Compare commits

...

103 Commits

Author SHA1 Message Date
085c8b75a6 release 2013.02.02 2013-02-02 14:45:38 +01:00
dbf2ba3d61 Better help for new options 2013-02-02 14:44:22 +01:00
b47bbac393 Disable Stanford OC test for now, and enable escapist 2013-02-02 14:40:41 +01:00
229cac754a Improve cookie error handling 2013-02-02 13:51:54 +01:00
0e33684194 Switch to m4a by default (Closes #240) 2013-02-01 18:23:20 +01:00
9e982f9e4e Added "min-filesize" and "max-filesize" options 2013-02-01 18:09:34 +01:00
c7a725cfad Merge remote-tracking branch 'dcoppa/master' 2013-02-01 18:05:42 +01:00
450a30cae8 Add PyPi upload to release script 2013-02-01 18:01:53 +01:00
9cd5e4fce8 release 2013.02.01 2013-02-01 17:57:32 +01:00
edba5137b8 Fix Facebook IE 2013-02-01 17:56:22 +01:00
233a22960a Switch ComedyCentral test to a permanent URL (They delete full episodes older than a month) 2013-02-01 17:46:03 +01:00
3b024e17af Work around buggy HTML Parser in Python < 2.7.3 (Closes #662) 2013-02-01 17:29:50 +01:00
a32b573ccb Try setuptools first, then fallback to distutils.core 2013-01-30 15:31:38 +01:00
ec71c13ab8 release 2013.01.28 2013-01-27 18:33:58 +01:00
f0bad2b026 Fix Stanford (Closes #653) 2013-01-27 15:23:26 +01:00
25580f3251 8tracks: Ignore hashes 2013-01-27 04:15:12 +01:00
da4de959df 8tracks: Better default titles 2013-01-27 04:05:53 +01:00
d0d51a8afa 8tracks: Include performer as uploader 2013-01-27 03:27:46 +01:00
c67598c3e1 Remove space before shebang 2013-01-27 03:07:07 +01:00
811d253bc2 Merge remote-tracking branch 'jaimeMF/makefilePythonversion' 2013-01-27 03:06:32 +01:00
c3a1642ead release 2013.01.27 2013-01-27 03:03:02 +01:00
ccf65f9dee 8tracks IE (Closes #652) 2013-01-27 03:01:23 +01:00
b954070d70 Fix Facebook (Closes #375) 2013-01-25 16:54:48 +01:00
30e9f4496b Drop md5: spec for now (unused and breaks int values) 2013-01-25 16:54:25 +01:00
271d3fbdaa Option in makefile to select python interpreter 2013-01-25 15:11:03 +01:00
6df40dcbe0 Guard against sys.getfilesystemencoding() == None (#503) 2013-01-20 01:48:05 +01:00
97f194c1fb twitch.tv: Use id as title if no title is present (Closes #638) 2013-01-16 09:55:45 +01:00
4da769ccca Do not backup version.py (under version control and frankly, not that complex) 2013-01-12 23:04:46 +01:00
253d96f2e2 Force build removal 2013-01-12 22:25:54 +01:00
bbc3e2753a release 2013.01.13 2013-01-12 22:18:13 +01:00
67353612ba Revert "Move update to front"
This reverts commit db30f02b50.
2013-01-12 22:10:36 +01:00
bffbd5f038 Download progress hooks 2013-01-12 20:34:50 +01:00
d8bbf2018e Aggressive test timeout to catch hanging servers 2013-01-12 20:33:03 +01:00
187f491ad2 [RBMA] Do not fail if thumbnail is empty 2013-01-12 18:45:50 +01:00
335959e778 Correct Blip.tv on 2.6, where HTTP headers are case-sensitive (wtf?) 2013-01-12 18:38:23 +01:00
3b83bf8f6a correct pushes in release script 2013-01-12 18:37:21 +01:00
51719893bf Default to py3 in sign-versions 2013-01-12 18:14:07 +01:00
1841f65e64 Python 2-proof versions.py 2013-01-12 18:12:24 +01:00
bb28998920 fix location of updates_key in devscripts/release 2013-01-12 18:07:31 +01:00
fbc5f99db9 release 2013.01.12 2013-01-12 17:59:58 +01:00
ca0a0bbeec RBMA IE (Closes #630) 2013-01-12 17:58:39 +01:00
6119f78cb9 Add location field 2013-01-12 17:34:31 +01:00
539679c7f9 Make uploader and upload_date fields optional 2013-01-12 17:34:09 +01:00
b642cd44c1 restore youtube-dl (update) binary 2013-01-12 17:07:12 +01:00
fffec3b9d9 Credit jefftimesten for YouPornIE, PornoTubeIE, YouJizzIE 2013-01-12 16:51:20 +01:00
3446dfb7cb Proper support for changing User-Agents from IEs 2013-01-12 16:49:13 +01:00
db16276b7c Improve YouJizz 2013-01-12 16:41:04 +01:00
629fcdd135 Add agecheck and various improvements to YouPorn IE 2013-01-12 16:10:35 +01:00
64ce2aada8 _request_webpage helper methods for queries that need the final URL 2013-01-12 16:10:16 +01:00
565f751967 Clean up porno IEs 2013-01-12 15:17:04 +01:00
6017964580 Merge remote-tracking branch 'jefftimesten/master' 2013-01-12 15:12:50 +01:00
1d16b0c3fe Keep file without any PPs (oops, missed the obvious case) 2013-01-12 15:12:28 +01:00
7851b37993 --recode-video option (Closes #18) 2013-01-12 15:09:09 +01:00
d81edc573e Merge 'jaimeMF/videoconversion' (sans actual option for now) 2013-01-12 14:04:30 +01:00
ef0c8d5f9f Make ustream IE more robust 2013-01-12 13:49:14 +01:00
db30f02b50 Move update to front 2013-01-12 13:45:39 +01:00
4ba7262467 Less confusing player version 2013-01-12 13:35:16 +01:00
67d0c25eab Add a PostProcessor for converting video format 2013-01-11 20:50:49 +01:00
09f9552b40 Less git acrobatics in devscripts/release.sh 2013-01-11 08:28:37 +01:00
142d38f776 release 2013.01.11 2013-01-11 08:05:30 +01:00
6dd3471900 Add Makefile in tarball (Closes #626) 2013-01-11 08:00:27 +01:00
280d67896a Correct documentation (Closes #625) 2013-01-10 23:20:26 +01:00
510e6f6dc1 Support --audio-format=opus 2013-01-10 19:15:04 +01:00
712e86b999 Fix broken ffmpeg (Closes #623) 2013-01-09 14:46:19 +01:00
74fdba620d release 2013.01.08 2013-01-08 10:29:53 +01:00
dc1c479a6f Merge pull request #621 from atomizer/master
justin.tv tweaks
2013-01-08 00:57:46 -08:00
119d536e07 Merge branch 'my-origin/master' 2013-01-07 17:03:58 +04:00
fa1bf9c653 justin.tv tweaks
- download all parts of a broadcast, fixes #614
- set "uploader" variable to channel_name if available
- catch api errors even if http status is 200
2013-01-07 16:59:39 +04:00
814eed0ea1 Fix tar target (--exclude-vcs is not supported everywhere, and reading . while writing to it can fail randomly) 2013-01-07 12:48:07 +01:00
0aa3068e9e Do not check in test_coverage 2013-01-06 23:38:36 +01:00
db2d6124b1 correct quoting 2013-01-06 23:14:56 +01:00
039dc61bd2 Simplify Makefile 2013-01-06 23:02:31 +01:00
4b879984ea release 2013.01.06 2013-01-06 22:52:04 +01:00
55e286ba55 read -n is bash-specific 2013-01-06 22:50:20 +01:00
9450bfa26e fixed tests (used the --test option) so that they pass. go figure 2013-01-06 16:33:37 -05:00
18be482a6f oops - didn't remove some reminders 2013-01-06 15:52:33 -05:00
ca6710ee41 made changes recommended in pull request 2013-01-06 15:40:50 -05:00
9314810243 fix ComedyCentral IE in Python3 2013-01-06 21:36:01 +01:00
7717ae19fa Add tests for ComedyCentral IE 2013-01-06 21:35:20 +01:00
32635ec685 Switch comedycentral IE to http downloads 2013-01-06 21:26:31 +01:00
caec7618a1 re-fixed XNXX regex problem 2013-01-05 16:05:23 -05:00
7e7ab2815c Merge branch 'master' of https://github.com/jefftimesten/youtube-dl 2013-01-05 16:01:03 -05:00
d7744f2219 Merge branch 'master' of https://github.com/jefftimesten/youtube-dl 2013-01-05 16:00:50 -05:00
7161829de5 Merge branch 'master' of https://github.com/jefftimesten/youtube-dl 2013-01-05 15:59:28 -05:00
991ba7fae3 Added extractors for 3 porn sites 2013-01-05 15:59:01 -05:00
a7539296ce Added extractors for 3 porn sites 2013-01-05 15:42:35 -05:00
258d5850c9 Merge branch 'master' of https://github.com/rg3/youtube-dl
Conflicts:
	.gitignore
	LATEST_VERSION
	Makefile
	youtube-dl
	youtube-dl.exe
	youtube_dl/InfoExtractors.py
	youtube_dl/__init__.py
2013-01-05 15:03:54 -05:00
20759b340a Disable travis irc notifications
travis is much to verbose for that, with random IEs constantly failing
2013-01-04 00:34:02 +01:00
8e5f761870 Merge pull request #617 from jaimeMF/steamIE
[steamIE]Allow downloading videos with other characters in their titles
2013-01-03 15:16:27 -08:00
26714799c9 steamIE remove the HTMLparser object 2013-01-03 23:56:02 +01:00
5e9d042d8f steamIE follow @phihag suggestions 2013-01-03 23:51:48 +01:00
9cf98a2bcc Allow downloading videos with other characters in their titles
Especially html entities
2013-01-03 21:17:35 +01:00
f5ebb61495 Support page URL in RTMP downloads 2013-01-03 20:26:38 +01:00
431d88dd31 Also generate SHA2-256 2013-01-03 19:49:06 +01:00
876f1a86af Also publish hashsums 2013-01-03 19:18:55 +01:00
01951dda7a Make ExtractorError usable for other causes 2013-01-03 15:39:55 +01:00
6e3dba168b release.sh edits based on 2013.01.02 experience 2013-01-02 23:40:24 +01:00
187da2c093 added YouJizz extractor 2012-12-16 00:26:27 -05:00
9a2cf56d51 Fixed a problem with the XNXXIE Regex 2012-12-15 23:22:07 -05:00
5f7ad21633 Strip HTML out of uploader name 2012-11-13 17:48:30 -05:00
089d47f8d5 Removed the README.md build target in the makefile. It is broken... 2012-11-13 17:48:10 -05:00
fdef722fa1 Added YouPorn infoExtractor 2012-11-13 13:10:56 -05:00
110d4f4c91 Added Pornotube support (for Laborers of Love) 2012-11-12 16:17:55 -05:00
16 changed files with 822 additions and 348 deletions

View File

@ -1,17 +0,0 @@
updates_key.pem
*.pyc
*.pyo
youtube-dl.exe
wine-py2exe/
py2exe.log
*.kate-swp
build/
dist/
MANIFEST
*.DS_Store
youtube-dl.tar.gz
.coverage
cover/
__pycache__/
.git/
*~

View File

@ -8,7 +8,7 @@ notifications:
email:
- filippo.valsorda@gmail.com
- phihag@phihag.de
irc:
channels:
- "irc.freenode.org#youtube-dl"
skip_join: true
# irc:
# channels:
# - "irc.freenode.org#youtube-dl"
# skip_join: true

View File

@ -1 +1 @@
9999.99.99
2012.12.99

View File

@ -1,12 +1,13 @@
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
clean:
rm -rf youtube-dl youtube-dl.exe youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/
rm -rf youtube-dl youtube-dl.exe youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
PREFIX=/usr/local
BINDIR=$(PREFIX)/bin
MANDIR=$(PREFIX)/man
SYSCONFDIR=/etc
PYTHON=/usr/bin/env python
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
install -d $(DESTDIR)$(BINDIR)
@ -20,12 +21,14 @@ test:
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
nosetests --verbose test
.PHONY: all clean install test
tar: youtube-dl.tar.gz
.PHONY: all clean install test tar
youtube-dl: youtube_dl/*.py
zip --quiet youtube-dl youtube_dl/*.py
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
echo '#!/usr/bin/env python' > youtube-dl
echo '#!$(PYTHON)' > youtube-dl
cat youtube-dl.zip >> youtube-dl
rm youtube-dl.zip
chmod a+x youtube-dl
@ -42,6 +45,17 @@ youtube-dl.1: README.md
youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in
python devscripts/bash-completion.py
youtube-dl.tar.gz: all
tar -cvzf youtube-dl.tar.gz -s "|^./|./youtube-dl/|" \
--exclude-from=".tarignore" -- .
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
--exclude '*.DS_Store' \
--exclude '*.kate-swp' \
--exclude '*.pyc' \
--exclude '*.pyo' \
--exclude '*~' \
--exclude '__pycache' \
--exclude '.git' \
-- \
bin devscripts test youtube_dl \
CHANGELOG LICENSE README.md README.txt \
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
youtube-dl

View File

@ -9,8 +9,8 @@ youtube-dl
# DESCRIPTION
**youtube-dl** is a small command-line program to download videos from
YouTube.com and a few more sites. It requires the Python interpreter, version
2.x (x being at least 6), and it is not platform specific. It should work in
your Unix box, in Windows or in Mac OS X. It is released to the public domain,
2.6, 2.7, or 3.3+, and it is not platform specific. It should work on
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
which means you can modify it, redistribute it or use it however you like.
# OPTIONS
@ -38,6 +38,10 @@ which means you can modify it, redistribute it or use it however you like.
--reject-title REGEX skip download for matching titles (regex or
caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g.
50k or 44.6m)
--max-filesize SIZE Do not download any videos larger than SIZE (e.g.
50k or 44.6m)
## Filesystem Options:
-t, --title use title in file name
@ -105,11 +109,13 @@ which means you can modify it, redistribute it or use it however you like.
## Post-processing Options:
-x, --extract-audio convert video files to audio-only files (requires
ffmpeg or avconv and ffprobe or avprobe)
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav";
best by default
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or
"wav"; best by default
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a
value between 0 (better) and 9 (worse) for VBR or a
specific bitrate like 128K (default 5)
--recode-video FORMAT Encode the video to another format if necessary
(currently supported: mp4|flv|ogg|webm)
-k, --keep-video keeps the video file on disk after the post-
processing; the video is erased by default
--no-post-overwrites do not overwrite post-processed files; the post-
@ -117,7 +123,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`.
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
# OUTPUT TEMPLATE

View File

@ -4,12 +4,17 @@ import rsa
import json
from binascii import hexlify
try:
input = raw_input
except NameError:
pass
versions_info = json.load(open('update/versions.json'))
if 'signature' in versions_info:
del versions_info['signature']
print('Enter the PKCS1 private key, followed by a blank line:')
privkey = ''
privkey = b''
while True:
try:
line = input()
@ -17,8 +22,7 @@ while True:
break
if line == '':
break
privkey += line + '\n'
privkey = bytes(privkey, 'ascii')
privkey += line.encode('ascii') + b'\n'
privkey = rsa.PrivateKey.load_pkcs1(privkey)
signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode()

View File

@ -1,13 +1,17 @@
#!/bin/sh
#!/bin/bash
# IMPORTANT: the following assumptions are made
# * you did --set-upstream
# * the GH repo is on the origin remote
# * the gh-pages branch is named so locally
# * the git config user.signingkey is properly set
# You will need
# pip install coverage nose rsa
# TODO
# release notes
# make hash on local files
set -e
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
@ -21,7 +25,7 @@ make clean
nosetests --with-coverage --cover-package=youtube_dl --cover-html test || exit 1
echo "\n### Changing version in version.py..."
sed -i~ "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
echo "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
make README.md
@ -34,41 +38,52 @@ git show "$version"
read -p "Is it good, can I push? (y/n) " -n 1
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
echo
git push
MASTER=$(git rev-parse --abbrev-ref HEAD)
git push origin $MASTER:master
git push origin "$version"
echo "\n### OK, now it is time to build the binaries..."
REV=$(git rev-parse HEAD)
make youtube-dl youtube-dl.tar.gz
wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
mkdir -p "update_staging/$version"
mv youtube-dl youtube-dl.exe "update_staging/$version"
mv youtube-dl.tar.gz "update_staging/$version/youtube-dl-$version.tar.gz"
mkdir -p "build/$version"
mv youtube-dl youtube-dl.exe "build/$version"
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
(cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
(cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
git checkout HEAD -- youtube-dl youtube-dl.exe
echo "\n### Signing and uploading the new binaries to youtube-dl.org..."
for f in update_staging/$version/*; do gpg --detach-sig "$f"; done
scp -r "update_staging/$version" ytdl@youtube-dl.org:html/downloads/
rm -r update_staging
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
scp -r "build/$version" ytdl@youtube-dl.org:html/downloads/
echo "\n### Now switching to gh-pages..."
MASTER=$(git rev-parse --abbrev-ref HEAD)
git checkout gh-pages
git checkout "$MASTER" -- devscripts/gh-pages/
git reset devscripts/gh-pages/
devscripts/gh-pages/add-version.py $version
devscripts/gh-pages/sign-versions.py < updates_key.pem
devscripts/gh-pages/generate-download.py
devscripts/gh-pages/update-copyright.py
rm -r test_coverage
mv cover test_coverage
git add *.html *.html.in update test_coverage
git commit -m "release $version"
git show HEAD
read -p "Is it good, can I push? (y/n) " -n 1
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
echo
git push
git clone --branch gh-pages --single-branch . build/gh-pages
ROOT=$(pwd)
(
set -e
ORIGIN_URL=$(git config --get remote.origin.url)
cd build/gh-pages
"$ROOT/devscripts/gh-pages/add-version.py" $version
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
"$ROOT/devscripts/gh-pages/generate-download.py"
"$ROOT/devscripts/gh-pages/update-copyright.py"
git add *.html *.html.in update
git commit -m "release $version"
git show HEAD
read -p "Is it good, can I push? (y/n) " -n 1
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
echo
git push "$ROOT" gh-pages
git push "$ORIGIN_URL" gh-pages
)
rm -rf build
echo "Uploading to PyPi ..."
pip sdist upload
echo "\n### DONE!"
git checkout $MASTER

View File

@ -2,10 +2,14 @@
# -*- coding: utf-8 -*-
from __future__ import print_function
from distutils.core import setup
import pkg_resources
import sys
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
try:
import py2exe
"""This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package"""

View File

@ -26,6 +26,7 @@ cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
proxy_handler = compat_urllib_request.ProxyHandler()
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(10)
def _try_rm(filename):
""" Remove a file if it exists """
@ -81,6 +82,11 @@ def generator(test_case):
fd.add_info_extractor(ie())
for ien in test_case.get('add_ie', []):
fd.add_info_extractor(getattr(youtube_dl.InfoExtractors, ien + 'IE')())
finished_hook_called = set()
def _hook(status):
if status['status'] == 'finished':
finished_hook_called.add(status['filename'])
fd.add_progress_hook(_hook)
test_cases = test_case.get('playlist', [test_case])
for tc in test_cases:
@ -92,7 +98,8 @@ def generator(test_case):
for tc in test_cases:
if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc['file']))
self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
self.assertTrue(tc['file'] in finished_hook_called)
self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
if 'md5' in tc:
md5_for_file = _file_md5(tc['file'])
@ -100,11 +107,7 @@ def generator(test_case):
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
info_dict = json.load(infof)
for (info_field, value) in tc.get('info_dict', {}).items():
if value.startswith('md5:'):
md5_info_value = hashlib.md5(info_dict.get(info_field, '')).hexdigest()
self.assertEqual(value[3:], md5_info_value)
else:
self.assertEqual(value, info_dict.get(info_field))
self.assertEqual(value, info_dict.get(info_field))
finally:
for tc in test_cases:
_try_rm(tc['file'])

View File

@ -35,6 +35,24 @@
"url": "http://www.xvideos.com/video939581/funny_porns_by_s_-1",
"file": "939581.flv"
},
{
"name": "YouPorn",
"md5": "c37ddbaaa39058c76a7e86c6813423c1",
"url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/",
"file": "505835.mp4"
},
{
"name": "Pornotube",
"md5": "374dd6dcedd24234453b295209aa69b6",
"url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing",
"file": "1689755.flv"
},
{
"name": "YouJizz",
"md5": "07e15fa469ba384c7693fd246905547c",
"url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html",
"file": "2189178.flv"
},
{
"name": "Vimeo",
"md5": "8879b6cc097e987f02484baf890129e5",
@ -58,7 +76,8 @@
"name": "StanfordOpenClassroom",
"md5": "544a9468546059d4e80d76265b0443b8",
"url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100",
"file": "PracticalUnix_intro-environment.mp4"
"file": "PracticalUnix_intro-environment.mp4",
"skip": "Currently offline"
},
{
"name": "XNXX",
@ -95,8 +114,7 @@
"name": "Escapist",
"url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate",
"file": "6618-Breaking-Down-Baldurs-Gate.flv",
"md5": "c6793dbda81388f4264c1ba18684a74d",
"skip": "Fails with timeout on Travis"
"md5": "c6793dbda81388f4264c1ba18684a74d"
},
{
"name": "GooglePlus",
@ -160,5 +178,102 @@
"params": {
"skip_download": true
}
},
{
"name": "ComedyCentral",
"url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart",
"file": "422212.mp4",
"md5": "4e2f5cb088a83cd8cdb7756132f9739d",
"info_dict": {
"title": "thedailyshow-kristen-stewart part 1"
}
},
{
"name": "RBMARadio",
"url": "http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011",
"file": "ford-lopatin-live-at-primavera-sound-2011.mp3",
"md5": "6bc6f9bcb18994b4c983bc3bf4384d95",
"info_dict": {
"title": "Live at Primavera Sound 2011",
"description": "Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
"uploader": "Ford & Lopatin",
"uploader_id": "ford-lopatin",
"location": "Spain"
}
},
{
"name": "Facebook",
"url": "https://www.facebook.com/photo.php?v=120708114770723",
"file": "120708114770723.mp4",
"md5": "48975a41ccc4b7a581abd68651c1a5a8",
"info_dict": {
"title": "PEOPLE ARE AWESOME 2013",
"duration": 279
}
},
{
"name": "EightTracks",
"url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
"playlist": [
{
"file": "11885610.m4a",
"md5": "96ce57f24389fc8734ce47f4c1abcc55",
"info_dict": {
"title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885608.m4a",
"md5": "4ab26f05c1f7291ea460a3920be8021f",
"info_dict": {
"title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885679.m4a",
"md5": "d30b5b5f74217410f4689605c35d1fd7",
"info_dict": {
"title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885680.m4a",
"md5": "4eb0a669317cd725f6bbd336a29f923a",
"info_dict": {
"title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885682.m4a",
"md5": "1893e872e263a2705558d1d319ad19e8",
"info_dict": {
"title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885683.m4a",
"md5": "b673c46f47a216ab1741ae8836af5899",
"info_dict": {
"title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885684.m4a",
"md5": "1d74534e95df54986da7f5abf7d842b7",
"info_dict": {
"title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885685.m4a",
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
"info_dict": {
"title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad"
}
}
]
}
]

View File

@ -81,6 +81,9 @@ class FileDownloader(object):
writesubtitles: Write the video subtitles to a .srt file
subtitleslang: Language of the subtitles to download
test: Download only first bytes to test the downloader.
keepvideo: Keep the video file after post-processing
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
"""
params = None
@ -94,6 +97,7 @@ class FileDownloader(object):
"""Create a FileDownloader object with the given options."""
self._ies = []
self._pps = []
self._progress_hooks = []
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
@ -216,12 +220,15 @@ class FileDownloader(object):
Depending on if the downloader has been configured to ignore
download errors or not, this method may throw an exception or
not when errors are found, after printing the message.
tb, if given, is additional traceback information.
"""
if message is not None:
self.to_stderr(message)
if self.params.get('verbose'):
if tb is None:
tb = u''.join(traceback.format_list(traceback.extract_stack()))
tb_data = traceback.format_list(traceback.extract_stack())
tb = u''.join(tb_data)
self.to_stderr(tb)
if not self.params.get('ignoreerrors', False):
raise DownloadError(message)
@ -497,7 +504,7 @@ class FileDownloader(object):
try:
videos = ie.extract(url)
except ExtractorError as de: # An error we somewhat expected
self.trouble(u'ERROR: ' + compat_str(de), compat_str(u''.join(traceback.format_tb(de.traceback))))
self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
break
except Exception as e:
if self.params.get('ignoreerrors', False):
@ -526,15 +533,29 @@ class FileDownloader(object):
return self._download_retcode
def post_process(self, filename, ie_info):
"""Run the postprocessing chain on the given file."""
"""Run all the postprocessors on the given file."""
info = dict(ie_info)
info['filepath'] = filename
keep_video = None
for pp in self._pps:
info = pp.run(info)
if info is None:
break
try:
keep_video_wish,new_info = pp.run(info)
if keep_video_wish is not None:
if keep_video_wish:
keep_video = keep_video_wish
elif keep_video is None:
# No clear decision yet, let IE decide
keep_video = keep_video_wish
except PostProcessingError as e:
self.to_stderr(u'ERROR: ' + e.msg)
if keep_video is False and not self.params.get('keepvideo', False):
try:
self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
os.remove(encodeFilename(filename))
except (IOError, OSError):
self.to_stderr(u'WARNING: Unable to remove downloaded video file')
def _download_with_rtmpdump(self, filename, url, player_url):
def _download_with_rtmpdump(self, filename, url, player_url, page_url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
@ -548,7 +569,11 @@ class FileDownloader(object):
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['-W', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False):
try:
@ -572,8 +597,15 @@ class FileDownloader(object):
retval = 0
break
if retval == 0:
self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
@ -581,22 +613,29 @@ class FileDownloader(object):
def _do_download(self, filename, info_dict):
url = info_dict['url']
player_url = info_dict.get('player_url', None)
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True
# Attempt to download using rtmpdump
if url.startswith('rtmp'):
return self._download_with_rtmpdump(filename, url, player_url)
return self._download_with_rtmpdump(filename, url,
info_dict.get('player_url', None),
info_dict.get('page_url', None))
tmpfilename = self.temp_name(filename)
stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
if 'user_agent' in info_dict:
headers['Youtubedl-user-agent'] = info_dict['user_agent']
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
@ -653,6 +692,10 @@ class FileDownloader(object):
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True
else:
# The length does not match, we start the download over
@ -671,6 +714,15 @@ class FileDownloader(object):
data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len:
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = self.format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
@ -711,6 +763,14 @@ class FileDownloader(object):
eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent_str, data_len_str, speed_str, eta_str)
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
})
# Apply rate limit
self.slow_down(start, byte_counter - resume_len)
@ -727,4 +787,31 @@ class FileDownloader(object):
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
})
return True
def _hook_progress(self, status):
for ph in self._progress_hooks:
ph(status)
def add_progress_hook(self, ph):
""" ph gets called on download progress, with a dictionary with the entries
* filename: The final filename
* status: One of "downloading" and "finished"
It can also have some of the following entries:
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.
"""
self._progress_hooks.append(ph)

View File

@ -5,6 +5,7 @@ from __future__ import absolute_import
import base64
import datetime
import itertools
import netrc
import os
import re
@ -35,15 +36,16 @@ class InfoExtractor(object):
url: Final video URL.
title: Video title, unescaped.
ext: Video filename extension.
uploader: Full name of the video uploader.
upload_date: Video upload date (YYYYMMDD).
The following fields are optional:
format: The video format, defaults to ext (used for --get-format)
thumbnail: Full URL to a video thumbnail image.
description: One-line video description.
uploader: Full name of the video uploader.
upload_date: Video upload date (YYYYMMDD).
uploader_id: Nickname or id of the video uploader.
location: Physical location of the video.
player_url: SWF Player URL (used for rtmpdump).
subtitles: The .srt file contents.
urlhandle: [internal] The urlHandle to be used to download the file,
@ -106,18 +108,23 @@ class InfoExtractor(object):
def IE_NAME(self):
return type(self).__name__[:-2]
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
""" Returns the response handle """
if note is None:
note = u'Downloading video webpage'
self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
try:
urlh = compat_urllib_request.urlopen(url_or_request)
webpage_bytes = urlh.read()
return webpage_bytes.decode('utf-8', 'replace')
return compat_urllib_request.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)))
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
""" Returns the data of the page as a string """
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
webpage_bytes = urlh.read()
return webpage_bytes.decode('utf-8', 'replace')
class YoutubeIE(InfoExtractor):
@ -1974,62 +1981,14 @@ class DepositFilesIE(InfoExtractor):
class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook"""
_WORKING = False
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
_NETRC_MACHINE = 'facebook'
_available_formats = ['video', 'highqual', 'lowqual']
_video_extensions = {
'video': 'mp4',
'highqual': 'mp4',
'lowqual': 'mp4',
}
IE_NAME = u'facebook'
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
def _reporter(self, message):
"""Add header and report message."""
self._downloader.to_screen(u'[facebook] %s' % message)
def report_login(self):
"""Report attempt to log in."""
self._reporter(u'Logging in')
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
self._reporter(u'%s: Downloading video webpage' % video_id)
def report_information_extraction(self, video_id):
"""Report attempt to extract video information."""
self._reporter(u'%s: Extracting video information' % video_id)
def _parse_page(self, video_webpage):
"""Extract video information from page"""
# General data
data = {'title': r'\("video_title", "(.*?)"\)',
'description': r'<div class="datawrap">(.*?)</div>',
'owner': r'\("video_owner_name", "(.*?)"\)',
'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
}
video_info = {}
for piece in data.keys():
mobj = re.search(data[piece], video_webpage)
if mobj is not None:
video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
# Video urls
video_urls = {}
for fmt in self._available_formats:
mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
if mobj is not None:
# URL is in a Javascript segment inside an escaped Unicode format within
# the generally utf-8 page
video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
video_info['video_urls'] = video_urls
return video_info
self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME)
def _real_initialize(self):
if self._downloader is None:
@ -2082,100 +2041,35 @@ class FacebookIE(InfoExtractor):
return
video_id = mobj.group('ID')
# Get video webpage
self.report_video_webpage_download(video_id)
request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
try:
page = compat_urllib_request.urlopen(request)
video_webpage = page.read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
webpage = self._download_webpage(url, video_id)
# Start extracting information
self.report_information_extraction(video_id)
BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n'
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
if not m:
raise ExtractorError(u'Cannot parse data')
data = dict(json.loads(m.group(1)))
params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw)
video_url = params['hd_src']
video_duration = int(params['video_duration'])
# Extract information
video_info = self._parse_page(video_webpage)
m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
if not m:
raise ExtractorError(u'Cannot find title in webpage')
video_title = unescapeHTML(m.group(1))
# uploader
if 'owner' not in video_info:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
video_uploader = video_info['owner']
info = {
'id': video_id,
'title': video_title,
'url': video_url,
'ext': 'mp4',
'duration': video_duration,
'thumbnail': params['thumbnail_src'],
}
return [info]
# title
if 'title' not in video_info:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = video_info['title']
video_title = video_title.decode('utf-8')
# thumbnail image
if 'thumbnail' not in video_info:
self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
video_thumbnail = ''
else:
video_thumbnail = video_info['thumbnail']
# upload date
upload_date = None
if 'upload_date' in video_info:
upload_time = video_info['upload_date']
timetuple = email.utils.parsedate_tz(upload_time)
if timetuple is not None:
try:
upload_date = time.strftime('%Y%m%d', timetuple[0:9])
except:
pass
# description
video_description = video_info.get('description', 'No description available.')
url_map = video_info['video_urls']
if url_map:
# Decide which formats to download
req_format = self._downloader.params.get('format', None)
format_limit = self._downloader.params.get('format_limit', None)
if format_limit is not None and format_limit in self._available_formats:
format_list = self._available_formats[self._available_formats.index(format_limit):]
else:
format_list = self._available_formats
existing_formats = [x for x in format_list if x in url_map]
if len(existing_formats) == 0:
self._downloader.trouble(u'ERROR: no known formats available for video')
return
if req_format is None:
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
elif req_format == '-1':
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific format
if req_format not in url_map:
self._downloader.trouble(u'ERROR: requested format not available')
return
video_url_list = [(req_format, url_map[req_format])] # Specific format
results = []
for format_param, video_real_url in video_url_list:
# Extension
video_extension = self._video_extensions.get(format_param, 'mp4')
results.append({
'id': video_id.decode('utf-8'),
'url': video_real_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
'upload_date': upload_date,
'title': video_title,
'ext': video_extension.decode('utf-8'),
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description.decode('utf-8'),
})
return results
class BlipTVIE(InfoExtractor):
"""Information extractor for blip.tv"""
@ -2204,6 +2098,7 @@ class BlipTVIE(InfoExtractor):
cchar = '?'
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
request = compat_urllib_request.Request(json_url)
request.add_header('User-Agent', 'iTunes/10.6.1')
self.report_extraction(mobj.group(1))
info = None
try:
@ -2224,8 +2119,7 @@ class BlipTVIE(InfoExtractor):
'urlhandle': urlh
}
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
if info is None: # Regular URL
try:
json_code_bytes = urlh.read()
@ -2258,13 +2152,13 @@ class BlipTVIE(InfoExtractor):
'format': data['media']['mimeType'],
'thumbnail': data['thumbnailUrl'],
'description': data['description'],
'player_url': data['embedUrl']
'player_url': data['embedUrl'],
'user_agent': 'iTunes/10.6.1',
}
except (ValueError,KeyError) as err:
self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
return
std_headers['User-Agent'] = 'iTunes/10.6.1'
return [info]
@ -2333,7 +2227,6 @@ class ComedyCentralIE(InfoExtractor):
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
$"""
IE_NAME = u'comedycentral'
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
@ -2361,16 +2254,12 @@ class ComedyCentralIE(InfoExtractor):
def report_extraction(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
def report_config_download(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
def report_config_download(self, episode_id, media_id):
self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration for %s' % (episode_id, media_id))
def report_index_download(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
def report_player_url(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
@ -2409,6 +2298,7 @@ class ComedyCentralIE(InfoExtractor):
try:
htmlHandle = compat_urllib_request.urlopen(req)
html = htmlHandle.read()
webpage = html.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
@ -2423,29 +2313,20 @@ class ComedyCentralIE(InfoExtractor):
return
epTitle = mobj.group('episode')
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', html)
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
if len(mMovieParams) == 0:
# The Colbert Report embeds the information in a without
# a URL prefix; so extract the alternate reference
# and then add the URL prefix manually.
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', html)
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
if len(altMovieParams) == 0:
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
return
else:
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
playerUrl_raw = mMovieParams[0][0]
self.report_player_url(epTitle)
try:
urlHandle = compat_urllib_request.urlopen(playerUrl_raw)
playerUrl = urlHandle.geturl()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to find out player URL: ' + compat_str(err))
return
uri = mMovieParams[0][1]
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
self.report_index_download(epTitle)
@ -2459,7 +2340,7 @@ class ComedyCentralIE(InfoExtractor):
idoc = xml.etree.ElementTree.fromstring(indexXml)
itemEls = idoc.findall('.//item')
for itemEl in itemEls:
for partNum,itemEl in enumerate(itemEls):
mediaId = itemEl.findall('./guid')[0].text
shortMediaId = mediaId.split(':')[-1]
showId = mediaId.split(':')[-2].replace('.com', '')
@ -2469,7 +2350,7 @@ class ComedyCentralIE(InfoExtractor):
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
compat_urllib_parse.urlencode({'uri': mediaId}))
configReq = compat_urllib_request.Request(configUrl)
self.report_config_download(epTitle)
self.report_config_download(epTitle, shortMediaId)
try:
configXml = compat_urllib_request.urlopen(configReq).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@ -2491,7 +2372,7 @@ class ComedyCentralIE(InfoExtractor):
return
# For now, just pick the highest bitrate
format,video_url = turls[-1]
format,rtmp_video_url = turls[-1]
# Get the format arg from the arg stream
req_format = self._downloader.params.get('format', None)
@ -2499,18 +2380,16 @@ class ComedyCentralIE(InfoExtractor):
# Select format if we can find one
for f,v in turls:
if f == req_format:
format, video_url = f, v
format, rtmp_video_url = f, v
break
# Patch to download from alternative CDN, which does not
# break on current RTMPDump builds
broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
if not m:
raise ExtractorError(u'Cannot transform RTMP url')
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
video_url = base + m.group('finalid')
if video_url.startswith(broken_cdn):
video_url = video_url.replace(broken_cdn, better_cdn)
effTitle = showId + u'-' + epTitle
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
info = {
'id': shortMediaId,
'url': video_url,
@ -2521,9 +2400,7 @@ class ComedyCentralIE(InfoExtractor):
'format': format,
'thumbnail': None,
'description': officialTitle,
'player_url': None #playerUrl
}
results.append(info)
return results
@ -2603,7 +2480,6 @@ class EscapistIE(InfoExtractor):
return [info]
class CollegeHumorIE(InfoExtractor):
"""Information extractor for collegehumor.com"""
@ -2995,8 +2871,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
raise ExtractorError(u'Invalid URL: %s' % url)
if mobj.group('course') and mobj.group('video'): # A specific video
course = mobj.group('course')
@ -3033,12 +2908,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
'upload_date': None,
}
self.report_download_webpage(info['id'])
try:
coursepage = compat_urllib_request.urlopen(url).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err))
return
coursepage = self._download_webpage(url, info['id'],
note='Downloading course info page',
errnote='Unable to download course info page')
m = re.search('<h1>([^<]+)</h1>', coursepage)
if m:
@ -3062,7 +2934,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
assert entry['type'] == 'reference'
results += self.extract(entry['url'])
return results
else: # Root page
info = {
'id': 'Stanford OpenClassroom',
@ -3290,7 +3161,7 @@ class YoukuIE(InfoExtractor):
class XNXXIE(InfoExtractor):
"""Information extractor for xnxx.com"""
_VALID_URL = r'^http://video\.xnxx\.com/video([0-9]+)/(.*)'
_VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
IE_NAME = u'xnxx'
VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
@ -3542,17 +3413,25 @@ class JustinTVIE(InfoExtractor):
return
response = json.loads(webpage)
if type(response) != list:
error_text = response.get('error', 'unknown error')
self._downloader.trouble(u'ERROR: Justin.tv API: %s' % error_text)
return
info = []
for clip in response:
video_url = clip['video_file_url']
if video_url:
video_extension = os.path.splitext(video_url)[1][1:]
video_date = re.sub('-', '', clip['created_on'][:10])
video_date = re.sub('-', '', clip['start_time'][:10])
video_uploader_id = clip.get('user_id', clip.get('channel_id'))
video_id = clip['id']
video_title = clip.get('title', video_id)
info.append({
'id': clip['id'],
'id': video_id,
'url': video_url,
'title': clip['title'],
'uploader': clip.get('user_id', clip.get('channel_id')),
'title': video_title,
'uploader': clip.get('channel_name', video_uploader_id),
'uploader_id': video_uploader_id,
'upload_date': video_date,
'ext': video_extension,
})
@ -3571,7 +3450,7 @@ class JustinTVIE(InfoExtractor):
paged = True
api += '/channel/archives/%s.json'
else:
api += '/clip/show/%s.json'
api += '/broadcast/by_archive/%s.json'
api = api % (video_id,)
self.report_extraction(video_id)
@ -3694,8 +3573,8 @@ class SteamIE(InfoExtractor):
videourl = 'http://store.steampowered.com/video/%s/' % gameID
webpage = self._download_webpage(videourl, gameID)
mweb = re.finditer(urlRE, webpage)
namesRE = r'<span class=\"title\">(?P<videoName>[\w:/\.\?=\+\s-]+)</span>'
titles = list(re.finditer(namesRE, webpage))
namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
titles = re.finditer(namesRE, webpage)
videos = []
for vid,vtitle in zip(mweb,titles):
video_id = vid.group('videoID')
@ -3707,15 +3586,15 @@ class SteamIE(InfoExtractor):
'id':video_id,
'url':video_url,
'ext': 'flv',
'title': title
'title': unescapeHTML(title)
}
videos.append(info)
return videos
class UstreamIE(InfoExtractor):
_VALID_URL = r'http://www.ustream.tv/recorded/(?P<videoID>\d+)'
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
IE_NAME = u'ustream'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
@ -3734,6 +3613,292 @@ class UstreamIE(InfoExtractor):
}
return [info]
class RBMARadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
webpage = self._download_webpage(url, video_id)
m = re.search(r'<script>window.gon = {.*?};gon\.show=(.+?);</script>', webpage)
if not m:
raise ExtractorError(u'Cannot find metadata')
json_data = m.group(1)
try:
data = json.loads(json_data)
except ValueError as e:
raise ExtractorError(u'Invalid JSON: ' + str(e))
video_url = data['akamai_url'] + '&cbr=256'
url_parts = compat_urllib_parse_urlparse(video_url)
video_ext = url_parts.path.rpartition('.')[2]
info = {
'id': video_id,
'url': video_url,
'ext': video_ext,
'title': data['title'],
'description': data.get('teaser_text'),
'location': data.get('country_of_origin'),
'uploader': data.get('host', {}).get('name'),
'uploader_id': data.get('host', {}).get('slug'),
'thumbnail': data.get('image', {}).get('large_url_2x'),
'duration': data.get('duration'),
}
return [info]
class YouPornIE(InfoExtractor):
"""Information extractor for youporn.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
def _print_formats(self, formats):
"""Print all available formats"""
print(u'Available formats:')
print(u'ext\t\tformat')
print(u'---------------------------------')
for format in formats:
print(u'%s\t\t%s' % (format['ext'], format['format']))
def _specific(self, req_format, formats):
for x in formats:
if(x["format"]==req_format):
return x
return None
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group('videoid')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
# Get the video title
result = re.search(r'videoTitleArea">(?P<title>.*)</h1>', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract video title')
video_title = result.group('title').strip()
# Get the video date
result = re.search(r'Date:</b>(?P<date>.*)</li>', webpage)
if result is None:
self._downloader.to_stderr(u'WARNING: unable to extract video date')
upload_date = None
else:
upload_date = result.group('date').strip()
# Get the video uploader
result = re.search(r'Submitted:</b>(?P<uploader>.*)</li>', webpage)
if result is None:
self._downloader.to_stderr(u'ERROR: unable to extract uploader')
video_uploader = None
else:
video_uploader = result.group('uploader').strip()
video_uploader = clean_html( video_uploader )
# Get all of the formats available
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
result = re.search(DOWNLOAD_LIST_RE, webpage)
if result is None:
raise ExtractorError(u'Unable to extract download list')
download_list_html = result.group('download_list').strip()
# Get all of the links from the page
LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
links = re.findall(LINK_RE, download_list_html)
if(len(links) == 0):
raise ExtractorError(u'ERROR: no known formats available for video')
self._downloader.to_screen(u'[youporn] Links found: %d' % len(links))
formats = []
for link in links:
# A link looks like this:
# http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
# A path looks like this:
# /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
video_url = unescapeHTML( link )
path = compat_urllib_parse_urlparse( video_url ).path
extension = os.path.splitext( path )[1][1:]
format = path.split('/')[4].split('_')[:2]
size = format[0]
bitrate = format[1]
format = "-".join( format )
title = u'%s-%s-%s' % (video_title, size, bitrate)
formats.append({
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'upload_date': upload_date,
'title': title,
'ext': extension,
'format': format,
'thumbnail': None,
'description': None,
'player_url': None
})
if self._downloader.params.get('listformats', None):
self._print_formats(formats)
return
req_format = self._downloader.params.get('format', None)
self._downloader.to_screen(u'[youporn] Format: %s' % req_format)
if req_format is None or req_format == 'best':
return [formats[0]]
elif req_format == 'worst':
return [formats[-1]]
elif req_format in ('-1', 'all'):
return formats
else:
format = self._specific( req_format, formats )
if result is None:
self._downloader.trouble(u'ERROR: requested format not available')
return
return [format]
class PornotubeIE(InfoExtractor):
"""Information extractor for pornotube.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group('videoid')
video_title = mobj.group('title')
# Get webpage content
webpage = self._download_webpage(url, video_id)
# Get the video URL
VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
result = re.search(VIDEO_URL_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return
video_url = compat_urllib_parse.unquote(result.group('url'))
#Get the uploaded date
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
result = re.search(VIDEO_UPLOADED_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
upload_date = result.group('date')
info = {'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': upload_date,
'title': video_title,
'ext': 'flv',
'format': 'flv'}
return [info]
class YouJizzIE(InfoExtractor):
"""Information extractor for youjizz.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group('videoid')
# Get webpage content
webpage = self._download_webpage(url, video_id)
# Get the video title
result = re.search(r'<title>(?P<title>.*)</title>', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract video title')
video_title = result.group('title').strip()
# Get the embed page
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract embed page')
embed_page_url = result.group(0).strip()
video_id = result.group('videoid')
webpage = self._download_webpage(embed_page_url, video_id)
# Get the video URL
result = re.search(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract video url')
video_url = result.group('source')
info = {'id': video_id,
'url': video_url,
'title': video_title,
'ext': 'flv',
'format': 'flv',
'player_url': embed_page_url}
return [info]
class EightTracksIE(InfoExtractor):
IE_NAME = '8tracks'
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
m = re.search(r"new TRAX.Mix\((.*?)\);\n*\s*TRAX.initSearchAutocomplete\('#search'\);", webpage, flags=re.DOTALL)
if not m:
raise ExtractorError(u'Cannot find trax information')
json_like = m.group(1)
data = json.loads(json_like)
session = str(random.randint(0, 1000000000))
mix_id = data['id']
track_count = data['tracks_count']
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
next_url = first_url
res = []
for i in itertools.count():
api_json = self._download_webpage(next_url, playlist_id,
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
errnote=u'Failed to download song information')
api_data = json.loads(api_json)
track_data = api_data[u'set']['track']
info = {
'id': track_data['id'],
'url': track_data['track_file_stream_url'],
'title': track_data['performer'] + u' - ' + track_data['name'],
'raw_title': track_data['name'],
'uploader_id': data['user']['login'],
'ext': 'm4a',
}
res.append(info)
if api_data['set']['at_last_track']:
break
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
return res
def gen_extractors():
""" Return a list of an instance of every supported extractor.
@ -3768,6 +3933,9 @@ def gen_extractors():
MTVIE(),
YoukuIE(),
XNXXIE(),
YouJizzIE(),
PornotubeIE(),
YouPornIE(),
GooglePlusIE(),
ArteTvIE(),
NBAIE(),
@ -3776,6 +3944,8 @@ def gen_extractors():
TweetReelIE(),
SteamIE(),
UstreamIE(),
RBMARadioIE(),
EightTracksIE(),
GenericIE()
]

View File

@ -45,31 +45,24 @@ class PostProcessor(object):
one has an extra field called "filepath" that points to the
downloaded file.
When this method returns None, the postprocessing chain is
stopped. However, this method may return an information
dictionary that will be passed to the next postprocessing
object in the chain. It can be the one it received after
changing some fields.
This method returns a tuple, the first element of which describes
whether the original file should be kept (i.e. not deleted - None for
no preference), and the second of which is the updated information.
In addition, this method may raise a PostProcessingError
exception that will be taken into account by the downloader
it was called from.
exception if post processing fails.
"""
return information # by default, do nothing
return None, information # by default, keep file and do nothing
class AudioConversionError(BaseException):
def __init__(self, message):
self.message = message
class FFmpegPostProcessorError(PostProcessingError):
pass
class FFmpegExtractAudioPP(PostProcessor):
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False, nopostoverwrites=False):
class AudioConversionError(PostProcessingError):
pass
class FFmpegPostProcessor(PostProcessor):
def __init__(self,downloader=None):
PostProcessor.__init__(self, downloader)
if preferredcodec is None:
preferredcodec = 'best'
self._preferredcodec = preferredcodec
self._preferredquality = preferredquality
self._keepvideo = keepvideo
self._nopostoverwrites = nopostoverwrites
self._exes = self.detect_executables()
@staticmethod
@ -83,10 +76,37 @@ class FFmpegExtractAudioPP(PostProcessor):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
return dict((program, executable(program)) for program in programs)
def run_ffmpeg(self, path, out_path, opts):
if not self._exes['ffmpeg'] and not self._exes['avconv']:
raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
+ opts +
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout,stderr = p.communicate()
if p.returncode != 0:
msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg.decode('utf-8', 'replace'))
def _ffmpeg_filename_argument(self, fn):
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
if fn.startswith(u'-'):
return u'./' + fn
return fn
class FFmpegExtractAudioPP(FFmpegPostProcessor):
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
FFmpegPostProcessor.__init__(self, downloader)
if preferredcodec is None:
preferredcodec = 'best'
self._preferredcodec = preferredcodec
self._preferredquality = preferredquality
self._nopostoverwrites = nopostoverwrites
def get_audio_codec(self, path):
if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
try:
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', '--', encodeFilename(path)]
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
output = handle.communicate()[0]
if handle.wait() != 0:
@ -108,31 +128,27 @@ class FFmpegExtractAudioPP(PostProcessor):
acodec_opts = []
else:
acodec_opts = ['-acodec', codec]
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path), '-vn']
+ acodec_opts + more_opts +
['--', encodeFilename(out_path)])
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout,stderr = p.communicate()
if p.returncode != 0:
msg = stderr.strip().split('\n')[-1]
raise AudioConversionError(msg)
opts = ['-vn'] + acodec_opts + more_opts
try:
FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
except FFmpegPostProcessorError as err:
raise AudioConversionError(err.message)
def run(self, information):
path = information['filepath']
filecodec = self.get_audio_codec(path)
if filecodec is None:
self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
return None
raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
more_opts = []
if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
if self._preferredcodec == 'm4a' and filecodec == 'aac':
if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
# Lossless, but in another container
acodec = 'copy'
extension = self._preferredcodec
extension = 'm4a'
more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
elif filecodec in ['aac', 'mp3', 'vorbis']:
elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
# Lossless if possible
acodec = 'copy'
extension = filecodec
@ -152,7 +168,7 @@ class FFmpegExtractAudioPP(PostProcessor):
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
else:
# We convert the audio (lossy)
acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
extension = self._preferredcodec
more_opts = []
if self._preferredquality is not None:
@ -181,10 +197,10 @@ class FFmpegExtractAudioPP(PostProcessor):
except:
etype,e,tb = sys.exc_info()
if isinstance(e, AudioConversionError):
self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
msg = u'audio conversion failed: ' + e.message
else:
self._downloader.to_stderr(u'ERROR: error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg'))
return None
msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
raise PostProcessingError(msg)
# Try to update the date time for extracted audio file.
if information.get('filetime') is not None:
@ -193,12 +209,24 @@ class FFmpegExtractAudioPP(PostProcessor):
except:
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
if not self._keepvideo:
try:
os.remove(encodeFilename(path))
except (IOError, OSError):
self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
return None
information['filepath'] = new_path
return information
return False,information
class FFmpegVideoConvertor(FFmpegPostProcessor):
def __init__(self, downloader=None,preferedformat=None):
super(FFmpegVideoConvertor, self).__init__(downloader)
self._preferedformat=preferedformat
def run(self, information):
path = information['filepath']
prefix, sep, ext = path.rpartition(u'.')
outpath = prefix + sep + self._preferedformat
if information['ext'] == self._preferedformat:
self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
return True,information
self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
self.run_ffmpeg(path, outpath, [])
information['filepath'] = outpath
information['format'] = self._preferedformat
information['ext'] = self._preferedformat
return False,information

View File

@ -22,6 +22,7 @@ __authors__ = (
'Christian Albrecht',
'Dave Vasilevsky',
'Jaime Marquínez Ferrándiz',
'Jeff Crouse',
)
__license__ = 'Public Domain'
@ -149,6 +150,9 @@ def parseOpts():
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
authentication.add_option('-u', '--username',
dest='username', metavar='USERNAME', help='account username')
@ -175,7 +179,6 @@ def parseOpts():
action='store', dest='subtitleslang', metavar='LANG',
help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
verbosity.add_option('-q', '--quiet',
action='store_true', dest='quiet', help='activates quiet mode', default=False)
verbosity.add_option('-s', '--simulate',
@ -248,9 +251,11 @@ def parseOpts():
postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default')
postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)')
postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
help='keeps the video file on disk after the post-processing; the video is erased by default')
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
@ -284,10 +289,13 @@ def _real_main():
else:
try:
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
if os.access(opts.cookiefile, os.R_OK):
jar.load()
except (IOError, OSError) as err:
sys.exit(u'ERROR: unable to open cookie file')
if opts.verbose:
traceback.print_exc()
sys.stderr.write(u'ERROR: unable to open cookie file\n')
sys.exit(101)
# Set user agent
if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent
@ -347,6 +355,16 @@ def _real_main():
if numeric_limit is None:
parser.error(u'invalid rate limit specified')
opts.ratelimit = numeric_limit
if opts.min_filesize is not None:
numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
if numeric_limit is None:
parser.error(u'invalid min_filesize specified')
opts.min_filesize = numeric_limit
if opts.max_filesize is not None:
numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
if numeric_limit is None:
parser.error(u'invalid max_filesize specified')
opts.max_filesize = numeric_limit
if opts.retries is not None:
try:
opts.retries = int(opts.retries)
@ -370,12 +388,15 @@ def _real_main():
except (TypeError, ValueError) as err:
parser.error(u'invalid playlist end number specified')
if opts.extractaudio:
if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
parser.error(u'invalid audio format specified')
if opts.audioquality:
opts.audioquality = opts.audioquality.strip('k').strip('K')
if not opts.audioquality.isdigit():
parser.error(u'invalid audio quality specified')
if opts.recodevideo is not None:
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:
parser.error(u'invalid video recode format specified')
if sys.version_info < (3,):
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
@ -432,6 +453,9 @@ def _real_main():
'prefer_free_formats': opts.prefer_free_formats,
'verbose': opts.verbose,
'test': opts.test,
'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize
})
if opts.verbose:
@ -453,7 +477,9 @@ def _real_main():
# PostProcessors
if opts.extractaudio:
fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo, nopostoverwrites=opts.nopostoverwrites))
fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
fd.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
# Update version
if opts.update_self:

View File

@ -8,6 +8,7 @@ import locale
import os
import re
import sys
import traceback
import zlib
import email.utils
import json
@ -279,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser):
lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
lines[-1] = lines[-1][:self.result[2][1]]
return '\n'.join(lines).strip()
# Hack for https://github.com/rg3/youtube-dl/issues/662
if sys.version_info < (2, 7, 3):
AttrParser.parse_endtag = (lambda self, i:
i + len("</scr'+'ipt>")
if self.rawdata[i:].startswith("</scr'+'ipt>")
else compat_html_parser.HTMLParser.parse_endtag(self, i))
def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document"""
@ -408,18 +415,24 @@ def encodeFilename(s):
# match Windows 9x series as well. Besides, NT 4 is obsolete.)
return s
else:
return s.encode(sys.getfilesystemencoding(), 'ignore')
encoding = sys.getfilesystemencoding()
if encoding is None:
encoding = 'utf-8'
return s.encode(encoding, 'ignore')
class ExtractorError(Exception):
"""Error during info extraction."""
def __init__(self, msg, tb=None):
""" tb is the original traceback (so that it can be printed out) """
""" tb, if given, is the original traceback (so that it can be printed out). """
super(ExtractorError, self).__init__(msg)
if tb is None:
tb = sys.exc_info()[2]
self.traceback = tb
def format_traceback(self):
if self.traceback is None:
return None
return u''.join(traceback.format_tb(self.traceback))
class DownloadError(Exception):
"""Download Error exception.
@ -446,7 +459,8 @@ class PostProcessingError(Exception):
This exception may be raised by PostProcessor's .run() method to
indicate an error in the postprocessing task.
"""
pass
def __init__(self, msg):
self.msg = msg
class MaxDownloadsReached(Exception):
""" --max-downloads limit has been reached. """
@ -511,14 +525,19 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
return ret
def http_request(self, req):
for h in std_headers:
for h,v in std_headers.items():
if h in req.headers:
del req.headers[h]
req.add_header(h, std_headers[h])
req.add_header(h, v)
if 'Youtubedl-no-compression' in req.headers:
if 'Accept-encoding' in req.headers:
del req.headers['Accept-encoding']
del req.headers['Youtubedl-no-compression']
if 'Youtubedl-user-agent' in req.headers:
if 'User-agent' in req.headers:
del req.headers['User-agent']
req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
del req.headers['Youtubedl-user-agent']
return req
def http_response(self, req, resp):

View File

@ -1,2 +1,2 @@
__version__ = '2013.01.02'
__version__ = '2013.02.02'