Compare commits

..

160 Commits

Author SHA1 Message Date
b26559878f release 2013.12.26 2013-12-26 21:56:23 +01:00
fd46a318a2 Print out encoding information in -v (#2046) 2013-12-26 21:55:42 +01:00
5d4f3985be Document that format_id field should be present 2013-12-26 21:19:00 +01:00
360babf799 [theplatform] Use centralized sorting 2013-12-26 21:18:18 +01:00
a1b92edbb3 [channel 9] Use centralized format sorting 2013-12-26 21:14:43 +01:00
12c978739a [internetvideoarchive] Use centralized format sorting 2013-12-26 21:08:52 +01:00
4bc60dafeb [blinkx] Use centralized format sorting 2013-12-26 21:05:30 +01:00
bf5b0a1bfb [ivi] Use centralized format sorting 2013-12-26 18:40:16 +01:00
bfe9de8510 [youporn] Add support for multiple formats 2013-12-26 18:37:12 +01:00
5ecd3c6a09 [bandcamp] Add support for multiple formats 2013-12-26 14:08:57 +01:00
608d11f515 [cnn] Add multiple formats, duration, and upload_date 2013-12-26 13:49:44 +01:00
723f839911 Remove unused imports 2013-12-25 15:33:19 +01:00
61224dbcdd [zdf] Make width extraction more robust 2013-12-25 15:33:09 +01:00
c3afc93a69 Merge remote-tracking branch 'origin/master' 2013-12-25 15:24:44 +01:00
7b8af56340 [appletrailers] Use centralized format selection 2013-12-25 15:24:41 +01:00
539179f45b [wistia] Use centralized sorting 2013-12-25 15:20:14 +01:00
7217e148fb [yahoo] Use centralized sorting, and add tbr field 2013-12-25 15:18:40 +01:00
d29b5e812b Merge pull request #2042 from dstftw/master
[smotri] Fix typo
2013-12-25 04:34:05 -08:00
dst
f7e9d77f34 [smotri] Fix typo 2013-12-25 09:02:35 +07:00
b874fe2da8 [mdr] Use centralized format selection 2013-12-24 23:34:11 +01:00
c7deaa4c74 [zdf] Use centralized sorting 2013-12-24 23:32:04 +01:00
e6812ac99d [spiegel] Use centralized sorting 2013-12-24 12:40:23 +01:00
719d3927d7 [mit] Add support for multiple formats 2013-12-24 12:38:08 +01:00
55e663a8d7 [dreisat] Use centralized format sorting 2013-12-24 12:35:08 +01:00
2c62dc26c8 [youtube] Simplify format specification 2013-12-24 12:34:09 +01:00
3d4a70b821 Add more tests for format selection 2013-12-24 12:33:33 +01:00
4bcc7bd1f2 Add temporary _sort_formats helper function 2013-12-24 12:31:42 +01:00
f49d89ee04 Add a resolution field and improve general --list-formats output 2013-12-24 11:56:02 +01:00
dabc127362 Remove dead code 2013-12-23 16:03:06 +01:00
c25c991809 [mplayer] Fix error introduced by downloader separation 2013-12-23 16:00:48 +01:00
f45f96f8f8 [myvideo] Use RTMP instead of RTMPT (Fixes #2032) 2013-12-23 15:57:43 +01:00
1538eff6d8 [bliptv] Remove support for direct downloads
This is now handled by the generic IE
2013-12-23 15:49:21 +01:00
00b2685b9c Merge remote-tracking branch 'origin/master' 2013-12-23 13:52:15 +01:00
8e3e03229e [YoutubeDL] fix tests (Closes #2036) 2013-12-23 13:51:56 +01:00
9d8d675e0e [subtitles-tests] Fix youtube test
It returns now a single info_dict
2013-12-23 10:40:28 +01:00
933605d7e8 YoutubeDL: rename _fd_progress_hooks back to _progress_hooks
In the future it may report more things.
2013-12-23 10:37:27 +01:00
b3d9ef88ec YoutubeDL: only set the ‘formats’ field of the info_dict if it was already set before
It caused a circular reference error, when trying to dump it to json (for example with the test video for myvideo.de or any other video without formats)
2013-12-23 10:23:13 +01:00
8958b6916c release 2013.12.23.4 2013-12-23 05:08:35 +01:00
9fc3bef87a Merge remote-tracking branch 'jaimeMF/split-downloaders' 2013-12-23 05:03:32 +01:00
d80044c235 [youtube] Prefer videos with sound 2013-12-23 04:51:42 +01:00
bc2103f3bf release 2013.12.23.3 2013-12-23 04:39:55 +01:00
f82b18efc1 Merge remote-tracking branch 'rzhxeo/youtube' 2013-12-23 04:37:40 +01:00
504c668d3b release 2013.12.23.2 2013-12-23 04:31:45 +01:00
466617f539 [bliptv] Simplify (From #2000) 2013-12-23 04:31:38 +01:00
196938835a Remove debugging code
Introduced by accident in 5d681e960d
2013-12-23 04:30:57 +01:00
a94e129a65 release 2013.12.23.1 2013-12-23 04:20:25 +01:00
5d681e960d Use bidiv instead of fribidi if available (Fixes #1912) 2013-12-23 04:19:50 +01:00
c7b487d96b release 2013.12.23 2013-12-23 03:45:02 +01:00
7dbf5ae587 [smotri] Add support for moderated (?) videos (Fixes #2030) 2013-12-23 03:44:47 +01:00
8d0bdeba18 [smotri] Make optional attributes optional 2013-12-23 03:38:29 +01:00
1b969041d7 [blinkx] Support mobile URLs (Closes #2022) 2013-12-22 07:43:54 +01:00
e302f9ce32 [youtube:user] Speed up --match-title 2013-12-22 03:57:42 +01:00
5a94982abe Remove unused import 2013-12-22 03:52:12 +01:00
7115ca84aa [vimeo/generic] Add support for embedded SWF vimeo videos 2013-12-22 03:34:13 +01:00
04ff34ab89 Show all matching URLs 2013-12-22 03:25:55 +01:00
bbafbe20c2 [vimeo] Better formatting for regexp 2013-12-22 03:21:28 +01:00
c4d55a33fc [brightcove] Test checksum changed 2013-12-20 17:28:50 +01:00
147e4aece0 [vbox7] New video checksum 2013-12-20 17:27:43 +01:00
bd1488ae64 [mdr] Remove test
For context, refer to the http://de.wikipedia.org/wiki/Depublizieren
2013-12-20 17:24:48 +01:00
79fed2a4df [crunchyroll] Fix test (#1721) 2013-12-20 17:20:39 +01:00
304cbe981e Merge remote-tracking branch 'rzhxeo/crunchyroll' 2013-12-20 17:13:26 +01:00
3fefbf50e3 Merge pull request #2005 from dstftw/ivi.ru
Add support for ivi.ru
2013-12-20 08:12:38 -08:00
f65c1d2be0 release 2013.12.20 2013-12-20 17:08:16 +01:00
aa94a6d315 [aparat] Add support (Fixes #2012) 2013-12-20 17:05:39 +01:00
768df74538 [blinkxx] Add support for youtube videos 2013-12-19 21:02:25 +01:00
1f9da9049b [generic] Support YouTube swf embed (Fixes #2010) 2013-12-19 20:44:30 +01:00
c0d0b01f0e [generic] Detect ooyala videos (fixes #2013) 2013-12-19 20:32:12 +01:00
7c86a5b864 Merge pull request #2011 from dstftw/master
[imdb] Add support for mobile site URLs
2013-12-19 11:28:34 -08:00
dst
97e302a419 [imdb] Add support for mobile site URLs 2013-12-20 00:21:04 +07:00
71507a11c8 [soundcloud] Support mobile URLs (Fixes #2009) 2013-12-19 16:39:01 +01:00
dst
a51e37af62 [ivi] Simplify 2013-12-19 10:53:38 +07:00
1fb8f09273 Merge pull request #2006 from dstftw/master
[smotri] Fix duration field name
2013-12-18 15:40:40 -08:00
dst
6c6db72ed4 [ivi] Skip tests for travis build 2013-12-19 06:19:41 +07:00
dst
0cc83dc54b [smotri] Fix duration field name 2013-12-19 05:56:48 +07:00
dst
5ce54a8205 [ivi] Neat import 2013-12-19 05:53:34 +07:00
dst
8c21b7c647 [ivi] Add playlist tests 2013-12-19 05:39:22 +07:00
dst
77aa6b329d [ivi] Add support for ivi.ru 2013-12-19 05:28:16 +07:00
62d68c43ed Make prefer_free_formats sorting more robust 2013-12-18 21:25:13 +01:00
bfaae0a768 Filter and sort videos before calling list_formats 2013-12-18 21:24:39 +01:00
e56f22ae20 [YoutubeIE] Sort formats by resolution 2013-12-18 21:22:37 +01:00
dbd1988ed9 [YoutubeIE] Add width and height to format dict 2013-12-18 21:21:25 +01:00
4ea3be0a5c [YoutubeIE] Externalize format selection 2013-12-18 03:30:55 +01:00
3e78514568 [generic] Support application/ogg for direct links
Also remove some debugging code.
2013-12-17 16:26:34 +01:00
e029b8bd43 [utils] Remove duplicated line
This line was added by accident in 42393ce234
2013-12-17 16:12:20 +01:00
f5567e401c Merge pull request #1997 from rg3/simplify-url_basename
Simplify url_basename
2013-12-17 07:08:48 -08:00
9b8aaeed85 Simplify url_basename
Use urlparse from the standard library.
2013-12-17 14:56:29 +01:00
6086d121cb release 2013.12.17.2 2013-12-17 12:35:57 +01:00
7de6e075b4 [radiofrance] remove unused imports 2013-12-17 12:35:16 +01:00
946135aa2a [academicearth] remove unused imports 2013-12-17 12:34:30 +01:00
42393ce234 Add support for direct links to a video (#1973) 2013-12-17 12:33:55 +01:00
d6c7a367e8 [utils] Fix url_basename 2013-12-17 12:32:58 +01:00
cecaaf3f58 [generic] Do not use compatibility result fallback 2013-12-17 12:04:33 +01:00
f09828b4e1 release 2013.12.17.1 2013-12-17 04:13:41 +01:00
29eb517403 Add webpage_url_basename info_dict field (Fixes #1938) 2013-12-17 04:13:36 +01:00
44c471c3b8 release 2013.12.17 2013-12-17 02:51:22 +01:00
46374a56b2 [youtube] Do not warn for videos with allow_rating=0
This fixes #1982
Test video: http://www.youtube.com/watch?v=gi2uH3YxohU
2013-12-17 02:49:56 +01:00
ec98946ef9 [academicearth] Support playlists (Closes #1976) 2013-12-17 02:41:34 +01:00
fa77b742ac [radiofrance] Fill in test details 2013-12-16 23:07:57 +01:00
8b4e274610 [rtlnow] Fix URL calculation (Closes #1989) 2013-12-16 22:28:52 +01:00
d6756d3758 [playlist-test] require a string 2013-12-16 22:25:02 +01:00
11b68f6e1b release 2013.12.16.7 2013-12-16 22:18:58 +01:00
88bb52ee18 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-16 22:18:37 +01:00
d90df974c3 [academicearth] Add support for courses (#1976) 2013-12-16 22:18:27 +01:00
5c541b2cb7 [mtv] Add support for urls from the mobile site (fixes #1959) 2013-12-16 22:05:28 +01:00
87a28127d2 _search_regex's "isatty" call fails with Py2exe's
_search_regex calls the sys.stderr.isatty() function for unix systems.

Py2exe uses a custom Stderr() stream which doesn't have an `isatty()`
function, leading to it's crash.

Fixes easily with checking that it's a unix system first.
2013-12-16 21:50:26 +01:00
ebce53b3d8 [vevo] Add suppor for videoplayer. URLs (#1957) 2013-12-16 21:48:38 +01:00
83c632dc43 release 2013.12.16.6 2013-12-16 21:46:16 +01:00
ff07a05575 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-16 21:46:11 +01:00
f25571ffbf Add support for embedded vevo player (Fixes #1957) 2013-12-16 21:45:21 +01:00
f7a6892572 [arte:ddc] Remove test
video seems to expire in 7 days, as arte+7
2013-12-16 21:42:41 +01:00
8fe56478f8 release 2013.12.16.5 2013-12-16 21:34:47 +01:00
0e2a436dce [radiofrance] Add support (Fixes #1942) 2013-12-16 21:34:41 +01:00
24050dd11c release 2013.12.16.4 2013-12-16 21:10:18 +01:00
8c8e3eec79 [facebook] Recognize #! URLs (Fixes #1988) 2013-12-16 21:10:06 +01:00
7ebc9dee69 Merge pull request #1987 from rzhxeo/blip
[GenericIE] Add support for embedded blip.tv
2013-12-16 11:28:34 -08:00
ee3e63e477 [GenericIE] Add support for embedded blip.tv 2013-12-16 20:08:23 +01:00
e9c424c144 Merge pull request #1984 from alimirjamali/patch-1
Incorrect variable is used to check whether thumbnail exists
2013-12-16 09:04:36 -08:00
0a9ce268ba Incorrect variable is used to check whether thumbnail exists
Dear @phihag

I believe in line 848, the correct variable to check is 'thumb_filename' rather than 'infofn'

Kindly advise

Mit freundlichen Gruessen
Ali
2013-12-16 20:14:28 +03:30
4b2da48ea7 release 2013.12.16.3 2013-12-16 14:44:29 +01:00
e64eaaa97d Fix execution under Python 3 2013-12-16 14:44:17 +01:00
780603027f [videopremium] Skip test 2013-12-16 14:42:07 +01:00
00902cd601 release 2013.12.16.2 2013-12-16 14:13:51 +01:00
d67b0b1596 Reorder info_dict documentation 2013-12-16 14:13:40 +01:00
d7dda16888 [blinkx] Add extractor (Fixes #1972) 2013-12-16 13:56:30 +01:00
a19fd00cc4 Simplify --playlist-start / --playlist-end interface 2013-12-16 13:16:20 +01:00
d66152a898 [ndtv] Remove unused imports 2013-12-16 08:16:38 +01:00
8c5f0c9fbc [mdr] Clean up 2013-12-16 08:16:11 +01:00
6888a874a1 release 2013.12.16.1 2013-12-16 05:45:15 +01:00
09dacfa57f [mdr] Simplify 2013-12-16 05:44:34 +01:00
b2ae513586 Merge remote-tracking branch 'mc2avr/master' 2013-12-16 05:14:03 +01:00
e4a0489f6e Merge remote-tracking branch 'dstftw/channel9'
Conflicts:
	youtube_dl/extractor/__init__.py
2013-12-16 05:14:00 +01:00
b83be81d27 Credit @mjorlitzky for pornhd (#1961) 2013-12-16 05:11:19 +01:00
6f5dcd4eee [pornhd] Simplify 2013-12-16 05:10:42 +01:00
1bb2fc98e0 Merge remote-tracking branch 'mjorlitzky/master' 2013-12-16 05:07:58 +01:00
e3946f989e Set process title to youtube-dl
This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.
2013-12-16 05:04:55 +01:00
8863d0de91 release 2013.12.16 2013-12-16 04:45:32 +01:00
7b6fefc9d4 Apply --no-overwrites for --write-* files as well (Fixes #1980) 2013-12-16 04:39:13 +01:00
525ef9227f Add --get-duration (Fixes #859) 2013-12-16 04:15:10 +01:00
c0ba0f4859 Document duration field 2013-12-16 04:09:43 +01:00
b466b7029d [youtube] Make duration an integer or None 2013-12-16 04:09:05 +01:00
fa3ae234e0 [cbs] Add extractor (Fixes #1977) 2013-12-16 03:53:43 +01:00
48462108f3 [theplatform] Fix geographic restriction check 2013-12-16 03:43:45 +01:00
f8b56e95b8 [theplatform] Detect geoblocked content 2013-12-16 03:34:46 +01:00
5fe18bdbde Add --min-views / --max-views (Fixes #1979) 2013-12-16 03:09:49 +01:00
dca02c80bc Fix detection of the extension if the 'extractaudio' is given and improve the error message (#1969)
Using 'foo.mp4' shouldn't raise an error.
If 'foo' is given suggest using 'foo.%(ext)s' for the template
2013-12-15 11:42:38 +01:00
9ee859b683 [daylimotion] Add support for urls from the mobile site (fixes #1953)
It uses the 'touch' subdomain and adds a '#' before 'video'
2013-12-14 14:20:12 +01:00
8e05c870b4 Add support for pornhd.com. 2013-12-13 22:24:32 -05:00
5d574e143f [ign] Update one of test video's title 2013-12-13 17:04:40 +01:00
2a203a6cda Merge pull request #1956 from dstftw/master
Fix typo in month name
2013-12-13 07:41:34 -08:00
dst
dadb8184e4 Fix typo in month name 2013-12-13 22:27:37 +07:00
7a563df90a [daum] Recognize mobile urls (#1952) 2013-12-12 13:05:38 +01:00
24b173fa5c [naver] Recognize mobile urls (fixes #1951) 2013-12-12 13:04:02 +01:00
dst
9b17ba0fa5 [channel9] Fix test description md5 2013-12-12 16:10:17 +07:00
dst
211f555d4c [channel9] Missing import in __init__ 2013-12-12 15:55:31 +07:00
dst
4d2ebb6bd7 [channel9] Cleanup 2013-12-12 15:19:23 +07:00
dst
df53747436 [channel9] Initial implementation (#1885) 2013-12-12 15:13:45 +07:00
3bc2ddccc8 Move FileDownloader to its own module and create a new class for each download process
A suitable downloader can be found using the 'get_suitable_downloader' function.

Each subclass implements 'real_download', for downloading an info dict you call the 'download' method, which first checks if the video has already been downloaded
2013-12-11 16:18:48 +01:00
8ab470f1b2 Now a new FileDownloader is created when downloading a video
The progress hooks can be added using the method "add_downloader_progress_hook"
2013-12-11 16:04:42 +01:00
df1d7da2af add MDRIE 2013-12-10 18:40:50 +01:00
c8434e8316 Add support for crunchyroll.com 2013-11-09 11:25:12 +01:00
67 changed files with 2912 additions and 1448 deletions

View File

@ -39,7 +39,8 @@ which means you can modify it, redistribute it or use it however you like.
/youtube-dl .
--no-cache-dir Disable filesystem caching
--bidi-workaround Work around terminals that lack bidirectional
text support. Requires fribidi executable in PATH
text support. Requires bidiv or fribidi
executable in PATH
## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)
@ -56,6 +57,10 @@ which means you can modify it, redistribute it or use it however you like.
--date DATE download only videos uploaded in this date
--datebefore DATE download only videos uploaded before this date
--dateafter DATE download only videos uploaded after this date
--min-views COUNT Do not download any videos with less than COUNT
views
--max-views COUNT Do not download any videos with more than COUNT
views
--no-playlist download only the currently playing video
--age-limit YEARS download only videos suitable for the given age
--download-archive FILE Download only videos not listed in the archive
@ -127,6 +132,7 @@ which means you can modify it, redistribute it or use it however you like.
--get-id simulate, quiet but print id
--get-thumbnail simulate, quiet but print thumbnail URL
--get-description simulate, quiet but print video description
--get-duration simulate, quiet but print video length
--get-filename simulate, quiet but print output filename
--get-format simulate, quiet but print output format
-j, --dump-json simulate, quiet but print JSON information

View File

@ -71,7 +71,7 @@ setup(
author_email='ytdl@yt-dl.org',
maintainer='Philipp Hagemeister',
maintainer_email='phihag@phihag.de',
packages=['youtube_dl', 'youtube_dl.extractor'],
packages=['youtube_dl', 'youtube_dl.extractor', 'youtube_dl.downloader'],
# Provokes warning on most systems (why?!)
# test_suite = 'nose.collector',

View File

@ -8,6 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from youtube_dl import YoutubeDL
from youtube_dl.extractor import YoutubeIE
class YDL(FakeYDL):
@ -33,6 +34,8 @@ class TestFormatSelection(unittest.TestCase):
{u'ext': u'mp4', u'height': 460},
]
info_dict = {u'formats': formats, u'extractor': u'test'}
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'ext'], u'webm')
@ -45,28 +48,46 @@ class TestFormatSelection(unittest.TestCase):
{u'ext': u'mp4', u'height': 1080},
]
info_dict[u'formats'] = formats
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'ext'], u'mp4')
# No prefer_free_formats => keep original formats order
# No prefer_free_formats => prefer mp4 and flv for greater compatibilty
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
{u'ext': u'webm', u'height': 720},
{u'ext': u'mp4', u'height': 720},
{u'ext': u'flv', u'height': 720},
]
info_dict[u'formats'] = formats
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'ext'], u'mp4')
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
{u'ext': u'flv', u'height': 720},
{u'ext': u'webm', u'height': 720},
]
info_dict[u'formats'] = formats
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'ext'], u'flv')
def test_format_limit(self):
formats = [
{u'format_id': u'meh', u'url': u'http://example.com/meh'},
{u'format_id': u'good', u'url': u'http://example.com/good'},
{u'format_id': u'great', u'url': u'http://example.com/great'},
{u'format_id': u'excellent', u'url': u'http://example.com/exc'},
{u'format_id': u'meh', u'url': u'http://example.com/meh', 'preference': 1},
{u'format_id': u'good', u'url': u'http://example.com/good', 'preference': 2},
{u'format_id': u'great', u'url': u'http://example.com/great', 'preference': 3},
{u'format_id': u'excellent', u'url': u'http://example.com/exc', 'preference': 4},
]
info_dict = {
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
@ -78,12 +99,12 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL({'format_limit': 'good'})
assert ydl.params['format_limit'] == 'good'
ydl.process_ie_result(info_dict)
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'format_id'], u'good')
ydl = YDL({'format_limit': 'great', 'format': 'all'})
ydl.process_ie_result(info_dict)
ydl.process_ie_result(info_dict.copy())
self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh')
self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good')
self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great')
@ -91,44 +112,80 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['format_limit'] = 'excellent'
ydl.process_ie_result(info_dict)
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'format_id'], u'excellent')
def test_format_selection(self):
formats = [
{u'format_id': u'35', u'ext': u'mp4'},
{u'format_id': u'45', u'ext': u'webm'},
{u'format_id': u'47', u'ext': u'webm'},
{u'format_id': u'2', u'ext': u'flv'},
{u'format_id': u'35', u'ext': u'mp4', 'preference': 1},
{u'format_id': u'45', u'ext': u'webm', 'preference': 2},
{u'format_id': u'47', u'ext': u'webm', 'preference': 3},
{u'format_id': u'2', u'ext': u'flv', 'preference': 4},
]
info_dict = {u'formats': formats, u'extractor': u'test'}
ydl = YDL({'format': u'20/47'})
ydl.process_ie_result(info_dict)
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'47')
ydl = YDL({'format': u'20/71/worst'})
ydl.process_ie_result(info_dict)
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'35')
ydl = YDL()
ydl.process_ie_result(info_dict)
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'2')
ydl = YDL({'format': u'webm/mp4'})
ydl.process_ie_result(info_dict)
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'47')
ydl = YDL({'format': u'3gp/40/mp4'})
ydl.process_ie_result(info_dict)
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'35')
def test_youtube_format_selection(self):
order = [
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
# Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '84', '102', '83', '101', '82', '100',
# Dash video
'138', '137', '248', '136', '247', '135', '246',
'245', '244', '134', '243', '133', '242', '160',
# Dash audio
'141', '172', '140', '139', '171',
]
for f1id, f2id in zip(order, order[1:]):
f1 = YoutubeIE._formats[f1id].copy()
f1['format_id'] = f1id
f2 = YoutubeIE._formats[f2id].copy()
f2['format_id'] = f2id
info_dict = {'formats': [f1, f2], 'extractor': 'youtube'}
ydl = YDL()
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1id)
info_dict = {'formats': [f2, f1], 'extractor': 'youtube'}
ydl = YDL()
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1id)
def test_add_extra_info(self):
test_dict = {
'extractor': 'Foo',

View File

@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_testcases
from youtube_dl.extractor import (
FacebookIE,
gen_extractors,
JustinTVIE,
YoutubeIE,
@ -87,12 +88,15 @@ class TestAllURLsMatching(unittest.TestCase):
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
def test_no_duplicates(self):
ies = gen_extractors()
for tc in get_testcases():
url = tc['url']
for ie in ies:
if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
else:
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))

View File

@ -90,7 +90,7 @@ def generator(test_case):
def _hook(status):
if status['status'] == 'finished':
finished_hook_called.add(status['filename'])
ydl.fd.add_progress_hook(_hook)
ydl.add_progress_hook(_hook)
def get_tc_filename(tc):
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))

View File

@ -12,6 +12,7 @@ from test.helper import FakeYDL
from youtube_dl.extractor import (
AcademicEarthCourseIE,
DailymotionPlaylistIE,
DailymotionUserIE,
VimeoChannelIE,
@ -26,7 +27,8 @@ from youtube_dl.extractor import (
BambuserChannelIE,
BandcampAlbumIE,
SmotriCommunityIE,
SmotriUserIE
SmotriUserIE,
IviCompilationIE
)
@ -158,5 +160,34 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Inspector')
self.assertTrue(len(result['entries']) >= 9)
def test_AcademicEarthCourse(self):
dl = FakeYDL()
ie = AcademicEarthCourseIE(dl)
result = ie.extract(u'http://academicearth.org/courses/building-dynamic-websites/')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'building-dynamic-websites')
self.assertEqual(result['title'], u'Building Dynamic Websites')
self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
self.assertEqual(len(result['entries']), 10)
def test_ivi_compilation(self):
dl = FakeYDL()
ie = IviCompilationIE(dl)
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'dezhurnyi_angel')
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012)')
self.assertTrue(len(result['entries']) >= 36)
def test_ivi_compilation_season(self):
dl = FakeYDL()
ie = IviCompilationIE(dl)
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'dezhurnyi_angel/season2')
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
self.assertTrue(len(result['entries']) >= 20)
if __name__ == '__main__':
unittest.main()

View File

@ -36,10 +36,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
url = 'QRS8MkLhQmM'
IE = YoutubeIE
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict[0]['subtitles']
def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()

View File

@ -13,20 +13,22 @@ import xml.etree.ElementTree
#from youtube_dl.utils import htmlentity_transform
from youtube_dl.utils import (
timeconvert,
sanitize_filename,
unescapeHTML,
orderedSet,
DateRange,
unified_strdate,
encodeFilename,
find_xpath_attr,
get_meta_content,
xpath_with_ns,
smuggle_url,
unsmuggle_url,
orderedSet,
parse_duration,
sanitize_filename,
shell_quote,
encodeFilename,
smuggle_url,
str_to_int,
timeconvert,
unescapeHTML,
unified_strdate,
unsmuggle_url,
url_basename,
xpath_with_ns,
)
if sys.version_info < (3, 0):
@ -181,6 +183,22 @@ class TestUtil(unittest.TestCase):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
def test_url_basename(self):
self.assertEqual(url_basename(u'http://foo.de/'), u'')
self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
self.assertEqual(
url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),
u'trailer.mp4')
def test_parse_duration(self):
self.assertEqual(parse_duration(None), None)
self.assertEqual(parse_duration('1'), 1)
self.assertEqual(parse_duration('1337:12'), 80232)
self.assertEqual(parse_duration('9:12:43'), 33163)
self.assertEqual(parse_duration('x:y'), None)
if __name__ == '__main__':
unittest.main()

View File

@ -1,724 +1,12 @@
import os
import re
import subprocess
import sys
import time
from .utils import (
compat_urllib_error,
compat_urllib_request,
ContentTooShortError,
determine_ext,
encodeFilename,
format_bytes,
sanitize_open,
timeconvert,
)
class FileDownloader(object):
"""File Downloader class.
File downloader objects are the ones responsible of downloading the
actual video file and writing it to disk.
File downloaders accept a lot of parameters. In order not to saturate
the object constructor with arguments, it receives a dictionary of
options instead.
Available options:
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
retries: Number of times to retry for HTTP error 5xx
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
"""
params = None
def __init__(self, ydl, params):
"""Create a FileDownloader object with the given options."""
self.ydl = ydl
self._progress_hooks = []
self.params = params
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
(hours, mins) = divmod(mins, 60)
if hours > 99:
return '--:--:--'
if hours == 0:
return '%02d:%02d' % (mins, secs)
else:
return '%02d:%02d:%02d' % (hours, mins, secs)
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
return None
return float(byte_counter) / float(data_len) * 100.0
@staticmethod
def format_percent(percent):
if percent is None:
return '---.-%'
return '%6s' % ('%3.1f%%' % percent)
@staticmethod
def calc_eta(start, now, total, current):
if total is None:
return None
dif = now - start
if current == 0 or dif < 0.001: # One millisecond
return None
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
@staticmethod
def format_eta(eta):
if eta is None:
return '--:--'
return FileDownloader.format_seconds(eta)
@staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
return None
return float(bytes) / dif
@staticmethod
def format_speed(speed):
if speed is None:
return '%10s' % '---b/s'
return '%10s' % ('%s/s' % format_bytes(speed))
@staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
if elapsed_time < 0.001:
return int(new_max)
rate = bytes / elapsed_time
if rate > new_max:
return int(new_max)
if rate < new_min:
return int(new_min)
return int(rate)
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into an integer."""
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
if matchobj is None:
return None
number = float(matchobj.group(1))
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return int(round(number * multiplier))
def to_screen(self, *args, **kargs):
self.ydl.to_screen(*args, **kargs)
def to_stderr(self, message):
self.ydl.to_screen(message)
def to_console_title(self, message):
self.ydl.to_console_title(message)
def trouble(self, *args, **kargs):
self.ydl.trouble(*args, **kargs)
def report_warning(self, *args, **kargs):
self.ydl.report_warning(*args, **kargs)
def report_error(self, *args, **kargs):
self.ydl.report_error(*args, **kargs)
def slow_down(self, start_time, byte_counter):
"""Sleep if the download speed is over the rate limit."""
rate_limit = self.params.get('ratelimit', None)
if rate_limit is None or byte_counter == 0:
return
now = time.time()
elapsed = now - start_time
if elapsed <= 0.0:
return
speed = float(byte_counter) / elapsed
if speed > rate_limit:
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == u'-' or \
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
return filename
return filename + u'.part'
def undo_temp_name(self, filename):
if filename.endswith(u'.part'):
return filename[:-len(u'.part')]
return filename
def try_rename(self, old_filename, new_filename):
try:
if old_filename == new_filename:
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError):
self.report_error(u'unable to rename file')
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
if last_modified_hdr is None:
return
if not os.path.isfile(encodeFilename(filename)):
return
timestr = last_modified_hdr
if timestr is None:
return
filetime = timeconvert(timestr)
if filetime is None:
return filetime
# Ignore obviously invalid dates
if filetime == 0:
return
try:
os.utime(filename, (time.time(), filetime))
except:
pass
return filetime
def report_destination(self, filename):
"""Report destination filename."""
self.to_screen(u'[download] Destination: ' + filename)
def _report_progress_status(self, msg, is_last_line=False):
fullmsg = u'[download] ' + msg
if self.params.get('progress_with_newline', False):
self.to_screen(fullmsg)
else:
if os.name == 'nt':
prev_len = getattr(self, '_report_progress_prev_line_length',
0)
if prev_len > len(fullmsg):
fullmsg += u' ' * (prev_len - len(fullmsg))
self._report_progress_prev_line_length = len(fullmsg)
clear_line = u'\r'
else:
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
self.to_console_title(u'youtube-dl ' + msg)
def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress."""
if self.params.get('noprogress', False):
return
if eta is not None:
eta_str = self.format_eta(eta)
else:
eta_str = 'Unknown ETA'
if percent is not None:
percent_str = self.format_percent(percent)
else:
percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
msg = (u'%s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
self._report_progress_status(msg)
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
if self.params.get('noprogress', False):
return
downloaded_str = format_bytes(downloaded_data_len)
speed_str = self.format_speed(speed)
elapsed_str = FileDownloader.format_seconds(elapsed)
msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
self._report_progress_status(msg)
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else:
self._report_progress_status(
(u'100%% of %s in %s' %
(data_len_str, self.format_seconds(tot_time))),
is_last_line=True)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
def report_retry(self, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded')
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume')
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live, conn):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = u''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
data_len_str = u'~' + format_bytes(data_len)
self.report_progress(percent, data_len_str, speed, eta)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'speed': speed,
})
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen(u'')
cursor_in_new_line = True
self.to_screen(u'[rtmpdump] '+line)
proc.wait()
if not cursor_in_new_line:
self.to_screen(u'')
return proc.returncode
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
# Check for rtmpdump first
try:
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
if play_path is not None:
basic_args += ['--playpath', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
if test:
basic_args += ['--stop', '1']
if live:
basic_args += ['--live']
if conn:
basic_args += ['--conn', conn]
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if sys.platform == 'win32' and sys.version_info < (3, 0):
# Windows subprocess module does not actually support Unicode
# on Python 2.x
# See http://stackoverflow.com/a/9951851/35070
subprocess_encoding = sys.getfilesystemencoding()
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
else:
subprocess_encoding = None
if self.params.get('verbose', False):
if subprocess_encoding:
str_args = [
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
for a in args]
else:
str_args = args
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
retval = run_rtmpdump(args)
while (retval == 2 or retval == 1) and not test:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == 1:
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == 2 and cursize > 1024:
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0
break
if retval == 0 or (test and retval == 2):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'rtmpdump exited with code %d' % retval)
return False
def _download_with_mplayer(self, filename, url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
# Check for mplayer first
try:
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
return False
# Download using mplayer.
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'mplayer exited with code %d' % retval)
return False
def _download_m3u8_with_ffmpeg(self, filename, url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
'-bsf:a', 'aac_adtstoasc', tmpfilename]
for program in ['avconv', 'ffmpeg']:
try:
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
break
except (OSError, IOError):
pass
else:
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
cmd = [program] + args
retval = subprocess.call(cmd)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'ffmpeg exited with code %d' % retval)
return False
# Legacy file for backwards compatibility, use youtube_dl.downloader instead!
from .downloader import FileDownloader as RealFileDownloader
from .downloader import get_suitable_downloader
# This class reproduces the old behaviour of FileDownloader
class FileDownloader(RealFileDownloader):
def _do_download(self, filename, info_dict):
url = info_dict['url']
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
})
return True
# Attempt to download using rtmpdump
if url.startswith('rtmp'):
return self._download_with_rtmpdump(filename, url,
info_dict.get('player_url', None),
info_dict.get('page_url', None),
info_dict.get('play_path', None),
info_dict.get('tc_url', None),
info_dict.get('rtmp_live', False),
info_dict.get('rtmp_conn', None))
# Attempt to download using mplayer
if url.startswith('mms') or url.startswith('rtsp'):
return self._download_with_mplayer(filename, url)
# m3u8 manifest are downloaded with ffmpeg
if determine_ext(url) == u'm3u8':
return self._download_m3u8_with_ffmpeg(filename, url)
tmpfilename = self.temp_name(filename)
stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
if 'user_agent' in info_dict:
headers['Youtubedl-user-agent'] = info_dict['user_agent']
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
if self.params.get('test', False):
request.add_header('Range','bytes=0-10240')
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = os.path.getsize(encodeFilename(tmpfilename))
else:
resume_len = 0
open_mode = 'wb'
if resume_len != 0:
if self.params.get('continuedl', False):
self.report_resuming_byte(resume_len)
request.add_header('Range','bytes=%d-' % resume_len)
open_mode = 'ab'
else:
resume_len = 0
count = 0
retries = self.params.get('retries', 0)
while count <= retries:
# Establish connection
try:
if count == 0 and 'urlhandle' in info_dict:
data = info_dict['urlhandle']
data = compat_urllib_request.urlopen(request)
break
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
# Unexpected HTTP error
raise
elif err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
data = compat_urllib_request.urlopen(basic_request)
content_length = data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
raise
else:
# Examine the reported length
if (content_length is not None and
(resume_len - 100 < int(content_length) < resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
# changing the file size slightly and causing problems for some users. So
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
open_mode = 'wb'
break
# Retry
count += 1
if count <= retries:
self.report_retry(count, retries)
if count > retries:
self.report_error(u'giving up after %s retries' % retries)
return False
data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len:
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
while True:
# Download and write
before = time.time()
data_block = data.read(block_size)
after = time.time()
if len(data_block) == 0:
break
byte_counter += len(data_block)
# Open file just in time
if stream is None:
try:
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
assert stream is not None
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename)
except (OSError, IOError) as err:
self.report_error(u'unable to open for writing: %s' % str(err))
return False
try:
stream.write(data_block)
except (IOError, OSError) as err:
self.to_stderr(u"\n")
self.report_error(u'unable to write data: %s' % str(err))
return False
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
# Progress message
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
if data_len is None:
eta = percent = None
else:
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
# Apply rate limit
self.slow_down(start, byte_counter - resume_len)
if stream is None:
self.to_stderr(u"\n")
self.report_error(u'Did not get any data blocks')
return False
stream.close()
self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
})
return True
def _hook_progress(self, status):
real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params)
for ph in self._progress_hooks:
ph(status)
def add_progress_hook(self, ph):
""" ph gets called on download progress, with a dictionary with the entries
* filename: The final filename
* status: One of "downloading" and "finished"
It can also have some of the following entries:
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if unknown
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.
"""
self._progress_hooks.append(ph)
real_fd.add_progress_hook(ph)
return real_fd.download(filename, info_dict)

View File

@ -34,6 +34,7 @@ from .utils import (
encodeFilename,
ExtractorError,
format_bytes,
formatSeconds,
get_term_width,
locked_file,
make_HTTPS_handler,
@ -46,12 +47,13 @@ from .utils import (
subtitles_filename,
takewhile_inclusive,
UnavailableVideoError,
url_basename,
write_json_file,
write_string,
YoutubeDLHandler,
)
from .extractor import get_info_extractor, gen_extractors
from .FileDownloader import FileDownloader
from .downloader import get_suitable_downloader
from .version import __version__
@ -94,6 +96,7 @@ class YoutubeDL(object):
forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description.
forcefilename: Force printing final filename.
forceduration: Force printing duration.
forcejson: Force printing info_dict as JSON.
simulate: Do not download the video files.
format: Video format code.
@ -127,7 +130,16 @@ class YoutubeDL(object):
noplaylist: Download single video instead of a playlist if in doubt.
age_limit: An integer representing the user's age in years.
Unsuitable videos for the given age are skipped.
download_archive: File name of a file where all downloads are recorded.
min_views: An integer representing the minimum view count the video
must have in order to not be skipped.
Videos without view count information are always
downloaded. None for no limit.
max_views: An integer representing the maximum view count.
Videos that are more popular than that are not
downloaded.
Videos without view count information are always
downloaded. None for no limit.
download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
cookiefile: File name where cookies should be read from and dumped to.
@ -171,12 +183,18 @@ class YoutubeDL(object):
width_args = []
else:
width_args = ['-w', str(width)]
self._fribidi = subprocess.Popen(
['fribidi', '-c', 'UTF-8'] + width_args,
sp_kwargs = dict(
stdin=subprocess.PIPE,
stdout=slave,
stderr=self._err_file)
self._fribidi_channel = os.fdopen(master, 'rb')
try:
self._output_process = subprocess.Popen(
['bidiv'] + width_args, **sp_kwargs
)
except OSError:
self._output_process = subprocess.Popen(
['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
self._output_channel = os.fdopen(master, 'rb')
except OSError as ose:
if ose.errno == 2:
self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
@ -193,8 +211,6 @@ class YoutubeDL(object):
u'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
self.fd = FileDownloader(self, self.params)
if '%(stitle)s' in self.params.get('outtmpl', ''):
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
@ -230,15 +246,20 @@ class YoutubeDL(object):
self._pps.append(pp)
pp.set_downloader(self)
def add_progress_hook(self, ph):
"""Add the progress hook (currently only for the file downloader)"""
self._progress_hooks.append(ph)
def _bidi_workaround(self, message):
if not hasattr(self, '_fribidi_channel'):
if not hasattr(self, '_output_channel'):
return message
assert hasattr(self, '_output_process')
assert type(message) == type(u'')
line_count = message.count(u'\n') + 1
self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
self._fribidi.stdin.flush()
res = u''.join(self._fribidi_channel.readline().decode('utf-8')
self._output_process.stdin.write((message + u'\n').encode('utf-8'))
self._output_process.stdin.flush()
res = u''.join(self._output_channel.readline().decode('utf-8')
for _ in range(line_count))
return res[:-len(u'\n')]
@ -355,22 +376,6 @@ class YoutubeDL(object):
error_message = u'%s %s' % (_msg_header, message)
self.trouble(error_message, tb)
def report_writedescription(self, descfn):
""" Report that the description file is being written """
self.to_screen(u'[info] Writing video description to: ' + descfn)
def report_writesubtitles(self, sub_filename):
""" Report that the subtitles file is being written """
self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
def report_writeinfojson(self, infofn):
""" Report that the metadata file has been written """
self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
def report_writeannotations(self, annofn):
""" Report that the annotations file has been written. """
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
@ -415,13 +420,14 @@ class YoutubeDL(object):
def _match_entry(self, info_dict):
""" Returns None iff the file should be downloaded """
video_title = info_dict.get('title', info_dict.get('id', u'video'))
if 'title' in info_dict:
# This can happen when we're just evaluating the playlist
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle:
if not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE):
@ -430,14 +436,21 @@ class YoutubeDL(object):
if date is not None:
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
view_count = info_dict.get('view_count', None)
if view_count is not None:
min_views = self.params.get('min_views')
if min_views is not None and view_count < min_views:
return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
max_views = self.params.get('max_views')
if max_views is not None and view_count > max_views:
return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
age_limit = self.params.get('age_limit')
if age_limit is not None:
if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict):
return (u'%s has already been recorded in archive'
% info_dict.get('title', info_dict.get('id', u'video')))
return u'%s has already been recorded in archive' % video_title
return None
@staticmethod
@ -481,6 +494,7 @@ class YoutubeDL(object):
{
'extractor': ie.IE_NAME,
'webpage_url': url,
'webpage_url_basename': url_basename(url),
'extractor_key': ie.ie_key(),
})
if process:
@ -528,7 +542,7 @@ class YoutubeDL(object):
def make_result(embedded_info):
new_result = ie_result.copy()
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
'entries', 'urlhandle', 'ie_key', 'duration',
'entries', 'ie_key', 'duration',
'subtitles', 'annotations', 'format',
'thumbnail', 'thumbnails'):
if f in new_result:
@ -554,16 +568,16 @@ class YoutubeDL(object):
n_all_entries = len(ie_result['entries'])
playliststart = self.params.get('playliststart', 1) - 1
playlistend = self.params.get('playlistend', -1)
playlistend = self.params.get('playlistend', None)
# For backwards compatibility, interpret -1 as whole list
if playlistend == -1:
entries = ie_result['entries'][playliststart:]
else:
entries = ie_result['entries'][playliststart:playlistend]
playlistend = None
entries = ie_result['entries'][playliststart:playlistend]
n_entries = len(entries)
self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
self.to_screen(
u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
for i, entry in enumerate(entries, 1):
@ -573,6 +587,7 @@ class YoutubeDL(object):
'playlist_index': i + playliststart,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
}
@ -593,6 +608,7 @@ class YoutubeDL(object):
{
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
})
return r
@ -629,7 +645,7 @@ class YoutubeDL(object):
info_dict['playlist_index'] = None
# This extractors handle format selection themselves
if info_dict['extractor'] in [u'youtube', u'Youku']:
if info_dict['extractor'] in [u'Youku']:
if download:
self.process_info(info_dict)
return info_dict
@ -655,24 +671,23 @@ class YoutubeDL(object):
if 'ext' not in format:
format['ext'] = determine_ext(format['url'])
if self.params.get('listformats', None):
self.list_formats(info_dict)
return
format_limit = self.params.get('format_limit', None)
if format_limit:
formats = list(takewhile_inclusive(
lambda f: f['format_id'] != format_limit, formats
))
if self.params.get('prefer_free_formats'):
def _free_formats_key(f):
try:
ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
except ValueError:
ext_ord = -1
# We only compare the extension if they have the same height and width
return (f.get('height'), f.get('width'), ext_ord)
formats = sorted(formats, key=_free_formats_key)
# TODO Central sorting goes here
if formats[0] is not info_dict:
# only set the 'formats' fields if the original info_dict list them
# otherwise we end up with a circular reference, the first (and unique)
# element in the 'formats' field in info_dict is info_dict itself,
# wich can't be exported to json
info_dict['formats'] = formats
if self.params.get('listformats', None):
self.list_formats(info_dict)
return
req_format = self.params.get('format', 'best')
if req_format is None:
@ -748,6 +763,8 @@ class YoutubeDL(object):
self.to_stdout(info_dict['description'])
if self.params.get('forcefilename', False) and filename is not None:
self.to_stdout(filename)
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
self.to_stdout(formatSeconds(info_dict['duration']))
if self.params.get('forceformat', False):
self.to_stdout(info_dict['format'])
if self.params.get('forcejson', False):
@ -770,28 +787,34 @@ class YoutubeDL(object):
return
if self.params.get('writedescription', False):
try:
descfn = filename + u'.description'
self.report_writedescription(descfn)
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(info_dict['description'])
except (KeyError, TypeError):
self.report_warning(u'There\'s no description to write.')
except (OSError, IOError):
self.report_error(u'Cannot write description file ' + descfn)
return
descfn = filename + u'.description'
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
self.to_screen(u'[info] Video description is already present')
else:
try:
self.to_screen(u'[info] Writing video description to: ' + descfn)
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(info_dict['description'])
except (KeyError, TypeError):
self.report_warning(u'There\'s no description to write.')
except (OSError, IOError):
self.report_error(u'Cannot write description file ' + descfn)
return
if self.params.get('writeannotations', False):
try:
annofn = filename + u'.annotations.xml'
self.report_writeannotations(annofn)
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations'])
except (KeyError, TypeError):
self.report_warning(u'There are no annotations to write.')
except (OSError, IOError):
self.report_error(u'Cannot write annotations file: ' + annofn)
return
annofn = filename + u'.annotations.xml'
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
self.to_screen(u'[info] Video annotations are already present')
else:
try:
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations'])
except (KeyError, TypeError):
self.report_warning(u'There are no annotations to write.')
except (OSError, IOError):
self.report_error(u'Cannot write annotations file: ' + annofn)
return
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
@ -807,45 +830,57 @@ class YoutubeDL(object):
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
self.report_writesubtitles(sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
else:
self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
except (OSError, IOError):
self.report_error(u'Cannot write subtitles file ' + descfn)
return
if self.params.get('writeinfojson', False):
infofn = os.path.splitext(filename)[0] + u'.info.json'
self.report_writeinfojson(infofn)
try:
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
write_json_file(json_info_dict, encodeFilename(infofn))
except (OSError, IOError):
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
return
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
self.to_screen(u'[info] Video description metadata is already present')
else:
self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
try:
write_json_file(info_dict, encodeFilename(infofn))
except (OSError, IOError):
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
return
if self.params.get('writethumbnail', False):
if info_dict.get('thumbnail') is not None:
thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
(info_dict['extractor'], info_dict['id']))
try:
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
(info_dict['extractor'], info_dict['id'], thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning(u'Unable to download thumbnail "%s": %s' %
(info_dict['thumbnail'], compat_str(err)))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
self.to_screen(u'[%s] %s: Thumbnail is already present' %
(info_dict['extractor'], info_dict['id']))
else:
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
(info_dict['extractor'], info_dict['id']))
try:
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
(info_dict['extractor'], info_dict['id'], thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning(u'Unable to download thumbnail "%s": %s' %
(info_dict['thumbnail'], compat_str(err)))
if not self.params.get('skip_download', False):
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
success = True
else:
try:
success = self.fd._do_download(filename, info_dict)
fd = get_suitable_downloader(info_dict)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
success = fd.download(filename, info_dict)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error(u'unable to download video data: %s' % str(err))
return
@ -963,13 +998,15 @@ class YoutubeDL(object):
def format_resolution(format, default='unknown'):
if format.get('vcodec') == 'none':
return 'audio only'
if format.get('_resolution') is not None:
return format['_resolution']
if format.get('resolution') is not None:
return format['resolution']
if format.get('height') is not None:
if format.get('width') is not None:
res = u'%sx%s' % (format['width'], format['height'])
else:
res = u'%sp' % format['height']
elif format.get('width') is not None:
res = u'?x%d' % format['width']
else:
res = default
return res
@ -977,15 +1014,19 @@ class YoutubeDL(object):
def list_formats(self, info_dict):
def format_note(fdict):
res = u''
if f.get('ext') in ['f4f', 'f4m']:
res += u'(unsupported) '
if fdict.get('format_note') is not None:
res += fdict['format_note'] + u' '
if fdict.get('tbr') is not None:
res += u'%4dk ' % fdict['tbr']
if (fdict.get('vcodec') is not None and
fdict.get('vcodec') != 'none'):
res += u'%-5s' % fdict['vcodec']
elif fdict.get('vbr') is not None:
res += u'video'
res += u'%-5s@' % fdict['vcodec']
elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
res += u'video@'
if fdict.get('vbr') is not None:
res += u'@%4dk' % fdict['vbr']
res += u'%4dk' % fdict['vbr']
if fdict.get('acodec') is not None:
if res:
res += u', '
@ -1020,7 +1061,7 @@ class YoutubeDL(object):
header_line = line({
'format_id': u'format code', 'ext': u'extension',
'_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
'resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, u"\n".join(formats_s)))

View File

@ -37,12 +37,14 @@ __authors__ = (
'Anton Larionov',
'Takuya Tsuchida',
'Sergey M.',
'Michael Orlitzky',
)
__license__ = 'Public Domain'
import codecs
import getpass
import locale
import optparse
import os
import random
@ -55,13 +57,13 @@ from .utils import (
compat_print,
DateRange,
decodeOption,
determine_ext,
get_term_width,
DownloadError,
get_cachedir,
MaxDownloadsReached,
preferredencoding,
SameFileError,
setproctitle,
std_headers,
write_string,
)
@ -193,13 +195,17 @@ def parseOpts(overrideArguments=None):
type=float, default=None, help=optparse.SUPPRESS_HELP)
general.add_option(
'--bidi-workaround', dest='bidi_workaround', action='store_true',
help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
selection.add_option('--playlist-start',
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
selection.add_option('--playlist-end',
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
selection.add_option(
'--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int,
help='playlist video to start at (default is %default)')
selection.add_option(
'--playlist-end',
dest='playlistend', metavar='NUMBER', default=None, type=int,
help='playlist video to end at (default is last)')
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
selection.add_option('--max-downloads', metavar='NUMBER',
@ -210,6 +216,14 @@ def parseOpts(overrideArguments=None):
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
selection.add_option(
'--min-views', metavar='COUNT', dest='min_views',
default=None, type=int,
help="Do not download any videos with less than COUNT views",)
selection.add_option(
'--max-views', metavar='COUNT', dest='max_views',
default=None, type=int,
help="Do not download any videos with more than COUNT views",)
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
help='download only videos suitable for the given age',
@ -290,6 +304,9 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--get-description',
action='store_true', dest='getdescription',
help='simulate, quiet but print video description', default=False)
verbosity.add_option('--get-duration',
action='store_true', dest='getduration',
help='simulate, quiet but print video length', default=False)
verbosity.add_option('--get-filename',
action='store_true', dest='getfilename',
help='simulate, quiet but print output filename', default=False)
@ -457,15 +474,20 @@ def parseOpts(overrideArguments=None):
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' %
(locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))
return parser, opts, args
def _real_main(argv=None):
# Compatibility fixes for Windows
if sys.platform == 'win32':
# https://github.com/rg3/youtube-dl/issues/820
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
setproctitle(u'youtube-dl')
parser, opts, args = parseOpts(argv)
# Set user agent
@ -505,7 +527,6 @@ def _real_main(argv=None):
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
matchedUrls = [url for url in all_urls if ie.suitable(url)]
all_urls = [url for url in all_urls if url not in matchedUrls]
for mu in matchedUrls:
compat_print(u' ' + mu)
sys.exit(0)
@ -560,18 +581,10 @@ def _real_main(argv=None):
if numeric_buffersize is None:
parser.error(u'invalid buffer size specified')
opts.buffersize = numeric_buffersize
try:
opts.playliststart = int(opts.playliststart)
if opts.playliststart <= 0:
raise ValueError(u'Playlist start must be positive')
except (TypeError, ValueError):
parser.error(u'invalid playlist start number specified')
try:
opts.playlistend = int(opts.playlistend)
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
raise ValueError(u'Playlist end must be greater than playlist start')
except (TypeError, ValueError):
parser.error(u'invalid playlist end number specified')
if opts.playliststart <= 0:
raise ValueError(u'Playlist start must be positive')
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
raise ValueError(u'Playlist end must be greater than playlist start')
if opts.extractaudio:
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
parser.error(u'invalid audio format specified')
@ -604,27 +617,30 @@ def _real_main(argv=None):
or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(title)s-%(id)s.%(ext)s')
if '%(ext)s' not in outtmpl and opts.extractaudio:
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error(u'Cannot download a video and extract audio into the same'
u' file! Use "%%(ext)s" instead of %r' %
determine_ext(outtmpl, u''))
u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
u' template'.format(outtmpl))
any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson
ydl_opts = {
'usenetrc': opts.usenetrc,
'username': opts.username,
'password': opts.password,
'videopassword': opts.videopassword,
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
'quiet': (opts.quiet or any_printing),
'forceurl': opts.geturl,
'forcetitle': opts.gettitle,
'forceid': opts.getid,
'forcethumbnail': opts.getthumbnail,
'forcedescription': opts.getdescription,
'forceduration': opts.getduration,
'forcefilename': opts.getfilename,
'forceformat': opts.getformat,
'forcejson': opts.dumpjson,
'simulate': opts.simulate,
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
'skip_download': (opts.skip_download or opts.simulate or any_printing),
'format': opts.format,
'format_limit': opts.format_limit,
'listformats': opts.listformats,
@ -668,6 +684,8 @@ def _real_main(argv=None):
'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize,
'min_views': opts.min_views,
'max_views': opts.max_views,
'daterange': date,
'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code,

View File

@ -1,4 +1,4 @@
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
import base64
from math import ceil
@ -32,6 +32,31 @@ def aes_ctr_decrypt(data, key, counter):
return decrypted_data
def aes_cbc_decrypt(data, key, iv):
"""
Decrypt with aes in CBC mode
@param {int[]} data cipher
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte IV
@returns {int[]} decrypted data
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
decrypted_data=[]
previous_cipher_block = iv
for i in range(block_count):
block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
block += [0]*(BLOCK_SIZE_BYTES - len(block))
decrypted_block = aes_decrypt(block, expanded_key)
decrypted_data += xor(decrypted_block, previous_cipher_block)
previous_cipher_block = block
decrypted_data = decrypted_data[:len(data)]
return decrypted_data
def key_expansion(data):
"""
Generate key schedule
@ -75,7 +100,7 @@ def aes_encrypt(data, expanded_key):
@returns {int[]} 16-Byte cipher
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
for i in range(1, rounds+1):
data = sub_bytes(data)
@ -83,6 +108,26 @@ def aes_encrypt(data, expanded_key):
if i != rounds:
data = mix_columns(data)
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
return data
def aes_decrypt(data, expanded_key):
"""
Decrypt one block with aes
@param {int[]} data 16-Byte cipher
@param {int[]} expanded_key 176/208/240-Byte expanded key
@returns {int[]} 16-Byte state
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
for i in range(rounds, 0, -1):
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
if i != rounds:
data = mix_columns_inv(data)
data = shift_rows_inv(data)
data = sub_bytes_inv(data)
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
return data
@ -139,14 +184,69 @@ SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
MIX_COLUMN_MATRIX = ((2,3,1,1),
(1,2,3,1),
(1,1,2,3),
(3,1,1,2))
SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
MIX_COLUMN_MATRIX = ((0x2,0x3,0x1,0x1),
(0x1,0x2,0x3,0x1),
(0x1,0x1,0x2,0x3),
(0x3,0x1,0x1,0x2))
MIX_COLUMN_MATRIX_INV = ((0xE,0xB,0xD,0x9),
(0x9,0xE,0xB,0xD),
(0xD,0x9,0xE,0xB),
(0xB,0xD,0x9,0xE))
RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
def sub_bytes(data):
return [SBOX[x] for x in data]
def sub_bytes_inv(data):
return [SBOX_INV[x] for x in data]
def rotate(data):
return data[1:] + [data[0]]
@ -160,30 +260,31 @@ def key_schedule_core(data, rcon_iteration):
def xor(data1, data2):
return [x^y for x, y in zip(data1, data2)]
def mix_column(data):
def rijndael_mul(a, b):
if(a==0 or b==0):
return 0
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
def mix_column(data, matrix):
data_mixed = []
for row in range(4):
mixed = 0
for column in range(4):
addend = data[column]
if MIX_COLUMN_MATRIX[row][column] in (2,3):
addend <<= 1
if addend > 0xff:
addend &= 0xff
addend ^= 0x1b
if MIX_COLUMN_MATRIX[row][column] == 3:
addend ^= data[column]
mixed ^= addend & 0xff
# xor is (+) and (-)
mixed ^= rijndael_mul(data[column], matrix[row][column])
data_mixed.append(mixed)
return data_mixed
def mix_columns(data):
def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
data_mixed = []
for i in range(4):
column = data[i*4 : (i+1)*4]
data_mixed += mix_column(column)
data_mixed += mix_column(column, matrix)
return data_mixed
def mix_columns_inv(data):
return mix_columns(data, MIX_COLUMN_MATRIX_INV)
def shift_rows(data):
data_shifted = []
for column in range(4):
@ -191,6 +292,13 @@ def shift_rows(data):
data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
return data_shifted
def shift_rows_inv(data):
data_shifted = []
for column in range(4):
for row in range(4):
data_shifted.append( data[((column - row) & 0b11) * 4 + row] )
return data_shifted
def inc(data):
data = data[:] # copy
for i in range(len(data)-1,-1,-1):

View File

@ -0,0 +1,23 @@
from .common import FileDownloader
from .hls import HlsFD
from .http import HttpFD
from .mplayer import MplayerFD
from .rtmp import RtmpFD
from ..utils import (
determine_ext,
)
def get_suitable_downloader(info_dict):
"""Get the downloader class that can handle the info dict."""
url = info_dict['url']
if url.startswith('rtmp'):
return RtmpFD
if determine_ext(url) == u'm3u8':
return HlsFD
if url.startswith('mms') or url.startswith('rtsp'):
return MplayerFD
else:
return HttpFD

View File

@ -0,0 +1,317 @@
import os
import re
import sys
import time
from ..utils import (
encodeFilename,
timeconvert,
format_bytes,
)
class FileDownloader(object):
"""File Downloader class.
File downloader objects are the ones responsible of downloading the
actual video file and writing it to disk.
File downloaders accept a lot of parameters. In order not to saturate
the object constructor with arguments, it receives a dictionary of
options instead.
Available options:
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
retries: Number of times to retry for HTTP error 5xx
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
Subclasses of this one must re-define the real_download method.
"""
params = None
def __init__(self, ydl, params):
"""Create a FileDownloader object with the given options."""
self.ydl = ydl
self._progress_hooks = []
self.params = params
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
(hours, mins) = divmod(mins, 60)
if hours > 99:
return '--:--:--'
if hours == 0:
return '%02d:%02d' % (mins, secs)
else:
return '%02d:%02d:%02d' % (hours, mins, secs)
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
return None
return float(byte_counter) / float(data_len) * 100.0
@staticmethod
def format_percent(percent):
if percent is None:
return '---.-%'
return '%6s' % ('%3.1f%%' % percent)
@staticmethod
def calc_eta(start, now, total, current):
if total is None:
return None
dif = now - start
if current == 0 or dif < 0.001: # One millisecond
return None
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
@staticmethod
def format_eta(eta):
if eta is None:
return '--:--'
return FileDownloader.format_seconds(eta)
@staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
return None
return float(bytes) / dif
@staticmethod
def format_speed(speed):
if speed is None:
return '%10s' % '---b/s'
return '%10s' % ('%s/s' % format_bytes(speed))
@staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
if elapsed_time < 0.001:
return int(new_max)
rate = bytes / elapsed_time
if rate > new_max:
return int(new_max)
if rate < new_min:
return int(new_min)
return int(rate)
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into an integer."""
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
if matchobj is None:
return None
number = float(matchobj.group(1))
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return int(round(number * multiplier))
def to_screen(self, *args, **kargs):
self.ydl.to_screen(*args, **kargs)
def to_stderr(self, message):
self.ydl.to_screen(message)
def to_console_title(self, message):
self.ydl.to_console_title(message)
def trouble(self, *args, **kargs):
self.ydl.trouble(*args, **kargs)
def report_warning(self, *args, **kargs):
self.ydl.report_warning(*args, **kargs)
def report_error(self, *args, **kargs):
self.ydl.report_error(*args, **kargs)
def slow_down(self, start_time, byte_counter):
"""Sleep if the download speed is over the rate limit."""
rate_limit = self.params.get('ratelimit', None)
if rate_limit is None or byte_counter == 0:
return
now = time.time()
elapsed = now - start_time
if elapsed <= 0.0:
return
speed = float(byte_counter) / elapsed
if speed > rate_limit:
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == u'-' or \
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
return filename
return filename + u'.part'
def undo_temp_name(self, filename):
if filename.endswith(u'.part'):
return filename[:-len(u'.part')]
return filename
def try_rename(self, old_filename, new_filename):
try:
if old_filename == new_filename:
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err:
self.report_error(u'unable to rename file: %s' % str(err))
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
if last_modified_hdr is None:
return
if not os.path.isfile(encodeFilename(filename)):
return
timestr = last_modified_hdr
if timestr is None:
return
filetime = timeconvert(timestr)
if filetime is None:
return filetime
# Ignore obviously invalid dates
if filetime == 0:
return
try:
os.utime(filename, (time.time(), filetime))
except:
pass
return filetime
def report_destination(self, filename):
"""Report destination filename."""
self.to_screen(u'[download] Destination: ' + filename)
def _report_progress_status(self, msg, is_last_line=False):
fullmsg = u'[download] ' + msg
if self.params.get('progress_with_newline', False):
self.to_screen(fullmsg)
else:
if os.name == 'nt':
prev_len = getattr(self, '_report_progress_prev_line_length',
0)
if prev_len > len(fullmsg):
fullmsg += u' ' * (prev_len - len(fullmsg))
self._report_progress_prev_line_length = len(fullmsg)
clear_line = u'\r'
else:
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
self.to_console_title(u'youtube-dl ' + msg)
def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress."""
if self.params.get('noprogress', False):
return
if eta is not None:
eta_str = self.format_eta(eta)
else:
eta_str = 'Unknown ETA'
if percent is not None:
percent_str = self.format_percent(percent)
else:
percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
msg = (u'%s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
self._report_progress_status(msg)
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
if self.params.get('noprogress', False):
return
downloaded_str = format_bytes(downloaded_data_len)
speed_str = self.format_speed(speed)
elapsed_str = FileDownloader.format_seconds(elapsed)
msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
self._report_progress_status(msg)
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else:
self._report_progress_status(
(u'100%% of %s in %s' %
(data_len_str, self.format_seconds(tot_time))),
is_last_line=True)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
def report_retry(self, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded')
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume')
def download(self, filename, info_dict):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
"""
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
})
return True
return self.real_download(filename, info_dict)
def real_download(self, filename, info_dict):
"""Real download process. Redefine in subclasses."""
raise NotImplementedError(u'This method must be implemented by sublcasses')
def _hook_progress(self, status):
for ph in self._progress_hooks:
ph(status)
def add_progress_hook(self, ph):
""" ph gets called on download progress, with a dictionary with the entries
* filename: The final filename
* status: One of "downloading" and "finished"
It can also have some of the following entries:
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if unknown
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.
"""
self._progress_hooks.append(ph)

View File

@ -0,0 +1,44 @@
import os
import subprocess
from .common import FileDownloader
from ..utils import (
encodeFilename,
)
class HlsFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
'-bsf:a', 'aac_adtstoasc', tmpfilename]
for program in ['avconv', 'ffmpeg']:
try:
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
break
except (OSError, IOError):
pass
else:
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
cmd = [program] + args
retval = subprocess.call(cmd)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'ffmpeg exited with code %d' % retval)
return False

View File

@ -0,0 +1,186 @@
import os
import time
from .common import FileDownloader
from ..utils import (
compat_urllib_request,
compat_urllib_error,
ContentTooShortError,
encodeFilename,
sanitize_open,
format_bytes,
)
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
tmpfilename = self.temp_name(filename)
stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
if 'user_agent' in info_dict:
headers['Youtubedl-user-agent'] = info_dict['user_agent']
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
if self.params.get('test', False):
request.add_header('Range','bytes=0-10240')
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = os.path.getsize(encodeFilename(tmpfilename))
else:
resume_len = 0
open_mode = 'wb'
if resume_len != 0:
if self.params.get('continuedl', False):
self.report_resuming_byte(resume_len)
request.add_header('Range','bytes=%d-' % resume_len)
open_mode = 'ab'
else:
resume_len = 0
count = 0
retries = self.params.get('retries', 0)
while count <= retries:
# Establish connection
try:
data = compat_urllib_request.urlopen(request)
break
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
# Unexpected HTTP error
raise
elif err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
data = compat_urllib_request.urlopen(basic_request)
content_length = data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
raise
else:
# Examine the reported length
if (content_length is not None and
(resume_len - 100 < int(content_length) < resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
# changing the file size slightly and causing problems for some users. So
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
open_mode = 'wb'
break
# Retry
count += 1
if count <= retries:
self.report_retry(count, retries)
if count > retries:
self.report_error(u'giving up after %s retries' % retries)
return False
data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len:
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
while True:
# Download and write
before = time.time()
data_block = data.read(block_size)
after = time.time()
if len(data_block) == 0:
break
byte_counter += len(data_block)
# Open file just in time
if stream is None:
try:
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
assert stream is not None
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename)
except (OSError, IOError) as err:
self.report_error(u'unable to open for writing: %s' % str(err))
return False
try:
stream.write(data_block)
except (IOError, OSError):
self.to_stderr(u"\n")
self.report_error(u'unable to write data: %s' % str(err))
return False
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
# Progress message
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
if data_len is None:
eta = percent = None
else:
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
# Apply rate limit
self.slow_down(start, byte_counter - resume_len)
if stream is None:
self.to_stderr(u"\n")
self.report_error(u'Did not get any data blocks')
return False
stream.close()
self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
})
return True

View File

@ -0,0 +1,40 @@
import os
import subprocess
from .common import FileDownloader
from ..utils import (
encodeFilename,
)
class MplayerFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
# Check for mplayer first
try:
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
return False
# Download using mplayer.
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'mplayer exited with code %d' % retval)
return False

View File

@ -0,0 +1,178 @@
import os
import re
import subprocess
import sys
import time
from .common import FileDownloader
from ..utils import (
encodeFilename,
format_bytes,
)
class RtmpFD(FileDownloader):
def real_download(self, filename, info_dict):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = u''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
data_len_str = u'~' + format_bytes(data_len)
self.report_progress(percent, data_len_str, speed, eta)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'speed': speed,
})
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen(u'')
cursor_in_new_line = True
self.to_screen(u'[rtmpdump] '+line)
proc.wait()
if not cursor_in_new_line:
self.to_screen(u'')
return proc.returncode
url = info_dict['url']
player_url = info_dict.get('player_url', None)
page_url = info_dict.get('page_url', None)
play_path = info_dict.get('play_path', None)
tc_url = info_dict.get('tc_url', None)
live = info_dict.get('rtmp_live', False)
conn = info_dict.get('rtmp_conn', None)
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
# Check for rtmpdump first
try:
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
if play_path is not None:
basic_args += ['--playpath', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
if test:
basic_args += ['--stop', '1']
if live:
basic_args += ['--live']
if conn:
basic_args += ['--conn', conn]
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if sys.platform == 'win32' and sys.version_info < (3, 0):
# Windows subprocess module does not actually support Unicode
# on Python 2.x
# See http://stackoverflow.com/a/9951851/35070
subprocess_encoding = sys.getfilesystemencoding()
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
else:
subprocess_encoding = None
if self.params.get('verbose', False):
if subprocess_encoding:
str_args = [
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
for a in args]
else:
str_args = args
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
retval = run_rtmpdump(args)
while (retval == 2 or retval == 1) and not test:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == 1:
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == 2 and cursize > 1024:
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0
break
if retval == 0 or (test and retval == 2):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'rtmpdump exited with code %d' % retval)
return False

View File

@ -1,6 +1,8 @@
from .appletrailers import AppleTrailersIE
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
from .anitube import AnitubeIE
from .aparat import AparatIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE
from .ard import ARDIE
from .arte import (
@ -13,6 +15,7 @@ from .arte import (
from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .breakcom import BreakIE
@ -20,6 +23,8 @@ from .brightcove import BrightcoveIE
from .c56 import C56IE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
from .channel9 import Channel9IE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .clipsyndicate import ClipsyndicateIE
@ -28,6 +33,7 @@ from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .crunchyroll import CrunchyrollIE
from .cspan import CSpanIE
from .d8 import D8IE
from .dailymotion import (
@ -78,6 +84,10 @@ from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
from .internetvideoarchive import InternetVideoArchiveIE
from .ivi import (
IviIE,
IviCompilationIE
)
from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
@ -87,6 +97,7 @@ from .kickstarter import KickStarterIE
from .keek import KeekIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE, LivestreamOriginalIE
from .mdr import MDRIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE
@ -111,9 +122,11 @@ from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .podomatic import PodomaticIE
from .pornhd import PornHdIE
from .pornhub import PornHubIE
from .pornotube import PornotubeIE
from .pyvideo import PyvideoIE
from .radiofrance import RadioFranceIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE

View File

@ -0,0 +1,31 @@
import re
from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
IE_NAME = u'AcademicEarth:Course'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
playlist_id = m.group('id')
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(
r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
description = self._html_search_regex(
r'<p class="excerpt">(.*?)</p>',
webpage, u'description', fatal=False)
urls = re.findall(
r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
webpage)
entries = [self.url_result(u) for u in urls]
return {
'_type': 'playlist',
'id': playlist_id,
'title': title,
'description': description,
'entries': entries,
}

View File

@ -0,0 +1,56 @@
#coding: utf-8
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
HEADRequest,
)
class AparatIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_TEST = {
u'url': u'http://www.aparat.com/v/wP8On',
u'file': u'wP8On.mp4',
u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
u'info_dict': {
u"title": u"تیم گلکسی 11 - زومیت",
},
#u'skip': u'Extremely unreliable',
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
# Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work
embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
video_id + u'/vt/frame')
webpage = self._download_webpage(embed_url, video_id)
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
for i, video_url in enumerate(video_urls):
req = HEADRequest(video_url)
res = self._request_webpage(
req, video_id, note=u'Testing video URL %d' % i, errnote=False)
if res:
break
else:
raise ExtractorError(u'No working video URLs found')
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
thumbnail = self._search_regex(
r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'ext': 'mp4',
'thumbnail': thumbnail,
}

View File

@ -110,7 +110,8 @@ class AppleTrailersIE(InfoExtractor):
'width': format['width'],
'height': int(format['height']),
})
formats = sorted(formats, key=lambda f: (f['height'], f['width']))
self._sort_formats(formats)
playlist.append({
'_type': 'video',

View File

@ -266,20 +266,6 @@ class ArteTVDDCIE(ArteTVPlus7IE):
IE_NAME = u'arte.tv:ddc'
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
_TEST = {
u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
u'file': u'049881-009_PLUS7-D.flv',
u'info_dict': {
u'title': u'Mit offenen Karten',
u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6',
u'upload_date': u'20131207',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
def _real_extract(self, url):
video_id, lang = self._extract_url_info(url)
if lang == 'folge':

View File

@ -10,14 +10,14 @@ from ..utils import (
class BandcampIE(InfoExtractor):
IE_NAME = u'Bandcamp'
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
_TESTS = [{
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
u'file': u'1812978515.mp3',
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
u'md5': u'c557841d5e50261777a6585648adf439',
u'info_dict': {
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
u"title": u"youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
u"duration": 10,
},
u'skip': u'There is a limit of 200 free downloads / month for the test song'
}]
@ -30,29 +30,42 @@ class BandcampIE(InfoExtractor):
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
if m_download is None:
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
if m_trackinfo:
json_code = m_trackinfo.group(1)
data = json.loads(json_code)
if m_trackinfo:
json_code = m_trackinfo.group(1)
data = json.loads(json_code)
d = data[0]
duration = int(round(d['duration']))
formats = []
for format_id, format_url in d['file'].items():
ext, _, abr_str = format_id.partition('-')
formats.append({
'format_id': format_id,
'url': format_url,
'ext': format_id.partition('-')[0],
'vcodec': 'none',
'acodec': format_id.partition('-')[0],
'abr': int(format_id.partition('-')[2]),
})
self._sort_formats(formats)
for d in data:
formats = [{
'format_id': 'format_id',
'url': format_url,
'ext': format_id.partition('-')[0]
} for format_id, format_url in sorted(d['file'].items())]
return {
'id': compat_str(d['id']),
'title': d['title'],
'formats': formats,
'duration': duration,
}
else:
raise ExtractorError(u'No free songs found')
else:
raise ExtractorError(u'No free songs found')
download_link = m_download.group(1)
id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
webpage, re.MULTILINE|re.DOTALL).group('id')
video_id = re.search(
r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
webpage, re.MULTILINE | re.DOTALL).group('id')
download_webpage = self._download_webpage(download_link, id,
download_webpage = self._download_webpage(download_link, video_id,
'Downloading free downloads page')
# We get the dictionary of the track from some javascrip code
info = re.search(r'items: (.*?),$',
@ -66,21 +79,21 @@ class BandcampIE(InfoExtractor):
m_url = re.match(re_url, initial_url)
#We build the url we will use to get the final track url
# This url is build in Bandcamp in the script download_bunde_*.js
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
# If we could correctly generate the .rand field the url would be
#in the "download_url" key
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
track_info = {'id':id,
'title' : info[u'title'],
'ext' : 'mp3',
'url' : final_url,
'thumbnail' : info[u'thumb_url'],
'uploader' : info[u'artist']
}
return [track_info]
return {
'id': video_id,
'title': info[u'title'],
'ext': 'mp3',
'vcodec': 'none',
'url': final_url,
'thumbnail': info[u'thumb_url'],
'uploader': info[u'artist'],
}
class BandcampAlbumIE(InfoExtractor):
@ -117,7 +130,7 @@ class BandcampAlbumIE(InfoExtractor):
webpage = self._download_webpage(url, title)
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
if not tracks_paths:
raise ExtractorError(u'The page doesn\'t contain any track')
raise ExtractorError(u'The page doesn\'t contain any tracks')
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
for t_path in tracks_paths]

View File

@ -0,0 +1,93 @@
import datetime
import json
import re
from .common import InfoExtractor
from ..utils import (
remove_start,
)
class BlinkxIE(InfoExtractor):
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
_IE_NAME = u'blinkx'
_TEST = {
u'url': u'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
u'file': u'8aQUy7GV.mp4',
u'md5': u'2e9a07364af40163a908edbf10bb2492',
u'info_dict': {
u"title": u"Police Car Rolls Away",
u"uploader": u"stupidvideos.com",
u"upload_date": u"20131215",
u"description": u"A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!",
u"duration": 14.886,
u"thumbnails": [{
"width": 100,
"height": 76,
"url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg",
}],
},
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
display_id = video_id[:8]
api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' +
u'video=%s' % video_id)
data_json = self._download_webpage(api_url, display_id)
data = json.loads(data_json)['api']['results'][0]
dt = datetime.datetime.fromtimestamp(data['pubdate_epoch'])
upload_date = dt.strftime('%Y%m%d')
duration = None
thumbnails = []
formats = []
for m in data['media']:
if m['type'] == 'jpg':
thumbnails.append({
'url': m['link'],
'width': int(m['w']),
'height': int(m['h']),
})
elif m['type'] == 'original':
duration = m['d']
elif m['type'] == 'youtube':
yt_id = m['link']
self.to_screen(u'Youtube video detected: %s' % yt_id)
return self.url_result(yt_id, 'Youtube', video_id=yt_id)
elif m['type'] in ('flv', 'mp4'):
vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff')
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
format_id = (u'%s-%sk-%s' %
(vcodec,
tbr,
m['w']))
formats.append({
'format_id': format_id,
'url': m['link'],
'vcodec': vcodec,
'acodec': acodec,
'abr': int(m['abr']) // 1000,
'vbr': int(m['vbr']) // 1000,
'tbr': tbr,
'width': int(m['w']),
'height': int(m['h']),
})
self._sort_formats(formats)
return {
'id': display_id,
'fullid': video_id,
'title': data['title'],
'formats': formats,
'uploader': data['channel_name'],
'upload_date': upload_date,
'description': data.get('description'),
'thumbnails': thumbnails,
'duration': duration,
}

View File

@ -1,6 +1,5 @@
import datetime
import json
import os
import re
import socket
@ -21,7 +20,7 @@ from ..utils import (
class BlipTVIE(InfoExtractor):
"""Information extractor for blip.tv"""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
_VALID_URL = r'^(?:https?://)?(?:www\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
_URL_EXT = r'^.*\.([a-z0-9]+)$'
IE_NAME = u'blip.tv'
_TEST = {
@ -58,7 +57,6 @@ class BlipTVIE(InfoExtractor):
url = 'http://blip.tv/a/a-' + file_id
return self._real_extract(url)
if '?' in url:
cchar = '&'
else:
@ -67,67 +65,49 @@ class BlipTVIE(InfoExtractor):
request = compat_urllib_request.Request(json_url)
request.add_header('User-Agent', 'iTunes/10.6.1')
self.report_extraction(mobj.group(1))
info = None
urlh = self._request_webpage(request, None, False,
u'unable to download video info webpage')
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
basename = url.split('/')[-1]
title,ext = os.path.splitext(basename)
title = title.decode('UTF-8')
ext = ext.replace('.', '')
self.report_direct_download(title)
info = {
'id': title,
'url': url,
'uploader': None,
'upload_date': None,
'title': title,
try:
json_code_bytes = urlh.read()
json_code = json_code_bytes.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
try:
json_data = json.loads(json_code)
if 'Post' in json_data:
data = json_data['Post']
else:
data = json_data
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
if 'additionalMedia' in data:
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
best_format = formats[-1]
video_url = best_format['url']
else:
video_url = data['media']['url']
umobj = re.match(self._URL_EXT, video_url)
if umobj is None:
raise ValueError('Can not determine filename extension')
ext = umobj.group(1)
return {
'id': compat_str(data['item_id']),
'url': video_url,
'uploader': data['display_name'],
'upload_date': upload_date,
'title': data['title'],
'ext': ext,
'urlhandle': urlh
'format': data['media']['mimeType'],
'thumbnail': data['thumbnailUrl'],
'description': data['description'],
'player_url': data['embedUrl'],
'user_agent': 'iTunes/10.6.1',
}
if info is None: # Regular URL
try:
json_code_bytes = urlh.read()
json_code = json_code_bytes.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
try:
json_data = json.loads(json_code)
if 'Post' in json_data:
data = json_data['Post']
else:
data = json_data
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
if 'additionalMedia' in data:
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
best_format = formats[-1]
video_url = best_format['url']
else:
video_url = data['media']['url']
umobj = re.match(self._URL_EXT, video_url)
if umobj is None:
raise ValueError('Can not determine filename extension')
ext = umobj.group(1)
info = {
'id': compat_str(data['item_id']),
'url': video_url,
'uploader': data['display_name'],
'upload_date': upload_date,
'title': data['title'],
'ext': ext,
'format': data['media']['mimeType'],
'thumbnail': data['thumbnailUrl'],
'description': data['description'],
'player_url': data['embedUrl'],
'user_agent': 'iTunes/10.6.1',
}
except (ValueError,KeyError) as err:
raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
return [info]
except (ValueError, KeyError) as err:
raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
class BlipTVUserIE(InfoExtractor):

View File

@ -26,7 +26,7 @@ class BrightcoveIE(InfoExtractor):
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
u'file': u'2371591881001.mp4',
u'md5': u'8eccab865181d29ec2958f32a6a754f5',
u'md5': u'5423e113865d26e40624dce2e4b45d95',
u'note': u'Test Brightcove downloads and detection in GenericIE',
u'info_dict': {
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',

View File

@ -0,0 +1,30 @@
import re
from .common import InfoExtractor
class CBSIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*'
_TEST = {
u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
u'file': u'4JUVEwq3wUT7.flv',
u'info_dict': {
u'title': u'Connect Chat feat. Garth Brooks',
u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
u'duration': 1495,
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
real_id = self._search_regex(
r"video\.settings\.pid\s*=\s*'([^']+)';",
webpage, u'real video ID')
return self.url_result(u'theplatform:%s' % real_id)

View File

@ -0,0 +1,271 @@
# encoding: utf-8
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class Channel9IE(InfoExtractor):
'''
Common extractor for channel9.msdn.com.
The type of provided URL (video or playlist) is determined according to
meta Search.PageType from web page HTML rather than URL itself, as it is
not always possible to do.
'''
IE_DESC = u'Channel 9'
IE_NAME = u'channel9'
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
_TESTS = [
{
u'url': u'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
u'file': u'Events_TechEd_Australia_2013_KOS002.mp4',
u'md5': u'bbd75296ba47916b754e73c3a4bbdf10',
u'info_dict': {
u'title': u'Developer Kick-Off Session: Stuff We Love',
u'description': u'md5:c08d72240b7c87fcecafe2692f80e35f',
u'duration': 4576,
u'thumbnail': u'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
u'session_code': u'KOS002',
u'session_day': u'Day 1',
u'session_room': u'Arena 1A',
u'session_speakers': [ u'Ed Blankenship', u'Andrew Coates', u'Brady Gaster', u'Patrick Klug', u'Mads Kristensen' ],
},
},
{
u'url': u'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
u'file': u'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
u'md5': u'b43ee4529d111bc37ba7ee4f34813e68',
u'info_dict': {
u'title': u'Self-service BI with Power BI - nuclear testing',
u'description': u'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
u'duration': 1540,
u'thumbnail': u'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
u'authors': [ u'Mike Wilmot' ],
},
}
]
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
# Sorted by quality
_known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
def _restore_bytes(self, formatted_size):
if not formatted_size:
return 0
m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size)
if not m:
return 0
units = m.group('units')
try:
exponent = [u'B', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB'].index(units.upper())
except ValueError:
return 0
size = float(m.group('size'))
return int(size * (1024 ** exponent))
def _formats_from_html(self, html):
FORMAT_REGEX = r'''
(?x)
<a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
<span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
(?:<div\s+class="popup\s+rounded">\s*
<h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
</div>)? # File size part may be missing
'''
# Extract known formats
formats = [{
'url': x.group('url'),
'format_id': x.group('quality'),
'format_note': x.group('note'),
'format': u'%s (%s)' % (x.group('quality'), x.group('note')),
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
'preference': self._known_formats.index(x.group('quality')),
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
} for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
self._sort_formats(formats)
return formats
def _extract_title(self, html):
title = self._html_search_meta(u'title', html, u'title')
if title is None:
title = self._og_search_title(html)
TITLE_SUFFIX = u' (Channel 9)'
if title is not None and title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
return title
def _extract_description(self, html):
DESCRIPTION_REGEX = r'''(?sx)
<div\s+class="entry-content">\s*
<div\s+id="entry-body">\s*
(?P<description>.+?)\s*
</div>\s*
</div>
'''
m = re.search(DESCRIPTION_REGEX, html)
if m is not None:
return m.group('description')
return self._html_search_meta(u'description', html, u'description')
def _extract_duration(self, html):
m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
def _extract_slides(self, html):
m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
return m.group('slidesurl') if m is not None else None
def _extract_zip(self, html):
m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
return m.group('zipurl') if m is not None else None
def _extract_avg_rating(self, html):
m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
return float(m.group('avgrating')) if m is not None else 0
def _extract_rating_count(self, html):
m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
def _extract_view_count(self, html):
m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
def _extract_comment_count(self, html):
m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
def _fix_count(self, count):
return int(str(count).replace(',', '')) if count is not None else None
def _extract_authors(self, html):
m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
if m is None:
return None
return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
def _extract_session_code(self, html):
m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
return m.group('code') if m is not None else None
def _extract_session_day(self, html):
m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
return m.group('day') if m is not None else None
def _extract_session_room(self, html):
m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
return m.group('room') if m is not None else None
def _extract_session_speakers(self, html):
return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
def _extract_content(self, html, content_path):
# Look for downloadable content
formats = self._formats_from_html(html)
slides = self._extract_slides(html)
zip_ = self._extract_zip(html)
# Nothing to download
if len(formats) == 0 and slides is None and zip_ is None:
self._downloader.report_warning(u'None of recording, slides or zip are available for %s' % content_path)
return
# Extract meta
title = self._extract_title(html)
description = self._extract_description(html)
thumbnail = self._og_search_thumbnail(html)
duration = self._extract_duration(html)
avg_rating = self._extract_avg_rating(html)
rating_count = self._extract_rating_count(html)
view_count = self._extract_view_count(html)
comment_count = self._extract_comment_count(html)
common = {'_type': 'video',
'id': content_path,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'avg_rating': avg_rating,
'rating_count': rating_count,
'view_count': view_count,
'comment_count': comment_count,
}
result = []
if slides is not None:
d = common.copy()
d.update({ 'title': title + '-Slides', 'url': slides })
result.append(d)
if zip_ is not None:
d = common.copy()
d.update({ 'title': title + '-Zip', 'url': zip_ })
result.append(d)
if len(formats) > 0:
d = common.copy()
d.update({ 'title': title, 'formats': formats })
result.append(d)
return result
def _extract_entry_item(self, html, content_path):
contents = self._extract_content(html, content_path)
if contents is None:
return contents
authors = self._extract_authors(html)
for content in contents:
content['authors'] = authors
return contents
def _extract_session(self, html, content_path):
contents = self._extract_content(html, content_path)
if contents is None:
return contents
session_meta = {'session_code': self._extract_session_code(html),
'session_day': self._extract_session_day(html),
'session_room': self._extract_session_room(html),
'session_speakers': self._extract_session_speakers(html),
}
for content in contents:
content.update(session_meta)
return contents
def _extract_list(self, content_path):
rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS')
entries = [self.url_result(session_url.text, 'Channel9')
for session_url in rss.findall('./channel/item/link')]
title_text = rss.find('./channel/title').text
return self.playlist_result(entries, content_path, title_text)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
content_path = mobj.group('contentpath')
webpage = self._download_webpage(url, content_path, u'Downloading web page')
page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
if page_type_m is None:
raise ExtractorError(u'Search.PageType not found, don\'t know how to process this page', expected=True)
page_type = page_type_m.group('pagetype')
if page_type == 'List': # List page, may contain list of 'item'-like objects
return self._extract_list(content_path)
elif page_type == 'Entry.Item': # Any 'item'-like page, may contain downloadable content
return self._extract_entry_item(webpage, content_path)
elif page_type == 'Session': # Event session page, may contain downloadable content
return self._extract_session(webpage, content_path)
else:
raise ExtractorError(u'Unexpected Search.PageType %s' % page_type, expected=True)

View File

@ -1,7 +1,10 @@
import re
from .common import InfoExtractor
from ..utils import determine_ext
from ..utils import (
int_or_none,
parse_duration,
)
class CNNIE(InfoExtractor):
@ -15,6 +18,8 @@ class CNNIE(InfoExtractor):
u'info_dict': {
u'title': u'Nadal wins 8th French Open title',
u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
u'duration': 135,
u'upload_date': u'20130609',
},
},
{
@ -35,22 +40,58 @@ class CNNIE(InfoExtractor):
info = self._download_xml(info_url, page_title)
formats = []
rex = re.compile(r'''(?x)
(?P<width>[0-9]+)x(?P<height>[0-9]+)
(?:_(?P<bitrate>[0-9]+)k)?
''')
for f in info.findall('files/file'):
mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
if mf is not None:
formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
formats = sorted(formats)
(_,_,_, video_path) = formats[-1]
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip())
fdct = {
'format_id': f.attrib['bitrate'],
'url': video_url,
}
mf = rex.match(f.attrib['bitrate'])
if mf:
fdct['width'] = int(mf.group('width'))
fdct['height'] = int(mf.group('height'))
fdct['tbr'] = int_or_none(mf.group('bitrate'))
else:
mf = rex.search(f.text)
if mf:
fdct['width'] = int(mf.group('width'))
fdct['height'] = int(mf.group('height'))
fdct['tbr'] = int_or_none(mf.group('bitrate'))
else:
mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate'])
if mi:
if mi.group(1) == 'audio':
fdct['vcodec'] = 'none'
fdct['ext'] = 'm4a'
else:
fdct['tbr'] = int(mi.group(1))
formats.append(fdct)
self._sort_formats(formats)
thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
return {'id': info.attrib['id'],
'title': info.find('headline').text,
'url': video_url,
'ext': determine_ext(video_url),
'thumbnail': thumbnails[-1][1],
'thumbnails': thumbs_dict,
'description': info.find('description').text,
}
metas_el = info.find('metas')
upload_date = (
metas_el.attrib.get('version') if metas_el is not None else None)
duration_el = info.find('length')
duration = parse_duration(duration_el.text)
return {
'id': info.attrib['id'],
'title': info.find('headline').text,
'formats': formats,
'thumbnail': thumbnails[-1][1],
'thumbnails': thumbs_dict,
'description': info.find('description').text,
'duration': duration,
'upload_date': upload_date,
}

View File

@ -9,6 +9,7 @@ import xml.etree.ElementTree
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_parse_urlparse,
compat_str,
clean_html,
@ -18,6 +19,7 @@ from ..utils import (
sanitize_filename,
unescapeHTML,
)
_NO_DEFAULT = object()
class InfoExtractor(object):
@ -34,15 +36,49 @@ class InfoExtractor(object):
The dictionaries must include the following fields:
id: Video identifier.
url: Final video URL.
title: Video title, unescaped.
ext: Video filename extension.
Instead of url and ext, formats can also specified.
Additionally, it must contain either a formats entry or a url one:
formats: A list of dictionaries for each format available, ordered
from worst to best quality.
Potential fields:
* url Mandatory. The URL of the video file
* ext Will be calculated from url if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
Calculated from the format_id, width, height.
and format_note fields if missing.
* format_id A short description of the format
("mp4_h264_opus" or "19").
Technically optional, but strongly recommended.
* format_note Additional info about the format
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
* resolution Textual description of width and height
* tbr Average bitrate of audio and video in KBit/s
* abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
* filesize The number of bytes, if known in advance
* player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual
download, lower-case.
"http", "https", "rtsp", "rtmp" or so.
* preference Order number of this format. If this field is
present, the formats get sorted by this field.
-1 for default (order by other properties),
-2 or smaller for less than default.
url: Final video URL.
ext: Video filename extension.
format: The video format, defaults to ext (used for --get-format)
player_url: SWF Player URL (used for rtmpdump).
The following fields are optional:
format: The video format, defaults to ext (used for --get-format)
thumbnails: A list of dictionaries (with the entries "resolution" and
"url") for the varying thumbnails
thumbnail: Full URL to a video thumbnail image.
@ -51,35 +87,14 @@ class InfoExtractor(object):
upload_date: Video upload date (YYYYMMDD).
uploader_id: Nickname or id of the video uploader.
location: Physical location of the video.
player_url: SWF Player URL (used for rtmpdump).
subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
comment_count: Number of comments on the video
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
age_limit: Age restriction for the video, as an integer (years)
formats: A list of dictionaries for each format available, it must
be ordered from worst to best quality. Potential fields:
* url Mandatory. The URL of the video file
* ext Will be calculated from url if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
Calculated from the format_id, width, height.
and format_note fields if missing.
* format_id A short description of the format
("mp4_h264_opus" or "19")
* format_note Additional info about the format
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
* abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
* filesize The number of bytes, if known in advance
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
@ -166,6 +181,8 @@ class InfoExtractor(object):
try:
return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is False:
return False
if errnote is None:
errnote = u'Unable to download webpage'
errmsg = u'%s: %s' % (errnote, compat_str(err))
@ -238,6 +255,11 @@ class InfoExtractor(object):
xml_string = transform_source(xml_string)
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
def report_warning(self, msg, video_id=None):
idstr = u'' if video_id is None else u'%s: ' % video_id
self._downloader.report_warning(
u'[%s] %s%s' % (self.IE_NAME, idstr, msg))
def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
@ -259,7 +281,8 @@ class InfoExtractor(object):
self.to_screen(u'Logging in')
#Methods for following #608
def url_result(self, url, ie=None, video_id=None):
@staticmethod
def url_result(url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
@ -268,7 +291,8 @@ class InfoExtractor(object):
if video_id is not None:
video_info['id'] = video_id
return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""
video_info = {'_type': 'playlist',
'entries': entries}
@ -278,7 +302,7 @@ class InfoExtractor(object):
video_info['title'] = playlist_title
return video_info
def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
"""
Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group.
@ -292,7 +316,7 @@ class InfoExtractor(object):
mobj = re.search(p, string, flags)
if mobj: break
if sys.stderr.isatty() and os.name != 'nt':
if os.name != 'nt' and sys.stderr.isatty():
_name = u'\033[0;34m%s\033[0m' % name
else:
_name = name
@ -300,7 +324,7 @@ class InfoExtractor(object):
if mobj:
# return the first matching group
return next(g for g in mobj.groups() if g is not None)
elif default is not None:
elif default is not _NO_DEFAULT:
return default
elif fatal:
raise RegexNotFoundError(u'Unable to extract %s' % _name)
@ -309,7 +333,7 @@ class InfoExtractor(object):
u'please report this issue on http://yt-dl.org/bug' % _name)
return None
def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
"""
Like _search_regex, but strips HTML tags and unescapes entities.
"""
@ -418,6 +442,56 @@ class InfoExtractor(object):
}
return RATING_TABLE.get(rating.lower(), None)
def _sort_formats(self, formats):
def _formats_key(f):
# TODO remove the following workaround
from ..utils import determine_ext
if not f.get('ext') and 'url' in f:
f['ext'] = determine_ext(f['url'])
preference = f.get('preference')
if preference is None:
proto = f.get('protocol')
if proto is None:
proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme
preference = 0 if proto in ['http', 'https'] else -0.1
if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
preference -= 0.5
if f.get('vcodec') == 'none': # audio only
if self._downloader.params.get('prefer_free_formats'):
ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus']
else:
ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a']
ext_preference = 0
try:
audio_ext_preference = ORDER.index(f['ext'])
except ValueError:
audio_ext_preference = -1
else:
if self._downloader.params.get('prefer_free_formats'):
ORDER = [u'flv', u'mp4', u'webm']
else:
ORDER = [u'webm', u'flv', u'mp4']
try:
ext_preference = ORDER.index(f['ext'])
except ValueError:
ext_preference = -1
audio_ext_preference = 0
return (
preference,
f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1,
ext_preference,
f.get('vbr') if f.get('vbr') is not None else -1,
f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference,
f.get('filesize') if f.get('filesize') is not None else -1,
f.get('format_id'),
)
formats.sort(key=_formats_key)
class SearchInfoExtractor(InfoExtractor):

View File

@ -0,0 +1,171 @@
# encoding: utf-8
import re, base64, zlib
from hashlib import sha1
from math import pow, sqrt, floor
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
bytes_to_intlist,
intlist_to_bytes,
unified_strdate,
clean_html,
)
from ..aes import (
aes_cbc_decrypt,
inc,
)
class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
u'file': u'645513.flv',
#u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412',
u'info_dict': {
u'title': u'Wanna be the Strongest in the World Episode 1 An Idol-Wrestler is Born!',
u'description': u'md5:2d17137920c64f2f49981a7797d275ef',
u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
u'uploader': u'Yomiuri Telecasting Corporation (YTV)',
u'upload_date': u'20131013',
},
u'params': {
# rtmp
u'skip_download': True,
},
}]
_FORMAT_IDS = {
u'360': (u'60', u'106'),
u'480': (u'61', u'106'),
u'720': (u'62', u'106'),
u'1080': (u'80', u'108'),
}
def _decrypt_subtitles(self, data, iv, id):
data = bytes_to_intlist(data)
iv = bytes_to_intlist(iv)
id = int(id)
def obfuscate_key_aux(count, modulo, start):
output = list(start)
for _ in range(count):
output.append(output[-1] + output[-2])
# cut off start values
output = output[2:]
output = list(map(lambda x: x % modulo + 33, output))
return output
def obfuscate_key(key):
num1 = int(floor(pow(2, 25) * sqrt(6.9)))
num2 = (num1 ^ key) << 5
num3 = key ^ num1
num4 = num3 ^ (num3 >> 3) ^ num2
prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest())
# Extend 160 Bit hash to 256 Bit
return shaHash + [0] * 12
key = obfuscate_key(id)
class Counter:
__value = iv
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
return temp
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
return zlib.decompress(decrypted_data)
def _convert_subtitles_to_srt(self, subtitles):
i=1
output = u''
for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
start = start.replace(u'.', u',')
end = end.replace(u'.', u',')
text = clean_html(text)
text = text.replace(u'\\N', u'\n')
if not text:
continue
output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
i+=1
return output
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
webpage_url = u'http://www.' + mobj.group('url')
video_id = mobj.group(u'video_id')
webpage = self._download_webpage(webpage_url, video_id)
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'')
if note_m:
raise ExtractorError(note_m)
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL)
video_title = re.sub(r' {2,}', u' ', video_title)
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'')
if not video_description:
video_description = None
video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL)
if video_upload_date:
video_upload_date = unified_strdate(video_upload_date)
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL)
playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url})
playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info')
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id')
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False)
formats = []
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt+u'p'
streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/')
# urlencode doesn't work!
streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format
streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data)))
streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format)
video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url')
video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path')
formats.append({
u'url': video_url,
u'play_path': video_play_path,
u'ext': 'flv',
u'format': video_format,
u'format_id': video_format,
})
subtitles = {}
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
video_id, note=u'Downloading subtitles for '+sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8')
lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False)
if not lang_code:
continue
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
return {
u'id': video_id,
u'title': video_title,
u'description': video_description,
u'thumbnail': video_thumbnail,
u'uploader': video_uploader,
u'upload_date': video_upload_date,
u'subtitles': subtitles,
u'formats': formats,
}

View File

@ -28,7 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
IE_NAME = u'dailymotion'
_FORMATS = [
@ -81,7 +81,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
# Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1).split('_')[0].split('?')[0]
video_id = mobj.group('id')
url = 'http://www.dailymotion.com/video/%s' % video_id

View File

@ -9,7 +9,7 @@ from ..utils import (
class DaumIE(InfoExtractor):
_VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
IE_NAME = u'daum.net'
_TEST = {

View File

@ -4,7 +4,6 @@ import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
unified_strdate,
)
@ -52,18 +51,12 @@ class DreiSatIE(InfoExtractor):
'width': int(fe.find('./width').text),
'height': int(fe.find('./height').text),
'url': fe.find('./url').text,
'ext': determine_ext(fe.find('./url').text),
'filesize': int(fe.find('./filesize').text),
'video_bitrate': int(fe.find('./videoBitrate').text),
'3sat_qualityname': fe.find('./quality').text,
} for fe in format_els
if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
def _sortkey(format):
qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname'])
prefer_http = 1 if 'rtmp' in format['url'] else 0
return (qidx, prefer_http, format['video_bitrate'])
formats.sort(key=_sortkey)
self._sort_formats(formats)
return {
'_type': 'video',

View File

@ -17,7 +17,7 @@ from ..utils import (
class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook"""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
_NETRC_MACHINE = 'facebook'
@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
u'file': u'120708114770723.mp4',
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
u'info_dict': {
u"duration": 279,
u"duration": 279,
u"title": u"PEOPLE ARE AWESOME 2013"
}
}

View File

@ -11,10 +11,14 @@ from ..utils import (
compat_urlparse,
ExtractorError,
HEADRequest,
smuggle_url,
unescapeHTML,
unified_strdate,
url_basename,
)
from .brightcove import BrightcoveIE
from .ooyala import OoyalaIE
class GenericIE(InfoExtractor):
@ -71,6 +75,27 @@ class GenericIE(InfoExtractor):
u'skip_download': True,
},
},
# Direct link to a video
{
u'url': u'http://media.w3.org/2010/05/sintel/trailer.mp4',
u'file': u'trailer.mp4',
u'md5': u'67d406c2bcb6af27fa886f31aa934bbe',
u'info_dict': {
u'id': u'trailer',
u'title': u'trailer',
u'upload_date': u'20100513',
}
},
# ooyala video
{
u'url': u'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
u'md5': u'5644c6ca5d5782c1d0d350dad9bd840c',
u'info_dict': {
u'id': u'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
u'ext': u'mp4',
u'title': u'2cc213299525360.mov', #that's what we get
},
},
]
def report_download_webpage(self, video_id):
@ -83,23 +108,20 @@ class GenericIE(InfoExtractor):
"""Report information extraction."""
self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
def _test_redirect(self, url):
def _send_head(self, url):
"""Check if it is a redirect, like url shorteners, in case return the new url."""
class HeadRequest(compat_urllib_request.Request):
def get_method(self):
return "HEAD"
class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
"""
Subclass the HTTPRedirectHandler to make it use our
HeadRequest also on the redirected URL
HEADRequest also on the redirected URL
"""
def redirect_request(self, req, fp, code, msg, headers, newurl):
if code in (301, 302, 303, 307):
newurl = newurl.replace(' ', '%20')
newheaders = dict((k,v) for k,v in req.headers.items()
if k.lower() not in ("content-length", "content-type"))
return HeadRequest(newurl,
return HEADRequest(newurl,
headers=newheaders,
origin_req_host=req.get_origin_req_host(),
unverifiable=True)
@ -128,32 +150,49 @@ class GenericIE(InfoExtractor):
compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
opener.add_handler(handler())
response = opener.open(HeadRequest(url))
response = opener.open(HEADRequest(url))
if response is None:
raise ExtractorError(u'Invalid URL protocol')
new_url = response.geturl()
if url == new_url:
return False
self.report_following_redirect(new_url)
return new_url
return response
def _real_extract(self, url):
parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme:
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
video_id = os.path.splitext(url.split('/')[-1])[0]
try:
new_url = self._test_redirect(url)
if new_url:
return [self.url_result(new_url)]
response = self._send_head(url)
# Check for redirect
new_url = response.geturl()
if url != new_url:
self.report_following_redirect(new_url)
return self.url_result(new_url)
# Check for direct link to a video
content_type = response.headers.get('Content-Type', '')
m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
if m:
upload_date = response.headers.get('Last-Modified')
if upload_date:
upload_date = unified_strdate(upload_date)
return {
'id': video_id,
'title': os.path.splitext(url_basename(url))[0],
'formats': [{
'format_id': m.group('format_id'),
'url': url,
'vcodec': u'none' if m.group('type') == 'audio' else None
}],
'upload_date': upload_date,
}
except compat_urllib_error.HTTPError:
# This may be a stupid server that doesn't like HEAD, our UA, or so
pass
video_id = url.split('/')[-1]
try:
webpage = self._download_webpage(url, video_id)
except ValueError:
@ -183,7 +222,7 @@ class GenericIE(InfoExtractor):
self.to_screen(u'Brightcove video detected.')
return self.url_result(bc_url, 'Brightcove')
# Look for embedded Vimeo player
# Look for embedded (iframe) Vimeo player
mobj = re.search(
r'<iframe[^>]+?src="(https?://player.vimeo.com/video/.+?)"', webpage)
if mobj:
@ -191,9 +230,18 @@ class GenericIE(InfoExtractor):
surl = smuggle_url(player_url, {'Referer': url})
return self.url_result(surl, 'Vimeo')
# Look for embedded (swf embed) Vimeo player
mobj = re.search(
r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
if mobj:
return self.url_result(mobj.group(1), 'Vimeo')
# Look for embedded YouTube player
matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
matches = re.findall(r'''(?x)
(?:<iframe[^>]+?src=|embedSWF\(\s*)
(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
(?:embed|v)/.+?)
\1''', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
for tuppl in matches]
@ -222,6 +270,18 @@ class GenericIE(InfoExtractor):
'id': video_id,
}
# Look for embedded blip.tv player
mobj = re.search(r'<meta\s[^>]*https?://api.blip.tv/\w+/redirect/\w+/(\d+)', webpage)
if mobj:
return self.url_result('http://blip.tv/seo/-'+mobj.group(1), 'BlipTV')
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*https?://(?:\w+\.)?blip.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', webpage)
if mobj:
player_url = 'http://blip.tv/play/%s.x?p=1' % mobj.group(1)
player_page = self._download_webpage(player_url, mobj.group(1))
blip_video_id = self._search_regex(r'data-episode-id="(\d+)', player_page, u'blip_video_id', fatal=False)
if blip_video_id:
return self.url_result('http://blip.tv/seo/-'+blip_video_id, 'BlipTV')
# Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None:
@ -229,6 +289,22 @@ class GenericIE(InfoExtractor):
# Don't set the extractor because it can be a track url or an album
return self.url_result(burl)
# Look for embedded Vevo player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'))
# Look for Ooyala videos
mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
if mobj is not None:
return OoyalaIE._build_url_result(mobj.group(1))
# Look for Aparat videos
mobj = re.search(r'<iframe src="(http://www.aparat.com/video/[^"]+)"', webpage)
if mobj is not None:
return self.url_result(mobj.group(1), 'Aparat')
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:

View File

@ -44,7 +44,7 @@ class IGNIE(InfoExtractor):
{
u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
u'info_dict': {
u'title': u'GTA 5\'s Twisted Beauty in Super Slow Motion',
u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
},
},

View File

@ -11,7 +11,7 @@ from ..utils import (
class ImdbIE(InfoExtractor):
IE_NAME = u'imdb'
IE_DESC = u'Internet Movie Database trailers'
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_TEST = {
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
@ -27,7 +27,7 @@ class ImdbIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url,video_id)
webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id)
descr = get_element_by_attribute('itemprop', 'description', webpage)
available_formats = re.findall(
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,

View File

@ -5,7 +5,6 @@ from ..utils import (
compat_urlparse,
compat_urllib_parse,
xpath_with_ns,
determine_ext,
)
@ -63,13 +62,17 @@ class InternetVideoArchiveIE(InfoExtractor):
for content in item.findall(_bp('media:group/media:content')):
attr = content.attrib
f_url = attr['url']
width = int(attr['width'])
bitrate = int(attr['bitrate'])
format_id = '%d-%dk' % (width, bitrate)
formats.append({
'format_id': format_id,
'url': f_url,
'ext': determine_ext(f_url),
'width': int(attr['width']),
'bitrate': int(attr['bitrate']),
'width': width,
'tbr': bitrate,
})
formats = sorted(formats, key=lambda f: f['bitrate'])
self._sort_formats(formats)
return {
'id': video_id,

156
youtube_dl/extractor/ivi.py Normal file
View File

@ -0,0 +1,156 @@
# encoding: utf-8
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
ExtractorError,
)
class IviIE(InfoExtractor):
IE_DESC = u'ivi.ru'
IE_NAME = u'ivi'
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
_TESTS = [
# Single movie
{
u'url': u'http://www.ivi.ru/watch/53141',
u'file': u'53141.mp4',
u'md5': u'6ff5be2254e796ed346251d117196cf4',
u'info_dict': {
u'title': u'Иван Васильевич меняет профессию',
u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346',
u'duration': 5498,
u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
},
u'skip': u'Only works from Russia',
},
# Serial's serie
{
u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
u'file': u'74791.mp4',
u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9',
u'info_dict': {
u'title': u'Дежурный ангел - 1 серия',
u'duration': 2490,
u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
},
u'skip': u'Only works from Russia',
}
]
# Sorted by quality
_known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
# Sorted by size
_known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480']
def _extract_description(self, html):
m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html)
return m.group('description') if m is not None else None
def _extract_comment_count(self, html):
m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
return int(m.group('commentcount')) if m is not None else 0
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
api_url = 'http://api.digitalaccess.ru/api/json/'
data = {u'method': u'da.content.get',
u'params': [video_id, {u'site': u's183',
u'referrer': u'http://www.ivi.ru/watch/%s' % video_id,
u'contentid': video_id
}
]
}
request = compat_urllib_request.Request(api_url, json.dumps(data))
video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON')
video_json = json.loads(video_json_page)
if u'error' in video_json:
error = video_json[u'error']
if error[u'origin'] == u'NoRedisValidData':
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True)
result = video_json[u'result']
formats = [{
'url': x[u'url'],
'format_id': x[u'content_format'],
'preference': self._known_formats.index(x[u'content_format']),
} for x in result[u'files'] if x[u'content_format'] in self._known_formats]
self._sort_formats(formats)
if not formats:
raise ExtractorError(u'No media links available for %s' % video_id)
duration = result[u'duration']
compilation = result[u'compilation']
title = result[u'title']
title = '%s - %s' % (compilation, title) if compilation is not None else title
previews = result[u'preview']
previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
thumbnail = previews[-1][u'url'] if len(previews) > 0 else None
video_page = self._download_webpage(url, video_id, u'Downloading video page')
description = self._extract_description(video_page)
comment_count = self._extract_comment_count(video_page)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'description': description,
'duration': duration,
'comment_count': comment_count,
'formats': formats,
}
class IviCompilationIE(InfoExtractor):
IE_DESC = u'ivi.ru compilations'
IE_NAME = u'ivi:compilation'
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
def _extract_entries(self, html, compilation_id):
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
compilation_id = mobj.group('compilationid')
season_id = mobj.group('seasonid')
if season_id is not None: # Season link
season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id)
playlist_id = '%s/season%s' % (compilation_id, season_id)
playlist_title = self._html_search_meta(u'title', season_page, u'title')
entries = self._extract_entries(season_page, compilation_id)
else: # Compilation link
compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page')
playlist_id = compilation_id
playlist_title = self._html_search_meta(u'title', compilation_page, u'title')
seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
if len(seasons) == 0: # No seasons in this compilation
entries = self._extract_entries(compilation_page, compilation_id)
else:
entries = []
for season_id in seasons:
season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
compilation_id, u'Downloading season %s web page' % season_id)
entries.extend(self._extract_entries(season_page, compilation_id))
return self.playlist_result(entries, playlist_id, playlist_title)

View File

@ -0,0 +1,64 @@
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class MDRIE(InfoExtractor):
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
# No tests, MDR regularily deletes its videos
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('video_id')
domain = m.group('domain')
# determine title and media streams from webpage
html = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title')
xmlurl = self._search_regex(
r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL')
doc = self._download_xml(domain + xmlurl, video_id)
formats = []
for a in doc.findall('./assets/asset'):
url_el = a.find('.//progressiveDownloadUrl')
if url_el is None:
continue
abr = int(a.find('bitrateAudio').text) // 1000
media_type = a.find('mediaType').text
format = {
'abr': abr,
'filesize': int(a.find('fileSize').text),
'url': url_el.text,
}
vbr_el = a.find('bitrateVideo')
if vbr_el is None:
format.update({
'vcodec': 'none',
'format_id': u'%s-%d' % (media_type, abr),
})
else:
vbr = int(vbr_el.text) // 1000
format.update({
'vbr': vbr,
'width': int(a.find('frameWidth').text),
'height': int(a.find('frameHeight').text),
'format_id': u'%s-%d' % (media_type, vbr),
})
formats.append(format)
if not formats:
raise ExtractorError(u'Could not find any valid formats')
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
}

View File

@ -33,8 +33,18 @@ class TechTVMITIE(InfoExtractor):
raw_page, u'base url')
formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
u'video formats')
formats = json.loads(formats_json)
formats = sorted(formats, key=lambda f: f['bitrate'])
formats_mit = json.loads(formats_json)
formats = [
{
'format_id': f['label'],
'url': base_url + f['url'].partition(':')[2],
'ext': f['url'].partition(':')[0],
'format': f['label'],
'width': f['width'],
'vbr': f['bitrate'],
}
for f in formats_mit
]
title = get_element_by_id('edit-title', clean_page)
description = clean_html(get_element_by_id('edit-description', clean_page))
@ -43,8 +53,7 @@ class TechTVMITIE(InfoExtractor):
return {'id': video_id,
'title': title,
'url': base_url + formats[-1]['url'].replace('mp4:', ''),
'ext': 'mp4',
'formats': formats,
'description': description,
'thumbnail': thumbnail,
}

View File

@ -93,7 +93,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
class MTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
_VALID_URL = r'''(?x)^https?://
(?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
@ -127,16 +129,17 @@ class MTVIE(MTVServicesInfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
webpage = self._download_webpage(url, video_id)
# Some videos come from Vevo.com
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
webpage, re.DOTALL)
if m_vevo:
vevo_id = m_vevo.group(1);
self.to_screen(u'Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
uri = mobj.group('mgid')
if uri is None:
webpage = self._download_webpage(url, video_id)
# Some videos come from Vevo.com
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
webpage, re.DOTALL)
if m_vevo:
vevo_id = m_vevo.group(1);
self.to_screen(u'Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
return self._get_videos_info(uri)

View File

@ -143,8 +143,10 @@ class MyVideoIE(InfoExtractor):
if mobj:
video_url = compat_urllib_parse.unquote(mobj.group(1))
if 'myvideo2flash' in video_url:
self._downloader.report_warning(u'forcing RTMPT ...')
video_url = video_url.replace('rtmpe://', 'rtmpt://')
self.report_warning(
u'Rewriting URL to use unencrypted rtmp:// ...',
video_id)
video_url = video_url.replace('rtmpe://', 'rtmp://')
if not video_url:
# extract non rtmp videos

View File

@ -9,7 +9,7 @@ from ..utils import (
class NaverIE(InfoExtractor):
_VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
_TEST = {
u'url': u'http://tvcast.naver.com/v/81652',

View File

@ -1,6 +1,4 @@
import json
import re
import time
from .common import InfoExtractor
from ..utils import month_by_name

View File

@ -22,6 +22,11 @@ class OoyalaIE(InfoExtractor):
def _url_for_embed_code(embed_code):
return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
@classmethod
def _build_url_result(cls, embed_code):
return cls.url_result(cls._url_for_embed_code(embed_code),
ie=cls.ie_key())
def _extract_result(self, info, more_info):
return {'id': info['embedCode'],
'ext': 'mp4',

View File

@ -0,0 +1,38 @@
import re
from .common import InfoExtractor
from ..utils import compat_urllib_parse
class PornHdIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
_TEST = {
u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
u'file': u'1962.flv',
u'md5': u'35272469887dca97abd30abecc6cdf75',
u'info_dict': {
u"title": u"sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
u"age_limit": 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
video_title = mobj.group('video_title')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'&hd=(http.+?)&', webpage, u'video URL')
video_url = compat_urllib_parse.unquote(video_url)
age_limit = 18
return {
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': video_title,
'age_limit': age_limit,
}

View File

@ -0,0 +1,55 @@
# coding: utf-8
import re
from .common import InfoExtractor
class RadioFranceIE(InfoExtractor):
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
IE_NAME = u'radiofrance'
_TEST = {
u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
u'file': u'one-one.ogg',
u'md5': u'bdbb28ace95ed0e04faab32ba3160daf',
u'info_dict': {
u"title": u"One to one",
u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
u"uploader": u"Thomas Hercouët",
},
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, u'title')
description = self._html_search_regex(
r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
webpage, u'description', fatal=False)
uploader = self._html_search_regex(
r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
webpage, u'uploader', fatal=False)
formats_str = self._html_search_regex(
r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
webpage, u'audio URLs')
formats = [
{
'format_id': fm[0],
'url': fm[1],
'vcodec': 'none',
}
for fm in
re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
]
# No sorting, we don't know any more about these formats
return {
'id': video_id,
'title': title,
'formats': formats,
'description': description,
'uploader': uploader,
}

View File

@ -10,7 +10,7 @@ from ..utils import (
class RTLnowIE(InfoExtractor):
"""Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_TESTS = [{
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
u'file': u'90419.flv',
@ -82,7 +82,7 @@ class RTLnowIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
webpage_url = u'http://' + mobj.group('url')
video_page_url = u'http://' + mobj.group('base_url')
video_page_url = u'http://' + mobj.group('domain') + u'/'
video_id = mobj.group(u'video_id')
webpage = self._download_webpage(webpage_url, video_id)

View File

@ -1,5 +1,6 @@
# encoding: utf-8
import os.path
import re
import json
import hashlib
@ -10,6 +11,7 @@ from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
url_basename,
)
@ -132,7 +134,16 @@ class SmotriIE(InfoExtractor):
# We will extract some from the video web page instead
video_page_url = 'http://' + mobj.group('url')
video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
# Warning if video is unavailable
warning = self._html_search_regex(
r'<div class="videoUnModer">(.*?)</div>', video_page,
u'warning message', default=None)
if warning is not None:
self._downloader.report_warning(
u'Video %s may not be available; smotri said: %s ' %
(video_id, warning))
# Adult content
if re.search(u'EroConfirmText">', video_page) is not None:
self.report_age_confirmation()
@ -148,38 +159,44 @@ class SmotriIE(InfoExtractor):
# Extract the rest of meta data
video_title = self._search_meta(u'name', video_page, u'title')
if not video_title:
video_title = video_url.rsplit('/', 1)[-1]
video_title = os.path.splitext(url_basename(video_url))[0]
video_description = self._search_meta(u'description', video_page)
END_TEXT = u' на сайте Smotri.com'
if video_description.endswith(END_TEXT):
if video_description and video_description.endswith(END_TEXT):
video_description = video_description[:-len(END_TEXT)]
START_TEXT = u'Смотреть онлайн ролик '
if video_description.startswith(START_TEXT):
if video_description and video_description.startswith(START_TEXT):
video_description = video_description[len(START_TEXT):]
video_thumbnail = self._search_meta(u'thumbnail', video_page)
upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
video_upload_date = (
(
upload_date_m.group('year') +
upload_date_m.group('month') +
upload_date_m.group('day')
if upload_date_str:
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
video_upload_date = (
(
upload_date_m.group('year') +
upload_date_m.group('month') +
upload_date_m.group('day')
)
if upload_date_m else None
)
if upload_date_m else None
)
else:
video_upload_date = None
duration_str = self._search_meta(u'duration', video_page)
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
video_duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
if duration_str:
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
video_duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m else None
)
if duration_m else None
)
else:
video_duration = None
video_uploader = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
@ -202,7 +219,7 @@ class SmotriIE(InfoExtractor):
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'video_duration': video_duration,
'duration': video_duration,
'view_count': video_view_count,
'age_limit': 18 if adult_content else 0,
'video_page_url': video_page_url

View File

@ -24,7 +24,7 @@ class SoundcloudIE(InfoExtractor):
"""
_VALID_URL = r'''^(?:https?://)?
(?:(?:(?:www\.)?soundcloud\.com/
(?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/
(?!sets/)(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)

View File

@ -51,9 +51,10 @@ class SpiegelIE(InfoExtractor):
# Blacklist type 6, it's extremely LQ and not available on the same server
if n.tag.startswith('type') and n.tag != 'type6'
]
formats.sort(key=lambda f: f['vbr'])
duration = float(idoc[0].findall('./duration')[0].text)
self._sort_formats(formats)
info = {
'id': video_id,
'title': video_title,

View File

@ -3,6 +3,7 @@ import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
xpath_with_ns,
)
@ -32,6 +33,17 @@ class ThePlatformIE(InfoExtractor):
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
'format=smil&mbr=true'.format(video_id))
meta = self._download_xml(smil_url, video_id)
try:
error_msg = next(
n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref'))
if n.attrib.get('title') == u'Geographic Restriction')
except StopIteration:
pass
else:
raise ExtractorError(error_msg, expected=True)
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)
@ -43,15 +55,21 @@ class ThePlatformIE(InfoExtractor):
formats = []
for f in switch.findall(_x('smil:video')):
attr = f.attrib
width = int(attr['width'])
height = int(attr['height'])
vbr = int(attr['system-bitrate']) // 1000
format_id = '%dx%d_%dk' % (width, height, vbr)
formats.append({
'format_id': format_id,
'url': base_url,
'play_path': 'mp4:' + attr['src'],
'ext': 'flv',
'width': int(attr['width']),
'height': int(attr['height']),
'vbr': int(attr['system-bitrate']),
'width': width,
'height': height,
'vbr': vbr,
})
formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))
self._sort_formats(formats)
return {
'id': video_id,

View File

@ -15,7 +15,7 @@ class Vbox7IE(InfoExtractor):
_TEST = {
u'url': u'http://vbox7.com/play:249bb972c2',
u'file': u'249bb972c2.flv',
u'md5': u'9c70d6d956f888bdc08c124acc120cfe',
u'md5': u'99f65c0c9ef9b682b97313e052734c3f',
u'info_dict': {
u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
}

View File

@ -15,7 +15,12 @@ class VevoIE(InfoExtractor):
Accepts urls from vevo.com or in the format 'vevo:{id}'
(currently used by MTVIE)
"""
_VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
_VALID_URL = r'''(?x)
(?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
vevo:)
(?P<id>[^&?#]+)'''
_TESTS = [{
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4',

View File

@ -15,6 +15,7 @@ class VideoPremiumIE(InfoExtractor):
u'params': {
u'skip_download': True,
},
u'skip': u'Test file has been deleted.',
}
def _real_extract(self, url):

View File

@ -16,11 +16,20 @@ from ..utils import (
unsmuggle_url,
)
class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:.*?/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_VALID_URL = r'''(?x)
(?P<proto>https?://)?
(?:(?:www|(?P<player>player))\.)?
vimeo(?P<pro>pro)?\.com/
(?:.*?/)?
(?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
(?:videos?/)?
(?P<id>[0-9]+)
/?(?:[?&].*)?(?:[#].*)?$'''
_NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo'
_TESTS = [

View File

@ -45,7 +45,8 @@ class WistiaIE(InfoExtractor):
'filesize': a['size'],
'ext': a['ext'],
})
formats.sort(key=lambda a: a['filesize'])
self._sort_formats(formats)
return {
'id': video_id,

View File

@ -32,7 +32,7 @@ class XTubeIE(InfoExtractor):
video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title')
video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False)
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', default=None)
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', fatal=False)
video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/')
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]

View File

@ -6,8 +6,8 @@ from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urlparse,
determine_ext,
clean_html,
int_or_none,
)
@ -68,9 +68,9 @@ class YahooIE(InfoExtractor):
formats = []
for s in info['streams']:
format_info = {
'width': s.get('width'),
'height': s.get('height'),
'bitrate': s.get('bitrate'),
'width': int_or_none(s.get('width')),
'height': int_or_none(s.get('height')),
'tbr': int_or_none(s.get('bitrate')),
}
host = s['host']
@ -84,10 +84,10 @@ class YahooIE(InfoExtractor):
else:
format_url = compat_urlparse.urljoin(host, path)
format_info['url'] = format_url
format_info['ext'] = determine_ext(format_url)
formats.append(format_info)
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
self._sort_formats(formats)
return {
'id': video_id,

View File

@ -1,5 +1,4 @@
import json
import os
import re
import sys
@ -16,6 +15,7 @@ from ..aes import (
aes_decrypt_text
)
class YouPornIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
_TEST = {
@ -23,9 +23,9 @@ class YouPornIE(InfoExtractor):
u'file': u'505835.mp4',
u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
u'info_dict': {
u"upload_date": u"20101221",
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
u"uploader": u"Ask Dan And Jennifer",
u"upload_date": u"20101221",
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
u"uploader": u"Ask Dan And Jennifer",
u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
u"age_limit": 18,
}
@ -71,38 +71,36 @@ class YouPornIE(InfoExtractor):
link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')
links.append(link)
if not links:
raise ExtractorError(u'ERROR: no known formats available for video')
formats = []
for link in links:
# A link looks like this:
# http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
# A path looks like this:
# /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
video_url = unescapeHTML(link)
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
format_parts = path.split('/')[4].split('_')[:2]
# size = format[0]
# bitrate = format[1]
format = "-".join(format)
# title = u'%s-%s-%s' % (video_title, size, bitrate)
dn = compat_urllib_parse_urlparse(video_url).netloc.partition('.')[0]
resolution = format_parts[0]
height = int(resolution[:-len('p')])
bitrate = int(format_parts[1][:-len('k')])
format = u'-'.join(format_parts) + u'-' + dn
formats.append({
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'height': height,
'tbr': bitrate,
'resolution': resolution,
})
# Sort and remove doubles
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
for i in range(len(formats)-1,0,-1):
if formats[i]['format_id'] == formats[i-1]['format_id']:
del formats[i]
self._sort_formats(formats)
if not formats:
raise ExtractorError(u'ERROR: no known formats available for video')
return {
'id': video_id,

View File

@ -150,168 +150,68 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
(?(1).+)? # if we found the ID, everything can follow
$"""
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
# Listed in order of quality
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
# Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '84', '102', '83', '101', '82', '100',
# Dash video
'138', '137', '248', '136', '247', '135', '246',
'245', '244', '134', '243', '133', '242', '160',
# Dash audio
'141', '172', '140', '171', '139',
]
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
# Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '102', '84', '101', '83', '100', '82',
# Dash video
'138', '248', '137', '247', '136', '246', '245',
'244', '135', '243', '134', '242', '133', '160',
# Dash audio
'172', '141', '171', '140', '139',
]
_video_formats_map = {
'flv': ['35', '34', '6', '5'],
'3gp': ['36', '17', '13'],
'mp4': ['38', '37', '22', '18'],
'webm': ['46', '45', '44', '43'],
}
_video_extensions = {
'13': '3gp',
'17': '3gp',
'18': 'mp4',
'22': 'mp4',
'36': '3gp',
'37': 'mp4',
'38': 'mp4',
'43': 'webm',
'44': 'webm',
'45': 'webm',
'46': 'webm',
_formats = {
'5': {'ext': 'flv', 'width': 400, 'height': 240},
'6': {'ext': 'flv', 'width': 450, 'height': 270},
'13': {'ext': '3gp'},
'17': {'ext': '3gp', 'width': 176, 'height': 144},
'18': {'ext': 'mp4', 'width': 640, 'height': 360},
'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
'34': {'ext': 'flv', 'width': 640, 'height': 360},
'35': {'ext': 'flv', 'width': 854, 'height': 480},
'36': {'ext': '3gp', 'width': 320, 'height': 240},
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
'43': {'ext': 'webm', 'width': 640, 'height': 360},
'44': {'ext': 'webm', 'width': 854, 'height': 480},
'45': {'ext': 'webm', 'width': 1280, 'height': 720},
'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
# 3d videos
'82': 'mp4',
'83': 'mp4',
'84': 'mp4',
'85': 'mp4',
'100': 'webm',
'101': 'webm',
'102': 'webm',
'82': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
'83': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
'84': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
'85': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': '3D', 'preference': -20},
'100': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
'101': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
'102': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
# Apple HTTP Live Streaming
'92': 'mp4',
'93': 'mp4',
'94': 'mp4',
'95': 'mp4',
'96': 'mp4',
'132': 'mp4',
'151': 'mp4',
'92': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
'93': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'HLS', 'preference': -10},
'94': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'HLS', 'preference': -10},
'95': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'HLS', 'preference': -10},
'96': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'HLS', 'preference': -10},
'132': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
'151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10},
# Dash mp4
'133': 'mp4',
'134': 'mp4',
'135': 'mp4',
'136': 'mp4',
'137': 'mp4',
'138': 'mp4',
'160': 'mp4',
# DASH mp4 video
'133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'preference': -40},
'134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'preference': -40},
'135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40},
'136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40},
'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
'138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},
'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
# Dash mp4 audio
'139': 'm4a',
'140': 'm4a',
'141': 'm4a',
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
# Dash webm
'171': 'webm',
'172': 'webm',
'242': 'webm',
'243': 'webm',
'244': 'webm',
'245': 'webm',
'246': 'webm',
'247': 'webm',
'248': 'webm',
}
_video_dimensions = {
'5': '400x240',
'6': '???',
'13': '???',
'17': '176x144',
'18': '640x360',
'22': '1280x720',
'34': '640x360',
'35': '854x480',
'36': '320x240',
'37': '1920x1080',
'38': '4096x3072',
'43': '640x360',
'44': '854x480',
'45': '1280x720',
'46': '1920x1080',
'82': '360p',
'83': '480p',
'84': '720p',
'85': '1080p',
'92': '240p',
'93': '360p',
'94': '480p',
'95': '720p',
'96': '1080p',
'100': '360p',
'101': '480p',
'102': '720p',
'132': '240p',
'151': '72p',
'133': '240p',
'134': '360p',
'135': '480p',
'136': '720p',
'137': '1080p',
'138': '>1080p',
'139': '48k',
'140': '128k',
'141': '256k',
'160': '192p',
'171': '128k',
'172': '256k',
'242': '240p',
'243': '360p',
'244': '480p',
'245': '480p',
'246': '480p',
'247': '720p',
'248': '1080p',
}
_special_itags = {
'82': '3D',
'83': '3D',
'84': '3D',
'85': '3D',
'100': '3D',
'101': '3D',
'102': '3D',
'133': 'DASH Video',
'134': 'DASH Video',
'135': 'DASH Video',
'136': 'DASH Video',
'137': 'DASH Video',
'138': 'DASH Video',
'139': 'DASH Audio',
'140': 'DASH Audio',
'141': 'DASH Audio',
'160': 'DASH Video',
'171': 'DASH Audio',
'172': 'DASH Audio',
'242': 'DASH Video',
'243': 'DASH Video',
'244': 'DASH Video',
'245': 'DASH Video',
'246': 'DASH Video',
'247': 'DASH Video',
'248': 'DASH Video',
'242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40},
'243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40},
'244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
'245': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
'246': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
'247': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': 'DASH webm', 'preference': -40},
'248': {'ext': 'webm', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH webm', 'preference': -40},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50},
}
IE_NAME = u'youtube'
@ -1153,13 +1053,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self._downloader.report_warning(err_msg)
return {}
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
self._video_dimensions.get(x, '???'),
' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
def _extract_id(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None:
@ -1172,48 +1065,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
Transform a dictionary in the format {itag:url} to a list of (itag, url)
with the requested formats.
"""
req_format = self._downloader.params.get('format', None)
format_limit = self._downloader.params.get('format_limit', None)
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
if format_limit is not None and format_limit in available_formats:
format_list = available_formats[available_formats.index(format_limit):]
else:
format_list = available_formats
existing_formats = [x for x in format_list if x in url_map]
existing_formats = [x for x in self._formats if x in url_map]
if len(existing_formats) == 0:
raise ExtractorError(u'no known formats available for video')
if self._downloader.params.get('listformats', None):
self._print_formats(existing_formats)
return
if req_format is None or req_format == 'best':
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
elif req_format in ('-1', 'all'):
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific formats. We pick the first in a slash-delimeted sequence.
# Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
# available in the specified format. For example,
# if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
# if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
# if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
req_formats = req_format.split('/')
video_url_list = None
for rf in req_formats:
if rf in url_map:
video_url_list = [(rf, url_map[rf])]
break
if rf in self._video_formats_map:
for srf in self._video_formats_map[rf]:
if srf in url_map:
video_url_list = [(srf, url_map[srf])]
break
else:
continue
break
if video_url_list is None:
raise ExtractorError(u'requested format not available')
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
video_url_list.reverse() # order worst to best
return video_url_list
def _extract_from_m3u8(self, manifest_url, video_id):
@ -1361,7 +1217,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_description = u''
def _extract_count(klass):
count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
count = self._search_regex(
r'class="%s">([\d,]+)</span>' % re.escape(klass),
video_webpage, klass, default=None)
if count is not None:
return int(count.replace(',', ''))
return None
@ -1377,9 +1235,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if 'length_seconds' not in video_info:
self._downloader.report_warning(u'unable to extract video duration')
video_duration = ''
video_duration = None
else:
video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
# annotations
video_annotations = None
@ -1460,50 +1318,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
url += '&ratebypass=yes'
url_map[url_data['itag'][0]] = url
video_url_list = self._get_video_url_list(url_map)
if not video_url_list:
return
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
url_map = self._extract_from_m3u8(manifest_url, video_id)
video_url_list = self._get_video_url_list(url_map)
if not video_url_list:
return
else:
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
results = []
formats = []
for itag, video_real_url in video_url_list:
# Extension
video_extension = self._video_extensions.get(itag, 'flv')
video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
self._video_dimensions.get(itag, '???'),
' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
results.append({
'id': video_id,
'url': video_real_url,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'upload_date': upload_date,
'title': video_title,
'ext': video_extension,
'format': video_format,
dct = {
'format_id': itag,
'thumbnail': video_thumbnail,
'description': video_description,
'player_url': player_url,
'subtitles': video_subtitles,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
})
return results
'url': video_real_url,
'player_url': player_url,
}
dct.update(self._formats[itag])
formats.append(dct)
self._sort_formats(formats)
return {
'id': video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'upload_date': upload_date,
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
'subtitles': video_subtitles,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'formats': formats,
}
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
IE_DESC = u'YouTube.com playlists'
@ -1715,7 +1566,7 @@ class YoutubeUserIE(InfoExtractor):
# page by page until there are no video ids - it means we got
# all of them.
video_ids = []
url_results = []
for pagenum in itertools.count(0):
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
@ -1733,10 +1584,17 @@ class YoutubeUserIE(InfoExtractor):
break
# Extract video identifiers
ids_in_page = []
for entry in response['feed']['entry']:
ids_in_page.append(entry['id']['$t'].split('/')[-1])
video_ids.extend(ids_in_page)
entries = response['feed']['entry']
for entry in entries:
title = entry['title']['$t']
video_id = entry['id']['$t'].split('/')[-1]
url_results.append({
'_type': 'url',
'url': video_id,
'ie_key': 'Youtube',
'id': 'video_id',
'title': title,
})
# A little optimization - if current page is not
# "full", ie. does not contain PAGE_SIZE video ids then
@ -1744,12 +1602,9 @@ class YoutubeUserIE(InfoExtractor):
# are no more ids on further pages - no need to query
# again.
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
if len(entries) < self._GDATA_PAGE_SIZE:
break
url_results = [
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_results, playlist_title=username)

View File

@ -1,10 +1,10 @@
# coding: utf-8
import operator
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
)
@ -67,29 +67,13 @@ class ZDFIE(InfoExtractor):
''', format_id)
ext = format_m.group('container')
is_supported = ext != 'f4f'
PROTO_ORDER = ['http', 'rtmp', 'rtsp']
try:
proto_pref = -PROTO_ORDER.index(format_m.group('proto'))
except ValueError:
proto_pref = -999
proto = format_m.group('proto').lower()
quality = fnode.find('./quality').text
QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low']
try:
quality_pref = -QUALITY_ORDER.index(quality)
except ValueError:
quality_pref = -999
abr = int(fnode.find('./audioBitrate').text) // 1000
vbr = int(fnode.find('./videoBitrate').text) // 1000
pref = (is_available, is_supported,
proto_pref, quality_pref, vbr, abr)
format_note = u''
if not is_supported:
format_note += u'(unsupported)'
if not format_note:
format_note = None
@ -101,18 +85,20 @@ class ZDFIE(InfoExtractor):
'vcodec': format_m.group('vcodec'),
'abr': abr,
'vbr': vbr,
'width': int(fnode.find('./width').text),
'height': int(fnode.find('./height').text),
'filesize': int(fnode.find('./filesize').text),
'width': int_or_none(fnode.find('./width').text),
'height': int_or_none(fnode.find('./height').text),
'filesize': int_or_none(fnode.find('./filesize').text),
'format_note': format_note,
'_pref': pref,
'protocol': proto,
'_available': is_available,
}
format_nodes = doc.findall('.//formitaeten/formitaet')
formats = sorted(filter(lambda f: f['_available'],
map(xml_to_format, format_nodes)),
key=operator.itemgetter('_pref'))
formats = list(filter(
lambda f: f['_available'],
map(xml_to_format, format_nodes)))
self._sort_formats(formats)
return {
'id': video_id,

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import ctypes
import datetime
import email.utils
import errno
@ -766,6 +767,10 @@ def unified_strdate(date_str):
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except:
pass
if upload_date is None:
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
return upload_date
def determine_ext(url, default_ext=u'unknown_video'):
@ -1051,7 +1056,7 @@ def month_by_name(name):
""" Return the number of a month by (locale-independently) English name """
ENGLISH_NAMES = [
u'Januar', u'February', u'March', u'April', u'May', u'June',
u'January', u'February', u'March', u'April', u'May', u'June',
u'July', u'August', u'September', u'October', u'November', u'December']
try:
return ENGLISH_NAMES.index(name) + 1
@ -1062,3 +1067,54 @@ def month_by_name(name):
def fix_xml_all_ampersand(xml_str):
"""Replace all the '&' by '&amp;' in XML"""
return xml_str.replace(u'&', u'&amp;')
def setproctitle(title):
assert isinstance(title, type(u''))
try:
libc = ctypes.cdll.LoadLibrary("libc.so.6")
except OSError:
return
title = title
buf = ctypes.create_string_buffer(len(title) + 1)
buf.value = title.encode('utf-8')
try:
libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
except AttributeError:
return # Strange libc, just skip this
def remove_start(s, start):
if s.startswith(start):
return s[len(start):]
return s
def url_basename(url):
path = compat_urlparse.urlparse(url).path
return path.strip(u'/').split(u'/')[-1]
class HEADRequest(compat_urllib_request.Request):
def get_method(self):
return "HEAD"
def int_or_none(v):
return v if v is None else int(v)
def parse_duration(s):
if s is None:
return None
m = re.match(
r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s)
if not m:
return None
res = int(m.group('secs'))
if m.group('mins'):
res += int(m.group('mins')) * 60
if m.group('hours'):
res += int(m.group('hours')) * 60 * 60
return res

View File

@ -1,2 +1,2 @@
__version__ = '2013.12.11.2'
__version__ = '2013.12.26'