Compare commits

...

457 Commits

Author SHA1 Message Date
Sergey M․
a46d9e5b41 release 2019.04.07 2019-04-07 04:19:46 +07:00
Sergey M․
aa5338118e [ChangeLog] Actualize
[ci skip]
2019-04-07 04:16:45 +07:00
Sergey M․
8410653f24 [ruutu] Add support for audio podcasts (closes #20473, closes #20545) 2019-04-07 03:18:10 +07:00
Sergey M․
f4da808036 [xvideos] Extract all thumbnails (closes #20432) 2019-04-07 02:59:09 +07:00
Martin Michlmayr
f412970164 [README.md] Fix lists formatting (closes #20558)
Lists have to be separated from the previous paragraph by a blank line
in certain variants of Markdown, otherwise they are not interpreted as
lists.

This change ensures that that the youtube-dl.1 man page, which is
generated from README.md with the help of pandoc, is formatted
correctly.
2019-04-07 02:28:31 +07:00
Sergey M․
059cd768b9 [vk] Remove unused import 2019-04-07 02:17:54 +07:00
Sergey M․
c701472fc9 [platzi] Add extractor (closes #20562) 2019-04-07 02:15:52 +07:00
Remita Amine
19591facea [dvtv] remove unnecessary comments and spaces 2019-04-06 16:45:11 +01:00
Jan Friesse
b9aad6c427 [dvtv] Fix extraction (closes #18514) 2019-04-06 16:37:07 +01:00
Remita Amine
9f182c23ba [vrv] add basic support for individual movie links(#19229) 2019-04-06 09:22:25 +01:00
Remita Amine
4810655cd6 [bfi:player] Add new extractor(#19235) 2019-04-05 19:35:35 +01:00
Remita Amine
a7978f8e2a [hbo] fix extraction and extract subtitles(closes #14629)(closes #13709) 2019-04-05 18:08:43 +01:00
Remita Amine
19041a3877 [youtube] extract srv[1-3] subtitle formats(#20566) 2019-04-05 16:18:57 +01:00
Remita Amine
afb7496416 [adultswim] fix extraction(closes #18025) 2019-04-05 11:45:49 +01:00
Remita Amine
69e6efac16 [teamcoco] fix extraction and add suport for subdomains(closes #17099)(closes #20339) 2019-04-05 08:26:04 +01:00
Remita Amine
2bbde1d09a [adn] fix subtitle compatibility with ffmpeg 2019-04-04 17:59:20 +01:00
Remita Amine
b966740cf7 [adn] fix extraction and add support for positioning styles(closes #20549) 2019-04-04 14:50:16 +01:00
Remita Amine
220828f2d6 [vk] use a more unique video id(closes #17848) 2019-04-03 11:08:42 +01:00
Remita Amine
977a782110 [rtl2] update player_url 2019-04-03 10:20:01 +01:00
Remita Amine
a2b6f946f1 [newstube] fix extraction 2019-04-03 10:19:36 +01:00
Remita Amine
4f7db46887 [rtl2] improve _VALID_URL regex 2019-04-03 01:00:02 +01:00
Remita Amine
d7d86fdd49 [download/external] pass rtmp_conn to ffmpeg 2019-04-02 22:41:23 +01:00
Remita Amine
f8987163fb [adobeconnect] Add new extractor(closes #20283) 2019-04-02 22:40:39 +01:00
Remita Amine
313e8b2b18 [gaia] add support for authentication(closes #14605) 2019-04-02 15:50:06 +01:00
Sergey M․
c0b7d11713 [YoutubeDL] Add ffmpeg_location to post processor options (closes #20532) 2019-04-02 01:29:44 +07:00
Sergey M․
efee62ac7f [mediasite] Add support for dashed ids and named catalogs (closes #20531) 2019-04-02 01:13:52 +07:00
Sergey M․
38287d251d release 2019.04.01 2019-04-01 23:55:17 +07:00
Sergey M․
25d9243141 [ChangeLog] Actualize
[ci skip]
2019-04-01 23:53:28 +07:00
RexYuan
93bb6b1bae [weibo] Extend _VALID_URL (#20496) 2019-03-31 01:31:33 +07:00
Sergey M․
b43c5f474a [xhamster] Add support for xhamster.one (closes #20508) 2019-03-31 01:27:45 +07:00
Sergey M․
4014a48622 [mediasite:catalog] Add extractor (closes #20507) 2019-03-31 01:21:53 +07:00
Remita Amine
99fe330070 [teamtreehouse] Add new extractor(closes #9836) 2019-03-28 16:55:57 +01:00
Remita Amine
c4c888697e [ina] add support for audio URLs 2019-03-27 18:49:29 +01:00
Remita Amine
b27a71e66c [ina] improve extraction 2019-03-27 18:29:24 +01:00
Remita Amine
de74ef83b7 [cwtv] fix episode number extraction(closes #20461) 2019-03-27 18:01:51 +01:00
Sergey M․
8cb10807ed [npo] Improve DRM detection 2019-03-23 21:43:50 +07:00
Sergey M․
b8526c78f9 [pornhub] Add support for DASH formats (closes #20403) 2019-03-23 01:09:33 +07:00
Sergey M․
5e1271c56d [utils] Improve int_or_none and float_or_none (#20403) 2019-03-23 01:08:54 +07:00
Jesse de Zwart
050afa60c6 Check for valid --min-sleep-interval when --max-sleep-interval is specified 2019-03-21 22:55:03 +07:00
Sergey M․
c4580580f5 [svtplay] Update API endpoint (closes #20430) 2019-03-21 22:39:35 +07:00
Sergey M․
0a8e251b35 release 2019.03.18 2019-03-18 01:36:41 +07:00
Sergey M․
2ed2ebdb36 [ChangeLog] Actualize
[ci skip]
2019-03-18 01:33:37 +07:00
Sergey M․
8428fdccf2 [yandexvideo] Add extractor 2019-03-17 17:33:07 +07:00
Sergey M․
04988b55b5 [openload] Improve embed detection 2019-03-17 15:34:21 +07:00
Sergey M․
034f5fb5ee [radiocanada:audiovideo] Fix typo 2019-03-17 14:34:48 +07:00
wolfy1339
a63782b581 [corus] Add support for bigbrothercanada.ca (#20357) 2019-03-17 14:20:21 +07:00
Lukas Anzinger
0146c6cde6 [orf:radio] Extract series (#20012) 2019-03-17 09:57:02 +07:00
Sergey M․
ddff25c5d1 [extractors] Remove superfluous whitespace 2019-03-17 09:41:16 +07:00
Tyler Szabo
81dada0b4b [cbc:watch] Add support for gem.cbc.ca (closes #20251, #20359) 2019-03-17 09:37:29 +07:00
Sergey M․
eba3a2f9ef [anysex] Remove extractor (closes #19279) 2019-03-17 09:15:24 +07:00
Sergey M․
d493f15c11 [extractor/common] Improve HTML5 entries extraction and add some realworld tests 2019-03-17 09:09:32 +07:00
Sergey M․
0dc41787af [utils] Introduce parse_bitrate 2019-03-17 09:07:47 +07:00
Austin de Coup-Crank
e5cfb779ea [ciscolive] Add support for new URL schema (closes #20320, #20351) 2019-03-17 07:18:54 +07:00
utlasidyo
77d95677b7 [youtube] Add support for invidiou.sh (#20309) 2019-03-17 07:15:15 +07:00
Sergey M․
ffbd1368df [update] Hide update URLs behind redirect 2019-03-17 07:08:20 +07:00
Sergey M․
2e27421c70 [test_InfoExtractor] Add test for #20346 2019-03-15 01:20:24 +07:00
Sergey M․
79d2077edc [extractor/common] Fix url meta field for unfragmented DASH formats (closes #20346) 2019-03-15 00:42:14 +07:00
charon2019
6db03a29d1 [anitube] Remove extractor
site no longer exists
2019-03-13 10:47:55 +01:00
Sergey M․
47f9792620 [ruleporn] Remove extractor (closes #15344, closes #20324)
Covered by generic extractor
2019-03-12 22:55:32 +07:00
Remita Amine
68867668cf [npr] fix extraction(closes #10793)(closes #13440) 2019-03-12 16:52:28 +01:00
Remita Amine
e7e3ec828b [biqle] fix extraction(closes #11471)(closes #15313) 2019-03-12 08:08:54 +01:00
Remita Amine
e585547284 [viddler] Modernize 2019-03-11 15:53:44 +01:00
Remita Amine
4993132344 [pandatv] fix condition syntax 2019-03-11 15:37:19 +01:00
Remita Amine
8569058f88 [moevideo] fix extraction 2019-03-11 15:18:09 +01:00
Remita Amine
612a159510 [primesharetv] remove extractor 2019-03-11 09:51:26 +01:00
Remita Amine
f517c0c0a7 [hypem] Modernize and extract more metadata(closes #15320) 2019-03-11 09:43:06 +01:00
Remita Amine
197cf8b794 [veoh] fix extraction 2019-03-11 09:04:06 +01:00
Remita Amine
fddb4a3a8b [escapist] Modernize 2019-03-11 08:18:57 +01:00
Remita Amine
99534feaff [videomega] remove extractor(closes #10108) 2019-03-11 07:27:57 +01:00
Sergey M․
cdc7baffca [beeg] Add support for beeg.porn (closes #20306) 2019-03-11 04:44:40 +07:00
Sergey M․
067aa17edf Start moving to ytdl-org 2019-03-11 04:00:54 +07:00
Remita Amine
2765503713 [vimeo:review] improve config url extraction and extract original format(closes #20305) 2019-03-10 15:03:32 +01:00
Remita Amine
0d08bcdb70 [fox] detect geo restriction and authentication errors(#20208) 2019-03-10 09:37:40 +01:00
Sergey M․
10734553fe release 2019.03.09 2019-03-09 02:53:18 +07:00
Sergey M․
bba35695eb [ChangeLog] Actualize
[ci skip]
2019-03-09 02:52:08 +07:00
Sergey M․
9d74ea6d36 [francetv:site] Relax video id regex and update test (closes #20268) 2019-03-08 23:28:24 +07:00
Remita Amine
7b6e760870 [toutv] detect invalid login error 2019-03-06 09:28:14 +01:00
Remita Amine
829685b88a [toutv] fix authentication(closes #20261) 2019-03-06 09:20:39 +01:00
Sergey M․
fca9baf0da [test] Fix test_compat_etree_Element 2019-03-06 02:46:26 +07:00
0x9fff00
d347b52b63 [urplay] Extract timestamp (#20235) 2019-03-06 02:11:32 +07:00
yonaikerlol
97157c692c [openload] Add support for oload.space 2019-03-06 01:34:34 +07:00
Sergey M․
a551768acf [facebook] Improve uploader extraction (closes #20250) 2019-03-06 01:27:22 +07:00
Sergey M․
ee0ba927aa Use compat_etree_Element 2019-03-06 01:21:57 +07:00
Sergey M․
399f76870d [compat] Introduce compat_etree_Element 2019-03-06 01:18:52 +07:00
Sergey M․
e5ada4f3ad [extractor/common] Fallback url to base URL for DASH formats 2019-03-06 00:33:08 +07:00
Sergey M․
bb6f112d9d [npo] Improve ISM extraction 2019-03-05 23:58:46 +07:00
Sergey M․
c17eb5b4b0 [rai] Improve extraction (closes #20253) 2019-03-05 23:56:42 +07:00
Sergey M․
d9eb580a79 [extractor/common] Do not fail on invalid data while parsing F4M manifest in non fatal mode 2019-03-05 23:56:33 +07:00
Remita Amine
5dcd630dca [paramountnetwork] fix mgid extraction(closes #20241) 2019-03-04 22:26:55 +01:00
Sergey M․
c790e93ab5 [extractor/common] Clarify url and manifest_url meta fields 2019-03-05 00:41:53 +07:00
Sergey M․
39c780fdec [extractor/common] Return MPD manifest as format's url meta field (#20242)
For symmetry with other segmented media
2019-03-05 00:40:57 +07:00
remitamine
e7e62441cd [utils] strip #HttpOnly_ prefix from cookies files (#20219) 2019-03-03 19:23:59 +07:00
Remita Amine
0a5baf9c21 [libsyn] improve extraction(closes #20229) 2019-03-03 06:18:51 +01:00
dimqua
8ae113ca9d [youtube] Add more invidious instances
See [Invidious-Instances](https://github.com/omarroth/invidious/wiki/Invidious-Instances) for the reference.
2019-03-03 08:19:36 +07:00
cclauss
7aeb788e56 [travis] Remove sudo: false
Travis now recommends removing `sudo: false` from configuration: https://blog.travis-ci.com/2018-11-19-required-linux-infrastructure-migration.
2019-03-03 08:16:48 +07:00
Sergey M․
7465e0aee2 [spankbang] Fix extraction (closes #20023) 2019-03-03 06:25:45 +07:00
Remita Amine
a8f83f0c56 [crunchyroll] fix is_logged check 2019-03-02 08:25:47 +01:00
Remita Amine
dca0e0040a Revert "use older login method(closes #11572)"
This reverts commit cc6a960e13.
2019-03-02 08:01:42 +01:00
Remita Amine
398e1e21d6 [espn] extend _VALID_URL regex(closes #20013) 2019-03-01 15:34:05 +01:00
Remita Amine
c5b02efe20 [sixplay] handle videos with empty assets(closes #20016) 2019-03-01 15:08:11 +01:00
Remita Amine
06242d44fe [vimeo] add support for Vimeo Pro portfolio protected videos(closes #20070) 2019-03-01 08:14:34 +01:00
Sergey M․
04c33bdfb3 release 2019.03.01 2019-03-01 01:03:51 +07:00
Sergey M․
333f617b12 [ChangeLog] Actualize
[ci skip]
2019-03-01 01:02:36 +07:00
Sergey M․
ff60ec8f02 [npo] Fix extraction (#20084) 2019-03-01 00:47:18 +07:00
Sergey M․
9d9a8676dc [francetv:site] Extend video id regex (closes #20029, closes #20071) 2019-02-28 23:26:52 +07:00
Sergey M․
db1c3a9d3f [periscope] Extract width and height (closes #20015) 2019-02-27 03:41:15 +07:00
Sergey M․
55b8588f0e [servus] Fix extraction (closes #19297) 2019-02-24 23:20:06 +07:00
Sergey M․
f0228f56fb [bbccouk] Make subtitles non fatal (#19651) 2019-02-24 21:01:25 +07:00
Sergey M․
8c80603f1a [downloader/external] Add support for rate limit and retries for wget 2019-02-23 01:00:03 +07:00
Sergey M․
37b239b3b6 [downloader/external] Fix infinite retries for curl (closes #19303) 2019-02-23 00:43:29 +07:00
Sergey M․
caf48f557a [metacafe] Fix family filter bypass (closes #19287) 2019-02-21 05:59:07 +07:00
Sergey M․
77a842c892 release 2019.02.18 2019-02-18 02:11:11 +07:00
Sergey M․
c76fc5b22a [ChangeLog] Actualize
[ci skip]
2019-02-18 02:10:06 +07:00
Sergey M․
388cfbd3d8 [tvp:website] Improve support 2019-02-17 14:27:00 +07:00
Sergey M․
d93083789b [tvp:series] Fix extraction 2019-02-17 14:09:30 +07:00
Sergey M․
34568dc296 [tvp] Detect unavailable videos 2019-02-17 13:39:00 +07:00
Sergey M․
3c9647372e [tvp] Fix description extraction, make thumbnail optional and fix tests 2019-02-17 13:38:21 +07:00
Sergey M․
659e93fcf5 [linuxacademy] Add extractor (closes #12207) 2019-02-17 07:12:10 +07:00
Sergey M․
c9a0ea6e51 [bilibili] Update keys (closes #19233) 2019-02-17 05:00:16 +07:00
Sergey M․
d7d513891b [udemy] Extend _VALID_URLs (closes #14330, closes #15883) 2019-02-17 01:05:01 +07:00
Sergey M․
ae65c93a26 [udemy] Update User-Agent and detect captcha (closes #14713, closes #15839, closes #18126) 2019-02-17 00:58:13 +07:00
Sergey M․
ba2e3730d1 [noovo] Fix extraction (closes #19230) 2019-02-16 22:45:53 +07:00
Sergey M․
2b2da3ba10 [rai] Relax _VALID_URL (closes #19232) 2019-02-15 23:56:29 +07:00
Sergey M․
794c1b6e02 [vshare] Pass Referer to download request (closes #19205, closes #19221) 2019-02-14 23:43:16 +07:00
yonaikerlol
7bee705d8f [openload] Add support for oload.live 2019-02-14 22:28:16 +07:00
bitraid
6f5c1807f4 [imgur] Use video id as title fallback (closes #18590) 2019-02-13 00:02:29 +07:00
Sergey M․
985637cbbf [twitch] Add new source format detection approach (closes #19193) 2019-02-12 00:13:50 +07:00
Sergey M․
7d8b89163c [tvplayhome] Fix video id extraction (closes #19190) 2019-02-11 04:41:28 +07:00
Sergey M․
d777f3e81c [tvplayhome] Fix episode metadata extraction (closes #19190) 2019-02-11 04:39:23 +07:00
Sergey M․
4c0e0dc9dc [rutube:embed] Fix extraction and add support private videos (closes #19163) 2019-02-11 00:49:51 +07:00
Sergey M․
f516f44094 [soundcloud] Extract more metadata 2019-02-10 23:44:08 +07:00
Sergey M․
e9dee7f1b2 [trunews] Add extractor (closes #19153) 2019-02-09 23:50:27 +07:00
Remita Amine
91effe22a0 [linkedin:learning] extract chapter_number and chapter_id(closes #19162) 2019-02-08 07:21:50 +01:00
Sergey M․
04eacf5453 release 2019.02.08 2019-02-08 01:12:51 +07:00
Sergey M․
f1f5b47255 [ChangeLog] Actualize
[ci skip]
2019-02-08 01:10:12 +07:00
Sergey M․
1211bb6dac [YoutubeDL] Improve _make_archive_id (closes #19149) 2019-02-08 01:08:48 +07:00
Sergey M․
4de3cb883c [malltv] Fix issues and simplify (closes #17856) 2019-02-08 00:43:31 +07:00
Ales Jirasek
22f5f5c6fc [malltv] Add extractor (closes #18058) 2019-02-08 00:43:26 +07:00
Sergey M․
49bd993fd9 [spankbang:playlist] Add extractor (closes #19145) 2019-02-08 00:09:50 +07:00
Sergey M․
f06a1cabe8 [spankbang] Extend _VALID_URL 2019-02-08 00:07:29 +07:00
Remita Amine
241c5d7d38 [trutv] fix extraction(closes #17336) 2019-02-06 19:38:10 +01:00
Remita Amine
8fecc7353d [toutv] fix authentication(closes #16398)(closes #18700) 2019-02-06 13:59:33 +01:00
Sergey M․
5dda1edef9 [pornhub] Improve and simplify (closes #19135) 2019-02-05 23:09:24 +07:00
JChris246
d2d970d07e [pornhub] Fix tags and categories extraction (closes #13720) 2019-02-05 23:08:49 +07:00
Sergey M․
48fb963b2f [pornhd] Fix formats extraction 2019-02-05 00:07:37 +07:00
JChris246
70c3ee1367 [pornhd] Extract like count 2019-02-05 00:06:04 +07:00
Remita Amine
07fbfef1c7 [radiocanada] switch to the new media requests(closes #19115) 2019-02-03 12:10:41 +01:00
Remita Amine
eecf788b90 [teachable] add support for courses.workitdaily.com (closes #18871) 2019-02-03 09:10:35 +01:00
Sergey M․
0efcb5a2fe [vporn] Remove extractor (closes #16276)
Handled by generic extractor
2019-02-03 00:33:45 +07:00
Sergey M․
7c5307f4c4 [soundcloud:pagedplaylist] Improve (closes #19086) 2019-02-02 23:40:30 +07:00
Cory Hall
6cc6e0c34d [soundcloud:pagedplaylist] Add ie and title to entries (#19022)
rel: https://github.com/rg3/youtube-dl/issues/19022
2019-02-02 23:40:22 +07:00
JChris246
b9bc1cff72 [drtuber] Extract duration 2019-02-02 06:04:00 +07:00
Sergey M․
e9fef7ee4e [YoutubeDL] Fallback to ie_key of matching extractor while making download archive id when no explicit ie_key is provided (#19022) 2019-02-02 05:44:31 +07:00
Sergey M․
b6423e6ca2 [soundcloud:user] Update tests 2019-02-02 04:11:32 +07:00
Sergey M․
3ef2da2d21 [soundcloud] Fix paged playlists extraction, add support for albums and update client id 2019-02-02 04:00:29 +07:00
Sergey M․
49fe4175ae [drtv] Improve preference (closes #19079) 2019-02-01 01:49:33 +07:00
Sergey M․
9613e14a92 [openload] Add support for openload.pw and oload.pw (closes #18930) 2019-01-31 00:15:45 +07:00
Batuhan's Unmaintained Account
15e832ff2a [openload] Add support for oload.info 2019-01-30 23:39:02 +07:00
Remita Amine
645c4885cf [crackle] authorize media detail request(closes #16931) 2019-01-30 14:44:23 +01:00
Sergey M․
7b0f9df23d release 2019.01.30.1 2019-01-30 06:19:36 +07:00
Sergey M․
c2a0fe2ea7 [ChangeLog] Actualize
[ci skip]
2019-01-30 06:17:25 +07:00
Sergey M․
ce52c7c111 [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (closes #19067) 2019-01-30 06:15:23 +07:00
Sergey M․
1063b4c707 release 2019.01.30 2019-01-30 00:08:39 +07:00
Sergey M․
ca01e5f903 [ChangeLog] Actualize
[ci skip]
2019-01-30 00:05:32 +07:00
Sergey M․
5496754ae4 [fox] Remove unused imports 2019-01-30 00:03:19 +07:00
Sergey M․
9868f1ab18 [yourporn] Improve (closes #19061) 2019-01-30 00:01:17 +07:00
JChris246
41cff90c41 [yourporn] Fix extraction and extract duration (closes #18815, closes #18852)
change cdn to cdn4 for the video_url
2019-01-29 23:58:44 +07:00
Sergey M․
a2d821d711 [drtv] Improve extraction (closes #19039)
+ Add support for EncryptedUri videos
+ Extract more metadata
* Fix subtitles extraction
2019-01-29 23:33:09 +07:00
Remita Amine
6df196f32e [fox] add support for locked videos using cookies(closes #19060) 2019-01-29 00:31:49 +01:00
Remita Amine
41c2c254d3 [fox] fix extraction for free videos(#19060) 2019-01-28 22:39:19 +01:00
Alexander Seiler
a81daba231 [zattoo] Add support for tv.salt.ch 2019-01-29 02:20:46 +07:00
Sergey M․
61ff92e11e [postprocessor/ffmpeg] Wrap loglevel args in encodeArgument 2019-01-29 01:59:56 +07:00
Sergey M
1397a790ff [postprocessor/ffmpeg] Disable "Last message repeated" messages which cause non-zero exit status (#19025) 2019-01-28 22:59:00 +07:00
Tatsh
7f903dd8bf [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding subtitles (closes #19042)
Related issue: https://trac.ffmpeg.org/ticket/6016
2019-01-28 22:57:14 +07:00
Andrew Udvare
2b3afe6b0f [postprocessor/ffmpeg] Disable "Last message repeated" messages which cause non-zero exit status 2019-01-27 22:24:37 -05:00
Sergey M․
e71be6ee9f release 2019.01.27 2019-01-27 21:28:09 +07:00
Sergey M․
bf8ebc9cfe [ChangeLog] Actualize
[ci skip]
2019-01-27 21:26:28 +07:00
Remita Amine
1fcc91663b [vice] fix extraction for locked videos(closes #16248) 2019-01-27 10:53:38 +01:00
Remita Amine
30cd1a5f39 [wakanim] detect DRM protected videos 2019-01-26 22:53:09 +01:00
Sergey M․
458fd30f56 [extractor/common] Extract season in _json_ld 2019-01-27 04:36:58 +07:00
Sergey M․
845333acf6 [wakanim] Add extractor (closes #14374) 2019-01-27 04:14:54 +07:00
Sergey M
252abb1e8b [README.md] Mention more convenience extraction functions 2019-01-26 15:29:19 +07:00
Remita Amine
ae18d58297 [usatoday] fix extraction for videos with custom brightcove partner id(closes #18990) 2019-01-25 11:02:15 +01:00
Sergey M․
1602a240a7 [drtv] Fix extraction (closes #18989) 2019-01-25 04:16:49 +07:00
Sergey M․
0eba178fce [nhk] Extend _VALID_URL (closes #18968) 2019-01-25 04:04:58 +07:00
Remita Amine
eb35b163ad [postprocessor/ffmpeg] fallback to ffmpeg/avconv for audio codec detection(closes #681) 2019-01-24 20:23:04 +01:00
Remita Amine
118afcf52f [go] fix adobe pass requests for Disney Now(closes #18901) 2019-01-23 22:16:52 +01:00
Sergey M․
9713d1d1e0 [openload] Add support for oload.club (closes #18969) 2019-01-24 02:30:12 +07:00
Sergey M․
a1e171233d release 2019.01.24 2019-01-24 01:46:23 +07:00
Sergey M․
7d311586ed [ChangeLog] Actualize
[ci skip]
2019-01-24 01:44:09 +07:00
Sergey M․
e118a8794f [YoutubeDL] Fix typo in string negation implementation and add more tests (closes #18961) 2019-01-24 01:39:39 +07:00
Sergey M․
435e382423 release 2019.01.23 2019-01-23 04:46:55 +07:00
Sergey M․
0670bdd8f2 [ChangeLog] Actualize
[ci skip]
2019-01-23 04:43:55 +07:00
Sergey M․
71a1f61700 [pornhub] Apply scrape detection bypass for all extractors 2019-01-23 04:12:06 +07:00
Sergey M․
6510a3aa97 [crunchyroll] Extend _VALID_URL (closes #18955) 2019-01-23 03:55:41 +07:00
Sergey M․
278d061a0c [pornhub] Bypass scrape detection (closes #5930) 2019-01-23 03:51:29 +07:00
Remita Amine
503b604a31 [vrv] fix oauth signing for python 2(#14307) 2019-01-22 18:21:37 +01:00
Remita Amine
4b85f0f9db [vrv] add support for authentication(closes #14307) 2019-01-22 14:38:40 +01:00
Sergey M․
19d6991312 [videomore] Improve extraction and fix season extractor (closes #18908) 2019-01-22 03:03:53 +07:00
Sergey M․
07f9febc4b [tnaflix] Pass Referer in metadata request (closes #18925) 2019-01-20 22:07:01 +07:00
Sergey M․
fad4ceb534 [utils] Fix urljoin for paths with non-http(s) schemes 2019-01-20 20:22:19 +07:00
Remita Amine
6945b9e78f [extractor/common] improve jwplayer relative url handling(closes #18892) 2019-01-20 13:31:52 +01:00
Alexandre Huot
29cfcb43da [radiocanada] Relax DRM check 2019-01-20 18:33:09 +07:00
Sergey M․
a1a4607598 [vimeo] Fix video password verification for videos protected by Referer HTTP header 2019-01-20 18:21:31 +07:00
Sergey M․
73c19aaa9f [hketv] Improve and simplify (closes #18696) 2019-01-20 17:43:11 +07:00
Anthony Fok
289ef490f7 [hketv] Add extractor 2019-01-20 17:43:11 +07:00
yonaikerlol
6ca3fa898c [streamango] Add support for fruithosts.net 2019-01-20 16:24:21 +07:00
jhwgh1968
31fbedc06a [instagram] Add base extractor for playlists and tag extractor 2019-01-20 16:10:46 +07:00
aviperes
15870747f0 [odnoklassniki] Detect paid videos 2019-01-20 15:15:01 +07:00
Remita Amine
fc746c3fdd [test/test_InfoExtractor] add test for #18923 2019-01-20 09:05:12 +01:00
Sergey M
4e58d9fabb [README.md] Fix formatting 2019-01-20 14:23:35 +07:00
Sergey M․
2cc779f497 [YoutubeDL] Add negation support for string comparisons in format selection expressions (closes #18600, closes #18805) 2019-01-20 13:48:49 +07:00
Remita Amine
379306ef55 [extractor/common] fix typo 2019-01-19 21:35:02 +01:00
Remita Amine
f28363ad1f [ted] correct acodec for http formats(#18923) 2019-01-19 21:25:53 +01:00
Remita Amine
2bfc1d9d68 [extractor/common] imporove HLS video only format detection(closes #18923) 2019-01-19 21:25:15 +01:00
Remita Amine
e2dd132f05 [cartoonnetwork] fix extraction(closes #15664)(closes #17224) 2019-01-17 23:56:37 +01:00
Remita Amine
79fec976b0 [vimeo] fix extraction for password protected player URLs(closes #18889) 2019-01-17 09:44:29 +01:00
Sergey M․
29639b363d release 2019.01.17 2019-01-17 10:27:17 +07:00
Sergey M․
f53cecd796 [ChangeLog] Actualize
[ci skip]
2019-01-17 10:25:50 +07:00
Sergey M․
fa4ac365f6 [youtube] Extend JS player signature function name regexes (closes #18890, closes #18891, closes #18893) 2019-01-17 10:24:44 +07:00
Sergey M․
bfc8eeea57 release 2019.01.16 2019-01-16 02:24:08 +07:00
Sergey M․
b0d73a7456 [ChangeLog] Actualize
[ci skip]
2019-01-16 02:20:10 +07:00
Sergey M․
4fe54c128a [youtube] Update tests and add a tests for #18804 2019-01-16 02:18:27 +07:00
Sergey M․
a16c7c033a [test/helper] Add support for maxcount and count collection len test checkers 2019-01-16 02:17:49 +07:00
Sergey M․
2f483bc1c3 [youtube] Skip unsupported adaptive stream type (#18804) 2019-01-16 01:28:50 +07:00
Sergey M․
561b456e2d [youtube] Extract DASH formats from player response (closes #18804) 2019-01-16 01:12:58 +07:00
Remita Amine
929ba3997b [funimation] fix extraction(closes #14089) 2019-01-15 10:25:31 +01:00
Sergey M․
10026329c2 [skylinewebcams] Fix extraction (closes #18853) 2019-01-14 23:23:51 +07:00
Remita Amine
3b983ee471 [curiositystream] add support for non app urls 2019-01-13 15:46:54 +01:00
Remita Amine
f1ab3b7de7 [downloader/hls] fix uplynk ad skipping(closes #18824) 2019-01-13 10:03:27 +01:00
Sergey M․
d65f6e734b [bitchute] Check formats (#18833) 2019-01-13 03:59:38 +07:00
Sergey M․
ed8db0a25c [wistia] Extend _VALID_URL (closes #18823) 2019-01-12 04:58:01 +07:00
Atlas Sullivan
60a899bb7e [README.md] Fix typo 2019-01-12 02:15:48 +07:00
Sergey M․
cbdc688c41 [postprocessor/ffmpeg] Relax ubuntu ffmpeg version regex 2019-01-12 00:30:06 +07:00
Sergey M․
5caa531a1a [postprocessor/ffmpeg] PEP 8 2019-01-11 23:47:23 +07:00
Remita Amine
a64646e417 [postprocessor/ffmpeg] sanitize ffmpeg version for Ubuntu and Arch Linux systems(closes #18813) 2019-01-11 15:12:10 +01:00
Remita Amine
c469e8808c [playplustv] add support for playplus.com(#18789) 2019-01-10 18:49:13 +01:00
Sergey M․
b64f6e690f release 2019.01.10 2019-01-10 23:26:54 +07:00
Sergey M․
a4491dd55c [ChangeLog] Actualize
[ci skip]
2019-01-10 23:23:19 +07:00
Sergey M․
c3e543893b [youtube] Extract live HLS URL from player response (closes #18799) 2019-01-10 22:46:53 +07:00
Remita Amine
432aba1c5e [outsidetv] Add new extractor(closes #18774) 2019-01-10 10:54:46 +01:00
Remita Amine
7c072f00d6 [jwplatform] use JW Platform Delivery API V2 and add support for more urls 2019-01-10 10:50:18 +01:00
Remita Amine
96c186e1fd [fox] add support National Geographic(closes #17985)(closes #15333)(closes #14698) 2019-01-10 09:05:00 +01:00
Remita Amine
4ad159c7b0 [playplustv] Add new extractor(closes #18789) 2019-01-09 20:39:48 +01:00
Remita Amine
65615be368 [globo] set GLBID cookie manually(closes #17346) 2019-01-09 19:17:58 +01:00
Remita Amine
3c1089dba4 [gaia] Add new extractor(#14605) 2019-01-09 14:23:26 +01:00
Sergey M․
6089ff40e7 [youporn] Fix title and description extraction (closes #18748) 2019-01-09 00:37:01 +07:00
Sergey M․
2543938bbe [hungama] Add support for videos (closes #17402) 2019-01-08 10:03:44 +07:00
Sergey M․
440863ade1 [extractor/common] Use episode name as title in _json_ld 2019-01-08 10:02:49 +07:00
Sergey M․
391256dc0e [extractor/common] Add support for movies in _json_ld 2019-01-08 10:02:00 +07:00
Sergey M․
06b4b90c70 [hungama] Fix code and extract more metadata (closes #18771) 2019-01-08 09:14:47 +07:00
Awal Garg
8cb5c2181a [hungama] Add extractor 2019-01-08 09:14:22 +07:00
Sergey M․
0266854f63 [dtube] Update test 2019-01-08 08:46:34 +07:00
4rensiker
bcc334a3c6 [dtube] Fix extraction (closes #18741) 2019-01-08 08:44:42 +07:00
Sergey M․
e9a50fba86 [utils] Fix typo 2019-01-07 01:02:34 +07:00
Sergey M․
04fb6928da [postprocessor/ffmpeg] Embed subtitles with non-standard language codes (refs #18765) 2019-01-07 00:57:55 +07:00
Sergey M․
b7acc83550 [utils] Add language codes replaced in 1989 revision of ISO 639 to ISO639Utils (closes #18765) 2019-01-07 00:57:48 +07:00
Sergey M․
de0359c0af [tvnow] Fix and rework extractors, prepare for a switch to the new API (closes #17245, closes #18499) 2019-01-05 03:44:19 +07:00
Sergey M․
c87f65e43d [carambatv:page] Fix extraction (closes #18739) 2019-01-04 22:21:53 +07:00
Sergey M․
d7c3af7a72 release 2019.01.02 2019-01-02 23:52:54 +07:00
Sergey M․
aeb72b3a41 [ChangeLog] Actualize
[ci skip]
2019-01-02 23:51:23 +07:00
nyuszika7h
2122d7151d [discovery] Use geo verification headers 2019-01-02 23:46:07 +07:00
Sergey M․
751e051557 [packtpub] Add support for subscription.packtpub.com (closes #18718) 2019-01-02 17:26:15 +07:00
Elan Ruusamäe
d226c560a6 Refactor code to use url_result 2019-01-01 23:56:05 +07:00
Sergey M
8437f5089f [README.md] Add long lines policy to coding conventions 2019-01-01 23:50:02 +07:00
v-delta
1d803085d7 [yourporn] Fix extraction 2019-01-01 23:26:59 +07:00
Sergey M
696f4e4114 [README.md] Add more guide lines for regular expressions 2019-01-01 23:13:39 +07:00
biwubo
0e713dbb11 [acast:channel] Add support for play.acast.com (closes #18587) 2019-01-01 22:48:06 +07:00
iwconfig
9b5c8751ee [extractors] Add missing age limits 2019-01-01 21:39:18 +07:00
Sergey M․
d9f1123c08 [rmcdecouverte] Improve, bypass geo restriction and add support for live (closes #18697) 2019-01-01 20:51:19 +07:00
Anarky
3d8eb6beb9 [rmcdecouverte] Update _VALID_URL (closes #18595) 2019-01-01 20:50:57 +07:00
Sergey M․
38d15ba7f9 [manyvids] Fix extraction (closes #18604, closes #18614) 2019-01-01 20:31:48 +07:00
Sergey M․
6b688b8942 [bitchute] Fix extraction (closes #18567) 2019-01-01 18:12:44 +07:00
Sergey M․
9d9daed464 release 2018.12.31 2018-12-31 23:59:52 +07:00
Sergey M․
32ac3d49ae [ChangeLog] Actualize
[ci skip]
2018-12-31 23:57:46 +07:00
Sergey M․
373941c5f0 [bbc] Add support for another embed pattern (closes #18643) 2018-12-31 23:20:40 +07:00
Sergey M․
4e1ddc8da9 [npo:live] Add support for npostart.nl (closes #18644) 2018-12-31 21:05:07 +07:00
Sergey M․
e4d51e751e [beeg] Fix extraction (closes #18610, closes #18626) 2018-12-31 20:59:53 +07:00
Sergey M․
c2dd2dc086 [youtube] Unescape HTML for series (closes #18641) 2018-12-31 19:57:01 +07:00
Remita Amine
140a13f5de [youtube] extract more format metadata 2018-12-26 10:56:10 +01:00
Remita Amine
825cd268a3 [youtube] detect DRM protected videos(#1774) 2018-12-26 10:56:10 +01:00
Sergey M․
63529e935c [youtube] Relax html5 player regexes (closes #18465, closes #18466) 2018-12-23 16:57:10 +07:00
Sergey M․
4273caf5c7 [youtube] Extend html5 player regex (closes #17516) 2018-12-23 16:40:16 +07:00
Remita Amine
e1a0628797 [liveleak] add support for another embed type and restore original format extraction 2018-12-20 23:22:51 +01:00
Remita Amine
835e45abab [crackle] extract ism and http formats 2018-12-19 22:08:01 +01:00
Elan Ruusamäe
904bb599be [README.md] Add flake8 instructions 2018-12-20 03:22:10 +07:00
Remita Amine
65e29cdac3 [twitter] pass referer with card request(closes #18579) 2018-12-18 22:49:01 +01:00
Sergey M․
4ee1845454 [mediasite] Extend _VALID_URL even more 2018-12-18 01:55:13 +07:00
Daan van Vugt
cfd13c4c45 [mediasite] Relax _VALID_URL 2018-12-18 00:03:00 +07:00
Sergey M․
386d1fea79 [lecturio] Add support for lecturio.de (closes #18562) 2018-12-17 23:43:12 +07:00
Remita Amine
7216e9bff7 [discovery] Add support for Scripps Networks watch domains(closes #17947) 2018-12-17 16:35:29 +01:00
Sergey M․
4cee62ade0 release 2018.12.17 2018-12-17 05:37:50 +07:00
Sergey M․
cbb3e4b14f [ChangeLog] Actualize
[ci skip]
2018-12-17 05:34:55 +07:00
Sergey M․
752582183a [ard:beta] Improve extraction robustness, fix subtitles extraction, improve geo restricted videos extraction 2018-12-17 05:29:59 +07:00
Sergey M․
1c82122741 [ard:beta] Relax _VALID_URL (closes #18441) 2018-12-17 04:51:57 +07:00
Sergey M․
50a498a68e [acast] Extend _VALID_URL 2018-12-17 04:32:59 +07:00
Tim Landscheidt
252e172dea [acast] Add support for embed.acast.com 2018-12-17 04:29:12 +07:00
yonaikerlol
90046d7761 [iprima] Relax _VALID_URL (closes #18515) 2018-12-17 04:10:36 +07:00
Remita Amine
c8b3751086 [vrv] fix initial state extraction 2018-12-16 14:29:18 +01:00
Sergey M․
21c340b83f [youtube] Fix mark watched (closes #18546) 2018-12-16 19:40:49 +07:00
Sergey M
c984196cf1 [README.md] Bind info dict URLs to a fixed blob (closes #18492) 2018-12-15 23:59:17 +07:00
Sergey M․
7f41a598b3 [safari] Add support for learning.oreilly.com (closes #18510) 2018-12-15 23:08:14 +07:00
Sergey M․
8fe104947d [youtube] Fix multifeed extraction (closes #18531) 2018-12-15 22:25:12 +07:00
Sergey M․
0a05cfabb6 [lecturio] Improve subtitles extraction (closes #18488) 2018-12-11 23:45:02 +07:00
Remita Amine
13e17cd28e [uol] fix format url extraction(closes 18480) 2018-12-10 15:02:54 +01:00
Sergey M․
102a4e54c5 [teachable] Remove debug output 2018-12-10 10:10:28 +07:00
Sergey M․
6e29458f24 [test/testdata/cookies/session_cookies.txt] Fix empty expires test data 2018-12-10 04:30:00 +07:00
Sergey M․
59c3940165 [ard:mediathek] Add support for classic.ardmediathek.de (closes #18473) 2018-12-10 01:37:10 +07:00
Sergey M․
cefe42c412 release 2018.12.09 2018-12-09 23:11:32 +07:00
Sergey M․
24cc64254c [ChangeLog] Actualize
[ci skip]
2018-12-09 23:08:16 +07:00
Sergey M․
9e02c2c704 [YoutubeDLCookieJar] Add test for keeping session cookies 2018-12-09 22:57:00 +07:00
Sergey M․
5ee7ae5c75 [teachable] Add support for teachable based platform sites (closes #5451, closes #18150, closes #18272) 2018-12-09 22:36:36 +07:00
Remita Amine
3ad6dabd33 [aenetworks] add support for History Vault(closes #18460) 2018-12-09 10:04:00 +01:00
Remita Amine
5f47a60c5d [imgur] improve gallery and album detection and extraction(closes #9133)(closes #16577)(closes #17223)(closes #18404) 2018-12-09 09:35:17 +01:00
Sergey M․
1bab343704 [YoutubeDL] Introduce YoutubeDLCookieJar and clarify the rationale behind session cookies (closes #12929) 2018-12-09 06:47:49 +07:00
aegamesi
1d88b3e6e6 [YoutubeDL] Recognize expires=0 as session cookies and send session cookies with requests 2018-12-09 06:05:37 +07:00
Sergey M․
9235b5091c [iprima] Relax _VALID_URL (closes #18453) 2018-12-09 00:01:11 +07:00
Remita Amine
c3c098dcf2 [hotstar] fix video data extraction(closes #18386) 2018-12-07 18:52:01 +01:00
Alexander Seiler
8c5879715f [ard:mediathek] Fix title and description extraction (closes #18349) 2018-12-07 03:41:02 +07:00
ealgase
ebb0449049 [xvideos] Switch to HTTPS (closes #18422) 2018-12-07 03:36:08 +07:00
Sergey M․
dfe0a3a9d2 [lecturio] Add extractor (closes #18405) 2018-12-07 03:27:11 +07:00
Sergey M․
c976873c5b [nrktv:series] Add support for extra materials 2018-12-07 00:54:58 +07:00
Sergey M․
15699ec8b0 [nrktv:season,series] Fix extraction and update tests (closes #17159, closes #17258) 2018-12-07 00:49:24 +07:00
Sergey M․
33cc1ea586 [nrktv] Relax _VALID_URL (closes #18304, closes #18387) 2018-12-07 00:00:06 +07:00
v-delta
ae9d77dab5 [yourporn] Fix extraction (closes #18424) 2018-12-06 23:24:35 +07:00
Remita Amine
8bb0c9cc16 [tbs] fix info extraction(fixes #18403) 2018-12-05 07:03:00 +01:00
Remita Amine
5547014ad9 [gamespot] add support reviews URLs 2018-12-02 20:01:56 +01:00
Sergey M․
ab896fa894 release 2018.12.03 2018-12-03 00:10:20 +07:00
Sergey M․
1fa59a928e [ChangeLog] Actualize
[ci skip]
2018-12-03 00:06:54 +07:00
Sergey M․
ce18a19be9 [tiktok] Improve extraction and add support for user pages (closes #18135) 2018-12-02 02:42:56 +07:00
Ken Swenson
1ead840d2c [tiktok] Add extractor (closes #18108) 2018-12-02 02:42:56 +07:00
Alexander Seiler
aa374bc78e [utils] Fix random_birthday to generate existing dates only 2018-12-02 00:05:15 +07:00
Sergey M․
3430ff9b07 [pornhub] Use actual URL host for requests (closes #18359) 2018-12-01 16:45:51 +07:00
Hakim Boyles
f012823082 [lynda] Fix authentication (closes #18158) 2018-11-30 01:20:27 +07:00
Jimm Stout
16597c2f94 [gfycat] Update API endpoint (closes #18333) 2018-11-30 01:07:07 +07:00
Sergey M․
adbbdefc81 [hotstar] Add support for alternative app state layout (closes #18320) 2018-11-30 00:48:15 +07:00
Alexander Seiler
053e5b12b2 [azmedien] Fix extraction (closes #18334) 2018-11-30 00:12:18 +07:00
Remita Amine
d9df8f120b [vimeo] extract VHX subtitles 2018-11-28 20:13:36 +01:00
Remita Amine
ca01d17884 [vimeo] Add support for VHX(Vimeo OTT)(#14835) 2018-11-28 19:53:45 +01:00
Alexander Seiler
d19600df07 [joj] Fix extraction (closes #18280) 2018-11-24 22:14:27 +07:00
Sergey M․
641e86e3cf [wistia] Add support for fast.wistia.com (closes #18287) 2018-11-24 21:47:41 +07:00
Sergey M․
6864855eb1 [tests] Fix invalid escape sequences 2018-11-23 00:43:42 +07:00
Sergey M․
d861a9d581 release 2018.11.23 2018-11-23 00:16:45 +07:00
Sergey M․
66173211c4 [ChangeLog] Actualize
[ci skip]
2018-11-23 00:14:43 +07:00
Remita Amine
6f2883a2df [mixcloud] base64 decode before decryption 2018-11-21 23:25:38 +01:00
Remita Amine
560020da30 [mixcloud] fallback to hardcoded decryption key(closes #18016) 2018-11-21 23:21:05 +01:00
Sergey M․
305ce767d5 [travis] Add python 3.8-dev build 2018-11-22 02:34:35 +07:00
Sergey M․
157eef3e63 [setup.py] Add python 3.8 classifier 2018-11-22 02:08:41 +07:00
Sergey M․
bd2d553c7b [travis] Add python 3.7 build 2018-11-22 02:01:39 +07:00
Sergey M․
af60e81e3c [setup.py] Add more relevant classifiers 2018-11-22 02:01:39 +07:00
Remita Amine
a843464a7e [nbc] fix NBCNews article extraction(closes #16194) 2018-11-21 12:10:06 +01:00
Remita Amine
6866f24494 [foxsports] update test 2018-11-21 12:08:46 +01:00
Remita Amine
4e33e0792a [loc] update test 2018-11-21 12:00:50 +01:00
Remita Amine
35328915b5 [foxsports] fix extraction(closes #17543) 2018-11-21 09:46:36 +01:00
Remita Amine
6c882aa899 [loc] relax _VALID_URL regex and improve formats extraction 2018-11-21 09:46:36 +01:00
Sergey M․
183417a50f [ciscolive:search] Add support for pagination 2018-11-21 06:10:43 +07:00
Sergey M․
6a6d7f0641 [ciscolive] Fix issues and improve extraction (closes #17984) 2018-11-21 06:10:39 +07:00
Austin de Coup-Crank
05bd5e9c77 [ciscolive] Add extractor 2018-11-21 06:10:30 +07:00
Alexander Seiler
15ed5a2784 [nzz] Relax kaltura regex 2018-11-21 02:50:40 +07:00
Remita Amine
2e1280ed43 [sixplay] fix format extraction 2018-11-19 18:15:51 +01:00
Remita Amine
8578ea4dcb [bitchute] use _html_search_regex for title extraction 2018-11-18 16:15:27 +01:00
Remita Amine
9b27a78a88 [kaltura] limit requested MediaEntry fields 2018-11-18 16:15:27 +01:00
Sergey M․
964b989dc8 [americastestkitchen] Add support for zype embeds (closes #18225) 2018-11-18 20:45:25 +07:00
Sergey M․
f97c099131 [pornhub] Move test to correct place 2018-11-18 11:14:46 +07:00
Sergey M․
1febf99da1 [pornhub] Add pornhub.net alias 2018-11-18 06:26:08 +07:00
Sergey M․
4167148fa4 [nova:embed] Fix extraction (closes #18222) 2018-11-18 01:11:10 +07:00
Sergey M․
5bb0479269 release 2018.11.18 2018-11-18 00:11:54 +07:00
Sergey M․
02df855e13 [ChangeLog] Actualize
[ci skip]
2018-11-18 00:07:40 +07:00
Sergey M․
006374e3ae [wwe] Fix issues, extract subtitles and add support for playlists (closes #14781, closes #17450) 2018-11-17 23:59:52 +07:00
mttronc
11d19ff503 [wwe] Add extractor 2018-11-17 23:59:46 +07:00
aviperes
a640c4d226 [vk] Detect geo restriction 2018-11-17 20:59:13 +07:00
Sergey M․
d0058c76d5 [openload] Use original host during extraction (closes #18211) 2018-11-17 16:59:20 +07:00
NeroBurner
0919cd4d01 [atvat] Fix extraction (closes #18041) 2018-11-17 00:18:50 +07:00
Sergey M․
2599956c9f [rte] Add support for new API endpoint (closes #18206) 2018-11-17 00:07:59 +07:00
Sergey M․
9b9b3501c5 [tnaflixnetwork:embed] Fix extraction (closes #18205) 2018-11-16 22:55:35 +07:00
Sergey M․
730c0d12a0 [picarto] Extract more metadata (closes #16518) 2018-11-11 16:17:59 +07:00
Patrick Griffis
f17a24a6df [picarto] Use API and add token support
This is just more reliable than trying to extract
it from the page itself.
2018-11-11 16:17:56 +07:00
Sergey M․
83852e57bf [zype] Add extractor (closes #18143) 2018-11-11 00:44:49 +07:00
Sergey M․
96a91b1551 [vivo] Fix extraction (closes #18139) 2018-11-10 23:37:27 +07:00
Sergey M․
cab26223bf [ruutu] Update API endpoint (closes #18138) 2018-11-10 15:26:04 +07:00
Sergey M․
532782ade1 release 2018.11.07 2018-11-07 01:38:25 +07:00
Sergey M․
f81d44aab6 [ChangeLog] Actualize
[ci skip]
2018-11-07 09:58:08 +07:00
Sergey M․
2511eee215 [youtube] Add another JS signature function name regex (closes #18091, closes #18093, closes #18094) 2018-11-07 09:55:59 +07:00
Remita Amine
0df514f07e [facebook] fix tahoe request(closes #17171) 2018-11-06 21:22:27 +01:00
Sergey M․
432cd48410 [cliphinter] Fix extraction (closes #18083) 2018-11-06 23:29:42 +07:00
Sergey M․
c0345b825f [youtube:playlist] Add support for invidio.us (closes #18077) 2018-11-05 19:08:39 +07:00
Sergey M․
2004e2210b [osnateltv] Update host 2018-11-05 17:09:57 +07:00
Sergey M․
16d896b2a7 [zattoo] Arrange API hosts for derived extractors (closes #18035) 2018-11-05 15:52:46 +07:00
Sergey M․
22e07ce502 [README.md] Improve documentation on safe metadata extraction and add more examples 2018-11-05 00:11:36 +07:00
Sergey M․
dbdaaa231a [youtube] Add fallback metadata extraction from videoDetails (closes #18052) 2018-11-03 06:26:16 +07:00
Sergey M․
38c32dbf19 release 2018.11.03 2018-11-03 02:57:48 +07:00
Sergey M․
a085410936 [ChangeLog] Actualize
[ci skip]
2018-11-03 02:56:14 +07:00
Sergey M․
6895ea4d3f [laola1tv:embed] Set correct stream access URL scheme (closes #16341) 2018-11-03 02:45:37 +07:00
Alexander Seiler
faac1c1f70 [ehftv] Add extractor (closes #15408) 2018-11-03 02:45:32 +07:00
Sergey M․
573531dcfb [azmedien] Simplify (closes #17746) 2018-11-03 01:33:42 +07:00
Alexander Seiler
da56fb631f [azmedien] Adopt to major site redesign (closes #17745) 2018-11-03 01:33:36 +07:00
Xiao Di Guan
95e42d7336 [extractor/common] Ensure response handle is not prematurely closed before it can be read if it matches expected_status (resolves #17195, closes #17846, resolves #17447) 2018-11-03 01:18:20 +07:00
Sergey M․
cf0db4d997 [twitcasting] Improve extraction and fix issues (closes #17981) 2018-11-03 00:28:25 +07:00
sichuan-pepper
036f905161 [twitcasting] Add extractor 2018-11-03 00:28:21 +07:00
Sergey M․
4b6aca17cc [orf:tvthek] Improve extraction and remove unused code (closes #17956, closes #18024) 2018-11-02 23:47:23 +07:00
Sebastian Haas
c620694c97 [orf:tvthek] Fix extraction (closes #17737)
use _extract_m3u8_formats and _extract_f4m_formats helper functions
closes #17737
2018-11-02 23:47:17 +07:00
yonaikerlol
061ea3a776 [openload] Add support for oload.fun 2018-11-02 23:08:41 +07:00
Sergey M․
c70ba664f1 [njpwworld] Fix authentication (closes #17427) 2018-11-01 01:40:02 +07:00
Sergey M․
f16679e843 [cnbc:video] Fix _VALID_URL (#17110) 2018-10-30 04:57:51 +07:00
Remita Amine
b14475724b [linkedin:learning:course] use url_transparent type for playlist entries 2018-10-29 21:49:12 +01:00
Remita Amine
aa7e974a2a [linkedin:learning] Add new extractor(closes #13545) 2018-10-29 19:28:29 +01:00
Sergey M․
9aac22c195 [theplatform] Improve error detection (#13222) 2018-10-30 00:22:44 +07:00
Sergey M․
94db1f7f3b [cnbc] Simplify extraction (closes #14280, closes #17110) 2018-10-29 23:55:55 +07:00
gfabiano
ffa7b2bfee [cbnc] Add support for new URL schema (closes #14193) 2018-10-29 23:54:14 +07:00
Sergey M․
2943397e87 [aparat] Improve extraction and extract more metadata (closes #17445, closes #18008) 2018-10-29 23:32:45 +07:00
Ali Irani
9c4a83a1be [aparat] Fix extraction 2018-10-29 23:31:24 +07:00
Sergey M․
9ff558f67f release 2018.10.29 2018-10-29 00:39:29 +07:00
Sergey M․
c2fe21efaa [ChangeLog] Actualize
[ci skip]
2018-10-29 00:38:06 +07:00
Sergey M․
476cf548e1 [sportbox] Improve extraction, add support for matchtv.ru and fix video id (closes #17978) 2018-10-29 00:21:50 +07:00
Sergey M․
bebef10909 [extractor/common] Add validation for JSON-LD URLs 2018-10-29 00:21:45 +07:00
Alexey Trofimov
4c237ab787 [sportbox] Fix extraction 2018-10-29 00:21:40 +07:00
Sergey M․
a1d1c63678 [screencast] Improve extraction (closes #14617, closes #17990) 2018-10-28 23:26:30 +07:00
sichuan-pepper
1fafb32984 [screencast] Fix extraction (closes #14590) 2018-10-28 23:26:30 +07:00
yonaikerlol
c901cc38e5 [openload] Add support for oload.icu 2018-10-28 22:51:29 +07:00
Sergey M․
022218f2f0 [ivi] Add support for ivi.tv 2018-10-28 22:49:10 +07:00
Sergey M․
08c7d3dade [crunchyroll] Improve extraction failsafeness (closes #17991) 2018-10-28 22:12:54 +07:00
Remita Amine
5e733b066a [dailymail] fix format extraction(closes #17976) 2018-10-26 05:41:57 +01:00
Remita Amine
7d9e858132 [viewster] reduce format requests 2018-10-26 05:40:49 +01:00
Remita Amine
b99b0bcfa0 [cwtv] handle api errors(closes #17905) 2018-10-17 06:22:56 +01:00
Sergey M․
baeabf7742 [rutube] Use geo verification headers (closes #17897) 2018-10-16 23:19:44 +07:00
Remita Amine
582797d780 [brightcove] remove unused variable 2018-10-15 20:47:12 +01:00
Remita Amine
160c2773f6 [brightcove:legacy] add another fall back to brightcove:new 2018-10-15 18:41:57 +01:00
Remita Amine
ee5fe42e44 [brightcove:legacy] fall back to brightcove:new(#13912) 2018-10-15 17:54:38 +01:00
Remita Amine
f0ee386851 [tv3] remove extractor(closes #10461)(closes #15339) 2018-10-15 16:26:29 +01:00
Remita Amine
a94e7c195e [ted] fix extraction for http and rtmp formats(closes #5941)(closes #17572)(closes #17894) 2018-10-15 11:52:01 +01:00
yonaikerlol
5d90a8a5f3 [openload] Add support for oload.cc 2018-10-07 20:05:45 +07:00
Remita Amine
19a352854f [patreon] extract post_file url(#17792) 2018-10-05 22:45:04 +01:00
Remita Amine
c9d891f19a [patreon] fix extraction(closes #14502)(closes #10471) 2018-10-05 20:11:01 +01:00
Sergey M․
d96f976b0c release 2018.10.05 2018-10-05 02:31:30 +07:00
Sergey M․
2e7ed29e34 [ChangeLog] Actualize
[ci skip]
2018-10-05 02:29:52 +07:00
Sergey M․
21c1a00dd7 [pluralsight] Improve authentication (closes #17762) 2018-10-05 02:27:14 +07:00
Sergey M․
0082f44a08 [dailymotion] Improve metadata extraction (closes #17706) 2018-10-05 02:03:25 +07:00
Enes
f60b9803a4 [dailymotion] Fix extraction (closes #17699) 2018-10-05 02:03:16 +07:00
Remita Amine
d98cb62e55 [crunchyroll] switch to HTTPS for RpcApi(closes #17749) 2018-10-02 19:43:06 +01:00
Remita Amine
05e7c184da [hotstar] fix extraction in python 2(closes #17696) 2018-10-02 06:07:43 +01:00
Sergey M․
66d106f270 [philharmoniedeparis] Fix extraction and add support for pad.philharmoniedeparis.fr (closes #17705) 2018-10-01 23:29:49 +07:00
Sergey M․
3c7da54c92 [jamendo] Add support for licensing.jamendo.com (closes #17724) 2018-10-01 22:05:18 +07:00
Enes
9795d93316 [openload] Add support for oload.cloud (closes #17710) 2018-10-01 21:48:59 +07:00
Sergey M․
365343131d [pluralsight] Fix subtitles extraction (closes #17726, closes #17728) 2018-10-01 21:45:24 +07:00
Sergey M․
85fa80d5f9 [vimeo] Add another config regex (closes #17690) 2018-10-01 21:13:43 +07:00
Remita Amine
245cbb33bc [spike] fix Paramount Network extraction(closes #17677) 2018-09-28 15:13:25 +01:00
Remita Amine
85cd69adcb [hotstar] fix extraction(closes #14694)(closes #14931)(closes #17637) 2018-09-26 08:13:36 +01:00
Sergey M․
4c89a675dd release 2018.09.26 2018-09-26 11:58:25 +07:00
Sergey M․
3d3499742c [ChangeLog] Actualize
[ci skip]
2018-09-26 11:56:15 +07:00
Sergey M․
c17e100b96 [pluralsight] Fix subtitles extraction (closes #17671) 2018-09-26 09:27:40 +07:00
Sergey M․
8fd12a0831 [mediaset] Improve embed support (closes #17668) 2018-09-26 05:38:41 +07:00
Sergey M․
60ce0c67fd [README.md] Document channel meta fields for output template 2018-09-25 23:43:41 +07:00
Sergey M․
cd5a74a28e [youtube] Add support for invidio.us (closes #17613) 2018-09-24 00:14:49 +07:00
Sergey M․
f6d7f7b474 [zattoo] Add support for more zattoo platform sites 2018-09-23 23:35:37 +07:00
Alexander Seiler
21160a1792 [zattoo] Fix extraction (closes #17175) 2018-09-23 21:34:47 +07:00
Sergey M․
4ac73fc170 [popcorntv] Remove debug output 2018-09-19 22:16:43 +07:00
Leonardo Taccari
28fcb7b061 [raiplay:playlist] Remove a debug leftover print() 2018-09-19 09:48:39 +07:00
245 changed files with 9210 additions and 3641 deletions

View File

@@ -6,12 +6,12 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.18**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.04.07*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.04.07**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
- [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections
- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
### What is the purpose of your *issue*?
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.09.18
[debug] youtube-dl version 2019.04.07
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
@@ -51,11 +51,11 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
- Single video: https://youtu.be/BaW_jenozKc
- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
---
### Description of your *issue*, suggested solution and other information
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
If work on your *issue* requires account credentials please provide them or explain how one can obtain them.

View File

@@ -6,12 +6,12 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
- [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections
- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
### What is the purpose of your *issue*?
@@ -51,11 +51,11 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
- Single video: https://youtu.be/BaW_jenozKc
- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
---
### Description of your *issue*, suggested solution and other information
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
If work on your *issue* requires account credentials please provide them or explain how one can obtain them.

View File

@@ -7,8 +7,8 @@
---
### Before submitting a *pull request* make sure you have:
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) sections
- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8)
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:

View File

@@ -9,12 +9,23 @@ python:
- "3.6"
- "pypy"
- "pypy3"
sudo: false
env:
- YTDL_TEST_SET=core
- YTDL_TEST_SET=download
matrix:
include:
- python: 3.7
dist: xenial
env: YTDL_TEST_SET=core
- python: 3.7
dist: xenial
env: YTDL_TEST_SET=download
- python: 3.8-dev
dist: xenial
env: YTDL_TEST_SET=core
- python: 3.8-dev
dist: xenial
env: YTDL_TEST_SET=download
- env: JYTHON=true; YTDL_TEST_SET=core
- env: JYTHON=true; YTDL_TEST_SET=download
fast_finish: true

View File

@@ -42,11 +42,11 @@ Before reporting any issue, type `youtube-dl -U`. This should report that you're
### Is the issue already documented?
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
### Why are existing options not enough?
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
### Is there enough context in your bug report?
@@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t
# DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
@@ -98,7 +98,7 @@ If you want to add support for a new site, first of all **make sure** this site
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork)
2. Check out the source code with:
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
@@ -150,18 +150,22 @@ After you have ensured this site is distributing its content legally, you can fo
# TODO more properties (see youtube_dl/extractor/common.py)
}
```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 youtube_dl/extractor/yourextractor.py
9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions!
@@ -173,7 +177,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
### Mandatory and optional metafields
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
- `id` (media identifier)
- `title` (media title)
@@ -181,7 +185,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
#### Example
@@ -257,11 +261,33 @@ title = meta.get('title') or self._og_search_title(webpage)
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
### Make regular expressions flexible
### Regular expressions
When using regular expressions try to write them fuzzy and flexible.
#### Don't capture groups you don't use
Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
##### Example
Don't capture id attribute name here since you can't use it for anything anyway.
Correct:
```python
r'(?:id|ID)=(?P<id>\d+)'
```
Incorrect:
```python
r'(id|ID)=(?P<id>\d+)'
```
#### Make regular expressions relaxed and flexible
When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
#### Example
##### Example
Say you need to extract `title` from the following HTML code:
@@ -294,7 +320,49 @@ title = self._search_regex(
webpage, 'title', group='title')
```
### Use safe conversion functions
### Long lines policy
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
Correct:
```python
'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
```
Incorrect:
```python
'https://www.youtube.com/watch?v=FqZTN594JQw&list='
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
```
### Use convenience conversion and parsing functions
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
Use `url_or_none` for safe URL processing.
Use `try_get` for safe metadata extraction from parsed JSON.
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
#### More examples
##### Safely extract optional description from parsed JSON
```python
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
```
##### Safely extract more optional metadata
```python
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
description = video.get('summary')
duration = float_or_none(video.get('durationMs'), scale=1000)
view_count = int_or_none(video.get('views'))
```

524
ChangeLog
View File

@@ -1,3 +1,527 @@
version 2019.04.07
Core
+ [downloader/external] Pass rtmp_conn to ffmpeg
Extractors
+ [ruutu] Add support for audio podcasts (#20473, #20545)
+ [xvideos] Extract all thumbnails (#20432)
+ [platzi] Add support for platzi.com (#20562)
* [dvtv] Fix extraction (#18514, #19174)
+ [vrv] Add basic support for individual movie links (#19229)
+ [bfi:player] Add support for player.bfi.org.uk (#19235)
* [hbo] Fix extraction and extract subtitles (#14629, #13709)
* [youtube] Extract srv[1-3] subtitle formats (#20566)
* [adultswim] Fix extraction (#18025)
* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339)
* [adn] Fix subtitle compatibility with ffmpeg
* [adn] Fix extraction and add support for positioning styles (#20549)
* [vk] Use unique video id (#17848)
* [newstube] Fix extraction
* [rtl2] Actualize extraction
+ [adobeconnect] Add support for adobeconnect.com (#20283)
+ [gaia] Add support for authentication (#14605)
+ [mediasite] Add support for dashed ids and named catalogs (#20531)
version 2019.04.01
Core
* [utils] Improve int_or_none and float_or_none (#20403)
* Check for valid --min-sleep-interval when --max-sleep-interval is specified
(#20435)
Extractors
+ [weibo] Extend URL regular expression (#20496)
+ [xhamster] Add support for xhamster.one (#20508)
+ [mediasite] Add support for catalogs (#20507)
+ [teamtreehouse] Add support for teamtreehouse.com (#9836)
+ [ina] Add support for audio URLs
* [ina] Improve extraction
* [cwtv] Fix episode number extraction (#20461)
* [npo] Improve DRM detection
+ [pornhub] Add support for DASH formats (#20403)
* [svtplay] Update API endpoint (#20430)
version 2019.03.18
Core
* [extractor/common] Improve HTML5 entries extraction
+ [utils] Introduce parse_bitrate
* [update] Hide update URLs behind redirect
* [extractor/common] Fix url meta field for unfragmented DASH formats (#20346)
Extractors
+ [yandexvideo] Add extractor
* [openload] Improve embed detection
+ [corus] Add support for bigbrothercanada.ca (#20357)
+ [orf:radio] Extract series (#20012)
+ [cbc:watch] Add support for gem.cbc.ca (#20251, #20359)
- [anysex] Remove extractor (#19279)
+ [ciscolive] Add support for new URL schema (#20320, #20351)
+ [youtube] Add support for invidiou.sh (#20309)
- [anitube] Remove extractor (#20334)
- [ruleporn] Remove extractor (#15344, #20324)
* [npr] Fix extraction (#10793, #13440)
* [biqle] Fix extraction (#11471, #15313)
* [viddler] Modernize
* [moevideo] Fix extraction
* [primesharetv] Remove extractor
* [hypem] Modernize and extract more metadata (#15320)
* [veoh] Fix extraction
* [escapist] Modernize
- [videomega] Remove extractor (#10108)
+ [beeg] Add support for beeg.porn (#20306)
* [vimeo:review] Improve config url extraction and extract original format
(#20305)
* [fox] Detect geo restriction and authentication errors (#20208)
version 2019.03.09
Core
* [extractor/common] Use compat_etree_Element
+ [compat] Introduce compat_etree_Element
* [extractor/common] Fallback url to base URL for DASH formats
* [extractor/common] Do not fail on invalid data while parsing F4M manifest
in non fatal mode
* [extractor/common] Return MPD manifest as format's url meta field (#20242)
* [utils] Strip #HttpOnly_ prefix from cookies files (#20219)
Extractors
* [francetv:site] Relax video id regular expression (#20268)
* [toutv] Detect invalid login error
* [toutv] Fix authentication (#20261)
+ [urplay] Extract timestamp (#20235)
+ [openload] Add support for oload.space (#20246)
* [facebook] Improve uploader extraction (#20250)
* [bbc] Use compat_etree_Element
* [crunchyroll] Use compat_etree_Element
* [npo] Improve ISM extraction
* [rai] Improve extraction (#20253)
* [paramountnetwork] Fix mgid extraction (#20241)
* [libsyn] Improve extraction (#20229)
+ [youtube] Add more invidious instances to URL regular expression (#20228)
* [spankbang] Fix extraction (#20023)
* [espn] Extend URL regular expression (#20013)
* [sixplay] Handle videos with empty assets (#20016)
+ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070)
version 2019.03.01
Core
+ [downloader/external] Add support for rate limit and retries for wget
* [downloader/external] Fix infinite retries for curl (#19303)
Extractors
* [npo] Fix extraction (#20084)
* [francetv:site] Extend video id regex (#20029, #20071)
+ [periscope] Extract width and height (#20015)
* [servus] Fix extraction (#19297)
* [bbccouk] Make subtitles non fatal (#19651)
* [metacafe] Fix family filter bypass (#19287)
version 2019.02.18
Extractors
* [tvp:website] Fix and improve extraction
+ [tvp] Detect unavailable videos
* [tvp] Fix description extraction and make thumbnail optional
+ [linuxacademy] Add support for linuxacademy.com (#12207)
* [bilibili] Update keys (#19233)
* [udemy] Extend URL regular expressions (#14330, #15883)
* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126)
* [noovo] Fix extraction (#19230)
* [rai] Relax URL regular expression (#19232)
+ [vshare] Pass Referer to download request (#19205, #19221)
+ [openload] Add support for oload.live (#19222)
* [imgur] Use video id as title fallback (#18590)
+ [twitch] Add new source format detection approach (#19193)
* [tvplayhome] Fix video id extraction (#19190)
* [tvplayhome] Fix episode metadata extraction (#19190)
* [rutube:embed] Fix extraction (#19163)
+ [rutube:embed] Add support private videos (#19163)
+ [soundcloud] Extract more metadata
+ [trunews] Add support for trunews.com (#19153)
+ [linkedin:learning] Extract chapter_number and chapter_id (#19162)
version 2019.02.08
Core
* [utils] Improve JSON-LD regular expression (#18058)
* [YoutubeDL] Fallback to ie_key of matching extractor while making
download archive id when no explicit ie_key is provided (#19022)
Extractors
+ [malltv] Add support for mall.tv (#18058, #17856)
+ [spankbang:playlist] Add support for playlists (#19145)
* [spankbang] Extend URL regular expression
* [trutv] Fix extraction (#17336)
* [toutv] Fix authentication (#16398, #18700)
* [pornhub] Fix tags and categories extraction (#13720, #19135)
* [pornhd] Fix formats extraction
+ [pornhd] Extract like count (#19123, #19125)
* [radiocanada] Switch to the new media requests (#19115)
+ [teachable] Add support for courses.workitdaily.com (#18871)
- [vporn] Remove extractor (#16276)
+ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086)
+ [drtuber] Extract duration (#19078)
* [soundcloud] Fix paged playlists extraction, add support for albums and update client id
* [soundcloud] Update client id
* [drtv] Improve preference (#19079)
+ [openload] Add support for openload.pw and oload.pw (#18930)
+ [openload] Add support for oload.info (#19073)
* [crackle] Authorize media detail request (#16931)
version 2019.01.30.1
Core
* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067)
version 2019.01.30
Core
* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding
subtitles (#19024, #19042)
* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025)
Extractors
* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061)
* [drtv] Improve extraction (#19039)
+ Add support for EncryptedUri videos
+ Extract more metadata
* Fix subtitles extraction
+ [fox] Add support for locked videos using cookies (#19060)
* [fox] Fix extraction for free videos (#19060)
+ [zattoo] Add support for tv.salt.ch (#19059)
version 2019.01.27
Core
+ [extractor/common] Extract season in _json_ld
* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection
(#681)
Extractors
* [vice] Fix extraction for locked videos (#16248)
+ [wakanim] Detect DRM protected videos
+ [wakanim] Add support for wakanim.tv (#14374)
* [usatoday] Fix extraction for videos with custom brightcove partner id
(#18990)
* [drtv] Fix extraction (#18989)
* [nhk] Extend URL regular expression (#18968)
* [go] Fix Adobe Pass requests for Disney Now (#18901)
+ [openload] Add support for oload.club (#18969)
version 2019.01.24
Core
* [YoutubeDL] Fix negation for string operators in format selection (#18961)
version 2019.01.23
Core
* [utils] Fix urljoin for paths with non-http(s) schemes
* [extractor/common] Improve jwplayer relative URL handling (#18892)
+ [YoutubeDL] Add negation support for string comparisons in format selection
expressions (#18600, #18805)
* [extractor/common] Improve HLS video-only format detection (#18923)
Extractors
* [crunchyroll] Extend URL regular expression (#18955)
* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722,
#17197, #18338 #18842, #18899)
+ [vrv] Add support for authentication (#14307)
* [videomore:season] Fix extraction
* [videomore] Improve extraction (#18908)
+ [tnaflix] Pass Referer in metadata request (#18925)
* [radiocanada] Relax DRM check (#18608, #18609)
* [vimeo] Fix video password verification for videos protected by
Referer HTTP header
+ [hketv] Add support for hkedcity.net (#18696)
+ [streamango] Add support for fruithosts.net (#18710)
+ [instagram] Add support for tags (#18757)
+ [odnoklassniki] Detect paid videos (#18876)
* [ted] Correct acodec for HTTP formats (#18923)
* [cartoonnetwork] Fix extraction (#15664, #17224)
* [vimeo] Fix extraction for password protected player URLs (#18889)
version 2019.01.17
Extractors
* [youtube] Extend JS player signature function name regular expressions
(#18890, #18891, #18893)
version 2019.01.16
Core
+ [test/helper] Add support for maxcount and count collection len checkers
* [downloader/hls] Fix uplynk ad skipping (#18824)
* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813)
Extractors
* [youtube] Skip unsupported adaptive stream type (#18804)
+ [youtube] Extract DASH formats from player response (#18804)
* [funimation] Fix extraction (#14089)
* [skylinewebcams] Fix extraction (#18853)
+ [curiositystream] Add support for non app URLs
+ [bitchute] Check formats (#18833)
* [wistia] Extend URL regular expression (#18823)
+ [playplustv] Add support for playplus.com (#18789)
version 2019.01.10
Core
* [extractor/common] Use episode name as title in _json_ld
+ [extractor/common] Add support for movies in _json_ld
* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes
(#18765)
+ [utils] Add language codes replaced in 1989 revision of ISO 639
to ISO639Utils (#18765)
Extractors
* [youtube] Extract live HLS URL from player response (#18799)
+ [outsidetv] Add support for outsidetv.com (#18774)
* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs
+ [fox] Add support National Geographic (#17985, #15333, #14698)
+ [playplustv] Add support for playplus.tv (#18789)
* [globo] Set GLBID cookie manually (#17346)
+ [gaia] Add support for gaia.com (#14605)
* [youporn] Fix title and description extraction (#18748)
+ [hungama] Add support for hungama.com (#17402, #18771)
* [dtube] Fix extraction (#18741)
* [tvnow] Fix and rework extractors and prepare for a switch to the new API
(#17245, #18499)
* [carambatv:page] Fix extraction (#18739)
version 2019.01.02
Extractors
* [discovery] Use geo verification headers (#17838)
+ [packtpub] Add support for subscription.packtpub.com (#18718)
* [yourporn] Fix extraction (#18583)
+ [acast:channel] Add support for play.acast.com (#18587)
+ [extractors] Add missing age limits (#18621)
+ [rmcdecouverte] Add support for live stream
* [rmcdecouverte] Bypass geo restriction
* [rmcdecouverte] Update URL regular expression (#18595, 18697)
* [manyvids] Fix extraction (#18604, #18614)
* [bitchute] Fix extraction (#18567)
version 2018.12.31
Extractors
+ [bbc] Add support for another embed pattern (#18643)
+ [npo:live] Add support for npostart.nl (#18644)
* [beeg] Fix extraction (#18610, #18626)
* [youtube] Unescape HTML for series (#18641)
+ [youtube] Extract more format metadata
* [youtube] Detect DRM protected videos (#1774)
* [youtube] Relax HTML5 player regular expressions (#18465, #18466)
* [youtube] Extend HTML5 player regular expression (#17516)
+ [liveleak] Add support for another embed type and restore original
format extraction
+ [crackle] Extract ISM and HTTP formats
+ [twitter] Pass Referer with card request (#18579)
* [mediasite] Extend URL regular expression (#18558)
+ [lecturio] Add support for lecturio.de (#18562)
+ [discovery] Add support for Scripps Networks watch domains (#17947)
version 2018.12.17
Extractors
* [ard:beta] Improve geo restricted videos extraction
* [ard:beta] Fix subtitles extraction
* [ard:beta] Improve extraction robustness
* [ard:beta] Relax URL regular expression (#18441)
* [acast] Add support for embed.acast.com and play.acast.com (#18483)
* [iprima] Relax URL regular expression (#18515, #18540)
* [vrv] Fix initial state extraction (#18553)
* [youtube] Fix mark watched (#18546)
+ [safari] Add support for learning.oreilly.com (#18510)
* [youtube] Fix multifeed extraction (#18531)
* [lecturio] Improve subtitles extraction (#18488)
* [uol] Fix format URL extraction (#18480)
+ [ard:mediathek] Add support for classic.ardmediathek.de (#18473)
version 2018.12.09
Core
* [YoutubeDL] Keep session cookies in cookie file between runs
* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929)
Extractors
+ [teachable] Add support for teachable platform sites (#5451, #18150, #18272)
+ [aenetworks] Add support for historyvault.com (#18460)
* [imgur] Improve gallery and album detection and extraction (#9133, #16577,
#17223, #18404)
* [iprima] Relax URL regular expression (#18453)
* [hotstar] Fix video data extraction (#18386)
* [ard:mediathek] Fix title and description extraction (#18349, #18371)
* [xvideos] Switch to HTTPS (#18422, #18427)
+ [lecturio] Add support for lecturio.com (#18405)
+ [nrktv:series] Add support for extra materials
* [nrktv:season,series] Fix extraction (#17159, #17258)
* [nrktv] Relax URL regular expression (#18304, #18387)
* [yourporn] Fix extraction (#18424, #18425)
* [tbs] Fix info extraction (#18403)
+ [gamespot] Add support for review URLs
version 2018.12.03
Core
* [utils] Fix random_birthday to generate existing dates only (#18284)
Extractors
+ [tiktok] Add support for tiktok.com (#18108, #18135)
* [pornhub] Use actual URL host for requests (#18359)
* [lynda] Fix authentication (#18158, #18217)
* [gfycat] Update API endpoint (#18333, #18343)
+ [hotstar] Add support for alternative app state layout (#18320)
* [azmedien] Fix extraction (#18334, #18336)
+ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
* [joj] Fix extraction (#18280, #18281)
+ [wistia] Add support for fast.wistia.com (#18287)
version 2018.11.23
Core
+ [setup.py] Add more relevant classifiers
Extractors
* [mixcloud] Fallback to hardcoded decryption key (#18016)
* [nbc:news] Fix article extraction (#16194)
* [foxsports] Fix extraction (#17543)
* [loc] Relax regular expression and improve formats extraction
+ [ciscolive] Add support for ciscolive.cisco.com (#17984)
* [nzz] Relax kaltura regex (#18228)
* [sixplay] Fix formats extraction
* [bitchute] Improve title extraction
* [kaltura] Limit requested MediaEntry fields
+ [americastestkitchen] Add support for zype embeds (#18225)
+ [pornhub] Add pornhub.net alias
* [nova:embed] Fix extraction (#18222)
version 2018.11.18
Extractors
+ [wwe] Extract subtitles
+ [wwe] Add support for playlistst (#14781)
+ [wwe] Add support for wwe.com (#14781, #17450)
* [vk] Detect geo restriction (#17767)
* [openload] Use original host during extraction (#18211)
* [atvat] Fix extraction (#18041)
+ [rte] Add support for new API endpoint (#18206)
* [tnaflixnetwork:embed] Fix extraction (#18205)
* [picarto] Use API and add token support (#16518)
+ [zype] Add support for player.zype.com (#18143)
* [vivo] Fix extraction (#18139)
* [ruutu] Update API endpoint (#18138)
version 2018.11.07
Extractors
+ [youtube] Add another JS signature function name regex (#18091, #18093,
#18094)
* [facebook] Fix tahoe request (#17171)
* [cliphunter] Fix extraction (#18083)
+ [youtube:playlist] Add support for invidio.us (#18077)
* [zattoo] Arrange API hosts for derived extractors (#18035)
+ [youtube] Add fallback metadata extraction from videoDetails (#18052)
version 2018.11.03
Core
* [extractor/common] Ensure response handle is not prematurely closed before
it can be read if it matches expected_status (#17195, #17846, #17447)
Extractors
* [laola1tv:embed] Set correct stream access URL scheme (#16341)
+ [ehftv] Add support for ehftv.com (#15408)
* [azmedien] Adopt to major site redesign (#17745, #17746)
+ [twitcasting] Add support for twitcasting.tv (#17981)
* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
+ [openload] Add support for oload.fun (#18045)
* [njpwworld] Fix authentication (#17427)
+ [linkedin:learning] Add support for linkedin.com/learning (#13545)
* [theplatform] Improve error detection (#13222)
* [cnbc] Simplify extraction (#14280, #17110)
+ [cbnc] Add support for new URL schema (#14193)
* [aparat] Improve extraction and extract more metadata (#17445, #18008)
* [aparat] Fix extraction
version 2018.10.29
Core
+ [extractor/common] Add validation for JSON-LD URLs
Extractors
+ [sportbox] Add support for matchtv.ru
* [sportbox] Fix extraction (#17978)
* [screencast] Fix extraction (#14590, #14617, #17990)
+ [openload] Add support for oload.icu
+ [ivi] Add support for ivi.tv
* [crunchyroll] Improve extraction failsafeness (#17991)
* [dailymail] Fix formats extraction (#17976)
* [viewster] Reduce format requests
* [cwtv] Handle API errors (#17905)
+ [rutube] Use geo verification headers (#17897)
+ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
- [tv3] Remove extractor (#10461, #15339)
* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
+ [openload] Add support for oload.cc (#17823)
+ [patreon] Extract post_file URL (#17792)
* [patreon] Fix extraction (#14502, #10471)
version 2018.10.05
Extractors
* [pluralsight] Improve authentication (#17762)
* [dailymotion] Fix extraction (#17699)
* [crunchyroll] Switch to HTTPS for RpcApi (#17749)
+ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705)
* [philharmoniedeparis] Fix extraction (#17705)
+ [jamendo] Add support for licensing.jamendo.com (#17724)
+ [openload] Add support for oload.cloud (#17710)
* [pluralsight] Fix subtitles extraction (#17726, #17728)
+ [vimeo] Add another config regular expression (#17690)
* [spike] Fix Paramount Network extraction (#17677)
* [hotstar] Fix extraction (#14694, #14931, #17637)
version 2018.09.26
Extractors
* [pluralsight] Fix subtitles extraction (#17671)
* [mediaset] Improve embed support (#17668)
+ [youtube] Add support for invidio.us (#17613)
+ [zattoo] Add support for more zattoo platform sites
* [zattoo] Fix extraction (#17175, #17542)
version 2018.09.18
Core

133
README.md
View File

@@ -1,4 +1,4 @@
[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
[![Build Status](https://travis-ci.org/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.org/ytdl-org/youtube-dl)
youtube-dl - download videos from youtube.com or other video platforms
@@ -43,7 +43,7 @@ Or with [MacPorts](https://www.macports.org/):
sudo port install youtube-dl
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://ytdl-org.github.io/youtube-dl/download.html).
# DESCRIPTION
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
@@ -496,7 +496,7 @@ The `-o` option allows users to indicate a template for the output file names.
**tl;dr:** [navigate me to examples](#output-template-examples).
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
- `id` (string): Video identifier
- `title` (string): Video title
@@ -511,6 +511,8 @@ The basic usage is not to set any template arguments when downloading a single f
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
- `upload_date` (string): Video upload date (YYYYMMDD)
- `uploader_id` (string): Nickname or id of the video uploader
- `channel` (string): Full name of the channel the video is uploaded on
- `channel_id` (string): Id of the channel
- `location` (string): Physical location where the video was filmed
- `duration` (numeric): Length of the video in seconds
- `view_count` (numeric): How many users have watched the video on the platform
@@ -640,6 +642,7 @@ The simplest case is requesting a specific format, for example with `-f 22` you
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
You can also use special names to select particular edge case formats:
- `best`: Select the best quality format represented by a single file with video and audio.
- `worst`: Select the worst quality format represented by a single file with video and audio.
- `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available.
@@ -656,6 +659,7 @@ If you want to download several formats of the same video use a comma as a separ
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
- `filesize`: The number of bytes, if known in advance
- `width`: Width of the video, if known
- `height`: Height of the video, if known
@@ -665,7 +669,8 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
- `asr`: Audio sampling rate in Hertz
- `fps`: Frame rate
Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields:
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
- `ext`: File extension
- `acodec`: Name of the audio codec in use
- `vcodec`: Name of the video codec in use
@@ -673,6 +678,8 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
- `format_id`: A short description of the format
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
@@ -681,7 +688,7 @@ You can merge the video and audio of two formats into a single file using `-f <v
Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/ytdl-org/youtube-dl/issues/5447), [#5456](https://github.com/ytdl-org/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
@@ -732,7 +739,7 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
### How do I update youtube-dl?
If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
If you've followed [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
@@ -742,7 +749,7 @@ As a last resort, you can also uninstall the version installed by your package m
sudo apt-get remove -y youtube-dl
Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html):
Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html):
```
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
@@ -776,7 +783,7 @@ Most people asking this question are not aware that youtube-dl now defaults to d
### I get HTTP error 402 when trying to download a video. What's this?
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/ytdl-org/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
### Do I need any other programs?
@@ -841,7 +848,7 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
### What is this binary file? Where has the code gone?
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
### The exe throws an error due to missing `MSVCR100.dll`
@@ -900,7 +907,7 @@ When youtube-dl detects an HLS video, it can download it either with the built-i
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://ytdl-org.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
@@ -940,7 +947,7 @@ youtube-dl is an open-source project manned by too few volunteers, so we'd rathe
# DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
@@ -968,7 +975,7 @@ If you want to add support for a new site, first of all **make sure** this site
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork)
2. Check out the source code with:
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
@@ -1020,18 +1027,22 @@ After you have ensured this site is distributing its content legally, you can fo
# TODO more properties (see youtube_dl/extractor/common.py)
}
```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 youtube_dl/extractor/yourextractor.py
9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions!
@@ -1043,7 +1054,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
### Mandatory and optional metafields
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
- `id` (media identifier)
- `title` (media title)
@@ -1051,7 +1062,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
#### Example
@@ -1127,11 +1138,33 @@ title = meta.get('title') or self._og_search_title(webpage)
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
### Make regular expressions flexible
### Regular expressions
When using regular expressions try to write them fuzzy and flexible.
#### Don't capture groups you don't use
Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
##### Example
Don't capture id attribute name here since you can't use it for anything anyway.
Correct:
```python
r'(?:id|ID)=(?P<id>\d+)'
```
Incorrect:
```python
r'(id|ID)=(?P<id>\d+)'
```
#### Make regular expressions relaxed and flexible
When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
#### Example
##### Example
Say you need to extract `title` from the following HTML code:
@@ -1164,13 +1197,55 @@ title = self._search_regex(
webpage, 'title', group='title')
```
### Use safe conversion functions
### Long lines policy
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
Correct:
```python
'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
```
Incorrect:
```python
'https://www.youtube.com/watch?v=FqZTN594JQw&list='
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
```
### Use convenience conversion and parsing functions
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
Use `url_or_none` for safe URL processing.
Use `try_get` for safe metadata extraction from parsed JSON.
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
#### More examples
##### Safely extract optional description from parsed JSON
```python
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
```
##### Safely extract more optional metadata
```python
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
description = video.get('summary')
duration = float_or_none(video.get('durationMs'), scale=1000)
view_count = int_or_none(video.get('views'))
```
# EMBEDDING YOUTUBE-DL
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/rg3/youtube-dl/issues/new).
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
@@ -1183,7 +1258,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
@@ -1224,7 +1299,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
# BUGS
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
Bugs and suggestions should be reported at: <https://github.com/ytdl-org/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
```
@@ -1270,11 +1345,11 @@ Before reporting any issue, type `youtube-dl -U`. This should report that you're
### Is the issue already documented?
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
### Why are existing options not enough?
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
### Is there enough context in your bug report?

View File

@@ -322,7 +322,7 @@ class GITBuilder(GITInfoBuilder):
class YoutubeDLBuilder(object):
authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile']
authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org']
def __init__(self, **kwargs):
if self.repoName != 'youtube-dl':

View File

@@ -27,8 +27,8 @@ from youtube_dl.utils import (
class GitHubReleaser(object):
_API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases'
_UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s'
_API_URL = 'https://api.github.com/repos/ytdl-org/youtube-dl/releases'
_UPLOADS_URL = 'https://uploads.github.com/repos/ytdl-org/youtube-dl/releases/%s/assets?name=%s'
_NETRC_MACHINE = 'github.com'
def __init__(self, debuglevel=0):

View File

@@ -10,7 +10,7 @@ import textwrap
atom_template = textwrap.dedent("""\
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="self" href="http://rg3.github.io/youtube-dl/update/releases.atom" />
<link rel="self" href="http://ytdl-org.github.io/youtube-dl/update/releases.atom" />
<title>youtube-dl releases</title>
<id>https://yt-dl.org/feed/youtube-dl-updates-feed</id>
<updated>@TIMESTAMP@</updated>
@@ -21,7 +21,7 @@ entry_template = textwrap.dedent("""
<entry>
<id>https://yt-dl.org/feed/youtube-dl-updates-feed/youtube-dl-@VERSION@</id>
<title>New version @VERSION@</title>
<link href="http://rg3.github.io/youtube-dl" />
<link href="http://ytdl-org.github.io/youtube-dl" />
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">
Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>

View File

@@ -96,7 +96,7 @@ git push origin "$version"
REV=$(git rev-parse HEAD)
make youtube-dl youtube-dl.tar.gz
read -p "VM running? (y/n) " -n 1
wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
wget "http://$buildserver/build/ytdl-org/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
mkdir -p "build/$version"
mv youtube-dl youtube-dl.exe "build/$version"
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"

View File

@@ -24,7 +24,7 @@ total_bytes = 0
for page in itertools.count(1):
releases = json.loads(compat_urllib_request.urlopen(
'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page
'https://api.github.com/repos/ytdl-org/youtube-dl/releases?page=%s' % page
).read().decode('utf-8'))
if not releases:

View File

@@ -28,12 +28,13 @@
- **acast:channel**
- **AddAnime**
- **ADN**: Anime Digital Network
- **AdobeConnect**
- **AdobeTV**
- **AdobeTVChannel**
- **AdobeTVShow**
- **AdobeTVVideo**
- **AdultSwim**
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
- **afreecatv**: afreecatv.com
- **AirMozilla**
- **AliExpressLive**
@@ -44,9 +45,7 @@
- **AmericasTestKitchen**
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **AnimeOnDemand**
- **anitube.se**
- **Anvato**
- **AnySex**
- **APA**
- **Aparat**
- **AppleConnect**
@@ -84,8 +83,6 @@
- **awaan:season**
- **awaan:video**
- **AZMedien**: AZ Medien videos
- **AZMedienPlaylist**: AZ Medien playlists
- **AZMedienShowPlaylist**: AZ Medien show playlists
- **BaiduVideo**: 百度视频
- **bambuser**
- **bambuser:channel**
@@ -98,12 +95,14 @@
- **bbc.co.uk:article**: BBC articles
- **bbc.co.uk:iplayer:playlist**
- **bbc.co.uk:playlist**
- **BBVTV**
- **Beatport**
- **Beeg**
- **BehindKink**
- **Bellator**
- **BellMedia**
- **Bet**
- **bfi:player**
- **Bigflix**
- **Bild**: Bild.de
- **BiliBili**
@@ -164,6 +163,8 @@
- **chirbit**
- **chirbit:profile**
- **Cinchcast**
- **CiscoLiveSearch**
- **CiscoLiveSession**
- **CJSW**
- **cliphunter**
- **Clippit**
@@ -177,6 +178,7 @@
- **Clyp**
- **cmt.com**
- **CNBC**
- **CNBCVideo**
- **CNN**
- **CNNArticle**
- **CNNBlogs**
@@ -250,7 +252,9 @@
- **EchoMsk**
- **egghead:course**: egghead.io course
- **egghead:lesson**: egghead.io lesson
- **ehftv**
- **eHow**
- **EinsUndEinsTV**
- **Einthusan**
- **eitb.tv**
- **EllenTube**
@@ -268,6 +272,7 @@
- **EsriVideo**
- **Europa**
- **EveryonesMixtape**
- **EWETV**
- **ExpoTV**
- **Expressen**
- **ExtremeTube**
@@ -315,6 +320,7 @@
- **Fusion**
- **Fux**
- **FXNetworks**
- **Gaia**
- **GameInformer**
- **GameOne**
- **gameone:playlist**
@@ -327,6 +333,7 @@
- **Gfycat**
- **GiantBomb**
- **Giga**
- **GlattvisionTV**
- **Glide**: Glide mobile video messages (glide.me)
- **Globo**
- **GloboArticle**
@@ -340,7 +347,6 @@
- **Groupon**
- **Hark**
- **hbo**
- **hbo:episode**
- **HearThisAt**
- **Heise**
- **HellPorno**
@@ -354,9 +360,10 @@
- **hitbox**
- **hitbox:live**
- **HitRecord**
- **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
- **HornBunny**
- **HotNewHipHop**
- **HotStar**
- **hotstar**
- **hotstar:playlist**
- **Howcast**
- **HowStuffWorks**
@@ -364,18 +371,22 @@
- **HRTiPlaylist**
- **Huajiao**: 花椒直播
- **HuffPost**: Huffington Post
- **Hungama**
- **HungamaSong**
- **Hypem**
- **Iconosquare**
- **ign.com**
- **imdb**: Internet Movie Database trailers
- **imdb:list**: Internet Movie Database lists
- **Imgur**
- **ImgurAlbum**
- **imgur:album**
- **imgur:gallery**
- **Ina**
- **Inc**
- **IndavideoEmbed**
- **InfoQ**
- **Instagram**
- **instagram:tag**: Instagram hashtag search
- **instagram:user**: Instagram user profile
- **Internazionale**
- **InternetVideoArchive**
@@ -429,6 +440,9 @@
- **Le**: 乐视网
- **Learnr**
- **Lecture2Go**
- **Lecturio**
- **LecturioCourse**
- **LecturioDeCourse**
- **LEGO**
- **Lemonde**
- **Lenta**
@@ -441,6 +455,9 @@
- **limelight:channel**
- **limelight:channel_list**
- **LineTV**
- **linkedin:learning**
- **linkedin:learning:course**
- **LinuxAcademy**
- **LiTV**
- **LiveLeak**
- **LiveLeakEmbed**
@@ -459,6 +476,7 @@
- **mailru:music**: Музыка@Mail.Ru
- **mailru:music:search**: Музыка@Mail.Ru
- **MakerTV**
- **MallTV**
- **mangomolo:live**
- **mangomolo:video**
- **ManyVids**
@@ -471,6 +489,8 @@
- **Medialaan**
- **Mediaset**
- **Mediasite**
- **MediasiteCatalog**
- **MediasiteNamedCatalog**
- **Medici**
- **megaphone.fm**: megaphone.fm embedded players
- **Meipai**: 美拍
@@ -494,6 +514,7 @@
- **Mixer:vod**
- **MLB**
- **Mnet**
- **MNetTV**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
- **Mofosex**
- **Mojvideo**
@@ -525,10 +546,10 @@
- **Myvi**
- **MyVidster**
- **MyviEmbed**
- **MyVisionTV**
- **n-tv.de**
- **natgeo**
- **natgeo:episodeguide**
- **natgeo:video**
- **NationalGeographicTV**
- **Naver**
- **NBA**
- **NBC**
@@ -550,6 +571,7 @@
- **netease:program**: 网易云音乐 - 电台节目
- **netease:singer**: 网易云音乐 - 歌手
- **netease:song**: 网易云音乐
- **NetPlus**
- **Netzkino**
- **Newgrounds**
- **NewgroundsPlaylist**
@@ -626,6 +648,8 @@
- **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek
- **OsnatelTV**
- **OutsideTV**
- **PacktPub**
- **PacktPubCourse**
- **PandaTV**: 熊猫TV
@@ -649,7 +673,10 @@
- **Piksel**
- **Pinkbike**
- **Pladform**
- **Platzi**
- **PlatziCourse**
- **play.fm**
- **PlayPlusTV**
- **PlaysTV**
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
- **Playvid**
@@ -674,7 +701,6 @@
- **PornoXO**
- **PornTube**
- **PressTV**
- **PrimeShareTV**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **puhutv**
@@ -686,6 +712,7 @@
- **qqmusic:playlist**: QQ音乐 - 歌单
- **qqmusic:singer**: QQ音乐 - 歌手
- **qqmusic:toplist**: QQ音乐 - 排行榜
- **QuantumTV**
- **Quickline**
- **QuicklineLive**
- **R7**
@@ -693,7 +720,7 @@
- **radio.de**
- **radiobremen**
- **radiocanada**
- **RadioCanadaAudioVideo**
- **radiocanada:audiovideo**
- **radiofrance**
- **RadioJavan**
- **Rai**
@@ -740,7 +767,6 @@
- **RTVS**
- **Rudo**
- **RUHD**
- **RulePorn**
- **rutube**: Rutube videos
- **rutube:channel**: Rutube channels
- **rutube:embed**: Rutube embedded videos
@@ -753,6 +779,8 @@
- **safari**: safaribooksonline.com online video
- **safari:api**
- **safari:course**: safaribooksonline.com online courses
- **SAKTV**
- **SaltTV**
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **SBS**: sbs.com.au
@@ -802,13 +830,14 @@
- **southpark.nl**
- **southparkstudios.dk**
- **SpankBang**
- **SpankBangPlaylist**
- **Spankwire**
- **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **Spiegeltv**
- **sport.francetvinfo.fr**
- **Sport5**
- **SportBoxEmbed**
- **SportBox**
- **SportDeutschland**
- **SpringboardPlatform**
- **Sprout**
@@ -839,10 +868,13 @@
- **TastyTrade**
- **TBS**
- **TDSLifeway**
- **Teachable**
- **TeachableCourse**
- **teachertube**: teachertube.com videos
- **teachertube:user:collection**: teachertube.com user and collection videos
- **TeachingChannel**
- **Teamcoco**
- **TeamTreeHouse**
- **TechTalks**
- **techtv.mit.edu**
- **ted**
@@ -871,6 +903,8 @@
- **ThisAmericanLife**
- **ThisAV**
- **ThisOldHouse**
- **TikTok**
- **TikTokUser**
- **tinypic**: tinypic.com videos
- **TMZ**
- **TMZArticle**
@@ -884,6 +918,7 @@
- **ToypicsUser**: Toypics user profile
- **TrailerAddict** (Currently broken)
- **Trilulilu**
- **TruNews**
- **TruTV**
- **Tube8**
- **TubiTv**
@@ -899,7 +934,6 @@
- **TV2**
- **tv2.hu**
- **TV2Article**
- **TV3**
- **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **TVA**
@@ -913,7 +947,9 @@
- **TVNet**
- **TVNoe**
- **TVNow**
- **TVNowList**
- **TVNowAnnual**
- **TVNowNew**
- **TVNowSeason**
- **TVNowShow**
- **tvp**: Telewizja Polska
- **tvp:embed**: Telewizja Polska
@@ -921,6 +957,7 @@
- **TVPlayer**
- **TVPlayHome**
- **Tweakers**
- **TwitCasting**
- **twitch:chapter**
- **twitch:clips**
- **twitch:profile**
@@ -945,8 +982,6 @@
- **uol.com.br**
- **uplynk**
- **uplynk:preplay**
- **Upskill**
- **UpskillCourse**
- **Urort**: NRK P3 Urørt
- **URPlay**
- **USANetwork**
@@ -965,6 +1000,7 @@
- **VevoPlaylist**
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com**
- **vhx:embed**
- **Viafree**
- **vice**
- **vice:article**
@@ -976,7 +1012,6 @@
- **video.mit.edu**
- **VideoDetective**
- **videofy.me**
- **VideoMega**
- **videomore**
- **videomore:season**
- **videomore:video**
@@ -1027,7 +1062,6 @@
- **Voot**
- **VoxMedia**
- **VoxMediaVolume**
- **Vporn**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
@@ -1035,12 +1069,15 @@
- **vrv**
- **vrv:series**
- **VShare**
- **VTXTV**
- **vube**: Vube.com
- **VuClip**
- **VVVVID**
- **VyboryMos**
- **Vzaar**
- **Wakanim**
- **Walla**
- **WalyTV**
- **washingtonpost**
- **washingtonpost:article**
- **wat.tv**
@@ -1066,6 +1103,7 @@
- **wrzuta.pl:playlist**
- **WSJ**: Wall Street Journal
- **WSJArticle**
- **WWE**
- **XBef**
- **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
@@ -1090,6 +1128,7 @@
- **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- **yandexmusic:track**: Яндекс.Музыка - Трек
- **YandexVideo**
- **YapFiles**
- **YesJapan**
- **yinyuetai:video**: 音悦Tai
@@ -1125,3 +1164,4 @@
- **ZDF**
- **ZDFChannel**
- **zingmp3**: mp3.zing.vn
- **Zype**

View File

@@ -104,7 +104,7 @@ setup(
version=__version__,
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
url='https://github.com/rg3/youtube-dl',
url='https://github.com/ytdl-org/youtube-dl',
author='Ricardo Garcia',
author_email='ytdl@yt-dl.org',
maintainer='Sergey M.',
@@ -124,6 +124,8 @@ setup(
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'License :: Public Domain',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
@@ -132,6 +134,13 @@ setup(
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: Implementation',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: IronPython',
'Programming Language :: Python :: Implementation :: Jython',
'Programming Language :: Python :: Implementation :: PyPy',
],
cmdclass={'build_lazy_extractors': build_lazy_extractors},

View File

@@ -7,6 +7,7 @@ import json
import os.path
import re
import types
import ssl
import sys
import youtube_dl.extractor
@@ -152,15 +153,27 @@ def expect_value(self, got, expected, field):
isinstance(got, compat_str),
'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
got = 'md5:' + md5(got)
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected):
self.assertTrue(
isinstance(got, (list, dict)),
'Expected field %s to be a list or a dict, but it is of type %s' % (
field, type(got).__name__))
expected_num = int(expected.partition(':')[2])
assertGreaterEqual(
op, _, expected_num = expected.partition(':')
expected_num = int(expected_num)
if op == 'mincount':
assert_func = assertGreaterEqual
msg_tmpl = 'Expected %d items in field %s, but only got %d'
elif op == 'maxcount':
assert_func = assertLessEqual
msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
elif op == 'count':
assert_func = assertEqual
msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
else:
assert False
assert_func(
self, len(got), expected_num,
'Expected %d items in field %s, but only got %d' % (expected_num, field, len(got)))
msg_tmpl % (expected_num, field, len(got)))
return
self.assertEqual(
expected, got,
@@ -236,6 +249,20 @@ def assertGreaterEqual(self, got, expected, msg=None):
self.assertTrue(got >= expected, msg)
def assertLessEqual(self, got, expected, msg=None):
if not (got <= expected):
if msg is None:
msg = '%r not less than or equal to %r' % (got, expected)
self.assertTrue(got <= expected, msg)
def assertEqual(self, got, expected, msg=None):
if not (got == expected):
if msg is None:
msg = '%r not equal to %r' % (got, expected)
self.assertTrue(got == expected, msg)
def expect_warnings(ydl, warnings_re):
real_warning = ydl.report_warning
@@ -244,3 +271,12 @@ def expect_warnings(ydl, warnings_re):
real_warning(w)
ydl.report_warning = _report_warning
def http_server_port(httpd):
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
# In Jython SSLSocket is not a subclass of socket.socket
sock = httpd.socket.sock
else:
sock = httpd.socket
return sock.getsockname()[1]

View File

@@ -9,11 +9,30 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, expect_dict, expect_value
from youtube_dl.compat import compat_etree_fromstring
from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
from youtube_dl.compat import compat_etree_fromstring, compat_http_server
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
import threading
TEAPOT_RESPONSE_STATUS = 418
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
def do_GET(self):
if self.path == '/teapot':
self.send_response(TEAPOT_RESPONSE_STATUS)
self.send_header('Content-Type', 'text/html; charset=utf-8')
self.end_headers()
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
else:
assert False
class TestIE(InfoExtractor):
@@ -42,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase):
<meta content='Foo' property=og:foobar>
<meta name="og:test1" content='foo > < bar'/>
<meta name="og:test2" content="foo >//< bar"/>
<meta property=og-test3 content='Ill-formatted opengraph'/>
'''
self.assertEqual(ie._og_search_title(html), 'Foo')
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
@@ -50,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase):
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph')
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
@@ -86,6 +107,184 @@ class TestInfoExtractor(unittest.TestCase):
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
def test_parse_html5_media_entries(self):
# from https://www.r18.com/
# with kpbs in label
expect_dict(
self,
self.ie._parse_html5_media_entries(
'https://www.r18.com/',
r'''
<video id="samplevideo_amateur" class="js-samplevideo video-js vjs-default-skin vjs-big-play-centered" controls preload="auto" width="400" height="225" poster="//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg">
<source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4" type="video/mp4" res="240" label="300kbps">
<source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4" type="video/mp4" res="480" label="1000kbps">
<source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4" type="video/mp4" res="740" label="1500kbps">
<p>Your browser does not support the video tag.</p>
</video>
''', None)[0],
{
'formats': [{
'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4',
'ext': 'mp4',
'format_id': '300kbps',
'height': 240,
'tbr': 300,
}, {
'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4',
'ext': 'mp4',
'format_id': '1000kbps',
'height': 480,
'tbr': 1000,
}, {
'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4',
'ext': 'mp4',
'format_id': '1500kbps',
'height': 740,
'tbr': 1500,
}],
'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg'
})
# from https://www.csfd.cz/
# with width and height
expect_dict(
self,
self.ie._parse_html5_media_entries(
'https://www.csfd.cz/',
r'''
<video width="770" height="328" preload="none" controls poster="https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360" >
<source src="https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4" type="video/mp4" width="640" height="360">
<source src="https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4" type="video/mp4" width="1280" height="720">
<source src="https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4" type="video/mp4" width="1920" height="1080">
<source src="https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm" type="video/webm" width="640" height="360">
<source src="https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm" type="video/webm" width="1280" height="720">
<source src="https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm" type="video/webm" width="1920" height="1080">
<track src="https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt" type="text/x-srt" kind="subtitles" srclang="cs" label="cs">
</video>
''', None)[0],
{
'formats': [{
'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4',
'ext': 'mp4',
'width': 640,
'height': 360,
}, {
'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4',
'ext': 'mp4',
'width': 1280,
'height': 720,
}, {
'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4',
'ext': 'mp4',
'width': 1920,
'height': 1080,
}, {
'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm',
'ext': 'webm',
'width': 640,
'height': 360,
}, {
'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm',
'ext': 'webm',
'width': 1280,
'height': 720,
}, {
'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm',
'ext': 'webm',
'width': 1920,
'height': 1080,
}],
'subtitles': {
'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}]
},
'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360'
})
# from https://tamasha.com/v/Kkdjw
# with height in label
expect_dict(
self,
self.ie._parse_html5_media_entries(
'https://tamasha.com/v/Kkdjw',
r'''
<video crossorigin="anonymous">
<source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4" label="AUTO" res="0"/>
<source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4"
label="240p" res="240"/>
<source src="https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4" type="video/mp4"
label="144p" res="144"/>
</video>
''', None)[0],
{
'formats': [{
'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4',
}, {
'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4',
'ext': 'mp4',
'format_id': '240p',
'height': 240,
}, {
'url': 'https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4',
'ext': 'mp4',
'format_id': '144p',
'height': 144,
}]
})
# from https://www.directvnow.com
# with data-src
expect_dict(
self,
self.ie._parse_html5_media_entries(
'https://www.directvnow.com',
r'''
<video id="vid1" class="header--video-masked active" muted playsinline>
<source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" />
</video>
''', None)[0],
{
'formats': [{
'ext': 'mp4',
'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4',
}]
})
# from https://www.directvnow.com
# with data-src
expect_dict(
self,
self.ie._parse_html5_media_entries(
'https://www.directvnow.com',
r'''
<video id="vid1" class="header--video-masked active" muted playsinline>
<source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" />
</video>
''', None)[0],
{
'formats': [{
'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4',
'ext': 'mp4',
}]
})
# from https://www.klarna.com/uk/
# with data-video-src
expect_dict(
self,
self.ie._parse_html5_media_entries(
'https://www.directvnow.com',
r'''
<video loop autoplay muted class="responsive-video block-kl__video video-on-medium">
<source src="" data-video-desktop data-video-src="https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4" type="video/mp4" />
</video>
''', None)[0],
{
'formats': [{
'url': 'https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4',
'ext': 'mp4',
}],
})
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(
@@ -180,7 +379,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
def test_parse_m3u8_formats(self):
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/11507
# https://github.com/ytdl-org/youtube-dl/issues/11507
# http://pluzz.francetv.fr/videos/le_ministere.html
'pluzz_francetv_11507',
'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
@@ -242,7 +441,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
}]
),
(
# https://github.com/rg3/youtube-dl/issues/11995
# https://github.com/ytdl-org/youtube-dl/issues/11995
# http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
'teamcoco_11995',
'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
@@ -316,7 +515,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
}]
),
(
# https://github.com/rg3/youtube-dl/issues/12211
# https://github.com/ytdl-org/youtube-dl/issues/12211
# http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601
'toggle_mobile_12211',
'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
@@ -478,7 +677,64 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'width': 1280,
'height': 720,
}]
)
),
(
# https://github.com/ytdl-org/youtube-dl/issues/18923
# https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa
'ted_18923',
'http://hls.ted.com/talks/31241.m3u8',
[{
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
'format_id': '600k-Audio',
'vcodec': 'none',
}, {
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
'format_id': '68',
'vcodec': 'none',
}, {
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b',
'format_id': '163',
'acodec': 'none',
'width': 320,
'height': 180,
}, {
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b',
'format_id': '481',
'acodec': 'none',
'width': 512,
'height': 288,
}, {
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b',
'format_id': '769',
'acodec': 'none',
'width': 512,
'height': 288,
}, {
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b',
'format_id': '984',
'acodec': 'none',
'width': 512,
'height': 288,
}, {
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
'format_id': '1255',
'acodec': 'none',
'width': 640,
'height': 360,
}, {
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b',
'format_id': '1693',
'acodec': 'none',
'width': 853,
'height': 480,
}, {
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b',
'format_id': '2462',
'acodec': 'none',
'width': 1280,
'height': 720,
}]
),
]
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
@@ -492,11 +748,12 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
def test_parse_mpd_formats(self):
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/13919
# https://github.com/ytdl-org/youtube-dl/issues/13919
# Also tests duplicate representation ids, see
# https://github.com/rg3/youtube-dl/issues/15111
# https://github.com/ytdl-org/youtube-dl/issues/15111
'float_duration',
'http://unknown/manifest.mpd',
'http://unknown/manifest.mpd', # mpd_url
None, # mpd_base_url
[{
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'm4a',
@@ -574,9 +831,10 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'height': 1080,
}]
), (
# https://github.com/rg3/youtube-dl/pull/14844
# https://github.com/ytdl-org/youtube-dl/pull/14844
'urls_only',
'http://unknown/manifest.mpd',
'http://unknown/manifest.mpd', # mpd_url
None, # mpd_base_url
[{
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
@@ -655,22 +913,68 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'width': 1920,
'height': 1080,
}]
), (
# https://github.com/ytdl-org/youtube-dl/issues/20346
# Media considered unfragmented even though it contains
# Initialization tag
'unfragmented',
'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', # mpd_url
'https://v.redd.it/hw1x7rcg7zl21', # mpd_base_url
[{
'url': 'https://v.redd.it/hw1x7rcg7zl21/audio',
'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
'ext': 'm4a',
'format_id': 'AUDIO-1',
'format_note': 'DASH audio',
'container': 'm4a_dash',
'acodec': 'mp4a.40.2',
'vcodec': 'none',
'tbr': 129.87,
'asr': 48000,
}, {
'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_240',
'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
'ext': 'mp4',
'format_id': 'VIDEO-2',
'format_note': 'DASH video',
'container': 'mp4_dash',
'acodec': 'none',
'vcodec': 'avc1.4d401e',
'tbr': 608.0,
'width': 240,
'height': 240,
'fps': 30,
}, {
'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_360',
'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
'ext': 'mp4',
'format_id': 'VIDEO-1',
'format_note': 'DASH video',
'container': 'mp4_dash',
'acodec': 'none',
'vcodec': 'avc1.4d401e',
'tbr': 804.261,
'width': 360,
'height': 360,
'fps': 30,
}]
)
]
for mpd_file, mpd_url, expected_formats in _TEST_CASES:
for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
mode='r', encoding='utf-8') as f:
formats = self.ie._parse_mpd_formats(
compat_etree_fromstring(f.read().encode('utf-8')),
mpd_url=mpd_url)
mpd_base_url=mpd_base_url, mpd_url=mpd_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
def test_parse_f4m_formats(self):
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/14660
# https://github.com/ytdl-org/youtube-dl/issues/14660
'custom_base_url',
'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
[{
@@ -743,6 +1047,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
for i in range(len(entries)):
expect_dict(self, entries[i], expected_entries[i])
def test_response_with_expected_status_returns_content(self):
# Checks for mitigations against the effects of
# <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
# manifest as `_download_webpage`, `_download_xml`, `_download_json`,
# or the underlying `_download_webpage_handle` returning no content
# when a response matches `expected_status`.
httpd = compat_http_server.HTTPServer(
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
port = http_server_port(httpd)
server_thread = threading.Thread(target=httpd.serve_forever)
server_thread.daemon = True
server_thread.start()
(content, urlh) = self.ie._download_webpage_handle(
'http://127.0.0.1:%d/teapot' % port, None,
expected_status=TEAPOT_RESPONSE_STATUS)
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
if __name__ == '__main__':
unittest.main()

View File

@@ -239,6 +239,76 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
def test_format_selection_string_ops(self):
formats = [
{'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
{'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL},
]
info_dict = _make_result(formats)
# equals (=)
ydl = YDL({'format': '[format_id=abc-cba]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'abc-cba')
# does not equal (!=)
ydl = YDL({'format': '[format_id!=abc-cba]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
# starts with (^=)
ydl = YDL({'format': '[format_id^=abc]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'abc-cba')
# does not start with (!^=)
ydl = YDL({'format': '[format_id!^=abc]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
# ends with ($=)
ydl = YDL({'format': '[format_id$=cba]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'abc-cba')
# does not end with (!$=)
ydl = YDL({'format': '[format_id!$=cba]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
# contains (*=)
ydl = YDL({'format': '[format_id*=bc-cb]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'abc-cba')
# does not contain (!*=)
ydl = YDL({'format': '[format_id!*=bc-cb]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
ydl = YDL({'format': '[format_id!*=-]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
def test_youtube_format_selection(self):
order = [
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
@@ -341,7 +411,7 @@ class TestFormatSelection(unittest.TestCase):
# For extractors with incomplete formats (all formats are audio-only or
# video-only) best and worst should fallback to corresponding best/worst
# video-only or audio-only formats (as per
# https://github.com/rg3/youtube-dl/pull/5556)
# https://github.com/ytdl-org/youtube-dl/pull/5556)
formats = [
{'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
{'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
@@ -372,7 +442,7 @@ class TestFormatSelection(unittest.TestCase):
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
def test_format_selection_issue_10083(self):
# See https://github.com/rg3/youtube-dl/issues/10083
# See https://github.com/ytdl-org/youtube-dl/issues/10083
formats = [
{'format_id': 'regular', 'height': 360, 'url': TEST_URL},
{'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
@@ -783,7 +853,7 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(result, [2, 3, 4])
def test_urlopen_no_file_protocol(self):
# see https://github.com/rg3/youtube-dl/issues/8227
# see https://github.com/ytdl-org/youtube-dl/issues/8227
ydl = YDL()
self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
import os
import re
import sys
import tempfile
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.utils import YoutubeDLCookieJar
class TestYoutubeDLCookieJar(unittest.TestCase):
def test_keep_session_cookies(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
cookiejar.load(ignore_discard=True, ignore_expires=True)
tf = tempfile.NamedTemporaryFile(delete=False)
try:
cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
temp = tf.read().decode('utf-8')
self.assertTrue(re.search(
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
self.assertTrue(re.search(
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp))
finally:
tf.close()
os.remove(tf.name)
def test_strip_httponly_prefix(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
cookiejar.load(ignore_discard=True, ignore_expires=True)
def assert_cookie_has_value(key):
self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
assert_cookie_has_value('HTTPONLY_COOKIE')
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
if __name__ == '__main__':
unittest.main()

View File

@@ -110,7 +110,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
# https://github.com/rg3/youtube-dl/issues/1930
# https://github.com/ytdl-org/youtube-dl/issues/1930
def test_soundcloud_not_matching_sets(self):
self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])
@@ -119,12 +119,12 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
def test_pbs(self):
# https://github.com/rg3/youtube-dl/issues/2350
# https://github.com/ytdl-org/youtube-dl/issues/2350
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs'])
def test_yahoo_https(self):
# https://github.com/rg3/youtube-dl/issues/2701
# https://github.com/ytdl-org/youtube-dl/issues/2701
self.assertMatch(
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo'])

View File

@@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.compat import (
compat_getenv,
compat_setenv,
compat_etree_Element,
compat_etree_fromstring,
compat_expanduser,
compat_shlex_split,
@@ -39,7 +40,7 @@ class TestCompat(unittest.TestCase):
def test_compat_expanduser(self):
old_home = os.environ.get('HOME')
test_str = 'C:\Documents and Settings\тест\Application Data'
test_str = r'C:\Documents and Settings\тест\Application Data'
compat_setenv('HOME', test_str)
self.assertEqual(compat_expanduser('~'), test_str)
compat_setenv('HOME', old_home or '')
@@ -90,6 +91,12 @@ class TestCompat(unittest.TestCase):
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
def test_compat_etree_Element(self):
try:
compat_etree_Element.items
except AttributeError:
self.fail('compat_etree_Element is not a type')
def test_compat_etree_fromstring(self):
xml = '''
<root foo="bar" spam="中文">

View File

@@ -9,26 +9,16 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import try_rm
from test.helper import http_server_port, try_rm
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server
from youtube_dl.downloader.http import HttpFD
from youtube_dl.utils import encodeFilename
import ssl
import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
def http_server_port(httpd):
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
# In Jython SSLSocket is not a subclass of socket.socket
sock = httpd.socket.sock
else:
sock = httpd.socket
return sock.getsockname()[1]
TEST_SIZE = 10 * 1024

View File

@@ -8,6 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import http_server_port
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server, compat_urllib_request
import ssl
@@ -16,15 +17,6 @@ import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
def http_server_port(httpd):
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
# In Jython SSLSocket is not a subclass of socket.socket
sock = httpd.socket.sock
else:
sock = httpd.socket
return sock.getsockname()[1]
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass

View File

@@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP
class TestMetadataFromTitle(unittest.TestCase):
def test_format_to_regex(self):
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')

View File

@@ -33,11 +33,13 @@ from youtube_dl.utils import (
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
float_or_none,
get_element_by_class,
get_element_by_attribute,
get_elements_by_class,
get_elements_by_attribute,
InAdvancePagedList,
int_or_none,
intlist_to_bytes,
is_html,
js_to_json,
@@ -55,6 +57,7 @@ from youtube_dl.utils import (
parse_count,
parse_iso8601,
parse_resolution,
parse_bitrate,
pkcs1pad,
read_batch_urls,
sanitize_filename,
@@ -467,6 +470,21 @@ class TestUtil(unittest.TestCase):
shell_quote(args),
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
def test_float_or_none(self):
self.assertEqual(float_or_none('42.42'), 42.42)
self.assertEqual(float_or_none('42'), 42.0)
self.assertEqual(float_or_none(''), None)
self.assertEqual(float_or_none(None), None)
self.assertEqual(float_or_none([]), None)
self.assertEqual(float_or_none(set()), None)
def test_int_or_none(self):
self.assertEqual(int_or_none('42'), 42)
self.assertEqual(int_or_none(''), None)
self.assertEqual(int_or_none(None), None)
self.assertEqual(int_or_none([]), None)
self.assertEqual(int_or_none(set()), None)
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
@@ -507,6 +525,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(urljoin('http://foo.de/', ''), None)
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
def test_url_or_none(self):
self.assertEqual(url_or_none(None), None)
@@ -1028,6 +1048,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_resolution('4k'), {'height': 2160})
self.assertEqual(parse_resolution('8K'), {'height': 4320})
def test_parse_bitrate(self):
self.assertEqual(parse_bitrate(None), None)
self.assertEqual(parse_bitrate(''), None)
self.assertEqual(parse_bitrate('300kbps'), 300)
self.assertEqual(parse_bitrate('1500kbps'), 1500)
self.assertEqual(parse_bitrate('300 kbps'), 300)
def test_version_tuple(self):
self.assertEqual(version_tuple('1'), (1,))
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))

View File

@@ -0,0 +1,6 @@
# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE
www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE

View File

@@ -0,0 +1,6 @@
# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue
www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value

28
test/testdata/m3u8/ted_18923.m3u8 vendored Normal file
View File

@@ -0,0 +1,28 @@
#EXTM3U
#EXT-X-VERSION:4
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360
/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180
/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480
/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720
/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES
/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400

28
test/testdata/mpd/unfragmented.mpd vendored Normal file
View File

@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<MPD mediaPresentationDuration="PT54.915S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
<Period duration="PT54.915S">
<AdaptationSet segmentAlignment="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1">
<Representation bandwidth="804261" codecs="avc1.4d401e" frameRate="30" height="360" id="VIDEO-1" mimeType="video/mp4" startWithSAP="1" width="360">
<BaseURL>DASH_360</BaseURL>
<SegmentBase indexRange="915-1114" indexRangeExact="true">
<Initialization range="0-914"/>
</SegmentBase>
</Representation>
<Representation bandwidth="608000" codecs="avc1.4d401e" frameRate="30" height="240" id="VIDEO-2" mimeType="video/mp4" startWithSAP="1" width="240">
<BaseURL>DASH_240</BaseURL>
<SegmentBase indexRange="913-1112" indexRangeExact="true">
<Initialization range="0-912"/>
</SegmentBase>
</Representation>
</AdaptationSet>
<AdaptationSet>
<Representation audioSamplingRate="48000" bandwidth="129870" codecs="mp4a.40.2" id="AUDIO-1" mimeType="audio/mp4" startWithSAP="1">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<BaseURL>audio</BaseURL>
<SegmentBase indexRange="832-1007" indexRangeExact="true">
<Initialization range="0-831"/>
</SegmentBase>
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@@ -7,7 +7,7 @@
# https://github.com/zsh-users/antigen
# Install youtube-dl:
# antigen bundle rg3/youtube-dl
# antigen bundle ytdl-org/youtube-dl
# Bundles installed by antigen are available for use immediately.
# Update youtube-dl (and all other antigen bundles):

View File

@@ -82,12 +82,14 @@ from .utils import (
sanitize_url,
sanitized_Request,
std_headers,
str_or_none,
subtitles_filename,
UnavailableVideoError,
url_basename,
version_tuple,
write_json_file,
write_string,
YoutubeDLCookieJar,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
)
@@ -307,6 +309,8 @@ class YoutubeDL(object):
The following options are used by the post processors:
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
otherwise prefer ffmpeg.
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
to the binary or its containing directory.
postprocessor_args: A list of additional command-line arguments for the
postprocessor.
@@ -558,7 +562,7 @@ class YoutubeDL(object):
self.restore_console_title()
if self.params.get('cookiefile') is not None:
self.cookiejar.save()
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
def trouble(self, message=None, tb=None):
"""Determine action to take when a download problem appears.
@@ -887,7 +891,7 @@ class YoutubeDL(object):
# url_transparent. In such cases outer metadata (from ie_result)
# should be propagated to inner one (info). For this to happen
# _type of info should be overridden with url_transparent. This
# fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
if new_result.get('_type') == 'url':
new_result['_type'] = 'url_transparent'
@@ -1062,21 +1066,24 @@ class YoutubeDL(object):
if not m:
STR_OPERATORS = {
'=': operator.eq,
'!=': operator.ne,
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
}
str_operator_rex = re.compile(r'''(?x)
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<value>[a-zA-Z0-9._-]+)
\s*$
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
m = str_operator_rex.search(filter_spec)
if m:
comparison_value = m.group('value')
op = STR_OPERATORS[m.group('op')]
str_op = STR_OPERATORS[m.group('op')]
if m.group('negation'):
op = lambda attr, value: not str_op(attr, value)
else:
op = str_op
if not m:
raise ValueError('Invalid filter specification %r' % filter_spec)
@@ -1601,7 +1608,7 @@ class YoutubeDL(object):
# by extractor are incomplete or not (i.e. whether extractor provides only
# video-only or audio-only formats) for proper formats selection for
# extractors with such incomplete formats (see
# https://github.com/rg3/youtube-dl/pull/5556).
# https://github.com/ytdl-org/youtube-dl/pull/5556).
# Since formats may be filtered during format selection and may not match
# the original formats the results may be incorrect. Thus original formats
# or pre-calculated metrics should be passed to format selection routines
@@ -1609,7 +1616,7 @@ class YoutubeDL(object):
# We will pass a context object containing all necessary additional data
# instead of just formats.
# This fixes incorrect format selection issue (see
# https://github.com/rg3/youtube-dl/issues/10083).
# https://github.com/ytdl-org/youtube-dl/issues/10083).
incomplete_formats = (
# All formats are video-only or
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
@@ -1805,7 +1812,7 @@ class YoutubeDL(object):
if sub_info.get('data') is not None:
try:
# Use newline='' to prevent conversion of newline characters
# See https://github.com/rg3/youtube-dl/issues/10268
# See https://github.com/ytdl-org/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
except (OSError, IOError):
@@ -2056,15 +2063,24 @@ class YoutubeDL(object):
self.report_warning('Unable to remove downloaded original file')
def _make_archive_id(self, info_dict):
video_id = info_dict.get('id')
if not video_id:
return
# Future-proof against any change in case
# and backwards compatibility with prior versions
extractor = info_dict.get('extractor_key')
extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
if extractor is None:
if 'id' in info_dict:
extractor = info_dict.get('ie_key') # key in a playlist
if extractor is None:
return None # Incomplete video information
return extractor.lower() + ' ' + info_dict['id']
url = str_or_none(info_dict.get('url'))
if not url:
return
# Try to find matching extractor for the URL and take its ie_key
for ie in self._ies:
if ie.suitable(url):
extractor = ie.ie_key()
break
else:
return
return extractor.lower() + ' ' + video_id
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
@@ -2072,7 +2088,7 @@ class YoutubeDL(object):
return False
vid_id = self._make_archive_id(info_dict)
if vid_id is None:
if not vid_id:
return False # Incomplete video information
try:
@@ -2215,7 +2231,7 @@ class YoutubeDL(object):
return
if type('') is not compat_str:
# Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
# Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
self.report_warning(
'Your Python is broken! Update to a newer and supported version')
@@ -2297,10 +2313,9 @@ class YoutubeDL(object):
self.cookiejar = compat_cookiejar.CookieJar()
else:
opts_cookiefile = expand_path(opts_cookiefile)
self.cookiejar = compat_cookiejar.MozillaCookieJar(
opts_cookiefile)
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
if os.access(opts_cookiefile, os.R_OK):
self.cookiejar.load()
self.cookiejar.load(ignore_discard=True, ignore_expires=True)
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
if opts_proxy is not None:
@@ -2310,7 +2325,7 @@ class YoutubeDL(object):
proxies = {'http': opts_proxy, 'https': opts_proxy}
else:
proxies = compat_urllib_request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = PerRequestProxyHandler(proxies)
@@ -2323,7 +2338,7 @@ class YoutubeDL(object):
# When passing our own FileHandler instance, build_opener won't add the
# default FileHandler and allows us to disable the file protocol, which
# can be used for malicious purposes (see
# https://github.com/rg3/youtube-dl/issues/8227)
# https://github.com/ytdl-org/youtube-dl/issues/8227)
file_handler = compat_urllib_request.FileHandler()
def file_open(*args, **kwargs):
@@ -2335,7 +2350,7 @@ class YoutubeDL(object):
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
opener.addheaders = []
self._opener = opener

View File

@@ -48,7 +48,7 @@ from .YoutubeDL import YoutubeDL
def _real_main(argv=None):
# Compatibility fixes for Windows
if sys.platform == 'win32':
# https://github.com/rg3/youtube-dl/issues/820
# https://github.com/ytdl-org/youtube-dl/issues/820
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
workaround_optparse_bug9161()
@@ -166,6 +166,8 @@ def _real_main(argv=None):
if opts.max_sleep_interval is not None:
if opts.max_sleep_interval < 0:
parser.error('max sleep interval must be positive or 0')
if opts.sleep_interval is None:
parser.error('min sleep interval must be specified, use --min-sleep-interval')
if opts.max_sleep_interval < opts.sleep_interval:
parser.error('max sleep interval must be greater than or equal to min sleep interval')
else:

View File

@@ -2364,7 +2364,7 @@ except ImportError: # Python 2
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
# implementations from cpython 3.4.3's stdlib. Python 2's version
# is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
# is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
def compat_urllib_parse_unquote_to_bytes(string):
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
@@ -2508,6 +2508,15 @@ class _TreeBuilder(etree.TreeBuilder):
pass
try:
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
# the following will crash with:
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
isinstance(None, xml.etree.ElementTree.Element)
from xml.etree.ElementTree import Element as compat_etree_Element
except TypeError: # Python <=2.6
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
if sys.version_info[0] >= 3:
def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
@@ -2819,7 +2828,7 @@ else:
compat_socket_create_connection = socket.create_connection
# Fix https://github.com/rg3/youtube-dl/issues/4223
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken
def workaround_optparse_bug9161():
op = optparse.OptionParser()
@@ -2944,7 +2953,7 @@ if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4,
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
# names, see the original PyPy issue [1] and the youtube-dl one [2].
# 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
# 2. https://github.com/rg3/youtube-dl/pull/4392
# 2. https://github.com/ytdl-org/youtube-dl/pull/4392
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
real = ctypes.WINFUNCTYPE(*args, **kwargs)
@@ -2969,6 +2978,7 @@ __all__ = [
'compat_cookiejar',
'compat_cookies',
'compat_ctypes_WINFUNCTYPE',
'compat_etree_Element',
'compat_etree_fromstring',
'compat_etree_register_namespace',
'compat_expanduser',

View File

@@ -121,7 +121,11 @@ class CurlFD(ExternalFD):
cmd += self._valueless_option('--silent', 'noprogress')
cmd += self._valueless_option('--verbose', 'verbose')
cmd += self._option('--limit-rate', 'ratelimit')
cmd += self._option('--retry', 'retries')
retry = self._option('--retry', 'retries')
if len(retry) == 2:
if retry[1] in ('inf', 'infinite'):
retry[1] = '2147483647'
cmd += retry
cmd += self._option('--max-filesize', 'max_filesize')
cmd += self._option('--interface', 'source_address')
cmd += self._option('--proxy', 'proxy')
@@ -160,6 +164,12 @@ class WgetFD(ExternalFD):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries')
if len(retry) == 2:
if retry[1] in ('inf', 'infinite'):
retry[1] = '0'
cmd += retry
cmd += self._option('--bind-address', 'source_address')
cmd += self._option('--proxy', 'proxy')
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
@@ -229,7 +239,7 @@ class FFmpegFD(ExternalFD):
# setting -seekable prevents ffmpeg from guessing if the server
# supports seeking(by adding the header `Range: bytes=0-`), which
# can cause problems in some cases
# https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
# https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
@@ -279,6 +289,7 @@ class FFmpegFD(ExternalFD):
tc_url = info_dict.get('tc_url')
flash_version = info_dict.get('flash_version')
live = info_dict.get('rtmp_live', False)
conn = info_dict.get('rtmp_conn')
if player_url is not None:
args += ['-rtmp_swfverify', player_url]
if page_url is not None:
@@ -293,6 +304,11 @@ class FFmpegFD(ExternalFD):
args += ['-rtmp_flashver', flash_version]
if live:
args += ['-rtmp_live', 'live']
if isinstance(conn, list):
for entry in conn:
args += ['-rtmp_conn', entry]
elif isinstance(conn, compat_str):
args += ['-rtmp_conn', conn]
args += ['-i', url, '-c', 'copy']
@@ -324,7 +340,7 @@ class FFmpegFD(ExternalFD):
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/rg3/youtube-dl/issues/8300).
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if sys.platform != 'win32':
proc.communicate(b'q')
raise

View File

@@ -324,8 +324,8 @@ class F4mFD(FragmentFD):
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl()
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
# and https://github.com/rg3/youtube-dl/issues/7823)
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
# and https://github.com/ytdl-org/youtube-dl/issues/7823)
manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
doc = compat_etree_fromstring(manifest)
@@ -409,7 +409,7 @@ class F4mFD(FragmentFD):
# In tests, segments may be truncated, and thus
# FlvReader may not be able to parse the whole
# chunk. If so, write the segment as is
# See https://github.com/rg3/youtube-dl/issues/9214
# See https://github.com/ytdl-org/youtube-dl/issues/9214
dest_stream.write(down_data)
break
raise

View File

@@ -75,10 +75,14 @@ class HlsFD(FragmentFD):
fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict)
def is_ad_fragment(s):
def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
def is_ad_fragment_end(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
media_frags = 0
ad_frags = 0
ad_frag_next = False
@@ -87,12 +91,13 @@ class HlsFD(FragmentFD):
if not line:
continue
if line.startswith('#'):
if is_ad_fragment(line):
ad_frags += 1
if is_ad_fragment_start(line):
ad_frag_next = True
elif is_ad_fragment_end(line):
ad_frag_next = False
continue
if ad_frag_next:
ad_frag_next = False
ad_frags += 1
continue
media_frags += 1
@@ -123,7 +128,6 @@ class HlsFD(FragmentFD):
if line:
if not line.startswith('#'):
if ad_frag_next:
ad_frag_next = False
continue
frag_index += 1
if frag_index <= ctx['fragment_index']:
@@ -148,8 +152,8 @@ class HlsFD(FragmentFD):
except compat_urllib_error.HTTPError as err:
# Unavailable (possibly temporary) fragments may be served.
# First we try to retry then either skip or abort.
# See https://github.com/rg3/youtube-dl/issues/10165,
# https://github.com/rg3/youtube-dl/issues/10448).
# See https://github.com/ytdl-org/youtube-dl/issues/10165,
# https://github.com/ytdl-org/youtube-dl/issues/10448).
count += 1
if count <= fragment_retries:
self.report_retry_fragment(err, frag_index, count, fragment_retries)
@@ -196,8 +200,10 @@ class HlsFD(FragmentFD):
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
elif is_ad_fragment(line):
elif is_ad_fragment_start(line):
ad_frag_next = True
elif is_ad_fragment_end(line):
ad_frag_next = False
self._finish_frag_download(ctx)

View File

@@ -111,7 +111,7 @@ class HttpFD(FileDownloader):
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
if has_range:
content_range = ctx.data.headers.get('Content-Range')
if content_range:

View File

@@ -17,25 +17,15 @@ from ..utils import (
class ACastIE(InfoExtractor):
IE_NAME = 'acast'
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
_VALID_URL = r'''(?x)
https?://
(?:
(?:(?:embed|www)\.)?acast\.com/|
play\.acast\.com/s/
)
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
'''
_TESTS = [{
# test with one bling
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
'md5': 'ada3de5a1e3a2a381327d749854788bb',
'info_dict': {
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
'ext': 'mp3',
'title': '"Where Are You?": Taipei 101, Taiwan',
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
'timestamp': 1196172000,
'upload_date': '20071127',
'duration': 211,
'creator': 'Concierge',
'series': 'Condé Nast Traveler Podcast',
'episode': '"Where Are You?": Taipei 101, Taiwan',
}
}, {
# test with multiple blings
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
'info_dict': {
@@ -50,6 +40,12 @@ class ACastIE(InfoExtractor):
'series': 'Spår',
'episode': '2. Raggarmordet - Röster ur det förflutna',
}
}, {
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
'only_matching': True,
}, {
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -83,17 +79,27 @@ class ACastIE(InfoExtractor):
class ACastChannelIE(InfoExtractor):
IE_NAME = 'acast:channel'
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
_TEST = {
'url': 'https://www.acast.com/condenasttraveler',
_VALID_URL = r'''(?x)
https?://
(?:
(?:www\.)?acast\.com/|
play\.acast\.com/s/
)
(?P<id>[^/#?]+)
'''
_TESTS = [{
'url': 'https://www.acast.com/todayinfocus',
'info_dict': {
'id': '50544219-29bb-499e-a083-6087f4cb7797',
'title': 'Condé Nast Traveler Podcast',
'description': 'md5:98646dee22a5b386626ae31866638fbd',
'id': '4efc5294-5385-4847-98bd-519799ce5786',
'title': 'Today in Focus',
'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
},
'playlist_mincount': 20,
}
_API_BASE_URL = 'https://www.acast.com/api/'
'playlist_mincount': 35,
}, {
'url': 'http://play.acast.com/s/ft-banking-weekly',
'only_matching': True,
}]
_API_BASE_URL = 'https://play.acast.com/api/'
_PAGE_SIZE = 10
@classmethod
@@ -106,7 +112,7 @@ class ACastChannelIE(InfoExtractor):
channel_slug, note='Download page %d of channel data' % page)
for cast in casts:
yield self.url_result(
'https://www.acast.com/%s/%s' % (channel_slug, cast['url']),
'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
'ACast', cast['id'])
def _real_extract(self, url):

View File

@@ -21,7 +21,6 @@ from ..utils import (
intlist_to_bytes,
long_to_bytes,
pkcs1pad,
srt_subtitles_timecode,
strip_or_none,
urljoin,
)
@@ -42,6 +41,18 @@ class ADNIE(InfoExtractor):
}
_BASE_URL = 'http://animedigitalnetwork.fr'
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
_POS_ALIGN_MAP = {
'start': 1,
'end': 3,
}
_LINE_ALIGN_MAP = {
'middle': 8,
'end': 4,
}
@staticmethod
def _ass_subtitles_timecode(seconds):
return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
def _get_subtitles(self, sub_path, video_id):
if not sub_path:
@@ -49,14 +60,14 @@ class ADNIE(InfoExtractor):
enc_subtitles = self._download_webpage(
urljoin(self._BASE_URL, sub_path),
video_id, fatal=False)
video_id, 'Downloading subtitles data', fatal=False)
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')),
bytes_to_intlist(binascii.unhexlify(self._K + '083db5aebd9353b4')),
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
@@ -67,23 +78,27 @@ class ADNIE(InfoExtractor):
subtitles = {}
for sub_lang, sub in subtitles_json.items():
srt = ''
for num, current in enumerate(sub):
start, end, text = (
ssa = '''[Script Info]
ScriptType:V4.00
[V4 Styles]
Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding
Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0
[Events]
Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
for current in sub:
start, end, text, line_align, position_align = (
float_or_none(current.get('startTime')),
float_or_none(current.get('endTime')),
current.get('text'))
current.get('text'), current.get('lineAlign'),
current.get('positionAlign'))
if start is None or end is None or text is None:
continue
srt += os.linesep.join(
(
'%d' % num,
'%s --> %s' % (
srt_subtitles_timecode(start),
srt_subtitles_timecode(end)),
text,
os.linesep,
))
alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
self._ass_subtitles_timecode(start),
self._ass_subtitles_timecode(end),
'{\\a%d}' % alignment if alignment != 2 else '',
text.replace('\n', '\\N').replace('<i>', '{\\i1}').replace('</i>', '{\\i0}'))
if sub_lang == 'vostf':
sub_lang = 'fr'
@@ -91,8 +106,8 @@ class ADNIE(InfoExtractor):
'ext': 'json',
'data': json.dumps(sub),
}, {
'ext': 'srt',
'data': srt,
'ext': 'ssa',
'data': ssa,
}])
return subtitles
@@ -100,7 +115,15 @@ class ADNIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_config = self._parse_json(self._search_regex(
r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id)
r'playerConfig\s*=\s*({.+});', webpage,
'player config', default='{}'), video_id, fatal=False)
if not player_config:
config_url = urljoin(self._BASE_URL, self._search_regex(
r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
webpage, 'config url'))
player_config = self._download_json(
config_url, video_id,
'Downloading player config JSON metadata')['player']
video_info = {}
video_info_str = self._search_regex(
@@ -129,12 +152,15 @@ class ADNIE(InfoExtractor):
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
authorization = base64.b64encode(encrypted_message).decode()
links_data = self._download_json(
urljoin(self._BASE_URL, links_url), video_id, headers={
urljoin(self._BASE_URL, links_url), video_id,
'Downloading links JSON metadata', headers={
'Authorization': 'Bearer ' + authorization,
})
links = links_data.get('links') or {}
metas = metas or links_data.get('meta') or {}
sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token
sub_path = sub_path or links_data.get('subtitles') or \
'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
sub_path += '&token=' + token
error = links_data.get('error')
title = metas.get('title') or video_info['title']
@@ -142,9 +168,11 @@ class ADNIE(InfoExtractor):
for format_id, qualities in links.items():
if not isinstance(qualities, dict):
continue
for load_balancer_url in qualities.values():
for quality, load_balancer_url in qualities.items():
load_balancer_data = self._download_json(
load_balancer_url, video_id, fatal=False) or {}
load_balancer_url, video_id,
'Downloading %s %s JSON metadata' % (format_id, quality),
fatal=False) or {}
m3u8_url = load_balancer_data.get('location')
if not m3u8_url:
continue

View File

@@ -0,0 +1,37 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
)
class AdobeConnectIE(InfoExtractor):
_VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P<id>[\w-]+)'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
is_live = qs.get('isLive', ['false'])[0] == 'true'
formats = []
for con_string in qs['conStrings'][0].split(','):
formats.append({
'format_id': con_string.split('://')[0],
'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
'ext': 'flv',
'play_path': 'mp4:' + qs['streamName'][0],
'rtmp_conn': 'S:' + qs['ticket'][0],
'rtmp_live': is_live,
'url': con_string,
})
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'formats': formats,
'is_live': is_live,
}

View File

@@ -1,13 +1,19 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .turner import TurnerBaseIE
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
mimetype2ext,
parse_age_limit,
parse_iso8601,
strip_or_none,
url_or_none,
try_get,
)
@@ -21,8 +27,8 @@ class AdultSwimIE(TurnerBaseIE):
'ext': 'mp4',
'title': 'Rick and Morty - Pilot',
'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
'timestamp': 1493267400,
'upload_date': '20170427',
'timestamp': 1543294800,
'upload_date': '20181127',
},
'params': {
# m3u8 download
@@ -43,6 +49,7 @@ class AdultSwimIE(TurnerBaseIE):
# m3u8 download
'skip_download': True,
},
'skip': '404 Not Found',
}, {
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
'info_dict': {
@@ -61,9 +68,9 @@ class AdultSwimIE(TurnerBaseIE):
}, {
'url': 'http://www.adultswim.com/videos/attack-on-titan',
'info_dict': {
'id': 'b7A69dzfRzuaXIECdxW8XQ',
'id': 'attack-on-titan',
'title': 'Attack on Titan',
'description': 'md5:6c8e003ea0777b47013e894767f5e114',
'description': 'md5:41caa9416906d90711e31dc00cb7db7e',
},
'playlist_mincount': 12,
}, {
@@ -78,83 +85,118 @@ class AdultSwimIE(TurnerBaseIE):
# m3u8 download
'skip_download': True,
},
'skip': '404 Not Found',
}]
def _real_extract(self, url):
show_path, episode_path = re.match(self._VALID_URL, url).groups()
display_id = episode_path or show_path
webpage = self._download_webpage(url, display_id)
initial_data = self._parse_json(self._search_regex(
r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});',
webpage, 'initial data'), display_id)
is_stream = show_path == 'streams'
if is_stream:
if not episode_path:
episode_path = 'live-stream'
video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path)
video_id = video_data.get('stream')
if not video_id:
entries = []
for episode in video_data.get('archiveEpisodes', []):
episode_url = url_or_none(episode.get('url'))
if not episode_url:
continue
entries.append(self.url_result(
episode_url, 'AdultSwim', episode.get('id')))
return self.playlist_result(
entries, video_data.get('id'), video_data.get('title'),
strip_or_none(video_data.get('description')))
query = '''query {
getShowBySlug(slug:"%s") {
%%s
}
}''' % show_path
if episode_path:
query = query % '''title
getVideoBySlug(slug:"%s") {
_id
auth
description
duration
episodeNumber
launchDate
mediaID
seasonNumber
poster
title
tvRating
}''' % episode_path
['getVideoBySlug']
else:
show_data = initial_data['show']
query = query % '''metaDescription
title
videos(first:1000,sort:["episode_number"]) {
edges {
node {
_id
slug
}
}
}'''
show_data = self._download_json(
'https://www.adultswim.com/api/search', display_id,
data=json.dumps({'query': query}).encode(),
headers={'Content-Type': 'application/json'})['data']['getShowBySlug']
if episode_path:
video_data = show_data['getVideoBySlug']
video_id = video_data['_id']
episode_title = title = video_data['title']
series = show_data.get('title')
if series:
title = '%s - %s' % (series, title)
info = {
'id': video_id,
'title': title,
'description': strip_or_none(video_data.get('description')),
'duration': float_or_none(video_data.get('duration')),
'formats': [],
'subtitles': {},
'age_limit': parse_age_limit(video_data.get('tvRating')),
'thumbnail': video_data.get('poster'),
'timestamp': parse_iso8601(video_data.get('launchDate')),
'series': series,
'season_number': int_or_none(video_data.get('seasonNumber')),
'episode': episode_title,
'episode_number': int_or_none(video_data.get('episodeNumber')),
}
if not episode_path:
entries = []
for video in show_data.get('videos', []):
slug = video.get('slug')
if not slug:
auth = video_data.get('auth')
media_id = video_data.get('mediaID')
if media_id:
info.update(self._extract_ngtv_info(media_id, {
# CDN_TOKEN_APP_ID from:
# https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
}, {
'url': url,
'site_name': 'AdultSwim',
'auth_required': auth,
}))
if not auth:
extract_data = self._download_json(
'https://www.adultswim.com/api/shows/v1/videos/' + video_id,
video_id, query={'fields': 'stream'}, fatal=False) or {}
assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or []
for asset in assets:
asset_url = asset.get('url')
if not asset_url:
continue
entries.append(self.url_result(
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
'AdultSwim', video.get('id')))
return self.playlist_result(
entries, show_data.get('id'), show_data.get('title'),
strip_or_none(show_data.get('metadata', {}).get('description')))
ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
if ext == 'm3u8':
info['formats'].extend(self._extract_m3u8_formats(
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
elif ext == 'f4m':
continue
# info['formats'].extend(self._extract_f4m_formats(
# asset_url, video_id, f4m_id='hds', fatal=False))
elif ext in ('scc', 'ttml', 'vtt'):
info['subtitles'].setdefault('en', []).append({
'url': asset_url,
})
self._sort_formats(info['formats'])
video_data = show_data['sluggedVideo']
video_id = video_data['id']
info = self._extract_cvp_info(
'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id,
video_id, {
'secure': {
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
},
}, {
'url': url,
'site_name': 'AdultSwim',
'auth_required': video_data.get('auth'),
})
info.update({
'id': video_id,
'display_id': display_id,
'description': info.get('description') or strip_or_none(video_data.get('description')),
})
if not is_stream:
info.update({
'duration': info.get('duration') or int_or_none(video_data.get('duration')),
'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')),
'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')),
'episode': info['title'],
'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')),
})
info['series'] = video_data.get('collection_title') or info.get('series')
if info['series'] and info['series'] != info['title']:
info['title'] = '%s - %s' % (info['series'], info['title'])
return info
return info
else:
entries = []
for edge in show_data.get('videos', {}).get('edges', []):
video = edge.get('node') or {}
slug = video.get('slug')
if not slug:
continue
entries.append(self.url_result(
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
'AdultSwim', video.get('_id')))
return self.playlist_result(
entries, show_path, show_data.get('title'),
strip_or_none(show_data.get('metaDescription')))

View File

@@ -22,18 +22,19 @@ class AENetworksBaseIE(ThePlatformIE):
class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks'
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?P<domain>
(?:history|aetv|mylifetime|lifetimemovieclub)\.com|
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
fyi\.tv
)/
(?:
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
specials/(?P<special_display_id>[^/]+)/full-special
specials/(?P<special_display_id>[^/]+)/full-special|
collections/[^/]+/(?P<collection_display_id>[^/]+)
)
'''
_TESTS = [{
@@ -80,6 +81,9 @@ class AENetworksIE(AENetworksBaseIE):
}, {
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
'only_matching': True
}, {
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
'only_matching': True
}]
_DOMAIN_TO_REQUESTOR_ID = {
'history.com': 'HISTORY',
@@ -90,9 +94,9 @@ class AENetworksIE(AENetworksBaseIE):
}
def _real_extract(self, url):
domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
display_id = show_path or movie_display_id or special_display_id
webpage = self._download_webpage(url, display_id)
domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
display_id = show_path or movie_display_id or special_display_id or collection_display_id
webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
if show_path:
url_parts = show_path.split('/')
url_parts_len = len(url_parts)

View File

@@ -43,10 +43,6 @@ class AmericasTestKitchenIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
partner_id = self._search_regex(
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
webpage, 'kaltura partner id')
video_data = self._parse_json(
self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
@@ -58,7 +54,18 @@ class AmericasTestKitchenIE(InfoExtractor):
(lambda x: x['episodeDetail']['content']['data'],
lambda x: x['videoDetail']['content']['data']), dict)
ep_meta = ep_data.get('full_video', {})
external_id = ep_data.get('external_id') or ep_meta['external_id']
zype_id = ep_meta.get('zype_id')
if zype_id:
embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
ie_key = 'Zype'
else:
partner_id = self._search_regex(
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
webpage, 'kaltura partner id')
external_id = ep_data.get('external_id') or ep_meta['external_id']
embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
ie_key = 'Kaltura'
title = ep_data.get('title') or ep_meta.get('title')
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
@@ -72,8 +79,8 @@ class AmericasTestKitchenIE(InfoExtractor):
return {
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, external_id),
'ie_key': 'Kaltura',
'url': embed_url,
'ie_key': ie_key,
'title': title,
'description': description,
'thumbnail': thumbnail,

View File

@@ -1,30 +0,0 @@
from __future__ import unicode_literals
from .nuevo import NuevoBaseIE
class AnitubeIE(NuevoBaseIE):
IE_NAME = 'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
_TEST = {
'url': 'http://www.anitube.se/video/36621',
'md5': '59d0eeae28ea0bc8c05e7af429998d43',
'info_dict': {
'id': '36621',
'ext': 'mp4',
'title': 'Recorder to Randoseru 01',
'duration': 180.19,
},
'skip': 'Blocked in the US',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
key = self._search_regex(
r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
return self._extract_nuevo(
'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, video_id)

View File

@@ -1,61 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
int_or_none,
)
class AnySexIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?anysex\.com/(?P<id>\d+)'
_TEST = {
'url': 'http://anysex.com/156592/',
'md5': '023e9fbb7f7987f5529a394c34ad3d3d',
'info_dict': {
'id': '156592',
'ext': 'mp4',
'title': 'Busty and sexy blondie in her bikini strips for you',
'description': 'md5:de9e418178e2931c10b62966474e1383',
'categories': ['Erotic'],
'duration': 270,
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
description = self._html_search_regex(
r'<div class="description"[^>]*>([^<]+)</div>', webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False)
categories = re.findall(
r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)
duration = parse_duration(self._search_regex(
r'<b>Duration:</b> (?:<q itemprop="duration">)?(\d+:\d+)', webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex(
r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
'description': description,
'thumbnail': thumbnail,
'categories': categories,
'duration': duration,
'view_count': view_count,
'age_limit': 18,
}

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
merge_dicts,
mimetype2ext,
url_or_none,
)
@@ -12,59 +13,83 @@ from ..utils import (
class AparatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_TEST = {
_TESTS = [{
'url': 'http://www.aparat.com/v/wP8On',
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
'info_dict': {
'id': 'wP8On',
'ext': 'mp4',
'title': 'تیم گلکسی 11 - زومیت',
'age_limit': 0,
'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
'duration': 231,
'timestamp': 1387394859,
'upload_date': '20131218',
'view_count': int,
},
# 'skip': 'Extremely unreliable',
}
}, {
# multiple formats
'url': 'https://www.aparat.com/v/8dflw/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
# Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work
webpage = self._download_webpage(
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
video_id)
# Provides more metadata
webpage = self._download_webpage(url, video_id, fatal=False)
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
if not webpage:
# Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work
webpage = self._download_webpage(
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
video_id)
file_list = self._parse_json(
options = self._parse_json(
self._search_regex(
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
'file list'),
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
webpage, 'options', group='value'),
video_id)
player = options['plugins']['sabaPlayerPlugin']
formats = []
for item in file_list[0]:
file_url = url_or_none(item.get('file'))
if not file_url:
continue
ext = mimetype2ext(item.get('type'))
label = item.get('label')
formats.append({
'url': file_url,
'ext': ext,
'format_id': label or ext,
'height': int_or_none(self._search_regex(
r'(\d+)[pP]', label or '', 'height', default=None)),
})
self._sort_formats(formats)
for sources in player['multiSRC']:
for item in sources:
if not isinstance(item, dict):
continue
file_url = url_or_none(item.get('src'))
if not file_url:
continue
item_type = item.get('type')
if item_type == 'application/vnd.apple.mpegurl':
formats.extend(self._extract_m3u8_formats(
file_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
else:
ext = mimetype2ext(item.get('type'))
label = item.get('label')
formats.append({
'url': file_url,
'ext': ext,
'format_id': 'http-%s' % (label or ext),
'height': int_or_none(self._search_regex(
r'(\d+)[pP]', label or '', 'height',
default=None)),
})
self._sort_formats(
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
thumbnail = self._search_regex(
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
info = self._search_json_ld(webpage, video_id, default={})
return {
if not info.get('title'):
info['title'] = player['title']
return merge_dicts(info, {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'age_limit': self._family_friendly_search(webpage),
'thumbnail': url_or_none(options.get('poster')),
'duration': int_or_none(player.get('duration')),
'formats': formats,
}
})

View File

@@ -8,20 +8,23 @@ from .generic import GenericIE
from ..utils import (
determine_ext,
ExtractorError,
qualities,
int_or_none,
parse_duration,
qualities,
str_or_none,
try_get,
unified_strdate,
xpath_text,
unified_timestamp,
update_url_query,
url_or_none,
xpath_text,
)
from ..compat import compat_etree_fromstring
class ARDMediathekIE(InfoExtractor):
IE_NAME = 'ARD:mediathek'
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_TESTS = [{
# available till 26.07.2022
@@ -51,8 +54,15 @@ class ARDMediathekIE(InfoExtractor):
# audio
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
'only_matching': True,
}, {
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
def _extract_media_info(self, media_info_url, webpage, video_id):
media_info = self._download_json(
media_info_url, video_id, 'Downloading media JSON')
@@ -173,13 +183,18 @@ class ARDMediathekIE(InfoExtractor):
title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>'],
r'<h4 class="headline">(.*?)</h4>',
r'<title[^>]*>(.*?)</title>'],
webpage, 'title')
description = self._html_search_meta(
'dcterms.abstract', webpage, 'description', default=None)
if description is None:
description = self._html_search_meta(
'description', webpage, 'meta description')
'description', webpage, 'meta description', default=None)
if description is None:
description = self._html_search_regex(
r'<p\s+class="teasertext">(.+?)</p>',
webpage, 'teaser text', default=None)
# Thumbnail is sometimes not present.
# It is in the mobile version, but that seems to use a different URL
@@ -288,7 +303,7 @@ class ARDIE(InfoExtractor):
class ARDBetaMediathekIE(InfoExtractor):
_VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
_VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?'
_TESTS = [{
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
@@ -302,12 +317,18 @@ class ARDBetaMediathekIE(InfoExtractor):
'upload_date': '20180826',
'ext': 'mp4',
},
}, {
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
'only_matching': True,
}, {
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
display_id = mobj.group('display_id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id)
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
@@ -318,43 +339,62 @@ class ARDBetaMediathekIE(InfoExtractor):
'display_id': display_id,
}
formats = []
subtitles = {}
geoblocked = False
for widget in data.values():
if widget.get('_geoblocked'):
raise ExtractorError('This video is not available due to geoblocking', expected=True)
if widget.get('_geoblocked') is True:
geoblocked = True
if '_duration' in widget:
res['duration'] = widget['_duration']
res['duration'] = int_or_none(widget['_duration'])
if 'clipTitle' in widget:
res['title'] = widget['clipTitle']
if '_previewImage' in widget:
res['thumbnail'] = widget['_previewImage']
if 'broadcastedOn' in widget:
res['upload_date'] = unified_strdate(widget['broadcastedOn'])
res['timestamp'] = unified_timestamp(widget['broadcastedOn'])
if 'synopsis' in widget:
res['description'] = widget['synopsis']
if '_subtitleUrl' in widget:
res['subtitles'] = {'de': [{
subtitle_url = url_or_none(widget.get('_subtitleUrl'))
if subtitle_url:
subtitles.setdefault('de', []).append({
'ext': 'ttml',
'url': widget['_subtitleUrl'],
}]}
'url': subtitle_url,
})
if '_quality' in widget:
format_url = widget['_stream']['json'][0]
if format_url.endswith('.f4m'):
format_url = url_or_none(try_get(
widget, lambda x: x['_stream']['json'][0]))
if not format_url:
continue
ext = determine_ext(format_url)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
format_url + '?hdcore=3.11.0',
video_id, f4m_id='hds', fatal=False))
elif format_url.endswith('m3u8'):
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
format_url, video_id, 'mp4', m3u8_id='hls',
fatal=False))
else:
# HTTP formats are not available when geoblocked is True,
# other formats are fine though
if geoblocked:
continue
quality = str_or_none(widget.get('_quality'))
formats.append({
'format_id': 'http-' + widget['_quality'],
'format_id': ('http-' + quality) if quality else 'http',
'url': format_url,
'preference': 10, # Plain HTTP, that's nice
})
if not formats and geoblocked:
self.raise_geo_restricted(
msg='This video is not available due to geoblocking',
countries=['DE'])
self._sort_formats(formats)
res['formats'] = formats
res.update({
'subtitles': subtitles,
'formats': formats,
})
return res

View File

@@ -103,7 +103,7 @@ class ArkenaIE(InfoExtractor):
f_url, video_id, mpd_id=kind, fatal=False))
elif kind == 'silverlight':
# TODO: process when ism is supported (see
# https://github.com/rg3/youtube-dl/issues/8118)
# https://github.com/ytdl-org/youtube-dl/issues/8118)
continue
else:
tbr = float_or_none(f.get('Bitrate'), 1000)

View File

@@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_data = self._parse_json(unescapeHTML(self._search_regex(
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
webpage, 'player data')), display_id)['config']['initial_video']
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
webpage, 'player data', group='json')),
display_id)['config']['initial_video']
video_id = video_data['id']
video_title = video_data['title']

View File

@@ -62,7 +62,7 @@ class AudiomackIE(InfoExtractor):
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# if so, pass the work off to the soundcloud extractor
if SoundcloudIE.suitable(api_response['url']):
return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
return self.url_result(api_response['url'], SoundcloudIE.ie_key())
return {
'id': compat_str(api_response.get('id', album_url_tag)),

View File

@@ -1,213 +1,86 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
get_element_by_class,
get_element_by_id,
strip_or_none,
urljoin,
)
class AZMedienBaseIE(InfoExtractor):
def _kaltura_video(self, partner_id, entry_id):
return self.url_result(
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
video_id=entry_id)
class AZMedienIE(AZMedienBaseIE):
class AZMedienIE(InfoExtractor):
IE_DESC = 'AZ Medien videos'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
(?P<host>
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
[0-9]+-show-[^/\#]+
(?:
/[0-9]+-episode-[^/\#]+
(?:
/[0-9]+-segment-(?:[^/\#]+\#)?|
\#
)|
\#
[^/]+/
(?P<id>
[^/]+-(?P<article_id>\d+)
)
(?P<id>[^\#]+)
(?:
\#video=
(?P<kaltura_id>
[_0-9a-z]+
)
)?
'''
_TESTS = [{
# URL with 'segment'
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
'info_dict': {
'id': '1_2444peh4',
'id': '1_anruz3wy',
'ext': 'mp4',
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
'uploader_id': 'TeleZ?ri',
'upload_date': '20161218',
'timestamp': 1482084490,
'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
'uploader_id': 'TVOnline',
'upload_date': '20180930',
'timestamp': 1538328802,
},
'params': {
'skip_download': True,
},
}, {
# URL with 'segment' and fragment:
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
'only_matching': True
}, {
# URL with 'episode' and fragment:
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
'only_matching': True
}, {
# URL with 'show' and fragment:
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
'only_matching': True
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
partner_id = self._search_regex(
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
webpage, 'kaltura partner id')
entry_id = self._html_search_regex(
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
return self._kaltura_video(partner_id, entry_id)
class AZMedienPlaylistIE(AZMedienBaseIE):
IE_DESC = 'AZ Medien playlists'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
(?P<id>[0-9]+-
(?:
show|
topic|
themen
)-[^/\#]+
(?:
/[0-9]+-episode-[^/\#]+
)?
)$
'''
_TESTS = [{
# URL with 'episode'
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
'info_dict': {
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
'title': 'News - Donnerstag, 15. Dezember 2016',
},
'playlist_count': 9,
}, {
# URL with 'themen'
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
'info_dict': {
'id': '258-themen-tele-m1-classics',
'title': 'Tele M1 Classics',
},
'playlist_mincount': 15,
}, {
# URL with 'topic', contains nested playlists
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
'only_matching': True,
}, {
# URL with 'show' only
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
'only_matching': True
}]
_PARTNER_ID = '1719221'
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id)
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
video_id = mobj.group('id')
entry_id = mobj.group('kaltura_id')
entries = []
if not entry_id:
api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0])
payload = {
'query': '''query VideoContext($articleId: ID!) {
article: node(id: $articleId) {
... on Article {
mainAssetRelation {
asset {
... on VideoAsset {
kalturaId
}
}
}
}
}
}''',
'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
}
json_data = self._download_json(
api_url, video_id, headers={
'Content-Type': 'application/json',
},
data=json.dumps(payload).encode())
entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
partner_id = self._search_regex(
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
webpage, 'kaltura partner id', default=None)
if partner_id:
entries = [
self._kaltura_video(partner_id, m.group('id'))
for m in re.finditer(
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
if not entries:
entries = [
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
for m in re.finditer(
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
if not entries:
entries = [
# May contain nested playlists (e.g. [1]) thus no explicit
# ie_key
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
self.url_result(urljoin(url, m.group('url')))
for m in re.finditer(
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
title = self._search_regex(
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
webpage, 'title',
default=strip_or_none(get_element_by_id(
'video-title', webpage)), group='title')
return self.playlist_result(entries, show_id, title)
class AZMedienShowPlaylistIE(AZMedienBaseIE):
IE_DESC = 'AZ Medien show playlists'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
(?:
all-episodes|
alle-episoden
)/
(?P<id>[^/?#&]+)
'''
_TEST = {
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
'info_dict': {
'id': 'astrotalk',
'title': 'TeleZüri: AstroTalk - alle episoden',
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
},
'playlist_mincount': 13,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
episodes = get_element_by_class('search-mobile-box', webpage)
entries = [self.url_result(
urljoin(url, m.group('url'))) for m in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
title = self._og_search_title(webpage, fatal=False)
description = self._og_search_description(webpage)
return self.playlist_result(entries, playlist_id, title, description)
return self.url_result(
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
ie=KalturaIE.ie_key(), video_id=entry_id)

View File

@@ -23,7 +23,7 @@ class BambuserIE(InfoExtractor):
_TEST = {
'url': 'http://bambuser.com/v/4050584',
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
# MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388
# 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
'info_dict': {
'id': '4050584',
@@ -38,7 +38,7 @@ class BambuserIE(InfoExtractor):
},
'params': {
# It doesn't respect the 'Range' header, it would download the whole video
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
# caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59
'skip_download': True,
},
}

View File

@@ -1,8 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import itertools
import re
from .common import InfoExtractor
from ..utils import (
@@ -17,10 +17,12 @@ from ..utils import (
parse_iso8601,
try_get,
unescapeHTML,
url_or_none,
urlencode_postdata,
urljoin,
)
from ..compat import (
compat_etree_Element,
compat_HTTPError,
compat_urlparse,
)
@@ -206,7 +208,7 @@ class BBCCoUkIE(InfoExtractor):
},
'skip': 'Now it\'s really geo-restricted',
}, {
# compact player (https://github.com/rg3/youtube-dl/issues/8147)
# compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
'info_dict': {
'id': 'p028bfkj',
@@ -310,7 +312,13 @@ class BBCCoUkIE(InfoExtractor):
def _get_subtitles(self, media, programme_id):
subtitles = {}
for connection in self._extract_connections(media):
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
cc_url = url_or_none(connection.get('href'))
if not cc_url:
continue
captions = self._download_xml(
cc_url, programme_id, 'Downloading captions', fatal=False)
if not isinstance(captions, compat_etree_Element):
continue
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
subtitles[lang] = [
{
@@ -795,6 +803,15 @@ class BBCIE(BBCCoUkIE):
'uploader': 'Radio 3',
'uploader_id': 'bbc_radio_three',
},
}, {
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
'info_dict': {
'id': 'p06w9tws',
'ext': 'mp4',
'title': 'md5:2fabf12a726603193a2879a055f72514',
'description': 'Learn English words and phrases from this story',
},
'add_ie': [BBCCoUkIE.ie_key()],
}]
@classmethod
@@ -945,6 +962,15 @@ class BBCIE(BBCCoUkIE):
if entries:
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
# http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
group_id = self._search_regex(
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
webpage, 'group id', default=None)
if playlist_id:
return self.url_result(
'https://www.bbc.co.uk/programmes/%s' % group_id,
ie=BBCCoUkIE.ie_key())
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
programme_id = self._search_regex(
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,

View File

@@ -1,21 +1,16 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_chr,
compat_ord,
compat_urllib_parse_unquote,
)
from ..compat import compat_str
from ..utils import (
int_or_none,
parse_iso8601,
urljoin,
unified_timestamp,
)
class BeegIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://beeg.com/5416503',
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
'info_dict': {
@@ -29,36 +24,22 @@ class BeegIE(InfoExtractor):
'tags': list,
'age_limit': 18,
}
}
}, {
'url': 'https://beeg.porn/video/5416503',
'only_matching': True,
}, {
'url': 'https://beeg.porn/5416503',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
cpl_url = self._search_regex(
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
webpage, 'cpl', default=None, group='url')
cpl_url = urljoin(url, cpl_url)
beeg_version, beeg_salt = [None] * 2
if cpl_url:
cpl = self._download_webpage(
self._proto_relative_url(cpl_url), video_id,
'Downloading cpl JS', fatal=False)
if cpl:
beeg_version = int_or_none(self._search_regex(
r'beeg_version\s*=\s*([^\b]+)', cpl,
'beeg version', default=None)) or self._search_regex(
r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
beeg_salt = self._search_regex(
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
default=None, group='beeg_salt')
beeg_version = beeg_version or '2185'
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
beeg_version = self._search_regex(
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
default='1546225636701')
for api_path in ('', 'api.'):
video = self._download_json(
@@ -68,37 +49,6 @@ class BeegIE(InfoExtractor):
if video:
break
def split(o, e):
def cut(s, x):
n.append(s[:x])
return s[x:]
n = []
r = len(o) % e
if r > 0:
o = cut(o, r)
while len(o) > e:
o = cut(o, e)
n.append(o)
return n
def decrypt_key(key):
# Reverse engineered from http://static.beeg.com/cpl/1738.js
a = beeg_salt
e = compat_urllib_parse_unquote(key)
o = ''.join([
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
for n in range(len(e))])
return ''.join(split(o, 3)[::-1])
def decrypt_url(encrypted_url):
encrypted_url = self._proto_relative_url(
encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')
key = self._search_regex(
r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
if not key:
return encrypted_url
return encrypted_url.replace(key, decrypt_key(key))
formats = []
for format_id, video_url in video.items():
if not video_url:
@@ -108,18 +58,20 @@ class BeegIE(InfoExtractor):
if not height:
continue
formats.append({
'url': decrypt_url(video_url),
'url': self._proto_relative_url(
video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
'format_id': format_id,
'height': int(height),
})
self._sort_formats(formats)
title = video['title']
video_id = video.get('id') or video_id
video_id = compat_str(video.get('id') or video_id)
display_id = video.get('code')
description = video.get('desc')
series = video.get('ps_name')
timestamp = parse_iso8601(video.get('date'), ' ')
timestamp = unified_timestamp(video.get('date'))
duration = int_or_none(video.get('duration'))
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
@@ -129,6 +81,7 @@ class BeegIE(InfoExtractor):
'display_id': display_id,
'title': title,
'description': description,
'series': series,
'timestamp': timestamp,
'duration': duration,
'tags': tags,

View File

@@ -0,0 +1,37 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import extract_attributes
class BFIPlayerIE(InfoExtractor):
IE_NAME = 'bfi:player'
_VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
_TEST = {
'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online',
'md5': 'e8783ebd8e061ec4bc6e9501ed547de8',
'info_dict': {
'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63',
'ext': 'mp4',
'title': 'Computer Doctor',
'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b',
},
'skip': 'BFI Player films cannot be played outside of the UK',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
entries = []
for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage):
player_attr = extract_attributes(player_el)
ooyala_id = player_attr.get('data-video-id')
if not ooyala_id:
continue
entries.append(self.url_result(
'ooyala:' + ooyala_id, 'Ooyala',
ooyala_id, player_attr.get('data-label')))
return self.playlist_result(entries)

View File

@@ -93,8 +93,8 @@ class BiliBiliIE(InfoExtractor):
}]
}]
_APP_KEY = '84956560bc028eb7'
_BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
_APP_KEY = 'iVGUTjsxvpLeuDCf'
_BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
def _report_error(self, result):
if 'message' in result:

View File

@@ -2,39 +2,96 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .vk import VKIE
from ..utils import (
HEADRequest,
int_or_none,
)
class BIQLEIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
_TESTS = [{
'url': 'http://www.biqle.ru/watch/847655_160197695',
'md5': 'ad5f746a874ccded7b8f211aeea96637',
# Youtube embed
'url': 'https://biqle.ru/watch/-115995369_456239081',
'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
'info_dict': {
'id': '160197695',
'id': '8v4f-avW-VI',
'ext': 'mp4',
'title': 'Foo Fighters - The Pretender (Live at Wembley Stadium)',
'uploader': 'Andrey Rogozin',
'upload_date': '20110605',
}
'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
'description': 'Passe-Partout',
'uploader_id': 'mrsimpsonstef3',
'uploader': 'Phanolito',
'upload_date': '20120822',
},
}, {
'url': 'https://biqle.org/watch/-44781847_168547604',
'url': 'http://biqle.org/watch/-44781847_168547604',
'md5': '7f24e72af1db0edf7c1aaba513174f97',
'info_dict': {
'id': '168547604',
'id': '-44781847_168547604',
'ext': 'mp4',
'title': 'Ребенок в шоке от автоматической мойки',
'timestamp': 1396633454,
'uploader': 'Dmitry Kotov',
'upload_date': '20140404',
'uploader_id': '47850140',
},
'skip': ' This video was marked as adult. Embedding adult videos on external sites is prohibited.',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._proto_relative_url(self._search_regex(
r'<iframe.+?src="((?:http:)?//daxab\.com/[^"]+)".*?></iframe>', webpage, 'embed url'))
r'<iframe.+?src="((?:https?:)?//daxab\.com/[^"]+)".*?></iframe>',
webpage, 'embed url'))
if VKIE.suitable(embed_url):
return self.url_result(embed_url, VKIE.ie_key(), video_id)
self._request_webpage(
HEADRequest(embed_url), video_id, headers={'Referer': url})
video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A')
item = self._download_json(
'https://api.vk.com/method/video.get', video_id,
headers={'User-Agent': 'okhttp/3.4.1'}, query={
'access_token': access_token,
'sig': sig,
'v': 5.44,
'videos': video_id,
})['response']['items'][0]
title = item['title']
formats = []
for f_id, f_url in item.get('files', {}).items():
if f_id == 'external':
return self.url_result(f_url)
ext, height = f_id.split('_')
formats.append({
'format_id': height + 'p',
'url': f_url,
'height': int_or_none(height),
'ext': ext,
})
self._sort_formats(formats)
thumbnails = []
for k, v in item.items():
if k.startswith('photo_') and v:
width = k.replace('photo_', '')
thumbnails.append({
'id': width,
'url': v,
'width': int_or_none(width),
})
return {
'_type': 'url_transparent',
'url': embed_url,
'id': video_id,
'title': title,
'formats': formats,
'comment_count': int_or_none(item.get('comments')),
'description': item.get('description'),
'duration': int_or_none(item.get('duration')),
'thumbnails': thumbnails,
'timestamp': int_or_none(item.get('date')),
'uploader': item.get('owner_id'),
'view_count': int_or_none(item.get('views')),
}

View File

@@ -5,7 +5,10 @@ import itertools
import re
from .common import InfoExtractor
from ..utils import urlencode_postdata
from ..utils import (
orderedSet,
urlencode_postdata,
)
class BitChuteIE(InfoExtractor):
@@ -37,16 +40,22 @@ class BitChuteIE(InfoExtractor):
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
})
title = self._search_regex(
title = self._html_search_regex(
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
webpage, 'title', default=None) or self._html_search_meta(
'description', webpage, 'title',
default=None) or self._og_search_description(webpage)
format_urls = []
for mobj in re.finditer(
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
format_urls.append(mobj.group('url'))
format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
formats = [
{'url': mobj.group('url')}
for mobj in re.finditer(
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
{'url': format_url}
for format_url in orderedSet(format_urls)]
self._check_formats(formats, video_id)
self._sort_formats(formats)
description = self._html_search_regex(

View File

@@ -1,8 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import base64
import json
import re
import struct
from .common import InfoExtractor
from .adobepass import AdobePassIE
@@ -124,7 +126,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'playlist_mincount': 7,
},
{
# playlist with 'playlistTab' (https://github.com/rg3/youtube-dl/issues/9965)
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
'info_dict': {
'id': '1522758701001',
@@ -153,10 +155,10 @@ class BrightcoveLegacyIE(InfoExtractor):
<object class="BrightcoveExperience">{params}</object>
"""
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
# Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553
object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
lambda m: m.group(1) + '/>', object_str)
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
# Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608
object_str = object_str.replace('<--', '<!--')
# remove namespace to simplify extraction
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
@@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor):
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
expected=True)
def _brightcove_new_url_result(self, publisher_id, video_id):
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
def _get_video_info(self, video_id, query, referer=None):
headers = {}
linkBase = query.get('linkBaseURL')
@@ -323,6 +329,28 @@ class BrightcoveLegacyIE(InfoExtractor):
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
'error message', default=None)
if error_msg is not None:
publisher_id = query.get('publisherId')
if publisher_id and publisher_id[0].isdigit():
publisher_id = publisher_id[0]
if not publisher_id:
player_key = query.get('playerKey')
if player_key and ',' in player_key[0]:
player_key = player_key[0]
else:
player_id = query.get('playerID')
if player_id and player_id[0].isdigit():
player_page = self._download_webpage(
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
video_id, headers=headers, fatal=False)
if player_page:
player_key = self._search_regex(
r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
player_page, 'player key', fatal=False)
if player_key:
enc_pub_id = player_key.split(',')[1].replace('~', '=')
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
if publisher_id:
return self._brightcove_new_url_result(publisher_id, video_id)
raise ExtractorError(
'brightcove said: %s' % error_msg, expected=True)
@@ -444,8 +472,12 @@ class BrightcoveLegacyIE(InfoExtractor):
else:
return ad_info
if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % video_id)
if not info.get('url') and not info.get('formats'):
uploader_id = info.get('uploader_id')
if uploader_id:
info.update(self._brightcove_new_url_result(uploader_id, video_id))
else:
raise ExtractorError('Unable to extract video url for %s' % video_id)
return info

View File

@@ -14,6 +14,7 @@ class CamModelsIE(InfoExtractor):
_TESTS = [{
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
'only_matching': True,
'age_limit': 18
}]
def _real_extract(self, url):
@@ -93,4 +94,5 @@ class CamModelsIE(InfoExtractor):
'title': self._live_title(user_id),
'is_live': True,
'formats': formats,
'age_limit': 18
}

View File

@@ -20,6 +20,7 @@ class CamTubeIE(InfoExtractor):
'duration': 1274,
'timestamp': 1528018608,
'upload_date': '20180603',
'age_limit': 18
},
'params': {
'skip_download': True,
@@ -66,4 +67,5 @@ class CamTubeIE(InfoExtractor):
'like_count': like_count,
'creator': creator,
'formats': formats,
'age_limit': 18
}

View File

@@ -25,6 +25,7 @@ class CamWithHerIE(InfoExtractor):
'comment_count': int,
'uploader': 'MileenaK',
'upload_date': '20160322',
'age_limit': 18,
},
'params': {
'skip_download': True,
@@ -84,4 +85,5 @@ class CamWithHerIE(InfoExtractor):
'comment_count': comment_count,
'uploader': uploader,
'upload_date': upload_date,
'age_limit': 18
}

View File

@@ -82,6 +82,12 @@ class CarambaTVPageIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
videomore_url = VideomoreIE._extract_url(webpage)
if not videomore_url:
videomore_id = self._search_regex(
r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id',
default=None)
if videomore_id:
videomore_url = 'videomore:%s' % videomore_id
if videomore_url:
title = self._og_search_title(webpage)
return {

View File

@@ -1,20 +1,19 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .turner import TurnerBaseIE
from ..utils import int_or_none
class CartoonNetworkIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
_TEST = {
'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
'info_dict': {
'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
'ext': 'mp4',
'title': 'Starfire the Cat Lady',
'description': 'Robin decides to become a cat so that Starfire will finally love him.',
'title': 'How to Draw Upgrade',
'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
},
'params': {
# m3u8 download
@@ -25,18 +24,39 @@ class CartoonNetworkIE(TurnerBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
return self._extract_cvp_info(
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
'secure': {
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
},
}, {
def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
metadata_re = ''
if content_re:
metadata_re = r'|video_metadata\.content_' + content_re
return self._search_regex(
r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
webpage, name, fatal=fatal)
media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
info = self._extract_ngtv_info(
media_id, {'networkId': 'cartoonnetwork'}, {
'url': url,
'site_name': 'CartoonNetwork',
'auth_required': self._search_regex(
r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
webpage, 'auth required', default='false') == 'true',
'auth_required': find_field('authType', 'auth type') != 'unauth',
})
series = find_field(
'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
info.update({
'id': media_id,
'display_id': display_id,
'title': title,
'description': self._html_search_meta('description', webpage),
'series': series,
'episode': title,
})
for field in ('season', 'episode'):
field_name = field + 'Number'
info[field + '_number'] = int_or_none(find_field(
field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
return info

View File

@@ -360,7 +360,7 @@ class CBCWatchVideoIE(CBCWatchBaseIE):
class CBCWatchIE(CBCWatchBaseIE):
IE_NAME = 'cbc.ca:watch'
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
_VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
_TESTS = [{
# geo-restricted to Canada, bypassable
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
@@ -386,6 +386,9 @@ class CBCWatchIE(CBCWatchBaseIE):
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
},
'playlist_mincount': 30,
}, {
'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -155,7 +155,7 @@ class CeskaTelevizeIE(InfoExtractor):
stream_formats = self._extract_mpd_formats(
stream_url, playlist_id,
mpd_id='dash-%s' % format_id, fatal=False)
# See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031
# See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
if format_id == 'audioDescription':
for f in stream_formats:
f['source_preference'] = -10

View File

@@ -0,0 +1,151 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
clean_html,
float_or_none,
int_or_none,
try_get,
urlencode_postdata,
)
class CiscoLiveBaseIE(InfoExtractor):
# These appear to be constant across all Cisco Live presentations
# and are not tied to any user session or event
RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
HEADERS = {
'Origin': 'https://ciscolive.cisco.com',
'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
'rfWidgetId': RAINFOCUS_WIDGET_ID,
}
def _call_api(self, ep, rf_id, query, referrer, note=None):
headers = self.HEADERS.copy()
headers['Referer'] = referrer
return self._download_json(
self.RAINFOCUS_API_URL % ep, rf_id, note=note,
data=urlencode_postdata(query), headers=headers)
def _parse_rf_item(self, rf_item):
event_name = rf_item.get('eventName')
title = rf_item['title']
description = clean_html(rf_item.get('abstract'))
presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
bc_id = rf_item['videos'][0]['url']
bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
location = try_get(rf_item, lambda x: x['times'][0]['room'])
if duration:
duration = duration * 60
return {
'_type': 'url_transparent',
'url': bc_url,
'ie_key': 'BrightcoveNew',
'title': title,
'description': description,
'duration': duration,
'creator': presenter_name,
'location': location,
'series': event_name,
}
class CiscoLiveSessionIE(CiscoLiveBaseIE):
_VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/[^#]*#/session/(?P<id>[^/?&]+)'
_TESTS = [{
'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
'md5': 'c98acf395ed9c9f766941c70f5352e22',
'info_dict': {
'id': '5803694304001',
'ext': 'mp4',
'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
'description': 'md5:ec4a436019e09a918dec17714803f7cc',
'timestamp': 1530305395,
'upload_date': '20180629',
'uploader_id': '5647924234001',
'location': '16B Mezz.',
},
}, {
'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.event=ciscoliveemea2019#/session/15361595531500013WOU',
'only_matching': True,
}, {
'url': 'https://www.ciscolive.com/global/on-demand-library.html?#/session/1490051371645001kNaS',
'only_matching': True,
}]
def _real_extract(self, url):
rf_id = self._match_id(url)
rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
return self._parse_rf_item(rf_result['items'][0])
class CiscoLiveSearchIE(CiscoLiveBaseIE):
_VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/(?:global/)?on-demand-library(?:\.html|/)'
_TESTS = [{
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
'info_dict': {
'title': 'Search query',
},
'playlist_count': 5,
}, {
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
'only_matching': True,
}, {
'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.technicallevel=scpsSkillLevel_aintroductory&search.event=ciscoliveemea2019&search.technology=scpsTechnology_dataCenter&search.focus=scpsSessionFocus_bestPractices#/',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
@staticmethod
def _check_bc_id_exists(rf_item):
return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
def _entries(self, query, url):
query['size'] = 50
query['from'] = 0
for page_num in itertools.count(1):
results = self._call_api(
'search', None, query, url,
'Downloading search JSON page %d' % page_num)
sl = try_get(results, lambda x: x['sectionList'][0], dict)
if sl:
results = sl
items = results.get('items')
if not items or not isinstance(items, list):
break
for item in items:
if not isinstance(item, dict):
continue
if not self._check_bc_id_exists(item):
continue
yield self._parse_rf_item(item)
size = int_or_none(results.get('size'))
if size is not None:
query['size'] = size
total = int_or_none(results.get('total'))
if total is not None and query['from'] + query['size'] > total:
break
query['from'] += query['size']
def _real_extract(self, url):
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
query['type'] = 'session'
return self.playlist_result(
self._entries(query, url), playlist_title='Search query')

View File

@@ -1,19 +1,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
_translation_table = {
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
'y': 'l', 'z': 'i',
'$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
}
def _decode(s):
return ''.join(_translation_table.get(c, c) for c in s)
from ..utils import (
int_or_none,
url_or_none,
)
class CliphunterIE(InfoExtractor):
@@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor):
formats = []
for format_id, f in gexo_files.items():
video_url = f.get('url')
video_url = url_or_none(f.get('url'))
if not video_url:
continue
fmt = f.get('fmt')
height = f.get('h')
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
formats.append({
'url': _decode(video_url),
'url': video_url,
'format_id': format_id,
'width': int_or_none(f.get('w')),
'height': int_or_none(height),

View File

@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
@@ -34,3 +35,32 @@ class CNBCIE(InfoExtractor):
{'force_smil_url': True}),
'id': video_id,
}
class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
_TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
'info_dict': {
'id': '7000031301',
'ext': 'mp4',
'title': "Trump: I don't necessarily agree with raising rates",
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
'timestamp': 1531958400,
'upload_date': '20180719',
'uploader': 'NBCU-CNBC',
},
'params': {
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
'video id')
return self.url_result(
'http://video.cnbc.com/gallery/?video=%s' % video_id,
CNBCIE.ie_key())

View File

@@ -119,11 +119,7 @@ class CNNBlogsIE(InfoExtractor):
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
return {
'_type': 'url',
'url': cnn_url,
'ie_key': CNNIE.ie_key(),
}
return self.url_result(cnn_url, CNNIE.ie_key())
class CNNArticleIE(InfoExtractor):
@@ -145,8 +141,4 @@ class CNNArticleIE(InfoExtractor):
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
return {
'_type': 'url',
'url': 'http://cnn.com/video/?/video/' + cnn_url,
'ie_key': CNNIE.ie_key(),
}
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())

View File

@@ -17,6 +17,7 @@ import math
from ..compat import (
compat_cookiejar,
compat_cookies,
compat_etree_Element,
compat_etree_fromstring,
compat_getpass,
compat_integer_types,
@@ -43,6 +44,7 @@ from ..utils import (
compiled_regex_type,
determine_ext,
determine_protocol,
dict_get,
error_to_compat_str,
ExtractorError,
extract_attributes,
@@ -55,13 +57,16 @@ from ..utils import (
JSON_LD_RE,
mimetype2ext,
orderedSet,
parse_bitrate,
parse_codecs,
parse_duration,
parse_iso8601,
parse_m3u8_attributes,
parse_resolution,
RegexNotFoundError,
sanitized_Request,
sanitize_filename,
str_or_none,
unescapeHTML,
unified_strdate,
unified_timestamp,
@@ -69,6 +74,7 @@ from ..utils import (
update_url_query,
urljoin,
url_basename,
url_or_none,
xpath_element,
xpath_text,
xpath_with_ns,
@@ -101,10 +107,26 @@ class InfoExtractor(object):
from worst to best quality.
Potential fields:
* url Mandatory. The URL of the video file
* url The mandatory URL representing the media:
for plain file media - HTTP URL of this file,
for RTMP - RTMP URL,
for HLS - URL of the M3U8 media playlist,
for HDS - URL of the F4M manifest,
for DASH
- HTTP URL to plain file media (in case of
unfragmented media)
- URL of the MPD manifest or base URL
representing the media if MPD manifest
is parsed froma string (in case of
fragmented media)
for MSS - URL of the ISM manifest.
* manifest_url
The URL of the manifest file in case of
fragmented media (DASH, hls, hds)
fragmented media:
for HLS - URL of the M3U8 master playlist,
for HDS - URL of the F4M manifest,
for DASH - URL of the MPD manifest,
for MSS - URL of the ISM manifest.
* ext Will be calculated from URL if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
@@ -605,6 +627,11 @@ class InfoExtractor(object):
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if isinstance(err, compat_urllib_error.HTTPError):
if self.__can_accept_status_code(err, expected_status):
# Retain reference to error to prevent file object from
# being closed before it can be read. Works around the
# effects of <https://bugs.python.org/issue15002>
# introduced in Python 3.4.1.
err.fp._error = err
return err.fp
if errnote is False:
@@ -782,7 +809,7 @@ class InfoExtractor(object):
fatal=True, encoding=None, data=None, headers={}, query={},
expected_status=None):
"""
Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
Return a tuple (xml as an compat_etree_Element, URL handle).
See _download_webpage docstring for arguments specification.
"""
@@ -803,7 +830,7 @@ class InfoExtractor(object):
transform_source=None, fatal=True, encoding=None,
data=None, headers={}, query={}, expected_status=None):
"""
Return the xml as an xml.etree.ElementTree.Element.
Return the xml as an compat_etree_Element.
See _download_webpage docstring for arguments specification.
"""
@@ -1052,7 +1079,7 @@ class InfoExtractor(object):
@staticmethod
def _og_regexes(prop):
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
% {'prop': re.escape(prop)})
template = r'<meta[^>]+?%s[^>]+?%s'
return [
@@ -1213,10 +1240,10 @@ class InfoExtractor(object):
def extract_video_object(e):
assert e['@type'] == 'VideoObject'
info.update({
'url': e.get('contentUrl'),
'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
'filesize': float_or_none(e.get('contentSize')),
@@ -1233,17 +1260,30 @@ class InfoExtractor(object):
if expected_type is not None and expected_type != item_type:
return info
if item_type in ('TVEpisode', 'Episode'):
episode_name = unescapeHTML(e.get('name'))
info.update({
'episode': unescapeHTML(e.get('name')),
'episode': episode_name,
'episode_number': int_or_none(e.get('episodeNumber')),
'description': unescapeHTML(e.get('description')),
})
if not info.get('title') and episode_name:
info['title'] = episode_name
part_of_season = e.get('partOfSeason')
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
info.update({
'season': unescapeHTML(part_of_season.get('name')),
'season_number': int_or_none(part_of_season.get('seasonNumber')),
})
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name'))
elif item_type == 'Movie':
info.update({
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('dateCreated')),
})
elif item_type in ('Article', 'NewsArticle'):
info.update({
'timestamp': parse_iso8601(e.get('datePublished')),
@@ -1421,7 +1461,7 @@ class InfoExtractor(object):
manifest_url, video_id, 'Downloading f4m manifest',
'Unable to download f4m manifest',
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
transform_source=transform_source,
fatal=fatal)
@@ -1435,6 +1475,9 @@ class InfoExtractor(object):
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True, m3u8_id=None):
if not isinstance(manifest, compat_etree_Element) and not fatal:
return []
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
if akamai_pv is not None and ';' in akamai_pv.text:
@@ -1449,7 +1492,7 @@ class InfoExtractor(object):
manifest_version = '2.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
# Remove unsupported DRM protected media from final formats
# rendition (see https://github.com/rg3/youtube-dl/issues/8573).
# rendition (see https://github.com/ytdl-org/youtube-dl/issues/8573).
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
@@ -1579,7 +1622,8 @@ class InfoExtractor(object):
# References:
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
# 2. https://github.com/rg3/youtube-dl/issues/12211
# 2. https://github.com/ytdl-org/youtube-dl/issues/12211
# 3. https://github.com/ytdl-org/youtube-dl/issues/18923
# We should try extracting formats only from master playlists [1, 4.3.4],
# i.e. playlists that describe available qualities. On the other hand
@@ -1651,11 +1695,16 @@ class InfoExtractor(object):
rendition = stream_group[0]
return rendition.get('NAME') or stream_group_id
# parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
# chance to detect video only formats when EXT-X-STREAM-INF tags
# precede EXT-X-MEDIA tags in HLS manifest such as [3].
for line in m3u8_doc.splitlines():
if line.startswith('#EXT-X-MEDIA:'):
extract_media(line)
for line in m3u8_doc.splitlines():
if line.startswith('#EXT-X-STREAM-INF:'):
last_stream_inf = parse_m3u8_attributes(line)
elif line.startswith('#EXT-X-MEDIA:'):
extract_media(line)
elif line.startswith('#') or not line.strip():
continue
else:
@@ -2095,7 +2144,6 @@ class InfoExtractor(object):
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
f = {
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
'url': base_url,
'manifest_url': mpd_url,
'ext': mimetype2ext(mime_type),
'width': int_or_none(representation_attrib.get('width')),
@@ -2116,7 +2164,7 @@ class InfoExtractor(object):
# First of, % characters outside $...$ templates
# must be escaped by doubling for proper processing
# by % operator string formatting used further (see
# https://github.com/rg3/youtube-dl/issues/16867).
# https://github.com/ytdl-org/youtube-dl/issues/16867).
t = ''
in_template = False
for c in tmpl:
@@ -2135,7 +2183,7 @@ class InfoExtractor(object):
# @initialization is a regular template like @media one
# so it should be handled just the same way (see
# https://github.com/rg3/youtube-dl/issues/11605)
# https://github.com/ytdl-org/youtube-dl/issues/11605)
if 'initialization' in representation_ms_info:
initialization_template = prepare_template(
'initialization',
@@ -2221,7 +2269,7 @@ class InfoExtractor(object):
elif 'segment_urls' in representation_ms_info:
# Segment URLs with no SegmentTimeline
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
# https://github.com/rg3/youtube-dl/pull/14844
# https://github.com/ytdl-org/youtube-dl/pull/14844
fragments = []
segment_duration = float_or_none(
representation_ms_info['segment_duration'],
@@ -2234,10 +2282,14 @@ class InfoExtractor(object):
fragment['duration'] = segment_duration
fragments.append(fragment)
representation_ms_info['fragments'] = fragments
# NB: MPD manifest may contain direct URLs to unfragmented media.
# No fragments key is present in this case.
# If there is a fragments key available then we correctly recognized fragmented media.
# Otherwise we will assume unfragmented media with direct access. Technically, such
# assumption is not necessarily correct since we may simply have no support for
# some forms of fragmented media renditions yet, but for now we'll use this fallback.
if 'fragments' in representation_ms_info:
f.update({
# NB: mpd_url may be empty when MPD manifest is parsed from a string
'url': mpd_url or base_url,
'fragment_base_url': base_url,
'fragments': [],
'protocol': 'http_dash_segments',
@@ -2248,11 +2300,15 @@ class InfoExtractor(object):
f['url'] = initialization_url
f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
else:
# Assuming direct URL to unfragmented media.
f['url'] = base_url
# According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
# is not necessarily unique within a Period thus formats with
# the same `format_id` are quite possible. There are numerous examples
# of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
# https://github.com/rg3/youtube-dl/issues/13919)
# of such manifests (see https://github.com/ytdl-org/youtube-dl/issues/15111,
# https://github.com/ytdl-org/youtube-dl/issues/13919)
full_info = formats_dict.get(representation_id, {}).copy()
full_info.update(f)
formats.append(full_info)
@@ -2413,7 +2469,7 @@ class InfoExtractor(object):
media_tags.extend(re.findall(
# We only allow video|audio followed by a whitespace or '>'.
# Allowing more characters may end up in significant slow down (see
# https://github.com/rg3/youtube-dl/issues/11979, example URL:
# https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
# http://www.porntrex.com/maps/videositemap.xml).
r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
for media_tag, media_type, media_content in media_tags:
@@ -2429,18 +2485,43 @@ class InfoExtractor(object):
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content:
for source_tag in re.findall(r'<source[^>]+>', media_content):
source_attributes = extract_attributes(source_tag)
src = source_attributes.get('src')
s_attr = extract_attributes(source_tag)
# data-video-src and data-src are non standard but seen
# several times in the wild
src = dict_get(s_attr, ('src', 'data-video-src', 'data-src'))
if not src:
continue
f = parse_content_type(source_attributes.get('type'))
f = parse_content_type(s_attr.get('type'))
is_plain_url, formats = _media_formats(src, media_type, f)
if is_plain_url:
# res attribute is not standard but seen several times
# in the wild
# width, height, res, label and title attributes are
# all not standard but seen several times in the wild
labels = [
s_attr.get(lbl)
for lbl in ('label', 'title')
if str_or_none(s_attr.get(lbl))
]
width = int_or_none(s_attr.get('width'))
height = (int_or_none(s_attr.get('height')) or
int_or_none(s_attr.get('res')))
if not width or not height:
for lbl in labels:
resolution = parse_resolution(lbl)
if not resolution:
continue
width = width or resolution.get('width')
height = height or resolution.get('height')
for lbl in labels:
tbr = parse_bitrate(lbl)
if tbr:
break
else:
tbr = None
f.update({
'height': int_or_none(source_attributes.get('res')),
'format_id': source_attributes.get('label'),
'width': width,
'height': height,
'tbr': tbr,
'format_id': s_attr.get('label') or s_attr.get('title'),
})
f.update(formats[0])
media_info['formats'].append(f)
@@ -2608,7 +2689,7 @@ class InfoExtractor(object):
'id': this_video_id,
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
@@ -2635,12 +2716,9 @@ class InfoExtractor(object):
for source in jwplayer_sources_data:
if not isinstance(source, dict):
continue
source_url = self._proto_relative_url(source.get('file'))
if not source_url:
continue
if base_url:
source_url = compat_urlparse.urljoin(base_url, source_url)
if source_url in urls:
source_url = urljoin(
base_url, self._proto_relative_url(source.get('file')))
if not source_url or source_url in urls:
continue
urls.append(source_url)
source_type = source.get('type') or ''

View File

@@ -36,7 +36,7 @@ class UnicodeBOMIE(InfoExtractor):
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
# Disable test for python 3.2 since BOM is broken in re in this version
# (see https://github.com/rg3/youtube-dl/issues/9751)
# (see https://github.com/ytdl-org/youtube-dl/issues/9751)
_TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
'only_matching': True,

View File

@@ -13,9 +13,9 @@ class CorusIE(ThePlatformFeedIE):
(?:www\.)?
(?P<domain>
(?:globaltv|etcanada)\.com|
(?:hgtv|foodnetwork|slice|history|showcase)\.ca
(?:hgtv|foodnetwork|slice|history|showcase|bigbrothercanada)\.ca
)
/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
/(?:video/(?:[^/]+/)?|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
(?P<id>\d+)
'''
_TESTS = [{
@@ -42,6 +42,12 @@ class CorusIE(ThePlatformFeedIE):
}, {
'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
'only_matching': True,
}, {
'url': 'http://www.bigbrothercanada.ca/video/1457812035894/',
'only_matching': True
}, {
'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
'only_matching': True
}]
_TP_FEEDS = {
@@ -73,6 +79,10 @@ class CorusIE(ThePlatformFeedIE):
'feed_id': '9H6qyshBZU3E',
'account_id': 2414426607,
},
'bigbrothercanada': {
'feed_id': 'ChQqrem0lNUp',
'account_id': 2269680845,
},
}
def _real_extract(self, url):

View File

@@ -1,7 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals, division
import hashlib
import hmac
import re
import time
from .common import InfoExtractor
from ..compat import compat_HTTPError
@@ -48,6 +51,21 @@ class CrackleIE(InfoExtractor):
'only_matching': True,
}]
_MEDIA_FILE_SLOTS = {
'360p.mp4': {
'width': 640,
'height': 360,
},
'480p.mp4': {
'width': 768,
'height': 432,
},
'480p_1mbps.mp4': {
'width': 852,
'height': 480,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -59,13 +77,16 @@ class CrackleIE(InfoExtractor):
for country in countries:
try:
# Authorization generation algorithm is reverse engineered from:
# https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country)
timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
media = self._download_json(
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
% (video_id, country), video_id,
'Downloading media JSON as %s' % country,
'Unable to download media JSON', query={
'disableProtocols': 'true',
'format': 'json'
media_detail_url, video_id, 'Downloading media JSON as %s' % country,
'Unable to download media JSON', headers={
'Accept': 'application/json',
'Authorization': '|'.join([h, timestamp, '117', '1']),
})
except ExtractorError as e:
# 401 means geo restriction, trying next country
@@ -95,6 +116,20 @@ class CrackleIE(InfoExtractor):
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
elif format_url.endswith('.ism/Manifest'):
formats.extend(self._extract_ism_formats(
format_url, video_id, ism_id='mss', fatal=False))
else:
mfs_path = e.get('Type')
mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
if not mfs_info:
continue
formats.append({
'url': format_url,
'format_id': 'http-' + mfs_path.split('.')[0],
'width': mfs_info['width'],
'height': mfs_info['height'],
})
self._sort_formats(formats)
description = media.get('Description')

View File

@@ -11,6 +11,7 @@ from .common import InfoExtractor
from .vrv import VRVIE
from ..compat import (
compat_b64decode,
compat_etree_Element,
compat_etree_fromstring,
compat_urllib_parse_urlencode,
compat_urllib_request,
@@ -45,7 +46,7 @@ class CrunchyrollBaseIE(InfoExtractor):
data['req'] = 'RpcApi' + method
data = compat_urllib_parse_urlencode(data).encode('utf-8')
return self._download_xml(
'http://www.crunchyroll.com/xml/',
'https://www.crunchyroll.com/xml/',
video_id, note, fatal=False, data=data, headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
@@ -55,22 +56,11 @@ class CrunchyrollBaseIE(InfoExtractor):
if username is None:
return
self._download_webpage(
'https://www.crunchyroll.com/?a=formhandler',
None, 'Logging in', 'Wrong login info',
data=urlencode_postdata({
'formname': 'RpcApiUser_Login',
'next_url': 'https://www.crunchyroll.com/acct/membership',
'name': username,
'password': password,
}))
'''
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
def is_logged(webpage):
return '<title>Redirecting' in webpage
return 'href="/logout"' in webpage
# Already logged in
if is_logged(login_page):
@@ -109,7 +99,6 @@ class CrunchyrollBaseIE(InfoExtractor):
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
'''
def _real_initialize(self):
self._login()
@@ -118,7 +107,7 @@ class CrunchyrollBaseIE(InfoExtractor):
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
else sanitized_Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues
# similar to https://github.com/rg3/youtube-dl/issues/6797.
# similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
# should be imposed or not (from what I can see it just takes the first language
# ignoring the priority and requires it to correspond the IP). By the way this causes
@@ -135,7 +124,7 @@ class CrunchyrollBaseIE(InfoExtractor):
# > This content may be inappropriate for some people.
# > Are you sure you want to continue?
# since it's not disabled by default in crunchyroll account's settings.
# See https://github.com/rg3/youtube-dl/issues/7202.
# See https://github.com/ytdl-org/youtube-dl/issues/7202.
qs['skip_wall'] = ['1']
return compat_urlparse.urlunparse(
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
@@ -143,7 +132,7 @@ class CrunchyrollBaseIE(InfoExtractor):
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
IE_NAME = 'crunchyroll'
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': {
@@ -268,6 +257,9 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
}, {
'url': 'http://www.crunchyroll.com/media-723735',
'only_matching': True,
}, {
'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
'only_matching': True,
}]
_FORMAT_IDS = {
@@ -398,7 +390,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'Downloading subtitles for ' + sub_name, data={
'subtitle_script_id': sub_id,
})
if sub_doc is None:
if not isinstance(sub_doc, compat_etree_Element):
continue
sid = sub_doc.get('id')
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
@@ -515,7 +507,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'video_quality': stream_quality,
'current_page': url,
})
if streamdata is not None:
if isinstance(streamdata, compat_etree_Element):
stream_info = streamdata.find('./{default}preload/stream_info')
if stream_info is not None:
stream_infos.append(stream_info)
@@ -526,7 +518,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'video_format': stream_format,
'video_encode_quality': stream_quality,
})
if stream_info is not None:
if isinstance(stream_info, compat_etree_Element):
stream_infos.append(stream_info)
for stream_info in stream_infos:
video_encode_id = xpath_text(stream_info, './video_encode_id')
@@ -598,10 +590,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
series = self._html_search_regex(
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
webpage, 'series', fatal=False)
season = xpath_text(metadata, 'series_title')
episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
season = episode = episode_number = duration = thumbnail = None
if isinstance(metadata, compat_etree_Element):
season = xpath_text(metadata, 'series_title')
episode = xpath_text(metadata, 'episode_title')
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
duration = float_or_none(media_metadata.get('duration'), 1000)
thumbnail = xpath_text(metadata, 'episode_image_url')
if not episode:
episode = media_metadata.get('title')
if not episode_number:
episode_number = int_or_none(media_metadata.get('episode_number'))
if not thumbnail:
thumbnail = media_metadata.get('thumbnail', {}).get('url')
season_number = int_or_none(self._search_regex(
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
@@ -611,8 +615,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'id': video_id,
'title': video_title,
'description': video_description,
'duration': float_or_none(media_metadata.get('duration'), 1000),
'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
'duration': duration,
'thumbnail': thumbnail,
'uploader': video_uploader,
'upload_date': video_upload_date,
'series': series,

View File

@@ -46,8 +46,24 @@ class CuriosityStreamBaseIE(InfoExtractor):
self._handle_errors(result)
self._auth_token = result['message']['auth_token']
def _extract_media_info(self, media):
video_id = compat_str(media['id'])
class CuriosityStreamIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream'
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
_TEST = {
'url': 'https://app.curiositystream.com/video/2',
'md5': '262bb2f257ff301115f1973540de8983',
'info_dict': {
'id': '2',
'ext': 'mp4',
'title': 'How Did You Develop The Internet?',
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
media = self._call_api('media/' + video_id, video_id)
title = media['title']
formats = []
@@ -114,38 +130,21 @@ class CuriosityStreamBaseIE(InfoExtractor):
}
class CuriosityStreamIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream'
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
_TEST = {
'url': 'https://app.curiositystream.com/video/2',
'md5': '262bb2f257ff301115f1973540de8983',
'info_dict': {
'id': '2',
'ext': 'mp4',
'title': 'How Did You Develop The Internet?',
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
media = self._call_api('media/' + video_id, video_id)
return self._extract_media_info(media)
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream:collection'
_VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
_TEST = {
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://app.curiositystream.com/collection/2',
'info_dict': {
'id': '2',
'title': 'Curious Minds: The Internet',
'description': 'How is the internet shaping our lives in the 21st Century?',
},
'playlist_mincount': 12,
}
'playlist_mincount': 17,
}, {
'url': 'https://curiositystream.com/series/2',
'only_matching': True,
}]
def _real_extract(self, url):
collection_id = self._match_id(url)
@@ -153,7 +152,10 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
'collections/' + collection_id, collection_id)
entries = []
for media in collection.get('media', []):
entries.append(self._extract_media_info(media))
media_id = compat_str(media.get('id'))
entries.append(self.url_result(
'https://curiositystream.com/video/' + media_id,
CuriosityStreamIE.ie_key(), media_id))
return self.playlist_result(
entries, collection_id,
collection.get('title'), collection.get('description'))

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_age_limit,
parse_iso8601,
@@ -66,16 +67,19 @@ class CWTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
data = self._download_json(
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
video_id)['video']
video_id)
if data.get('result') != 'ok':
raise ExtractorError(data['msg'], expected=True)
video_data = data['video']
title = video_data['title']
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
season = str_or_none(video_data.get('season'))
episode = str_or_none(video_data.get('episode'))
if episode and season:
episode = episode.lstrip(season)
episode = episode[len(season):]
return {
'_type': 'url_transparent',

View File

@@ -49,6 +49,9 @@ class DailyMailIE(InfoExtractor):
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
video_sources = self._download_json(sources_url, video_id)
body = video_sources.get('body')
if body:
video_sources = body
formats = []
for rendition in video_sources['renditions']:

View File

@@ -22,7 +22,10 @@ from ..utils import (
parse_iso8601,
sanitized_Request,
str_to_int,
try_get,
unescapeHTML,
update_url_query,
url_or_none,
urlencode_postdata,
)
@@ -163,18 +166,33 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
webpage, 'comment count', default=None))
player_v5 = self._search_regex(
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/ytdl-org/youtube-dl/issues/7826
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
r'buildPlayer\(({.+?})\);',
r'var\s+config\s*=\s*({.+?});',
# New layout regex (see https://github.com/rg3/youtube-dl/issues/13580)
# New layout regex (see https://github.com/ytdl-org/youtube-dl/issues/13580)
r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
webpage, 'player v5', default=None)
if player_v5:
player = self._parse_json(player_v5, video_id)
metadata = player['metadata']
player = self._parse_json(player_v5, video_id, fatal=False) or {}
metadata = try_get(player, lambda x: x['metadata'], dict)
if not metadata:
metadata_url = url_or_none(try_get(
player, lambda x: x['context']['metadata_template_url1']))
if metadata_url:
metadata_url = metadata_url.replace(':videoId', video_id)
else:
metadata_url = update_url_query(
'https://www.dailymotion.com/player/metadata/video/%s'
% video_id, {
'embedder': url,
'integration': 'inline',
'GK_PV5_NEON': '1',
})
metadata = self._download_json(
metadata_url, video_id, 'Downloading metadata JSON')
if metadata.get('error', {}).get('type') == 'password_protected':
if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
password = self._downloader.params.get('videopassword')
if password:
r = int(metadata['id'][1:], 36)

View File

@@ -17,16 +17,29 @@ from ..compat import compat_HTTPError
class DiscoveryIE(DiscoveryGoBaseIE):
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
discovery|
investigationdiscovery|
discoverylife|
animalplanet|
ahctv|
destinationamerica|
sciencechannel|
tlc|
velocity
_VALID_URL = r'''(?x)https?://
(?P<site>
(?:www\.)?
(?:
discovery|
investigationdiscovery|
discoverylife|
animalplanet|
ahctv|
destinationamerica|
sciencechannel|
tlc|
velocity
)|
watch\.
(?:
hgtv|
foodnetwork|
travelchannel|
diynetwork|
cookingchanneltv|
motortrend
)
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
_TESTS = [{
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
@@ -71,7 +84,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
if not access_token:
access_token = self._download_json(
'https://www.%s.com/anonymous' % site, display_id, query={
'https://%s.com/anonymous' % site, display_id, query={
'authRel': 'authorization',
'client_id': try_get(
react_data, lambda x: x['application']['apiClientId'],
@@ -81,11 +94,12 @@ class DiscoveryIE(DiscoveryGoBaseIE):
})['access_token']
try:
headers = self.geo_verification_headers()
headers['Authorization'] = 'Bearer ' + access_token
stream = self._download_json(
'https://api.discovery.com/v1/streaming/video/' + video_id,
display_id, headers={
'Authorization': 'Bearer ' + access_token,
})
display_id, headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
e_description = self._parse_json(

View File

@@ -114,7 +114,7 @@ class DreiSatIE(InfoExtractor):
video_url, video_id, fatal=False))
elif ext == 'm3u8':
# the certificates are misconfigured (see
# https://github.com/rg3/youtube-dl/issues/8665)
# https://github.com/ytdl-org/youtube-dl/issues/8665)
if video_url.startswith('https://'):
continue
formats.extend(self._extract_m3u8_formats(

View File

@@ -4,7 +4,9 @@ import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
NO_DEFAULT,
parse_duration,
str_to_int,
)
@@ -65,6 +67,9 @@ class DrTuberIE(InfoExtractor):
})
self._sort_formats(formats)
duration = int_or_none(video_data.get('duration')) or parse_duration(
video_data.get('duration_format'))
title = self._html_search_regex(
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
r'<title>([^<]+)\s*@\s+DrTuber',
@@ -103,4 +108,5 @@ class DrTuberIE(InfoExtractor):
'comment_count': comment_count,
'categories': categories,
'age_limit': self._rta_search(webpage),
'duration': duration,
}

View File

@@ -1,15 +1,25 @@
# coding: utf-8
from __future__ import unicode_literals
import binascii
import hashlib
import re
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt
from ..compat import compat_urllib_parse_unquote
from ..utils import (
bytes_to_intlist,
ExtractorError,
int_or_none,
intlist_to_bytes,
float_or_none,
mimetype2ext,
parse_iso8601,
remove_end,
str_or_none,
unified_timestamp,
update_url_query,
url_or_none,
)
@@ -20,23 +30,31 @@ class DRTVIE(InfoExtractor):
IE_NAME = 'drtv'
_TESTS = [{
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
'md5': '7ae17b4e18eb5d29212f424a7511c184',
'md5': '25e659cccc9a2ed956110a299fdf5983',
'info_dict': {
'id': 'klassen-darlig-taber-10',
'ext': 'mp4',
'title': 'Klassen - Dårlig taber (10)',
'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
'timestamp': 1471991907,
'upload_date': '20160823',
'timestamp': 1539085800,
'upload_date': '20181009',
'duration': 606.84,
'series': 'Klassen',
'season': 'Klassen I',
'season_number': 1,
'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
'episode': 'Episode 10',
'episode_number': 10,
'release_year': 2016,
},
'expected_warnings': ['Unable to download f4m manifest'],
}, {
# embed
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
'info_dict': {
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
'ext': 'mp4',
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
'title': 'christiania pusher street ryddes drdkrjpo',
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
'timestamp': 1472800279,
'upload_date': '20160902',
@@ -45,17 +63,18 @@ class DRTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest'],
}, {
# with SignLanguage formats
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
'info_dict': {
'id': 'historien-om-danmark-stenalder',
'ext': 'mp4',
'title': 'Historien om Danmark: Stenalder (1)',
'title': 'Historien om Danmark: Stenalder',
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
'timestamp': 1490401996,
'upload_date': '20170325',
'duration': 3502.04,
'timestamp': 1546628400,
'upload_date': '20190104',
'duration': 3502.56,
'formats': 'mincount:20',
},
'params': {
@@ -74,20 +93,26 @@ class DRTVIE(InfoExtractor):
video_id = self._search_regex(
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
webpage, 'video id')
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
webpage, 'video id', default=None)
programcard = self._download_json(
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
video_id, 'Downloading video JSON')
data = programcard['Data'][0]
if not video_id:
video_id = compat_urllib_parse_unquote(self._search_regex(
r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
webpage, 'urn'))
title = remove_end(self._og_search_title(
webpage, default=None), ' | TV | DR') or data['Title']
data = self._download_json(
'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id,
video_id, 'Downloading video JSON', query={'expanded': 'true'})
title = str_or_none(data.get('Title')) or re.sub(
r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
self._og_search_title(webpage))
description = self._og_search_description(
webpage, default=None) or data.get('Description')
timestamp = parse_iso8601(data.get('CreatedTime'))
timestamp = unified_timestamp(
data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime'))
thumbnail = None
duration = None
@@ -97,24 +122,62 @@ class DRTVIE(InfoExtractor):
formats = []
subtitles = {}
for asset in data['Assets']:
assets = []
primary_asset = data.get('PrimaryAsset')
if isinstance(primary_asset, dict):
assets.append(primary_asset)
secondary_assets = data.get('SecondaryAssets')
if isinstance(secondary_assets, list):
for secondary_asset in secondary_assets:
if isinstance(secondary_asset, dict):
assets.append(secondary_asset)
def hex_to_bytes(hex):
return binascii.a2b_hex(hex.encode('ascii'))
def decrypt_uri(e):
n = int(e[2:10], 16)
a = e[10 + n:]
data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
key = bytes_to_intlist(hashlib.sha256(
('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
iv = bytes_to_intlist(hex_to_bytes(a))
decrypted = aes_cbc_decrypt(data, key, iv)
return intlist_to_bytes(
decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
for asset in assets:
kind = asset.get('Kind')
if kind == 'Image':
thumbnail = asset.get('Uri')
thumbnail = url_or_none(asset.get('Uri'))
elif kind in ('VideoResource', 'AudioResource'):
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
restricted_to_denmark = asset.get('RestrictedToDenmark')
asset_target = asset.get('Target')
for link in asset.get('Links', []):
uri = link.get('Uri')
if not uri:
encrypted_uri = link.get('EncryptedUri')
if not encrypted_uri:
continue
try:
uri = decrypt_uri(encrypted_uri)
except Exception:
self.report_warning(
'Unable to decrypt EncryptedUri', video_id)
continue
uri = url_or_none(uri)
if not uri:
continue
target = link.get('Target')
format_id = target or ''
preference = None
if asset_target in ('SpokenSubtitles', 'SignLanguage'):
if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
preference = -1
format_id += '-%s' % asset_target
elif asset_target == 'Default':
preference = 1
else:
preference = None
if target == 'HDS':
f4m_formats = self._extract_f4m_formats(
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
@@ -140,19 +203,22 @@ class DRTVIE(InfoExtractor):
'vcodec': 'none' if kind == 'AudioResource' else None,
'preference': preference,
})
subtitles_list = asset.get('SubtitlesList')
if isinstance(subtitles_list, list):
LANGS = {
'Danish': 'da',
}
for subs in subtitles_list:
if not subs.get('Uri'):
continue
lang = subs.get('Language') or 'da'
subtitles.setdefault(LANGS.get(lang, lang), []).append({
'url': subs['Uri'],
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
})
subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
if isinstance(subtitles_list, list):
LANGS = {
'Danish': 'da',
}
for subs in subtitles_list:
if not isinstance(subs, dict):
continue
sub_uri = url_or_none(subs.get('Uri'))
if not sub_uri:
continue
lang = subs.get('Language') or 'da'
subtitles.setdefault(LANGS.get(lang, lang), []).append({
'url': sub_uri,
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
})
if not formats and restricted_to_denmark:
self.raise_geo_restricted(
@@ -170,6 +236,13 @@ class DRTVIE(InfoExtractor):
'duration': duration,
'formats': formats,
'subtitles': subtitles,
'series': str_or_none(data.get('SeriesTitle')),
'season': str_or_none(data.get('SeasonTitle')),
'season_number': int_or_none(data.get('SeasonNumber')),
'season_id': str_or_none(data.get('SeasonUrn')),
'episode': str_or_none(data.get('EpisodeTitle')),
'episode_number': int_or_none(data.get('EpisodeNumber')),
'release_year': int_or_none(data.get('ProductionYear')),
}

View File

@@ -15,16 +15,16 @@ from ..utils import (
class DTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
_TEST = {
'url': 'https://d.tube/#!/v/benswann/zqd630em',
'md5': 'a03eaa186618ffa7a3145945543a251e',
'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',
'md5': '9f29088fa08d699a7565ee983f56a06e',
'info_dict': {
'id': 'zqd630em',
'id': 'x380jtr1',
'ext': 'mp4',
'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
'description': 'md5:700d164e066b87f9eac057949e4227c2',
'uploader_id': 'benswann',
'upload_date': '20180222',
'timestamp': 1519328958,
'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks',
'description': 'md5:60be222088183be3a42f196f34235776',
'uploader_id': 'broncnutz',
'upload_date': '20190107',
'timestamp': 1546854054,
},
'params': {
'format': '480p',
@@ -48,7 +48,7 @@ class DTubeIE(InfoExtractor):
def canonical_url(h):
if not h:
return None
return 'https://ipfs.io/ipfs/' + h
return 'https://video.dtube.top/ipfs/' + h
formats = []
for q in ('240', '480', '720', '1080', ''):

View File

@@ -10,16 +10,16 @@ from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
try_get,
unescapeHTML,
parse_iso8601,
)
class DVTVIE(InfoExtractor):
IE_NAME = 'dvtv'
IE_DESC = 'http://video.aktualne.cz/'
_VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
_TESTS = [{
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
'md5': '67cb83e4a955d36e1b5d31993134a0c2',
@@ -28,11 +28,13 @@ class DVTVIE(InfoExtractor):
'ext': 'mp4',
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
'duration': 1484,
'upload_date': '20141217',
'timestamp': 1418792400,
}
}, {
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
'info_dict': {
'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci',
'title': r'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
'id': '973eb3bc854e11e498be002590604f2e',
},
'playlist': [{
@@ -84,6 +86,8 @@ class DVTVIE(InfoExtractor):
'ext': 'mp4',
'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta',
'duration': 1103,
'upload_date': '20170511',
'timestamp': 1494514200,
},
'params': {
'skip_download': True,
@@ -91,43 +95,59 @@ class DVTVIE(InfoExtractor):
}, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True,
}, {
# Test live stream video (liveStarter) parsing
'url': 'https://video.aktualne.cz/dvtv/zive-mistryne-sveta-eva-samkova-po-navratu-ze-sampionatu/r~182654c2288811e990fd0cc47ab5f122/',
'md5': '2e552e483f2414851ca50467054f9d5d',
'info_dict': {
'id': '8d116360288011e98c840cc47ab5f122',
'ext': 'mp4',
'title': 'Živě: Mistryně světa Eva Samková po návratu ze šampionátu',
'upload_date': '20190204',
'timestamp': 1549289591,
},
'params': {
# Video content is no longer available
'skip_download': True,
},
}]
def _parse_video_metadata(self, js, video_id, live_js=None):
def _parse_video_metadata(self, js, video_id, timestamp):
data = self._parse_json(js, video_id, transform_source=js_to_json)
if live_js:
data.update(self._parse_json(
live_js, video_id, transform_source=js_to_json))
title = unescapeHTML(data['title'])
live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict)
if live_starter:
data.update(live_starter)
formats = []
for video in data['sources']:
video_url = video.get('file')
if not video_url:
continue
video_type = video.get('type')
ext = determine_ext(video_url, mimetype2ext(video_type))
if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif video_type == 'application/dash+xml' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
else:
label = video.get('label')
height = self._search_regex(
r'^(\d+)[pP]', label or '', 'height', default=None)
format_id = ['http']
for f in (ext, label):
if f:
format_id.append(f)
formats.append({
'url': video_url,
'format_id': '-'.join(format_id),
'height': int_or_none(height),
})
for tracks in data.get('tracks', {}).values():
for video in tracks:
video_url = video.get('src')
if not video_url:
continue
video_type = video.get('type')
ext = determine_ext(video_url, mimetype2ext(video_type))
if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif video_type == 'application/dash+xml' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
else:
label = video.get('label')
height = self._search_regex(
r'^(\d+)[pP]', label or '', 'height', default=None)
format_id = ['http']
for f in (ext, label):
if f:
format_id.append(f)
formats.append({
'url': video_url,
'format_id': '-'.join(format_id),
'height': int_or_none(height),
})
self._sort_formats(formats)
return {
@@ -136,41 +156,29 @@ class DVTVIE(InfoExtractor):
'description': data.get('description'),
'thumbnail': data.get('image'),
'duration': int_or_none(data.get('duration')),
'timestamp': int_or_none(data.get('pubtime')),
'timestamp': int_or_none(timestamp),
'formats': formats
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
timestamp = parse_iso8601(self._html_search_meta(
'article:published_time', webpage, 'published time', default=None))
# live content
live_item = self._search_regex(
r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});',
webpage, 'video', default=None)
# single video
item = self._search_regex(
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
webpage, 'video', default=None)
if item:
return self._parse_video_metadata(item, video_id, live_item)
# playlist
items = re.findall(
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
webpage)
if not items:
items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage)
items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
if items:
return {
'_type': 'playlist',
'id': video_id,
'title': self._og_search_title(webpage),
'entries': [self._parse_video_metadata(i, video_id) for i in items]
}
return self.playlist_result(
[self._parse_video_metadata(i, video_id, timestamp) for i in items],
video_id, self._html_search_meta('twitter:title', webpage))
item = self._search_regex(
r'(?s)BBXPlayer\.setup\((.+?)\);',
webpage, 'video', default=None)
if item:
# remove function calls (ex. htmldeentitize)
# TODO this should be fixed in a general way in the js_to_json
item = re.sub(r'\w+?\((.+)\)', r'\1', item)
return self._parse_video_metadata(item, video_id, timestamp)
raise ExtractorError('Could not find neither video nor playlist')

View File

@@ -1,14 +1,11 @@
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
determine_ext,
clean_html,
int_or_none,
float_or_none,
sanitized_Request,
)
@@ -36,7 +33,7 @@ def _decrypt_config(key, string):
class EscapistIE(InfoExtractor):
_VALID_URL = r'https?://?(?:www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
_VALID_URL = r'https?://?(?:(?:www|v1)\.)?escapistmagazine\.com/videos/view/[^/]+/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@@ -61,6 +58,12 @@ class EscapistIE(InfoExtractor):
'duration': 304,
'uploader': 'The Escapist',
}
}, {
'url': 'http://escapistmagazine.com/videos/view/the-escapist-presents/6618',
'only_matching': True,
}, {
'url': 'https://v1.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -74,19 +77,20 @@ class EscapistIE(InfoExtractor):
video_id = ims_video['videoID']
key = ims_video['hash']
config_req = sanitized_Request(
'http://www.escapistmagazine.com/videos/'
'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
config_req.add_header('Referer', url)
config = self._download_webpage(config_req, video_id, 'Downloading video config')
config = self._download_webpage(
'http://www.escapistmagazine.com/videos/vidconfig.php',
video_id, 'Downloading video config', headers={
'Referer': url,
}, query={
'videoID': video_id,
'hash': key,
})
data = json.loads(_decrypt_config(key, config))
data = self._parse_json(_decrypt_config(key, config), video_id)
video_data = data['videoData']
title = clean_html(video_data['title'])
duration = float_or_none(video_data.get('duration'), 1000)
uploader = video_data.get('publisher')
formats = [{
'url': video['src'],
@@ -99,8 +103,9 @@ class EscapistIE(InfoExtractor):
'id': video_id,
'formats': formats,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'thumbnail': self._og_search_thumbnail(webpage) or data.get('poster'),
'description': self._og_search_description(webpage),
'duration': duration,
'uploader': uploader,
'duration': float_or_none(video_data.get('duration'), 1000),
'uploader': video_data.get('publisher'),
'series': video_data.get('show'),
}

View File

@@ -29,7 +29,8 @@ class ESPNIE(OnceIE):
(?:
.*?\?.*?\bid=|
/_/id/
)
)|
[^/]+/video/
)
)|
(?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
@@ -94,6 +95,9 @@ class ESPNIE(OnceIE):
}, {
'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
'only_matching': True,
}, {
'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -20,6 +20,7 @@ from .acast import (
)
from .addanime import AddAnimeIE
from .adn import ADNIE
from .adobeconnect import AdobeConnectIE
from .adobetv import (
AdobeTVIE,
AdobeTVShowIE,
@@ -38,9 +39,7 @@ from .alphaporno import AlphaPornoIE
from .amcnetworks import AMCNetworksIE
from .americastestkitchen import AmericasTestKitchenIE
from .animeondemand import AnimeOnDemandIE
from .anitube import AnitubeIE
from .anvato import AnvatoIE
from .anysex import AnySexIE
from .aol import AolIE
from .allocine import AllocineIE
from .aliexpress import AliExpressLiveIE
@@ -88,11 +87,7 @@ from .awaan import (
AWAANLiveIE,
AWAANSeasonIE,
)
from .azmedien import (
AZMedienIE,
AZMedienPlaylistIE,
AZMedienShowPlaylistIE,
)
from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
@@ -112,6 +107,7 @@ from .behindkink import BehindKinkIE
from .bellmedia import BellMediaIE
from .beatport import BeatportIE
from .bet import BetIE
from .bfi import BFIPlayerIE
from .bigflix import BigflixIE
from .bild import BildIE
from .bilibili import (
@@ -198,6 +194,10 @@ from .chirbit import (
ChirbitProfileIE,
)
from .cinchcast import CinchcastIE
from .ciscolive import (
CiscoLiveSessionIE,
CiscoLiveSearchIE,
)
from .cjsw import CJSWIE
from .cliphunter import CliphunterIE
from .clippit import ClippitIE
@@ -209,7 +209,10 @@ from .cloudy import CloudyIE
from .clubic import ClubicIE
from .clyp import ClypIE
from .cmt import CMTIE
from .cnbc import CNBCIE
from .cnbc import (
CNBCIE,
CNBCVideoIE,
)
from .cnn import (
CNNIE,
CNNBlogsIE,
@@ -408,6 +411,7 @@ from .funk import (
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .fxnetworks import FXNetworksIE
from .gaia import GaiaIE
from .gameinformer import GameInformerIE
from .gameone import (
GameOneIE,
@@ -438,16 +442,14 @@ from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE
from .hark import HarkIE
from .hbo import (
HBOIE,
HBOEpisodeIE,
)
from .hbo import HBOIE
from .hearthisat import HearThisAtIE
from .heise import HeiseIE
from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .hgtv import HGTVComShowIE
from .hketv import HKETVIE
from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE
from .hitbox import HitboxIE, HitboxLiveIE
@@ -466,6 +468,10 @@ from .hrti import (
)
from .huajiao import HuajiaoIE
from .huffpost import HuffPostIE
from .hungama import (
HungamaIE,
HungamaSongIE,
)
from .hypem import HypemIE
from .iconosquare import IconosquareIE
from .ign import (
@@ -480,12 +486,17 @@ from .imdb import (
from .imgur import (
ImgurIE,
ImgurAlbumIE,
ImgurGalleryIE,
)
from .ina import InaIE
from .inc import IncIE
from .indavideo import IndavideoEmbedIE
from .infoq import InfoQIE
from .instagram import InstagramIE, InstagramUserIE
from .instagram import (
InstagramIE,
InstagramUserIE,
InstagramTagIE,
)
from .internazionale import InternazionaleIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
@@ -540,6 +551,7 @@ from .la7 import LA7IE
from .laola1tv import (
Laola1TvEmbedIE,
Laola1TvIE,
EHFTVIE,
ITTFIE,
)
from .lci import LCIIE
@@ -549,6 +561,11 @@ from .lcp import (
)
from .learnr import LearnrIE
from .lecture2go import Lecture2GoIE
from .lecturio import (
LecturioIE,
LecturioCourseIE,
LecturioDeCourseIE,
)
from .leeco import (
LeIE,
LePlaylistIE,
@@ -569,6 +586,11 @@ from .limelight import (
LimelightChannelListIE,
)
from .line import LineTVIE
from .linkedin import (
LinkedInLearningIE,
LinkedInLearningCourseIE,
)
from .linuxacademy import LinuxAcademyIE
from .litv import LiTVIE
from .liveleak import (
LiveLeakIE,
@@ -595,6 +617,7 @@ from .mailru import (
MailRuMusicSearchIE,
)
from .makertv import MakerTVIE
from .malltv import MallTVIE
from .mangomolo import (
MangomoloVideoIE,
MangomoloLiveIE,
@@ -608,7 +631,11 @@ from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .mediaset import MediasetIE
from .mediasite import MediasiteIE
from .mediasite import (
MediasiteIE,
MediasiteCatalogIE,
MediasiteNamedCatalogIE,
)
from .medici import MediciIE
from .megaphone import MegaphoneIE
from .meipai import MeipaiIE
@@ -670,8 +697,7 @@ from .myvi import (
from .myvidster import MyVidsterIE
from .nationalgeographic import (
NationalGeographicVideoIE,
NationalGeographicIE,
NationalGeographicEpisodeGuideIE,
NationalGeographicTVIE,
)
from .naver import NaverIE
from .nba import NBAIE
@@ -814,6 +840,7 @@ from .orf import (
ORFOE1IE,
ORFIPTVIE,
)
from .outsidetv import OutsideTVIE
from .packtpub import (
PacktPubIE,
PacktPubCourseIE,
@@ -841,7 +868,12 @@ from .picarto import (
from .piksel import PikselIE
from .pinkbike import PinkbikeIE
from .pladform import PladformIE
from .platzi import (
PlatziIE,
PlatziCourseIE,
)
from .playfm import PlayFMIE
from .playplustv import PlayPlusTVIE
from .plays import PlaysTVIE
from .playtvak import PlaytvakIE
from .playvid import PlayvidIE
@@ -874,7 +906,6 @@ from .puhutv import (
PuhuTVSerieIE,
)
from .presstv import PressTVIE
from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
@@ -951,7 +982,6 @@ from .rtvnh import RTVNHIE
from .rtvs import RTVSIE
from .rudo import RudoIE
from .ruhd import RUHDIE
from .ruleporn import RulePornIE
from .rutube import (
RutubeIE,
RutubeChannelIE,
@@ -1033,7 +1063,10 @@ from .southpark import (
SouthParkEsIE,
SouthParkNlIE
)
from .spankbang import SpankBangIE
from .spankbang import (
SpankBangIE,
SpankBangPlaylistIE,
)
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE
from .spiegeltv import SpiegeltvIE
@@ -1043,7 +1076,7 @@ from .spike import (
)
from .stitcher import StitcherIE
from .sport5 import Sport5IE
from .sportbox import SportBoxEmbedIE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE
@@ -1078,12 +1111,17 @@ from .tass import TassIE
from .tastytrade import TastyTradeIE
from .tbs import TBSIE
from .tdslifeway import TDSLifewayIE
from .teachable import (
TeachableIE,
TeachableCourseIE,
)
from .teachertube import (
TeacherTubeIE,
TeacherTubeUserIE,
)
from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .teamtreehouse import TeamTreeHouseIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tele5 import Tele5IE
@@ -1116,6 +1154,10 @@ from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE
from .threeqsdn import ThreeQSDNIE
from .tiktok import (
TikTokIE,
TikTokUserIE,
)
from .tinypic import TinyPicIE
from .tmz import (
TMZIE,
@@ -1134,6 +1176,7 @@ from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .trunews import TruNewsIE
from .trutv import TruTVIE
from .tube8 import Tube8IE
from .tubitv import TubiTvIE
@@ -1153,7 +1196,6 @@ from .tv2 import (
TV2ArticleIE,
)
from .tv2hu import TV2HuIE
from .tv3 import TV3IE
from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE
from .tva import TVAIE
@@ -1172,13 +1214,15 @@ from .tvnet import TVNetIE
from .tvnoe import TVNoeIE
from .tvnow import (
TVNowIE,
TVNowListIE,
TVNowNewIE,
TVNowSeasonIE,
TVNowAnnualIE,
TVNowShowIE,
)
from .tvp import (
TVPEmbedIE,
TVPIE,
TVPSeriesIE,
TVPWebsiteIE,
)
from .tvplay import (
TVPlayIE,
@@ -1190,6 +1234,7 @@ from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE
from .twitcasting import TwitCastingIE
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
@@ -1223,10 +1268,6 @@ from .uplynk import (
UplynkIE,
UplynkPreplayIE,
)
from .upskill import (
UpskillIE,
UpskillCourseIE,
)
from .urort import UrortIE
from .urplay import URPlayIE
from .usanetwork import USANetworkIE
@@ -1262,7 +1303,6 @@ from .viddler import ViddlerIE
from .videa import VideaIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE
from .videomore import (
VideomoreIE,
VideomoreVideoIE,
@@ -1295,6 +1335,7 @@ from .vimeo import (
VimeoReviewIE,
VimeoUserIE,
VimeoWatchLaterIE,
VHXEmbedIE,
)
from .vimple import VimpleIE
from .vine import (
@@ -1330,7 +1371,6 @@ from .voxmedia import (
VoxMediaVolumeIE,
VoxMediaIE,
)
from .vporn import VpornIE
from .vrt import VRTIE
from .vrak import VrakIE
from .vrv import (
@@ -1344,6 +1384,7 @@ from .vuclip import VuClipIE
from .vvvvid import VVVVIDIE
from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE
from .wakanim import WakanimIE
from .walla import WallaIE
from .washingtonpost import (
WashingtonPostIE,
@@ -1367,7 +1408,7 @@ from .webofstories import (
WebOfStoriesPlaylistIE,
)
from .weibo import (
WeiboIE,
WeiboIE,
WeiboMobileIE
)
from .weiqitv import WeiqiTVIE
@@ -1382,6 +1423,7 @@ from .wsj import (
WSJIE,
WSJArticleIE,
)
from .wwe import WWEIE
from .xbef import XBefIE
from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE
@@ -1410,12 +1452,13 @@ from .yahoo import (
YahooIE,
YahooSearchIE,
)
from .yandexdisk import YandexDiskIE
from .yandexmusic import (
YandexMusicTrackIE,
YandexMusicAlbumIE,
YandexMusicPlaylistIE,
)
from .yandexdisk import YandexDiskIE
from .yandexvideo import YandexVideoIE
from .yapfiles import YapFilesIE
from .yesjapan import YesJapanIE
from .yinyuetai import YinYueTaiIE
@@ -1455,10 +1498,24 @@ from .youtube import (
from .zapiks import ZapiksIE
from .zaq1 import Zaq1IE
from .zattoo import (
BBVTVIE,
EinsUndEinsTVIE,
EWETVIE,
GlattvisionTVIE,
MNetTVIE,
MyVisionTVIE,
NetPlusIE,
OsnatelTVIE,
QuantumTVIE,
QuicklineIE,
QuicklineLiveIE,
SaltTVIE,
SAKTVIE,
VTXTVIE,
WalyTVIE,
ZattooIE,
ZattooLiveIE,
)
from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import ZingMp3IE
from .zype import ZypeIE

View File

@@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor):
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
_TESTS = [{
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
@@ -424,7 +424,7 @@ class FacebookIE(InfoExtractor):
uploader = clean_html(get_element_by_id(
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
fatal=False) or self._og_search_title(webpage, fatal=False)
default=None) or self._og_search_title(webpage, fatal=False)
timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None))

View File

@@ -1,17 +1,22 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import uuid
from .adobepass import AdobePassIE
from .uplynk import UplynkPreplayIE
from ..compat import compat_str
from ..compat import (
compat_HTTPError,
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
HEADRequest,
ExtractorError,
int_or_none,
parse_age_limit,
parse_duration,
try_get,
unified_timestamp,
update_url_query,
)
@@ -31,6 +36,7 @@ class FOXIE(AdobePassIE):
'upload_date': '20170901',
'creator': 'FOX',
'series': 'Gotham',
'age_limit': 14,
},
'params': {
'skip_download': True,
@@ -44,48 +50,76 @@ class FOXIE(AdobePassIE):
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
'only_matching': True,
}]
_GEO_BYPASS = False
_HOME_PAGE_URL = 'https://www.fox.com/'
_API_KEY = 'abdcbed02c124d393b39e818a4312055'
_access_token = None
def _call_api(self, path, video_id, data=None):
headers = {
'X-Api-Key': self._API_KEY,
}
if self._access_token:
headers['Authorization'] = 'Bearer ' + self._access_token
try:
return self._download_json(
'https://api2.fox.com/v2.0/' + path,
video_id, data=data, headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.status == 403:
entitlement_issues = self._parse_json(
e.cause.read().decode(), video_id)['entitlementIssues']
for e in entitlement_issues:
if e.get('errorCode') == 1005:
raise ExtractorError(
'This video is only available via cable service provider '
'subscription. You may want to use --cookies.', expected=True)
messages = ', '.join([e['message'] for e in entitlement_issues])
raise ExtractorError(messages, expected=True)
raise
def _real_initialize(self):
if not self._access_token:
mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth')
if mvpd_auth:
self._access_token = (self._parse_json(compat_urllib_parse_unquote(
mvpd_auth.value), None, fatal=False) or {}).get('accessToken')
if not self._access_token:
self._access_token = self._call_api(
'login', None, json.dumps({
'deviceId': compat_str(uuid.uuid4()),
}).encode())['accessToken']
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
video_id, headers={
'apikey': 'abdcbed02c124d393b39e818a4312055',
'Content-Type': 'application/json',
'Referer': url,
})
video = self._call_api('vodplayer/' + video_id, video_id)
title = video['name']
release_url = video['videoRelease']['url']
description = video.get('description')
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
video.get('duration')) or parse_duration(video.get('duration'))
timestamp = unified_timestamp(video.get('datePublished'))
rating = video.get('contentRating')
age_limit = parse_age_limit(rating)
release_url = video['url']
try:
m3u8_url = self._download_json(release_url, video_id)['playURL']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.read().decode(), video_id)
if error.get('exception') == 'GeoLocationBlocked':
self.raise_geo_restricted(countries=['US'])
raise ExtractorError(error['description'], expected=True)
raise
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
data = try_get(
video, lambda x: x['trackingData']['properties'], dict) or {}
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
video.get('duration')) or parse_duration(video.get('duration'))
timestamp = unified_timestamp(video.get('datePublished'))
creator = data.get('brand') or data.get('network') or video.get('network')
series = video.get('seriesName') or data.get(
'seriesName') or data.get('show')
season_number = int_or_none(video.get('seasonNumber'))
episode = video.get('name')
episode_number = int_or_none(video.get('episodeNumber'))
release_year = int_or_none(video.get('releaseYear'))
if data.get('authRequired'):
resource = self._get_mvpd_resource(
'fbc-fox', title, video.get('guid'), rating)
release_url = update_url_query(
release_url, {
'auth': self._extract_mvpd_auth(
url, video_id, 'fbc-fox', resource)
})
subtitles = {}
for doc_rel in video.get('documentReleases', []):
@@ -98,36 +132,19 @@ class FOXIE(AdobePassIE):
}]
break
info = {
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'description': video.get('description'),
'duration': duration,
'timestamp': timestamp,
'age_limit': age_limit,
'age_limit': parse_age_limit(video.get('contentRating')),
'creator': creator,
'series': series,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
'release_year': release_year,
'season_number': int_or_none(video.get('seasonNumber')),
'episode': video.get('name'),
'episode_number': int_or_none(video.get('episodeNumber')),
'release_year': int_or_none(video.get('releaseYear')),
'subtitles': subtitles,
}
urlh = self._request_webpage(HEADRequest(release_url), video_id)
video_url = compat_str(urlh.geturl())
if UplynkPreplayIE.suitable(video_url):
info.update({
'_type': 'url_transparent',
'url': video_url,
'ie_key': UplynkPreplayIE.ie_key(),
})
else:
m3u8_url = self._download_json(release_url, video_id)['playURL']
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
info['formats'] = formats
return info

View File

@@ -1,43 +1,33 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
smuggle_url,
update_url_query,
)
class FoxSportsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
_TEST = {
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
'md5': 'b49050e955bebe32c301972e4012ac17',
'info_dict': {
'id': 'bwduI3X_TgUB',
'id': '432609859715',
'ext': 'mp4',
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
'description': 'Courtney Lee talks about Memphis being focused.',
'upload_date': '20150423',
'timestamp': 1429761109,
# TODO: fix timestamp
'upload_date': '19700101', # '20150423',
# 'timestamp': 1429761109,
'uploader': 'NEWA-FNG-FOXSPORTS',
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['ThePlatform'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
config = self._parse_json(
self._html_search_regex(
r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
webpage, 'data player config'),
video_id)
return self.url_result(smuggle_url(update_url_query(
config['releaseURL'], {
'mbr': 'true',
'switch': 'http',
}), {'force_smil_url': True}))
return self.url_result(
'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')

View File

@@ -143,7 +143,7 @@ class FranceTVIE(InfoExtractor):
ext = determine_ext(video_url)
if ext == 'f4m':
if georestricted:
# See https://github.com/rg3/youtube-dl/issues/3963
# See https://github.com/ytdl-org/youtube-dl/issues/3963
# m3u8 urls work fine
continue
formats.extend(self._extract_f4m_formats(
@@ -215,7 +215,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
_TESTS = [{
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
'info_dict': {
'id': '162311093',
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus',
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
@@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
catalogue = None
video_id = self._search_regex(
r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1',
r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'video id', default=None, group='id')
if not video_id:

View File

@@ -1,6 +1,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
class FreespeechIE(InfoExtractor):
@@ -27,8 +28,4 @@ class FreespeechIE(InfoExtractor):
r'data-video-url="([^"]+)"',
webpage, 'youtube url')
return {
'_type': 'url',
'url': youtube_url,
'ie_key': 'Youtube',
}
return self.url_result(youtube_url, YoutubeIE.ie_key())

View File

@@ -1,6 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
import random
import string
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
@@ -87,7 +90,7 @@ class FunimationIE(InfoExtractor):
video_id = title_data.get('id') or self._search_regex([
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
r'<iframe[^>]+src="/player/(\d+)"',
r'<iframe[^>]+src="/player/(\d+)',
], webpage, 'video_id', default=None)
if not video_id:
player_url = self._html_search_meta([
@@ -108,8 +111,10 @@ class FunimationIE(InfoExtractor):
if self._TOKEN:
headers['Authorization'] = 'Token %s' % self._TOKEN
sources = self._download_json(
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
video_id, headers=headers)['items']
'https://www.funimation.com/api/showexperience/%s/' % video_id,
video_id, headers=headers, query={
'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
})['items']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
error = self._parse_json(e.cause.read(), video_id)['errors'][0]

View File

@@ -0,0 +1,130 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
ExtractorError,
int_or_none,
str_or_none,
strip_or_none,
try_get,
urlencode_postdata,
)
class GaiaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)'
_TESTS = [{
'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature',
'info_dict': {
'id': '89356',
'ext': 'mp4',
'title': 'Connecting with Universal Consciousness',
'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
'upload_date': '20151116',
'timestamp': 1447707266,
'duration': 936,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview',
'info_dict': {
'id': '89351',
'ext': 'mp4',
'title': 'Connecting with Universal Consciousness',
'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
'upload_date': '20151116',
'timestamp': 1447707266,
'duration': 53,
},
'params': {
# m3u8 download
'skip_download': True,
},
}]
_NETRC_MACHINE = 'gaia'
_jwt = None
def _real_initialize(self):
auth = self._get_cookies('https://www.gaia.com/').get('auth')
if auth:
auth = self._parse_json(
compat_urllib_parse_unquote(auth.value),
None, fatal=False)
if not auth:
username, password = self._get_login_info()
if username is None:
return
auth = self._download_json(
'https://auth.gaia.com/v1/login',
None, data=urlencode_postdata({
'username': username,
'password': password
}))
if auth.get('success') is False:
raise ExtractorError(', '.join(auth['messages']), expected=True)
if auth:
self._jwt = auth.get('jwt')
def _real_extract(self, url):
display_id, vtype = re.search(self._VALID_URL, url).groups()
node_id = self._download_json(
'https://brooklyn.gaia.com/pathinfo', display_id, query={
'path': 'video/' + display_id,
})['id']
node = self._download_json(
'https://brooklyn.gaia.com/node/%d' % node_id, node_id)
vdata = node[vtype]
media_id = compat_str(vdata['nid'])
title = node['title']
headers = None
if self._jwt:
headers = {'Authorization': 'Bearer ' + self._jwt}
media = self._download_json(
'https://brooklyn.gaia.com/media/' + media_id,
media_id, headers=headers)
formats = self._extract_m3u8_formats(
media['mediaUrls']['bcHLS'], media_id, 'mp4')
self._sort_formats(formats)
subtitles = {}
text_tracks = media.get('textTracks', {})
for key in ('captions', 'subtitles'):
for lang, sub_url in text_tracks.get(key, {}).items():
subtitles.setdefault(lang, []).append({
'url': sub_url,
})
fivestar = node.get('fivestar', {})
fields = node.get('fields', {})
def get_field_value(key, value_key='value'):
return try_get(fields, lambda x: x[key][0][value_key])
return {
'id': media_id,
'display_id': display_id,
'title': title,
'formats': formats,
'description': strip_or_none(get_field_value('body') or get_field_value('teaser')),
'timestamp': int_or_none(node.get('created')),
'subtitles': subtitles,
'duration': int_or_none(vdata.get('duration')),
'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])),
'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])),
'comment_count': int_or_none(node.get('comment_count')),
'series': try_get(node, lambda x: x['series']['title'], compat_str),
'season_number': int_or_none(get_field_value('season')),
'season_id': str_or_none(get_field_value('series_nid', 'nid')),
'episode_number': int_or_none(get_field_value('episode')),
}

View File

@@ -14,7 +14,7 @@ from ..utils import (
class GameSpotIE(OnceIE):
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@@ -41,6 +41,9 @@ class GameSpotIE(OnceIE):
}, {
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
'only_matching': True,
}, {
'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .tvc import TVCIE
from .sportbox import SportBoxEmbedIE
from .sportbox import SportBoxIE
from .smotri import SmotriIE
from .myvi import MyviIE
from .condenast import CondeNastIE
@@ -109,11 +109,13 @@ from .vice import ViceIE
from .xfileshare import XFileShareIE
from .cloudflarestream import CloudflareStreamIE
from .peertube import PeerTubeIE
from .teachable import TeachableIE
from .indavideo import IndavideoEmbedIE
from .apa import APAIE
from .foxnews import FoxNewsIE
from .viqeo import ViqeoIE
from .expressen import ExpressenIE
from .zype import ZypeIE
class GenericIE(InfoExtractor):
@@ -428,7 +430,7 @@ class GenericIE(InfoExtractor):
},
},
{
# https://github.com/rg3/youtube-dl/issues/2253
# https://github.com/ytdl-org/youtube-dl/issues/2253
'url': 'http://bcove.me/i6nfkrc3',
'md5': '0ba9446db037002366bab3b3eb30c88c',
'info_dict': {
@@ -453,7 +455,7 @@ class GenericIE(InfoExtractor):
},
},
{
# https://github.com/rg3/youtube-dl/issues/3541
# https://github.com/ytdl-org/youtube-dl/issues/3541
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
'info_dict': {
@@ -917,7 +919,7 @@ class GenericIE(InfoExtractor):
}
},
# Multiple brightcove videos
# https://github.com/rg3/youtube-dl/issues/2283
# https://github.com/ytdl-org/youtube-dl/issues/2283
{
'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
'info_dict': {
@@ -2070,6 +2072,20 @@ class GenericIE(InfoExtractor):
},
'playlist_count': 6,
},
{
# Zype embed
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
'info_dict': {
'id': '5b400b834b32992a310622b9',
'ext': 'mp4',
'title': 'Smoky Barbecue Favorites',
'thumbnail': r're:^https?://.*\.jpe?g',
},
'add_ie': [ZypeIE.ie_key()],
'params': {
'skip_download': True,
},
},
{
# videojs embed
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
@@ -2181,10 +2197,7 @@ class GenericIE(InfoExtractor):
def _real_extract(self, url):
if url.startswith('//'):
return {
'_type': 'url',
'url': self.http_scheme() + url,
}
return self.url_result(self.http_scheme() + url)
parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme:
@@ -2358,7 +2371,7 @@ class GenericIE(InfoExtractor):
return camtasia_res
# Sometimes embedded video player is hidden behind percent encoding
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
# Unescaping the whole page allows to handle those cases in a generic way
webpage = compat_urllib_parse_unquote(webpage)
@@ -2636,9 +2649,9 @@ class GenericIE(InfoExtractor):
return self.url_result(tvc_url, 'TVC')
# Look for embedded SportBox player
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
sportbox_urls = SportBoxIE._extract_urls(webpage)
if sportbox_urls:
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
# Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
@@ -3023,7 +3036,7 @@ class GenericIE(InfoExtractor):
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
# Look for Mediaset embeds
mediaset_urls = MediasetIE._extract_urls(webpage)
mediaset_urls = MediasetIE._extract_urls(self, webpage)
if mediaset_urls:
return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
@@ -3097,6 +3110,10 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
teachable_url = TeachableIE._extract_url(webpage, url)
if teachable_url:
return self.url_result(teachable_url)
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
if indavideo_urls:
return self.playlist_from_matches(
@@ -3129,6 +3146,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
zype_urls = ZypeIE._extract_urls(webpage)
if zype_urls:
return self.playlist_from_matches(
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
@@ -3155,7 +3177,7 @@ class GenericIE(InfoExtractor):
jwplayer_data, video_id, require_title=False, base_url=url)
return merge_dicts(info, info_dict)
except ExtractorError:
# See https://github.com/rg3/youtube-dl/pull/16735
# See https://github.com/ytdl-org/youtube-dl/pull/16735
pass
# Video.js embed

View File

@@ -53,7 +53,7 @@ class GfycatIE(InfoExtractor):
video_id = self._match_id(url)
gfy = self._download_json(
'http://gfycat.com/cajax/get/%s' % video_id,
'https://api.gfycat.com/v1/gfycats/%s' % video_id,
video_id, 'Downloading video info')
if 'error' in gfy:
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)

View File

@@ -72,7 +72,7 @@ class GloboIE(InfoExtractor):
return
try:
self._download_json(
glb_id = (self._download_json(
'https://login.globo.com/api/authentication', None, data=json.dumps({
'payload': {
'email': email,
@@ -81,7 +81,9 @@ class GloboIE(InfoExtractor):
},
}).encode(), headers={
'Content-Type': 'application/json; charset=utf-8',
})
}) or {}).get('glbId')
if glb_id:
self._set_cookie('.globo.com', 'GLBID', glb_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
resp = self._parse_json(e.cause.read(), None)

View File

@@ -25,15 +25,15 @@ class GoIE(AdobePassIE):
},
'watchdisneychannel': {
'brand': '004',
'requestor_id': 'Disney',
'resource_id': 'Disney',
},
'watchdisneyjunior': {
'brand': '008',
'requestor_id': 'DisneyJunior',
'resource_id': 'DisneyJunior',
},
'watchdisneyxd': {
'brand': '009',
'requestor_id': 'DisneyXD',
'resource_id': 'DisneyXD',
}
}
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
@@ -130,8 +130,8 @@ class GoIE(AdobePassIE):
'device': '001',
}
if video_data.get('accesslevel') == '1':
requestor_id = site_info['requestor_id']
resource = self._get_mvpd_resource(
requestor_id = site_info.get('requestor_id', 'DisneyChannels')
resource = site_info.get('resource_id') or self._get_mvpd_resource(
requestor_id, title, video_id, None)
auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)

View File

@@ -36,7 +36,7 @@ class GoogleDriveIE(InfoExtractor):
}
}, {
# video can't be watched anonymously due to view count limit reached,
# but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
'info_dict': {

Some files were not shown because too many files have changed in this diff Show More