Compare commits

...

157 Commits

Author SHA1 Message Date
bdf7f13954 release 2016.03.06 2016-03-06 10:08:02 +01:00
0f56a4b443 [vimeo] Don't pollute std_headers
Fixes #8778
2016-03-06 17:01:05 +08:00
1b5284b13f [downloader/fragment] Make speed more smooth
At the beginning of every segment there was a drop to Unknown speed due to timeslice being too small to calculate speed.
Now last speed from the previous fragment is used.
2016-03-06 05:36:52 +06:00
d1e4a464cd [YoutubeDL] Carry long lines and improve readability 2016-03-06 04:32:18 +06:00
ff059017c0 [YoutubeDL] Fix typo in m3u8_native fixup 2016-03-06 04:30:19 +06:00
f22ba4bd60 update tests related to the change in youtube http format sorting
the change was done in 82156fdbf0
2016-03-05 21:52:24 +01:00
1db772673e [cinemassacre] update tests 2016-03-05 21:34:34 +01:00
75313f2baa [cnet] fix info extraction 2016-03-05 21:10:00 +01:00
090eb8e25f Merge pull request #8718 from remitamine/m3u8-fixup
Add fixup for media files produced by HlsNative downloader(fixes #4776)
2016-03-05 18:37:28 +01:00
a9793f58a1 Merge pull request #8754 from remitamine/5min
update 5min related web sites info extraction and add support for Aol features.
2016-03-05 18:35:48 +01:00
7177fd24f8 [vgtv] support ap.vgtv.no and fix old videos extraction(fixes #8719) 2016-03-05 17:51:46 +01:00
1e501f6c40 [jeuxvideo] Fix config URL extraction (Closes #8774) 2016-03-05 21:01:43 +06:00
2629a3802c [revison3] fix video_id for --download-archive 2016-03-05 15:42:15 +01:00
51ce91174b [YoutubeDL] Fix resolution with missing height in output template dict 2016-03-05 19:38:58 +06:00
107d0c421a [revision3] add support for pages of type tag 2016-03-05 13:43:29 +01:00
18b0b23992 [revision3] add support pages of type embed 2016-03-05 12:14:48 +01:00
d1b29d1342 [elpais] Add support for alternative layout (Closes #8744) 2016-03-05 16:43:29 +06:00
845817aadf [twitter] Provide more metadata 2016-03-05 18:14:58 +08:00
3233a68fbb [utils] update_url_query: Encode the strings in the query dict
The test case with {'test': '第二行тест'} was failing on python 2 (the non-ascii characters were replaced with '?').
2016-03-04 22:18:40 +01:00
cf074e5ddd [foxnews] update test 2016-03-04 21:42:04 +01:00
002c755248 [youporn] Fix sources regex 2016-03-05 01:51:27 +06:00
d627cec608 [youporn] Fix quality extraction (Closes #8758) 2016-03-05 01:50:12 +06:00
1315224cbb [bleacherreport] update tests 2016-03-04 20:14:09 +01:00
7760b9ff4d [audimedia] update _VALID_URL and video_id regex and improve http format_id 2016-03-04 17:55:50 +01:00
28559564b2 [kusi] Correct test_KUSI 2016-03-05 00:04:29 +08:00
fa880d20ad [kusi] Two fixes
Thanks @dstftw for pointing out those
2016-03-04 23:59:58 +08:00
ae7d31af1c [yandexmusic] Capture and output API errors 2016-03-04 21:32:54 +06:00
9d303bf29b Merge branch 'mutantmonkey-kusi' 2016-03-04 23:21:24 +08:00
5f1688f271 [kusi] Simplify and improve 2016-03-04 23:08:47 +08:00
1d4c9ed90c [aol] imporve extraction
- add support for aol features
- remove support for legacy urls
2016-03-04 10:42:58 +01:00
d48352fb5d [engadget] remove support for legacy urls 2016-03-04 10:40:39 +01:00
6d6536acb2 [fivemin] improve extraction
- skip m3u8 formats(404 error)
- skip unavailable test
- download embed page only when it's needed
- update _VALID_URL regex(joystiq.com redirect to engadget.com)
2016-03-04 10:25:16 +01:00
b6f94d81ea [kusi] Add a test for the alternative form of URL 2016-03-04 14:32:01 +08:00
8477a69283 Merge branch 'kusi' of https://github.com/mutantmonkey/youtube-dl into mutantmonkey-kusi 2016-03-04 14:21:23 +08:00
d58cb3ec7e [leeco] Skip an invalid test. test_LePlaylist_1 is sufficient 2016-03-04 13:46:38 +08:00
8a370aedac [leeco] format_id should be strings 2016-03-04 13:38:45 +08:00
24ca0e9c0b [douyutv] Fix tests 2016-03-04 13:36:29 +08:00
e1dd521e49 [livestream] Fix FutureWarning (Closes #8742) 2016-03-04 01:16:58 +06:00
1255733945 Merge pull request #8739 from remitamine/update_url_params
[utils] add update_url_query function to create or update query string params
2016-03-03 19:24:04 +01:00
3201a67f61 [test/test_utils] add more tests for update_url_query 2016-03-03 19:18:57 +01:00
d0ff690d68 [indavideo:embed] Fix tags extraction (Closes #8738) 2016-03-04 00:09:40 +06:00
fb640d0a3d [test/test_utils] add tests for update_url_query 2016-03-03 18:40:05 +01:00
38f9ef31dc [utils] add update_url_query function 2016-03-03 18:34:52 +01:00
a8276b2680 [twitch:playlistbase] Fix all at once fetch 2016-03-03 22:18:32 +06:00
ececca6cde [twitch:playlistbase] Restore original _PAGE_LIMIT 2016-03-03 22:12:55 +06:00
8bbb4b56ee [twitch:playlistsbase] Use orderedSet 2016-03-03 22:11:26 +06:00
539a1641c6 [twitch] Workaround broken paging (Closes #8740) 2016-03-03 22:10:36 +06:00
1b0635aba3 [Makefile] Allow specifying the Python version in offline tests 2016-03-03 21:57:49 +08:00
429491f531 [test/http] Fix failure in Jython
make offlinetest passed on the latest Jython hg version with patched
lib-python/2.7/urllib2.py pulled from CPython 2.7.11
2016-03-03 21:55:17 +08:00
e9c0cdd389 [jython] Introduce compat_os_name
os.name is always 'java' on Jython
2016-03-03 19:24:24 +08:00
0cae023b24 Merge branch 'jython-support'
Closes #8302
2016-03-03 18:49:32 +08:00
8ee239e921 [utils] Jython support - handle filenames correctly
Now test:youtube downloads
2016-03-03 18:47:54 +08:00
fa9e259fd9 [extractor/common] use compat_parse_qs in update_url_params 2016-03-03 10:54:39 +01:00
f3bdae76de [extractor/common] add update_url_params helper method to add or update query string params 2016-03-03 10:27:22 +01:00
03879ff054 [twitter] Media info is not always in the first entity
Fixes #8704
2016-03-03 14:42:49 +08:00
c8398a9b87 [twitter] Now Twitter serves the same file for Firefox and Chrome 2016-03-03 14:27:27 +08:00
b8972bd69d [twitter] Fix extraction of test_Twitter and test_Twitter_1 2016-03-03 14:24:24 +08:00
0ae937a798 [twitter] Support twitter.com/i/videos/tweet/ URLS
Closes #8737
2016-03-03 13:43:45 +08:00
4459bef203 [thepltform] detect other types of errors 2016-03-02 21:41:29 +01:00
e07237f640 [utils] remove check for val from find_xpath_attr 2016-03-02 21:40:21 +01:00
8c5a994424 [leeco] Letv renamed to LeEco
LeEco is the company name and Le is the domain name.

For more information see the Chinese news post
http://www.techorz.com/company-news/letv-renamed-to-leeco-and-new-logo/
2016-03-03 03:27:55 +08:00
2eb25b256b [letv] Merge LetvTvIE into LetvPlaylistIE
And
1. Add more URL examples
2. Improve the matching pattern
2016-03-03 03:27:55 +08:00
f3bc19a989 [letv] Correct regular expressions and fix a typo 2016-03-03 03:27:55 +08:00
7a8fef3173 [letv] Order imports alphabetically 2016-03-03 03:27:55 +08:00
7465e7e42d [letv] Keep videos' order in playlists 2016-03-03 03:27:55 +08:00
5e73a67d44 [letv] Domain name changed 2016-03-03 03:27:55 +08:00
2316dc2b9a [twitch:playlistbase] Mark broken
Twitch paging mechanism is completely broken on twitch side serving all videos all the time and making our travis builds stall.
2016-03-03 00:41:36 +06:00
a2d7797cee [vimeo] Extract uploader_url (Closes #8727) 2016-03-03 00:00:11 +06:00
fd050249af [youtube] Extract uploader_url (Closes #8724) 2016-03-02 23:49:10 +06:00
7bcd2830dd [extractor/common] Document uploader_url 2016-03-02 23:31:24 +06:00
47462a125b [README.md] Document license field for output template 2016-03-02 23:10:01 +06:00
7caf9830b0 [youtube] Extract license (Closes #8725) 2016-03-02 23:07:25 +06:00
2bc0c46f98 [extractor/common] Document license metafield 2016-03-02 23:06:39 +06:00
3318832e9d [youtube] improve width and height extraction from fmt_list 2016-03-02 17:52:13 +01:00
e7d2084568 Merge branch 'master' of github.com:rg3/youtube-dl 2016-03-02 17:35:55 +01:00
c2d3cb4c63 Revert "[youtube] add tbr to _formats extracted from watch_as3.swf"
This reverts commit 4a5ba28a87.
2016-03-02 17:35:04 +01:00
c48dd4400f Revert "[youtube] add basic info for some unknown formats extracted from watch_as3.swf"
This reverts commit 85ca019d96.
2016-03-02 17:34:56 +01:00
e38cafe986 [YoutubeDL] Skip postprocessing and archive report when outputting to stdout (Closes #8729) 2016-03-02 21:11:18 +06:00
85ca019d96 [youtube] add basic info for some unknown formats extracted from watch_as3.swf 2016-03-02 16:05:05 +01:00
4a5ba28a87 [youtube] add tbr to _formats extracted from watch_as3.swf 2016-03-02 16:05:05 +01:00
82156fdbf0 [youtube] extract width and height from fmt_list 2016-03-02 16:05:05 +01:00
6114090418 [nrk:skole] Relax _VALID_URL 2016-03-02 20:57:04 +06:00
3099b31276 [nrk:skole] Add extractor (Closes #8728) 2016-03-02 20:52:06 +06:00
f17f86513e Add fixup for media files produced by HlsNative downloader(fixes #4776) 2016-03-01 21:10:41 +01:00
90f794c6c3 [options] Add --no-mark-watched (#5054) 2016-03-01 23:41:23 +06:00
66ca2cfddd [wistia] Fix extraction (Closes #8707) 2016-03-01 23:26:53 +06:00
269dd2c6a7 Merge pull request #8703 from dstftw/mark-watched
Add --mark-watched feature (Closes #5054)
2016-03-01 23:00:51 +06:00
e7998f59aa [lifenews] Fix extraction and improve (Closes #2482, closes #8714) 2016-03-01 22:59:11 +06:00
9fb556eef0 [iqiyi] SWF URLs are not used anymore
Since automatic detection of enc_key failed

Closes #8705
2016-03-01 08:42:33 +08:00
e781ab63db release 2016.03.01 2016-03-01 00:05:39 +01:00
3e76968220 [rtve.es:live] Fix extraction
* Update _VALID_URL to match the current URLs
* Use the m3u8 manifest since I haven't figured out how to use the rtmp stream
2016-02-29 20:57:26 +01:00
2812c24c16 [mdr] Fix extraction (Closes #8702) 2016-03-01 01:24:26 +06:00
d77ab8e255 Add --mark-watched feature (Closes #5054) 2016-03-01 01:01:33 +06:00
4b3cd7316c [tf1] Improve wat id regex (Closes #8691) 2016-02-29 03:28:21 +06:00
6dae56384a [screenwavemedia] Check formats' URLs 2016-02-28 21:46:36 +06:00
2b2dfae83e [screenwavemedia] Improve formats sorting 2016-02-28 20:16:31 +06:00
6c10dbeae9 [screenwavemedia] Improve formats extraction 2016-02-28 20:05:58 +06:00
9173202b84 [zdf] Ignore hls manifests that use https (closes #8665)
The certificates are misconfigured, you get the following error mesage:

    ssl.CertificateError: hostname u'zdf-hdios-none-i.zdf.de' doesn't match either of 'a248.e.akamai.net', '*.akamaihd.net', '*.akamaihd-staging.net', '*.akamaized.net', '*.akamaized-staging.net'
2016-02-28 14:06:26 +01:00
8870bb4653 [webofstories] Tolerate malforder og:title (Closes #8417) 2016-02-28 03:37:48 +06:00
7a0e7779fe [README.md] Use simple wording instead of env variable for home 2016-02-28 03:12:13 +06:00
a048ffc9b0 [README.md] Clarify configuration file options syntax 2016-02-28 03:04:06 +06:00
4587915b2a [README.md] Make configuration file example more diverse 2016-02-28 02:56:09 +06:00
da665ddc25 release 2016.02.27 2016-02-27 21:31:21 +01:00
5add979d91 [dplay] Add support for dplay.no 2016-02-27 21:42:08 +06:00
20afe8bd14 Credit @aidan- for more dplay sites support (#8463) 2016-02-27 21:31:43 +06:00
940b606a07 [dplay] Improve, extract all formats and metadata (Closes #8463) 2016-02-27 21:30:47 +06:00
9505053704 [dplay] add support for it.dplay.com and dplay.dk 2016-02-27 19:40:36 +06:00
2c9ca78281 [extractor/generic] Add support for tnaflix network embeds (Closes #7505) 2016-02-27 17:15:49 +06:00
63719a8ac3 [tnaflixnetwork:embed] Add _extract_urls 2016-02-27 17:15:06 +06:00
8fab62482a [tnaflixnetwork] Fallback age limit to 18 2016-02-27 16:59:10 +06:00
d6e9c2706f [tnaflixnetwork:embed] Add extractor 2016-02-27 16:58:11 +06:00
f7f2e53a0a [imdb] Recognize 1080p formats (Closes #8677) 2016-02-27 15:51:25 +06:00
9cdffeeb3f [extractor/common] Clarify rationale on media playlist detection 2016-02-27 07:01:11 +06:00
fbb6edd298 [extractor/common] Properly extract audio only formats in master m3u8 playlists 2016-02-27 06:48:13 +06:00
5eb6bdced4 [utils] Multiple changes to base_n()
1. Renamed to encode_base_n()
2. Allow tables longer than 62 characters
3. Raise ValueError instead of AssertionError for invalid input data
4. Return the first character in the table instead of '0' for number 0
5. Add tests
2016-02-27 03:22:52 +08:00
5633b4d39d [infoq] Use BokeCC extractor function 2016-02-27 02:55:11 +08:00
4435c6e98e [bokecc] Add new extractor (#2336) 2016-02-27 02:54:43 +08:00
2ebd2eac88 [letv] Speedup M3U8 decryption 2016-02-27 00:58:03 +08:00
b78b292f0c [youtube] Add alternative automatic captions extraction approach (Closes #8667) 2016-02-26 22:21:47 +06:00
efbd6fb8bb [vidzi] Use decode_packed_codes
Javascript codes found on Vidzi are slightly different from those found
in VideoMega and iQiyi. Nevertheless, the difference has no effects on
the final result.
2016-02-26 15:14:13 +08:00
680079be39 [utils] Relaxing regex in decode_packed_codes for vidzi 2016-02-26 15:13:03 +08:00
e4fc8d2ebe [videomega] Fix extraction (closes #7606) 2016-02-26 15:00:48 +08:00
f52354a889 [utils] Move codes for handling eval() from iqiyi.py 2016-02-26 14:58:29 +08:00
59f898b7a7 [utils] Merge base_n functions 2016-02-26 14:37:20 +08:00
8f4a2124a9 [vidzi] Fix extraction 2016-02-26 14:26:26 +08:00
481888294d [utils] Add base36 for use in Vidzi 2016-02-26 14:26:26 +08:00
d1e440a4a1 [jwplatform] Separate codes for for parsing jwplayer data 2016-02-26 14:26:26 +08:00
81bdc8fdf6 [utils] Move base62 to utils 2016-02-26 14:26:26 +08:00
e048d87fc9 [kuwo] Fix a test 2016-02-26 14:26:26 +08:00
e26cde0927 [space] Remove extractor (Closes #8662)
Now uses ooyala embed
2016-02-25 21:46:43 +06:00
20108c6b90 [ustudio] Improve (Closes #8574) 2016-02-25 21:30:19 +06:00
9195ef745a [uStudio] Add new extractor 2016-02-25 21:29:49 +06:00
d0459c530d [motherless] Update tests 2016-02-25 00:54:41 +06:00
f160785c5c [utils] Remove AM/PM from unified_strdate patterns 2016-02-25 00:52:49 +06:00
5c0a57185c [motherless] Detect non-existing videos 2016-02-25 00:42:19 +06:00
43479d9e9d [motherless] Make categories optional (Closes #8654) 2016-02-25 00:36:14 +06:00
c0da50d2b2 [README.md] Turn references to issues to links 2016-02-24 23:05:23 +06:00
c24883a1c0 [facebook] Fix format sorting
'hd' formats should have higher priorities
2016-02-24 03:43:24 +08:00
1b77ee6248 [c56] Support videos hosted on Sohu (closes #8073) 2016-02-24 03:32:29 +08:00
bf4b3b6bd9 [vk] Extract video URL from extra_data (Closes #8646) 2016-02-23 18:47:13 +06:00
efbeddead3 [facebook] Support mobile URLs (closes #8638) 2016-02-23 13:17:24 +08:00
3cfeb1624a [nba] Support channels (#5362, #4167) 2016-02-23 13:11:20 +08:00
b95dc034ca [utils] Implement cache for OnDemandPagedList 2016-02-23 13:11:20 +08:00
86a7dbe66e [nba] Support non-video/ pages
Fixes #8589
2016-02-23 13:11:20 +08:00
b43a7a92cd [README.md] Fix typo 2016-02-23 05:41:09 +06:00
6563d31710 [README.md] Fix typo 2016-02-23 05:37:49 +06:00
cf89ba9eff [README.md] Clarify robustness and future-proof requirements for new extractors 2016-02-23 05:35:19 +06:00
9b01272832 [README.md] Update link to extractor metafields 2016-02-23 05:03:57 +06:00
58525c94d5 [README.md] Emphasize copyright infringement aspects in add-new-site-support tutorial 2016-02-23 04:58:51 +06:00
621bd0cda9 [README.md] Add tl;dr links to examples 2016-02-23 04:43:45 +06:00
1610f770d7 [README.md] Extract example subsections 2016-02-23 04:29:39 +06:00
0fc871d2f0 [README.md:output_template] Add example for channel/user playlists download 2016-02-23 04:16:47 +06:00
1ad6143061 [xfileshare] Add support for powerwatch (Closes #8628) 2016-02-22 17:37:00 +06:00
101067de12 Jython support - handle *.class files 2016-02-21 03:32:03 +08:00
c1c05c67ea [utils] Jython support - disable setproctitle() until ctypes is complete 2016-02-21 03:32:03 +08:00
399a76e67b [utils] Jython support: tolerate missing fcntl module 2016-02-21 03:32:03 +08:00
199e724291 [KUSI] Add new extractor 2016-02-16 19:55:46 -08:00
81 changed files with 1850 additions and 725 deletions

3
.gitignore vendored
View File

@ -1,5 +1,6 @@
*.pyc
*.pyo
*.class
*~
*.DS_Store
wine-py2exe/
@ -32,4 +33,4 @@ test/testdata
.tox
youtube-dl.zsh
.idea
.idea/*
.idea/*

View File

@ -160,3 +160,4 @@ Erwin de Haan
Jens Wille
Robin Houtevelts
Patrick Griffis
Aidan Rowe

View File

@ -92,7 +92,9 @@ If you want to create a build of youtube-dl yourself, you'll need
### Adding support for a new site
If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
@ -140,16 +142,17 @@ If you want to add support for a new site, you can follow this quick list (assum
```
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/__init__.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions!

View File

@ -3,6 +3,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete
find . -name "*.class" -delete
PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
@ -44,7 +45,7 @@ test:
ot: offlinetest
offlinetest: codetest
nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
$(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
tar: youtube-dl.tar.gz

View File

@ -80,6 +80,8 @@ which means you can modify it, redistribute it or use it however you like.
on Windows)
--flat-playlist Do not extract the videos of a playlist,
only list them.
--mark-watched Mark videos watched (YouTube only)
--no-mark-watched Do not mark videos watched (YouTube only)
--no-color Do not emit color codes in output
## Network Options:
@ -409,13 +411,18 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime and use a proxy:
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
```
--extract-audio
-x
--no-mtime
--proxy 127.0.0.1:3128
-o ~/Movies/%(title)s.%(ext)s
```
Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
You can use `--ignore-config` if you want to disable the configuration file for a particular youtube-dl run.
### Authentication with `.netrc` file
@ -440,7 +447,11 @@ On Windows you may also need to setup the `%HOME%` environment variable manually
# OUTPUT TEMPLATE
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
The `-o` option allows users to indicate a template for the output file names.
**tl;dr:** [navigate me to examples](#output-template-examples).
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
- `id`: Video identifier
- `title`: Video title
@ -449,6 +460,7 @@ The `-o` option allows users to indicate a template for the output file names. T
- `alt_title`: A secondary title of the video
- `display_id`: An alternative identifier for the video
- `uploader`: Full name of the video uploader
- `license`: License name the video is licensed under
- `creator`: The main artist who created the video
- `release_date`: The date (YYYYMMDD) when the video was released
- `timestamp`: UNIX timestamp of the moment the video became available
@ -513,7 +525,9 @@ The current default template is `%(title)s-%(id)s.%(ext)s`.
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
Examples (note on Windows you may need to use double quotes instead of single):
#### Output template examples
Note on Windows you may need to use double quotes instead of single.
```bash
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
@ -525,6 +539,9 @@ youtube-dl_test_video_.mp4 # A simple file name
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
$ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
$ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/user/TheLinuxFoundation/playlists
# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
@ -543,6 +560,8 @@ But sometimes you may want to download in a different format, for example when y
The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
**tl;dr:** [navigate me to examples](#format-selection-examples).
The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download best quality format of particular file extension served as a single file, e.g. `-f webm` will download best quality format with `webm` extension served as a single file.
@ -588,11 +607,14 @@ You can merge the video and audio of two formats into a single file using `-f <v
Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
Examples (note on Windows you may need to use double quotes instead of single):
#### Format selection examples
Note on Windows you may need to use double quotes instead of single.
```bash
# Download best mp4 format available or any other best if no mp4 available
$ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
@ -733,7 +755,7 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
### What is this binary file? Where has the code gone?
Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
### The exe throws a *Runtime error from Visual C++*
@ -816,7 +838,9 @@ If you want to create a build of youtube-dl yourself, you'll need
### Adding support for a new site
If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
@ -864,16 +888,17 @@ If you want to add support for a new site, you can follow this quick list (assum
```
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/__init__.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions!

View File

@ -77,6 +77,7 @@
- **BleacherReportCMS**
- **blinkx**
- **Bloomberg**
- **BokeCC**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek
- **Break**
@ -193,6 +194,7 @@
- **faz.net**
- **fc2**
- **Fczenit**
- **features.aol.com**
- **fernsehkritik.tv**
- **Firstpost**
- **FiveTV**
@ -292,6 +294,7 @@
- **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью
- **Ku6**
- **KUSI**
- **kuwo:album**: 酷我音乐 - 专辑
- **kuwo:category**: 酷我音乐 - 分类
- **kuwo:chart**: 酷我音乐 - 排行榜
@ -300,12 +303,11 @@
- **kuwo:song**: 酷我音乐
- **la7.tv**
- **Laola1Tv**
- **Le**: 乐视网
- **Lecture2Go**
- **Lemonde**
- **Letv**: 乐视网
- **LePlaylist**
- **LetvCloud**: 乐视云
- **LetvPlaylist**
- **LetvTv**
- **Libsyn**
- **life:embed**
- **lifenews**: LIFE | NEWS
@ -420,6 +422,7 @@
- **Npr**
- **NRK**
- **NRKPlaylist**
- **NRKSkole**: NRK Skole
- **NRKTV**: NRK TV and NRK Radio
- **ntv.ru**
- **Nuvid**
@ -560,7 +563,6 @@
- **southpark.de**
- **southpark.nl**
- **southparkstudios.dk**
- **Space**
- **SpankBang**
- **Spankwire**
- **Spiegel**
@ -620,6 +622,7 @@
- **TMZ**
- **TMZArticle**
- **TNAFlix**
- **TNAFlixNetworkEmbed**
- **toggle**
- **tou.tv**
- **Toypics**: Toypics user profile
@ -670,6 +673,7 @@
- **Urort**: NRK P3 Urørt
- **ustream**
- **ustream:channel**
- **Ustudio**
- **Varzesh3**
- **Vbox7**
- **VeeHD**
@ -685,7 +689,7 @@
- **video.mit.edu**
- **VideoDetective**
- **videofy.me**
- **VideoMega** (Currently broken)
- **VideoMega**
- **videomore**
- **videomore:season**
- **videomore:video**

View File

@ -11,8 +11,11 @@ import sys
import youtube_dl.extractor
from youtube_dl import YoutubeDL
from youtube_dl.utils import (
from youtube_dl.compat import (
compat_os_name,
compat_str,
)
from youtube_dl.utils import (
preferredencoding,
write_string,
)
@ -42,7 +45,7 @@ def report_warning(message):
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
if sys.stderr.isatty() and os.name != 'nt':
if sys.stderr.isatty() and compat_os_name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m'
else:
_msg_header = 'WARNING:'

View File

@ -52,7 +52,12 @@ class TestHTTP(unittest.TestCase):
('localhost', 0), HTTPTestRequestHandler)
self.httpd.socket = ssl.wrap_socket(
self.httpd.socket, certfile=certfn, server_side=True)
self.port = self.httpd.socket.getsockname()[1]
if os.name == 'java':
# In Jython SSLSocket is not a subclass of socket.socket
sock = self.httpd.socket.sock
else:
sock = self.httpd.socket
self.port = sock.getsockname()[1]
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
self.server_thread.start()

View File

@ -18,6 +18,7 @@ import xml.etree.ElementTree
from youtube_dl.utils import (
age_restricted,
args_to_str,
encode_base_n,
clean_html,
DateRange,
detect_exe_version,
@ -60,6 +61,7 @@ from youtube_dl.utils import (
lowercase_escape,
url_basename,
urlencode_postdata,
update_url_query,
version_tuple,
xpath_with_ns,
xpath_element,
@ -75,6 +77,8 @@ from youtube_dl.utils import (
)
from youtube_dl.compat import (
compat_etree_fromstring,
compat_urlparse,
compat_parse_qs,
)
@ -249,6 +253,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
'20150202')
self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
@ -452,6 +457,40 @@ class TestUtil(unittest.TestCase):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
self.assertTrue(isinstance(data, bytes))
def test_update_url_query(self):
def query_dict(url):
return compat_parse_qs(compat_urlparse.urlparse(url).query)
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
query_dict('http://example.com/path?quality=HD&format=mp4'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'fields': 'id,formats,subtitles'})),
query_dict('http://example.com/path?fields=id,formats,subtitles'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path?manifest=f4m', {'manifest': []})),
query_dict('http://example.com/path'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
query_dict('http://example.com/path?system=LINUX'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'fields': b'id,formats,subtitles'})),
query_dict('http://example.com/path?fields=id,formats,subtitles'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'width': 1080, 'height': 720})),
query_dict('http://example.com/path?width=1080&height=720'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'bitrate': 5020.43})),
query_dict('http://example.com/path?bitrate=5020.43'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'test': '第二行тест'})),
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
def test_dict_get(self):
FALSE_VALUES = {
'none': None,
@ -801,5 +840,16 @@ The first line
ohdave_rsa_encrypt(b'aa111222', e, N),
'726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
def test_encode_base_n(self):
self.assertEqual(encode_base_n(0, 30), '0')
self.assertEqual(encode_base_n(80, 30), '2k')
custom_table = '9876543210ZYXWVUTSRQPONMLKJIHGFEDCBA'
self.assertEqual(encode_base_n(0, 30, custom_table), '9')
self.assertEqual(encode_base_n(80, 30, custom_table), '7P')
self.assertRaises(ValueError, encode_base_n, 0, 70)
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
if __name__ == '__main__':
unittest.main()

View File

@ -24,9 +24,6 @@ import time
import tokenize
import traceback
if os.name == 'nt':
import ctypes
from .compat import (
compat_basestring,
compat_cookiejar,
@ -34,6 +31,7 @@ from .compat import (
compat_get_terminal_size,
compat_http_client,
compat_kwargs,
compat_os_name,
compat_str,
compat_tokenize_tokenize,
compat_urllib_error,
@ -87,6 +85,7 @@ from .extractor import get_info_extractor, gen_extractors
from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version
from .postprocessor import (
FFmpegFixupM3u8PP,
FFmpegFixupM4aPP,
FFmpegFixupStretchedPP,
FFmpegMergerPP,
@ -95,6 +94,9 @@ from .postprocessor import (
)
from .version import __version__
if compat_os_name == 'nt':
import ctypes
class YoutubeDL(object):
"""YoutubeDL class.
@ -450,7 +452,7 @@ class YoutubeDL(object):
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
return
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
@ -521,7 +523,7 @@ class YoutubeDL(object):
else:
if self.params.get('no_warnings'):
return
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m'
else:
_msg_header = 'WARNING:'
@ -533,7 +535,7 @@ class YoutubeDL(object):
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
_msg_header = '\033[0;31mERROR:\033[0m'
else:
_msg_header = 'ERROR:'
@ -566,7 +568,7 @@ class YoutubeDL(object):
elif template_dict.get('height'):
template_dict['resolution'] = '%sp' % template_dict['height']
elif template_dict.get('width'):
template_dict['resolution'] = '?x%d' % template_dict['width']
template_dict['resolution'] = '%dx?' % template_dict['width']
sanitize = lambda k, v: sanitize_filename(
compat_str(v),
@ -1631,12 +1633,14 @@ class YoutubeDL(object):
self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
return
if success:
if success and filename != '-':
# Fixup content
fixup_policy = self.params.get('fixup')
if fixup_policy is None:
fixup_policy = 'detect_or_warn'
INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
stretched_ratio = info_dict.get('stretched_ratio')
if stretched_ratio is not None and stretched_ratio != 1:
if fixup_policy == 'warn':
@ -1649,15 +1653,18 @@ class YoutubeDL(object):
info_dict['__postprocessors'].append(stretched_pp)
else:
self.report_warning(
'%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
info_dict['id'], stretched_ratio))
'%s: Non-uniform pixel ratio (%s). %s'
% (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
else:
assert fixup_policy in ('ignore', 'never')
if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
if (info_dict.get('requested_formats') is None and
info_dict.get('container') == 'm4a_dash'):
if fixup_policy == 'warn':
self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
info_dict['id']))
self.report_warning(
'%s: writing DASH m4a. '
'Only some players support this container.'
% info_dict['id'])
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM4aPP(self)
if fixup_pp.available:
@ -1665,8 +1672,27 @@ class YoutubeDL(object):
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
'%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
info_dict['id']))
'%s: writing DASH m4a. '
'Only some players support this container. %s'
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
else:
assert fixup_policy in ('ignore', 'never')
if (info_dict.get('protocol') == 'm3u8_native' or
info_dict.get('protocol') == 'm3u8' and
self.params.get('hls_prefer_native')):
if fixup_policy == 'warn':
self.report_warning('%s: malformated aac bitstream.' % (
info_dict['id']))
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM3u8PP(self)
if fixup_pp.available:
info_dict.setdefault('__postprocessors', [])
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
'%s: malformated aac bitstream. %s'
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
else:
assert fixup_policy in ('ignore', 'never')

View File

@ -355,6 +355,7 @@ def _real_main(argv=None):
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
'encoding': opts.encoding,
'extract_flat': opts.extract_flat,
'mark_watched': opts.mark_watched,
'merge_output_format': opts.merge_output_format,
'postprocessors': postprocessors,
'fixup': opts.fixup,

View File

@ -326,6 +326,9 @@ def compat_ord(c):
return ord(c)
compat_os_name = os._name if os.name == 'java' else os.name
if sys.version_info >= (3, 0):
compat_getenv = os.getenv
compat_expanduser = os.path.expanduser
@ -346,7 +349,7 @@ else:
# The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
# for different platforms with correct environment variables decoding.
if os.name == 'posix':
if compat_os_name == 'posix':
def compat_expanduser(path):
"""Expand ~ and ~user constructions. If user or $HOME is unknown,
do nothing."""
@ -370,7 +373,7 @@ else:
userhome = pwent.pw_dir
userhome = userhome.rstrip('/')
return (userhome + path[i:]) or '/'
elif os.name == 'nt' or os.name == 'ce':
elif compat_os_name == 'nt' or compat_os_name == 'ce':
def compat_expanduser(path):
"""Expand ~ and ~user constructs.
@ -556,6 +559,7 @@ __all__ = [
'compat_itertools_count',
'compat_kwargs',
'compat_ord',
'compat_os_name',
'compat_parse_qs',
'compat_print',
'compat_shlex_split',

View File

@ -5,6 +5,7 @@ import re
import sys
import time
from ..compat import compat_os_name
from ..utils import (
encodeFilename,
error_to_compat_str,
@ -219,7 +220,7 @@ class FileDownloader(object):
if self.params.get('progress_with_newline', False):
self.to_screen(fullmsg)
else:
if os.name == 'nt':
if compat_os_name == 'nt':
prev_len = getattr(self, '_report_progress_prev_line_length',
0)
if prev_len > len(fullmsg):

View File

@ -99,7 +99,8 @@ class FragmentFD(FileDownloader):
state['eta'] = self.calc_eta(
start, time_now, estimated_size,
state['downloaded_bytes'])
state['speed'] = s.get('speed')
state['speed'] = s.get('speed') or ctx.get('speed')
ctx['speed'] = state['speed']
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state)

View File

@ -23,7 +23,10 @@ from .alphaporno import AlphaPornoIE
from .animeondemand import AnimeOnDemandIE
from .anitube import AnitubeIE
from .anysex import AnySexIE
from .aol import AolIE
from .aol import (
AolIE,
AolFeaturesIE,
)
from .allocine import AllocineIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
@ -74,6 +77,7 @@ from .bleacherreport import (
)
from .blinkx import BlinkxIE
from .bloomberg import BloombergIE
from .bokecc import BokeCCIE
from .bpb import BpbIE
from .br import BRIE
from .breakcom import BreakIE
@ -339,6 +343,7 @@ from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .kusi import KUSIIE
from .kuwo import (
KuwoIE,
KuwoAlbumIE,
@ -351,10 +356,9 @@ from .la7 import LA7IE
from .laola1tv import Laola1TvIE
from .lecture2go import Lecture2GoIE
from .lemonde import LemondeIE
from .letv import (
LetvIE,
LetvTvIE,
LetvPlaylistIE,
from .leeco import (
LeIE,
LePlaylistIE,
LetvCloudIE,
)
from .libsyn import LibsynIE
@ -505,6 +509,7 @@ from .npr import NprIE
from .nrk import (
NRKIE,
NRKPlaylistIE,
NRKSkoleIE,
NRKTVIE,
)
from .ntvde import NTVDeIE
@ -669,7 +674,6 @@ from .southpark import (
SouthParkEsIE,
SouthParkNlIE
)
from .space import SpaceIE
from .spankbang import SpankBangIE
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE
@ -737,6 +741,7 @@ from .tmz import (
TMZArticleIE,
)
from .tnaflix import (
TNAFlixNetworkEmbedIE,
TNAFlixIE,
EMPFlixIE,
MovieFapIE,
@ -813,6 +818,7 @@ from .digiteka import DigitekaIE
from .unistra import UnistraIE
from .urort import UrortIE
from .ustream import UstreamIE, UstreamChannelIE
from .ustudio import UstudioIE
from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE

View File

@ -1,24 +1,11 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com'
_VALID_URL = r'''(?x)
(?:
aol-video:|
http://on\.aol\.com/
(?:
video/.*-|
playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
)
)
(?P<id>[0-9]+)
(?:$|\?)
'''
_VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
_TESTS = [{
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@ -29,42 +16,31 @@ class AolIE(InfoExtractor):
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
},
'add_ie': ['FiveMin'],
}, {
'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
'info_dict': {
'id': '152147',
'title': 'Brace Yourself - Today\'s Weirdest News',
},
'playlist_mincount': 10,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
playlist_id = mobj.group('playlist_id')
if not playlist_id or self._downloader.params.get('noplaylist'):
return self.url_result('5min:%s' % video_id)
video_id = self._match_id(url)
return self.url_result('5min:%s' % video_id)
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(
r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
playlist_html = self._search_regex(
r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
'playlist HTML')
entries = [{
'_type': 'url',
'url': 'aol-video:%s' % m.group('id'),
'ie_key': 'Aol',
} for m in re.finditer(
r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
playlist_html)]
class AolFeaturesIE(InfoExtractor):
IE_NAME = 'features.aol.com'
_VALID_URL = r'http://features\.aol\.com/video/(?P<id>[^/?#]+)'
return {
'_type': 'playlist',
'id': playlist_id,
'display_id': mobj.group('playlist_display_id'),
'title': title,
'entries': entries,
}
_TESTS = [{
'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',
'md5': '7db483bb0c09c85e241f84a34238cc75',
'info_dict': {
'id': '519507715',
'ext': 'mp4',
'title': 'What To Watch - February 17, 2016',
},
'add_ie': ['FiveMin'],
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
return self.url_result(self._search_regex(
r'<script type="text/javascript" src="(https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js[^"]+)"',
webpage, '5min embed url'), 'FiveMin')

View File

@ -10,9 +10,9 @@ from ..utils import (
class AudiMediaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
_TEST = {
'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
'md5': '79a8b71c46d49042609795ab59779b66',
'info_dict': {
'id': '1565',
@ -32,7 +32,10 @@ class AudiMediaIE(InfoExtractor):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload')
raw_payload = self._search_regex([
r'class="amtv-embed"[^>]+id="([^"]+)"',
r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
], webpage, 'raw payload')
_, stage_mode, video_id, lang = raw_payload.split('-')
# TODO: handle s and e stage_mode (live streams and ended live streams)
@ -59,13 +62,19 @@ class AudiMediaIE(InfoExtractor):
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
if not video_version_url:
continue
formats.append({
f = {
'url': video_version_url,
'width': int_or_none(video_version.get('width')),
'height': int_or_none(video_version.get('height')),
'abr': int_or_none(video_version.get('audio_bitrate')),
'vbr': int_or_none(video_version.get('video_bitrate')),
})
}
bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
if bitrate:
f.update({
'format_id': 'http-%s' % bitrate,
})
formats.append(f)
self._sort_formats(formats)
return {

View File

@ -28,10 +28,10 @@ class BleacherReportIE(InfoExtractor):
'add_ie': ['Ooyala'],
}, {
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
'md5': '6a5cd403418c7b01719248ca97fb0692',
'info_dict': {
'id': '2586817',
'ext': 'mp4',
'ext': 'webm',
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
'timestamp': 1446839961,
'uploader': 'Sean Fay',
@ -93,10 +93,14 @@ class BleacherReportCMSIE(AMPIE):
'md5': '8c2c12e3af7805152675446c905d159b',
'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'ext': 'flv',
'ext': 'mp4',
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
},
'params': {
# m3u8 download
'skip_download': True,
},
}]
def _real_extract(self, url):

View File

@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..utils import ExtractorError
class BokeCCBaseIE(InfoExtractor):
def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
player_params_str = self._html_search_regex(
r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
webpage, 'player params')
player_params = compat_parse_qs(player_params_str)
info_xml = self._download_xml(
'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
player_params['siteid'][0], player_params['vid'][0]), video_id)
formats = [{
'format_id': format_id,
'url': quality.find('./copy').attrib['playurl'],
'preference': int(quality.attrib['value']),
} for quality in info_xml.findall('./video/quality')]
self._sort_formats(formats)
return formats
class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频'
_VALID_URL = r'http://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{
'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',
'info_dict': {
'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30',
'ext': 'flv',
'title': 'BokeCC Video',
},
}]
def _real_extract(self, url):
qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
if not qs.get('vid') or not qs.get('uid'):
raise ExtractorError('Invalid URL', expected=True)
video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0])
webpage = self._download_webpage(url, video_id)
return {
'id': video_id,
'title': 'BokeCC Video', # no title provided in the webpage
'formats': self._extract_bokecc_formats(webpage, video_id),
}

View File

@ -4,12 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import js_to_json
class C56IE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
IE_NAME = '56.com'
_TEST = {
_TESTS = [{
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
'md5': 'e59995ac63d0457783ea05f93f12a866',
'info_dict': {
@ -18,12 +19,29 @@ class C56IE(InfoExtractor):
'title': '网事知多少 第32期车怒',
'duration': 283.813,
},
}
}, {
'url': 'http://www.56.com/u47/v_MTM5NjQ5ODc2.html',
'md5': '',
'info_dict': {
'id': '82247482',
'title': '爱的诅咒之杜鹃花开',
},
'playlist_count': 7,
'add_ie': ['Sohu'],
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
text_id = mobj.group('textid')
webpage = self._download_webpage(url, text_id)
sohu_video_info_str = self._search_regex(
r'var\s+sohuVideoInfo\s*=\s*({[^}]+});', webpage, 'Sohu video info', default=None)
if sohu_video_info_str:
sohu_video_info = self._parse_json(
sohu_video_info_str, text_id, transform_source=js_to_json)
return self.url_result(sohu_video_info['url'], 'Sohu')
page = self._download_json(
'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')

View File

@ -21,6 +21,10 @@ class CinemassacreIE(InfoExtractor):
'title': '“Angry Video Game Nerd: The Movie” Trailer',
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
},
'params': {
# m3u8 download
'skip_download': True,
},
},
{
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
@ -31,14 +35,18 @@ class CinemassacreIE(InfoExtractor):
'upload_date': '20131002',
'title': 'The Mummys Hand (1940)',
},
'params': {
# m3u8 download
'skip_download': True,
},
},
{
# Youtube embedded video
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
'md5': 'df4cf8a1dcedaec79a73d96d83b99023',
'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9',
'info_dict': {
'id': 'OEVzPCY2T-g',
'ext': 'mp4',
'ext': 'webm',
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
'upload_date': '20061207',
'uploader': 'Cinemassacre',
@ -49,12 +57,12 @@ class CinemassacreIE(InfoExtractor):
{
# Youtube embedded video
'url': 'http://cinemassacre.com/2006/09/01/mckids/',
'md5': '6eb30961fa795fedc750eac4881ad2e1',
'md5': '7393c4e0f54602ad110c793eb7a6513a',
'info_dict': {
'id': 'FnxsNhuikpo',
'ext': 'mp4',
'ext': 'webm',
'upload_date': '20060901',
'uploader': 'Cinemassacre Extras',
'uploader': 'Cinemassacre Extra',
'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
'uploader_id': 'Cinemassacre',
'title': 'AVGN: McKids',
@ -69,7 +77,11 @@ class CinemassacreIE(InfoExtractor):
'description': 'Lets Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
'upload_date': '20150525',
}
},
'params': {
# m3u8 download
'skip_download': True,
},
}
]

View File

@ -51,9 +51,7 @@ class CNETIE(ThePlatformIE):
uploader = None
uploader_id = None
mpx_account = data['config']['uvpConfig']['default']['mpx_account']
metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id)
metadata = self.get_metadata('kYEXFC/%s' % list(vdata['files'].values())[0], video_id)
description = vdata.get('description') or metadata.get('description')
duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
@ -62,7 +60,7 @@ class CNETIE(ThePlatformIE):
for (fkey, vid) in vdata['files'].items():
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
continue
release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid)
release_url = 'http://link.theplatform.com/s/kYEXFC/%s?format=SMIL&mbr=true' % vid
if fkey == 'hds':
release_url += '&manifest=f4m'
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)

View File

@ -15,13 +15,14 @@ import math
from ..compat import (
compat_cookiejar,
compat_cookies,
compat_etree_fromstring,
compat_getpass,
compat_http_client,
compat_os_name,
compat_str,
compat_urllib_error,
compat_urllib_parse,
compat_urlparse,
compat_str,
compat_etree_fromstring,
)
from ..utils import (
NO_DEFAULT,
@ -157,12 +158,14 @@ class InfoExtractor(object):
thumbnail: Full URL to a video thumbnail image.
description: Full video description.
uploader: Full name of the video uploader.
license: License name the video is licensed under.
creator: The main artist who created the video.
release_date: The date (YYYYMMDD) when the video was released.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).
If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader.
uploader_url: Full URL to a personal webpage of the video uploader.
location: Physical location where the video was filmed.
subtitles: The available subtitles as a dictionary in the format
{language: subformats}. "subformats" is a list sorted from
@ -425,7 +428,7 @@ class InfoExtractor(object):
self.to_screen('Saving request to ' + filename)
# Working around MAX_PATH limitation on Windows (see
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
if os.name == 'nt':
if compat_os_name == 'nt':
absfilepath = os.path.abspath(filename)
if len(absfilepath) > 259:
filename = '\\\\?\\' + absfilepath
@ -594,7 +597,7 @@ class InfoExtractor(object):
if mobj:
break
if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
_name = '\033[0;34m%s\033[0m' % name
else:
_name = name
@ -1033,11 +1036,21 @@ class InfoExtractor(object):
return []
m3u8_doc, urlh = res
m3u8_url = urlh.geturl()
# A Media Playlist Tag MUST NOT appear in a Master Playlist
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
# The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
if '#EXT-X-TARGETDURATION' in m3u8_doc:
# We should try extracting formats only from master playlists [1], i.e.
# playlists that describe available qualities. On the other hand media
# playlists [2] should be returned as is since they contain just the media
# without qualities renditions.
# Fortunately, master playlist can be easily distinguished from media
# playlist based on particular tags availability. As of [1, 2] master
# playlist tags MUST NOT appear in a media playist and vice versa.
# As of [3] #EXT-X-TARGETDURATION tag is REQUIRED for every media playlist
# and MUST NOT appear in master playlist thus we can clearly detect media
# playlist with this criterion.
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.4
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
return [{
'url': m3u8_url,
'format_id': m3u8_id,
@ -1084,19 +1097,29 @@ class InfoExtractor(object):
'protocol': entry_protocol,
'preference': preference,
}
codecs = last_info.get('CODECS')
if codecs:
# TODO: looks like video codec is not always necessarily goes first
va_codecs = codecs.split(',')
if va_codecs[0]:
f['vcodec'] = va_codecs[0]
if len(va_codecs) > 1 and va_codecs[1]:
f['acodec'] = va_codecs[1]
resolution = last_info.get('RESOLUTION')
if resolution:
width_str, height_str = resolution.split('x')
f['width'] = int(width_str)
f['height'] = int(height_str)
codecs = last_info.get('CODECS')
if codecs:
vcodec, acodec = [None] * 2
va_codecs = codecs.split(',')
if len(va_codecs) == 1:
# Audio only entries usually come with single codec and
# no resolution. For more robustness we also check it to
# be mp4 audio.
if not resolution and va_codecs[0].startswith('mp4a'):
vcodec, acodec = 'none', va_codecs[0]
else:
vcodec = va_codecs[0]
else:
vcodec, acodec = va_codecs[:2]
f.update({
'acodec': acodec,
'vcodec': vcodec,
})
if last_media is not None:
f['m3u8_media'] = last_media
last_media = None
@ -1600,6 +1623,15 @@ class InfoExtractor(object):
def _get_automatic_captions(self, *args, **kwargs):
raise NotImplementedError('This method must be implemented by subclasses')
def mark_watched(self, *args, **kwargs):
if (self._downloader.params.get('mark_watched', False) and
(self._get_login_info()[0] is not None or
self._downloader.params.get('cookiefile') is not None)):
self._mark_watched(*args, **kwargs)
def _mark_watched(self, *args, **kwargs):
raise NotImplementedError('This method must be implemented by subclasses')
class SearchInfoExtractor(InfoExtractor):
"""

View File

@ -18,7 +18,7 @@ class DouyuTVIE(InfoExtractor):
'display_id': 'iseven',
'ext': 'flv',
'title': 're:^清晨醒脑T-ara根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
'description': 'md5:f34981259a03e980a3c6404190a3ed61',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': '7师傅',
'uploader_id': '431925',
@ -26,7 +26,7 @@ class DouyuTVIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
}, {
'url': 'http://www.douyutv.com/85982',
'info_dict': {
@ -42,7 +42,24 @@ class DouyuTVIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
'skip': 'Romm not found',
}, {
'url': 'http://www.douyutv.com/17732',
'info_dict': {
'id': '17732',
'display_id': '17732',
'ext': 'flv',
'title': 're:^清晨醒脑T-ara根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:f34981259a03e980a3c6404190a3ed61',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': '7师傅',
'uploader_id': '431925',
'is_live': True,
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):

View File

@ -1,6 +1,8 @@
# encoding: utf-8
# coding: utf-8
from __future__ import unicode_literals
import json
import re
import time
from .common import InfoExtractor
@ -8,44 +10,125 @@ from ..utils import int_or_none
class DPlayIE(InfoExtractor):
_VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)'
_VALID_URL = r'http://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
_TESTS = [{
'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',
'info_dict': {
'id': '1255600',
'display_id': 'stagione-1-episodio-25',
'ext': 'mp4',
'title': 'Episodio 25',
'description': 'md5:cae5f40ad988811b197d2d27a53227eb',
'duration': 2761,
'timestamp': 1454701800,
'upload_date': '20160205',
'creator': 'RTIT',
'series': 'Take me out',
'season_number': 1,
'episode_number': 25,
'age_limit': 0,
},
'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
'info_dict': {
'id': '3172',
'ext': 'mp4',
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
'ext': 'flv',
'title': 'Svensken lär sig njuta av livet',
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
'duration': 2650,
'timestamp': 1365454320,
'upload_date': '20130408',
'creator': 'Kanal 5 (Home)',
'series': 'Nugammalt - 77 händelser som format Sverige',
'season_number': 1,
'episode_number': 1,
'age_limit': 0,
},
}
}, {
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
'info_dict': {
'id': '70816',
'display_id': 'season-6-episode-12',
'ext': 'flv',
'title': 'Episode 12',
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
'duration': 2563,
'timestamp': 1429696800,
'upload_date': '20150422',
'creator': 'Kanal 4',
'series': 'Mig og min mor',
'season_number': 6,
'episode_number': 12,
'age_limit': 0,
},
}, {
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
domain = mobj.group('domain')
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-video-id="(\d+)"', webpage, 'video id')
r'data-video-id=["\'](\d+)', webpage, 'video id')
info = self._download_json(
'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id,
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
video_id)['data'][0]
self._set_cookie(
'secure.dplay.se', 'dsc-geo',
'{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000))
# TODO: consider adding support for 'stream_type=hds', it seems to
# require setting some cookies
manifest_url = self._download_json(
'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id,
video_id, 'Getting manifest url for hls stream')['hls']
formats = self._extract_m3u8_formats(
manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native')
title = info['title']
PROTOCOLS = ('hls', 'hds')
formats = []
def extract_formats(protocol, manifest_url):
if protocol == 'hls':
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False))
elif protocol == 'hds':
formats.extend(self._extract_f4m_formats(
manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
video_id, f4m_id=protocol, fatal=False))
domain_tld = domain.split('.')[-1]
if domain_tld in ('se', 'dk'):
for protocol in PROTOCOLS:
self._set_cookie(
'secure.dplay.%s' % domain_tld, 'dsc-geo',
json.dumps({
'countryCode': domain_tld.upper(),
'expiry': (time.time() + 20 * 60) * 1000,
}))
stream = self._download_json(
'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s'
% (domain_tld, video_id, protocol), video_id,
'Downloading %s stream JSON' % protocol, fatal=False)
if stream and stream.get(protocol):
extract_formats(protocol, stream[protocol])
else:
for protocol in PROTOCOLS:
if info.get(protocol):
extract_formats(protocol, info[protocol])
return {
'id': video_id,
'display_id': display_id,
'title': info['title'],
'formats': formats,
'title': title,
'description': info.get('video_metadata_longDescription'),
'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
'timestamp': int_or_none(info.get('video_publish_date')),
'creator': info.get('video_metadata_homeChannel'),
'series': info.get('video_metadata_show'),
'season_number': int_or_none(info.get('season')),
'episode_number': int_or_none(info.get('episode')),
'age_limit': int_or_none(info.get('minimum_age')),
'formats': formats,
}

View File

@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
IE_DESC = 'El País'
_TEST = {
_TESTS = [{
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
'md5': '98406f301f19562170ec071b83433d55',
'info_dict': {
@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
'upload_date': '20140206',
}
}
}, {
'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
'md5': '3bd5b09509f3519d7d9e763179b013de',
'info_dict': {
'id': '1456340311_668921',
'ext': 'mp4',
'title': 'Cómo hacer el mejor café con cafetera italiana',
'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
'upload_date': '20160303',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
prefix = self._html_search_regex(
r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
video_suffix = self._search_regex(
r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
video_url = prefix + video_suffix
thumbnail_suffix = self._search_regex(
r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
fatal=False)
r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
webpage, 'thumbnail URL', fatal=False)
thumbnail = (
None if thumbnail_suffix is None
else prefix + thumbnail_suffix)
title = self._html_search_regex(
'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
(r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
webpage, 'title')
date_str = self._search_regex(
upload_date = unified_strdate(self._search_regex(
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
webpage, 'upload date', fatal=False)
upload_date = (None if date_str is None else unified_strdate(date_str))
webpage, 'upload date', default=None) or self._html_search_meta(
'datePublished', webpage, 'timestamp'))
return {
'id': video_id,

View File

@ -1,21 +1,13 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
url_basename,
)
class EngadgetIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://www.engadget.com/
(?:video(?:/5min)?/(?P<id>\d+)|
[\d/]+/.*?)
'''
_VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)'
_TEST = {
'url': 'http://www.engadget.com/video/5min/518153925/',
'url': 'http://www.engadget.com/video/518153925/',
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
'info_dict': {
'id': '518153925',
@ -27,15 +19,4 @@ class EngadgetIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
if video_id is not None:
return self.url_result('5min:%s' % video_id)
else:
title = url_basename(url)
webpage = self._download_webpage(url, title)
ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
return {
'_type': 'playlist',
'title': title,
'entries': [self.url_result('5min:%s' % vid) for vid in ids]
}
return self.url_result('5min:%s' % video_id)

View File

@ -34,8 +34,9 @@ class FacebookIE(InfoExtractor):
video/video\.php|
photo\.php|
video\.php|
video/embed
)\?(?:.*?)(?:v|video_id)=|
video/embed|
story\.php
)\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?
)|
facebook:
@ -92,6 +93,9 @@ class FacebookIE(InfoExtractor):
}, {
'url': 'facebook:544765982287235',
'only_matching': True,
}, {
'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
'only_matching': True,
}]
def _login(self):
@ -208,10 +212,13 @@ class FacebookIE(InfoExtractor):
for src_type in ('src', 'src_no_ratelimit'):
src = f[0].get('%s_%s' % (quality, src_type))
if src:
preference = -10 if format_id == 'progressive' else 0
if quality == 'hd':
preference += 5
formats.append({
'format_id': '%s_%s_%s' % (format_id, quality, src_type),
'url': src,
'preference': -10 if format_id == 'progressive' else 0,
'preference': preference,
})
dash_manifest = f[0].get('dash_manifest')
if dash_manifest:

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
@ -16,12 +18,7 @@ from ..utils import (
class FiveMinIE(InfoExtractor):
IE_NAME = '5min'
_VALID_URL = r'''(?x)
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
5min:)
(?P<id>\d+)
'''
_VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
_TESTS = [
{
@ -45,6 +42,7 @@ class FiveMinIE(InfoExtractor):
'title': 'How to Make a Next-Level Fruit Salad',
'duration': 184,
},
'skip': 'no longer available',
},
]
_ERRORS = {
@ -91,20 +89,33 @@ class FiveMinIE(InfoExtractor):
}
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
sid = mobj.group('sid')
if mobj.group('query'):
qs = compat_parse_qs(mobj.group('query'))
if not qs.get('playList'):
raise ExtractorError('Invalid URL', expected=True)
video_id = qs['playList'][0]
if qs.get('sid'):
sid = qs['sid'][0]
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
embed_page = self._download_webpage(embed_url, video_id,
'Downloading embed page')
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
query = compat_urllib_parse.urlencode({
'func': 'GetResults',
'playlist': video_id,
'sid': sid,
'isPlayerSeed': 'true',
'url': embed_url,
})
if not sid:
embed_page = self._download_webpage(embed_url, video_id,
'Downloading embed page')
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
response = self._download_json(
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
'https://syn.5min.com/handlers/SenseHandler.ashx?' +
compat_urllib_parse.urlencode({
'func': 'GetResults',
'playlist': video_id,
'sid': sid,
'isPlayerSeed': 'true',
'url': embed_url,
}),
video_id)
if not response['success']:
raise ExtractorError(
@ -118,9 +129,7 @@ class FiveMinIE(InfoExtractor):
parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
for rendition in info['Renditions']:
if rendition['RenditionType'] == 'm3u8':
formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
elif rendition['RenditionType'] == 'aac':
if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
continue
else:
rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))

View File

@ -36,6 +36,10 @@ class FoxNewsIE(AMPIE):
# 'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
# m3u8 download
'skip_download': True,
},
},
{
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',

View File

@ -14,7 +14,7 @@ class FreespeechIE(InfoExtractor):
'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
'info_dict': {
'id': 'poKsVCZ64uU',
'ext': 'mp4',
'ext': 'webm',
'title': 'Obama, Romney Campaign in Colorado Ahead of Debate',
'description': 'Obama, Romney Campaign in Colorado Ahead of Debate',
'uploader': 'freespeechtv',

View File

@ -47,6 +47,7 @@ from .senateisvp import SenateISVPIE
from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .vimeo import VimeoIE
from .dailymotion import DailymotionCloudIE
from .onionstudios import OnionStudiosIE
@ -1633,6 +1634,11 @@ class GenericIE(InfoExtractor):
if xhamster_urls:
return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
# Look for embedded TNAFlixNetwork player
tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
if tnaflix_urls:
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
# Look for embedded Tvigle player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)

View File

@ -42,7 +42,7 @@ class ImdbIE(InfoExtractor):
for f_url, f_name in extra_formats]
format_pages.append(player_page)
quality = qualities(['SD', '480p', '720p'])
quality = qualities(('SD', '480p', '720p', '1080p'))
formats = []
for format_page in format_pages:
json_data = self._search_regex(

View File

@ -73,7 +73,7 @@ class IndavideoEmbedIE(InfoExtractor):
'url': self._proto_relative_url(thumbnail)
} for thumbnail in video.get('thumbnails', [])]
tags = [tag['title'] for tag in video.get('tags', [])]
tags = [tag['title'] for tag in video.get('tags') or []]
return {
'id': video.get('id') or video_id,

View File

@ -4,15 +4,12 @@ from __future__ import unicode_literals
import base64
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_parse_qs,
)
from ..compat import compat_urllib_parse_unquote
from ..utils import determine_ext
from .bokecc import BokeCCBaseIE
class InfoQIE(InfoExtractor):
class InfoQIE(BokeCCBaseIE):
_VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
_TESTS = [{
@ -38,26 +35,6 @@ class InfoQIE(InfoExtractor):
},
}]
def _extract_bokecc_videos(self, webpage, video_id):
# TODO: bokecc.com is a Chinese video cloud platform
# It should have an independent extractor but I don't have other
# examples using bokecc
player_params_str = self._html_search_regex(
r'<script[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
webpage, 'player params', default=None)
player_params = compat_parse_qs(player_params_str)
info_xml = self._download_xml(
'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
player_params['siteid'][0], player_params['vid'][0]), video_id)
return [{
'format_id': 'bokecc',
'url': quality.find('./copy').attrib['playurl'],
'preference': int(quality.attrib['value']),
} for quality in info_xml.findall('./video/quality')]
def _extract_rtmp_videos(self, webpage):
# The server URL is hardcoded
video_url = 'rtmpe://video.infoq.com/cfx/st/'
@ -101,7 +78,7 @@ class InfoQIE(InfoExtractor):
if '/cn/' in url:
# for China videos, HTTP video URL exists but always fails with 403
formats = self._extract_bokecc_videos(webpage, video_id)
formats = self._extract_bokecc_formats(webpage, video_id)
else:
formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)

View File

@ -18,6 +18,7 @@ from ..compat import (
compat_urllib_parse_urlparse,
)
from ..utils import (
decode_packed_codes,
ExtractorError,
ohdave_rsa_encrypt,
remove_start,
@ -126,43 +127,11 @@ class IqiyiSDK(object):
class IqiyiSDKInterpreter(object):
BASE62_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
def __init__(self, sdk_code):
self.sdk_code = sdk_code
@classmethod
def base62(cls, num):
if num == 0:
return '0'
ret = ''
while num:
ret = cls.BASE62_TABLE[num % 62] + ret
num = num // 62
return ret
def decode_eval_codes(self):
self.sdk_code = self.sdk_code[5:-3]
mobj = re.search(
r"'([^']+)',62,(\d+),'([^']+)'\.split\('\|'\),[^,]+,{}",
self.sdk_code)
obfucasted_code, count, symbols = mobj.groups()
count = int(count)
symbols = symbols.split('|')
symbol_table = {}
while count:
count -= 1
b62count = self.base62(count)
symbol_table[b62count] = symbols[count] or b62count
self.sdk_code = re.sub(
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
obfucasted_code)
def run(self, target, ip, timestamp):
self.decode_eval_codes()
self.sdk_code = decode_packed_codes(self.sdk_code)
functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code)
@ -529,7 +498,7 @@ class IqiyiIE(InfoExtractor):
raw_data = self._download_json(api_url, video_id)
return raw_data
def get_enc_key(self, swf_url, video_id):
def get_enc_key(self, video_id):
# TODO: automatic key extraction
# last update at 2016-01-22 for Zombie::bite
enc_key = '6ab6d0280511493ba85594779759d4ed'
@ -582,11 +551,9 @@ class IqiyiIE(InfoExtractor):
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
video_id = self._search_regex(
r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
swf_url = self._search_regex(
r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL')
_uuid = uuid.uuid4().hex
enc_key = self.get_enc_key(swf_url, video_id)
enc_key = self.get_enc_key(video_id)
raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)

View File

@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
webpage = self._download_webpage(url, title)
title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
config_url = self._html_search_regex(
r'data-src="(/contenu/medias/video.php.*?)"',
r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
webpage, 'config URL')
config_url = 'http://www.jeuxvideo.com' + config_url

View File

@ -7,7 +7,46 @@ from .common import InfoExtractor
from ..utils import int_or_none
class JWPlatformIE(InfoExtractor):
class JWPlatformBaseIE(InfoExtractor):
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True):
video_data = jwplayer_data['playlist'][0]
subtitles = {}
for track in video_data['tracks']:
if track['kind'] == 'captions':
subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
formats = []
for source in video_data['sources']:
source_url = self._proto_relative_url(source['file'])
source_type = source.get('type') or ''
if source_type in ('application/vnd.apple.mpegurl', 'hls'):
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
elif source_type.startswith('audio'):
formats.append({
'url': source_url,
'vcodec': 'none',
})
else:
formats.append({
'url': source_url,
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['title'] if require_title else video_data.get('title'),
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'subtitles': subtitles,
'formats': formats,
}
class JWPlatformIE(JWPlatformBaseIE):
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TEST = {
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
@ -33,38 +72,4 @@ class JWPlatformIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
video_data = json_data['playlist'][0]
subtitles = {}
for track in video_data['tracks']:
if track['kind'] == 'captions':
subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
formats = []
for source in video_data['sources']:
source_url = self._proto_relative_url(source['file'])
source_type = source.get('type') or ''
if source_type == 'application/vnd.apple.mpegurl':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
elif source_type.startswith('audio'):
formats.append({
'url': source_url,
'vcodec': 'none',
})
else:
formats.append({
'url': source_url,
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['title'],
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'subtitles': subtitles,
'formats': formats,
}
return self._parse_jwplayer_data(json_data, video_id)

View File

@ -14,10 +14,10 @@ class KhanAcademyIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.khanacademy.org/video/one-time-pad',
'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
'md5': '7b391cce85e758fb94f763ddc1bbb979',
'info_dict': {
'id': 'one-time-pad',
'ext': 'mp4',
'ext': 'webm',
'title': 'The one-time pad',
'description': 'The perfect cipher',
'duration': 176,

View File

@ -0,0 +1,99 @@
# coding: utf-8
from __future__ import unicode_literals
import random
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
int_or_none,
float_or_none,
timeconvert,
update_url_query,
xpath_text,
)
class KUSIIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
_TESTS = [{
'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
'info_dict': {
'id': '12203019',
'ext': 'mp4',
'title': 'Turko Files: Case Closed! & Put On Hold!',
'duration': 231.0,
'upload_date': '20160210',
'timestamp': 1455087571,
'thumbnail': 're:^https?://.*\.jpg$'
},
}, {
'url': 'http://kusi.com/video?clipId=12203019',
'info_dict': {
'id': '12203019',
'ext': 'mp4',
'title': 'Turko Files: Case Closed! & Put On Hold!',
'duration': 231.0,
'upload_date': '20160210',
'timestamp': 1455087571,
'thumbnail': 're:^https?://.*\.jpg$'
},
'params': {
'skip_download': True, # Same as previous one
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
clip_id = mobj.group('clipId')
video_id = clip_id or mobj.group('path')
webpage = self._download_webpage(url, video_id)
if clip_id is None:
video_id = clip_id = self._html_search_regex(
r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id')
affiliate_id = self._search_regex(
r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id')
# See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf
xml_url = update_url_query('http://www.kusi.com/build.asp', {
'buildtype': 'buildfeaturexmlrequest',
'featureType': 'Clip',
'featureid': clip_id,
'affiliateno': affiliate_id,
'clientgroupid': '1',
'rnd': int(round(random.random() * 1000000)),
})
doc = self._download_xml(xml_url, video_id)
video_title = xpath_text(doc, 'HEADLINE', fatal=True)
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
description = xpath_text(doc, 'ABSTRACT')
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
formats = []
for quality in quality_options:
formats.append({
'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
'height': int_or_none(quality.attrib.get('height')),
'width': int_or_none(quality.attrib.get('width')),
'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
'description': description,
'duration': duration,
'formats': formats,
'thumbnail': thumbnail,
'timestamp': createtion_time,
}

View File

@ -68,6 +68,7 @@ class KuwoIE(KuwoBaseIE):
'id': '6446136',
'ext': 'mp3',
'title': '',
'description': 'md5:b2ab6295d014005bfc607525bfc1e38a',
'creator': 'IU',
'upload_date': '20150518',
},

View File

@ -1,36 +1,39 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import datetime
import hashlib
import re
import time
import base64
import hashlib
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_ord,
compat_str,
compat_urllib_parse,
)
from ..utils import (
determine_ext,
encode_data_uri,
ExtractorError,
int_or_none,
orderedSet,
parse_iso8601,
sanitized_Request,
int_or_none,
str_or_none,
encode_data_uri,
url_basename,
)
class LetvIE(InfoExtractor):
class LeIE(InfoExtractor):
IE_DESC = '乐视网'
_VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
_VALID_URL = r'http://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html'
_URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
_TESTS = [{
'url': 'http://www.letv.com/ptv/vplay/22005890.html',
'url': 'http://www.le.com/ptv/vplay/22005890.html',
'md5': 'edadcfe5406976f42f9f266057ee5e40',
'info_dict': {
'id': '22005890',
@ -42,7 +45,7 @@ class LetvIE(InfoExtractor):
'hls_prefer_native': True,
},
}, {
'url': 'http://www.letv.com/ptv/vplay/1415246.html',
'url': 'http://www.le.com/ptv/vplay/1415246.html',
'info_dict': {
'id': '1415246',
'ext': 'mp4',
@ -54,7 +57,7 @@ class LetvIE(InfoExtractor):
},
}, {
'note': 'This video is available only in Mainland China, thus a proxy is needed',
'url': 'http://www.letv.com/ptv/vplay/1118082.html',
'url': 'http://www.le.com/ptv/vplay/1118082.html',
'md5': '2424c74948a62e5f31988438979c5ad1',
'info_dict': {
'id': '1118082',
@ -94,17 +97,16 @@ class LetvIE(InfoExtractor):
return encrypted_data
encrypted_data = encrypted_data[5:]
_loc4_ = bytearray()
while encrypted_data:
b = compat_ord(encrypted_data[0])
_loc4_.extend([b // 16, b & 0x0f])
encrypted_data = encrypted_data[1:]
_loc4_ = bytearray(2 * len(encrypted_data))
for idx, val in enumerate(encrypted_data):
b = compat_ord(val)
_loc4_[2 * idx] = b // 16
_loc4_[2 * idx + 1] = b % 16
idx = len(_loc4_) - 11
_loc4_ = _loc4_[idx:] + _loc4_[:idx]
_loc7_ = bytearray()
while _loc4_:
_loc7_.append(_loc4_[0] * 16 + _loc4_[1])
_loc4_ = _loc4_[2:]
_loc7_ = bytearray(len(encrypted_data))
for i in range(len(encrypted_data)):
_loc7_[i] = _loc4_[2 * i] * 16 + _loc4_[2 * i + 1]
return bytes(_loc7_)
@ -117,10 +119,10 @@ class LetvIE(InfoExtractor):
'splatid': 101,
'format': 1,
'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.letv.com'
'domain': 'www.le.com'
}
play_json_req = sanitized_Request(
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
'http://api.le.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
@ -193,26 +195,51 @@ class LetvIE(InfoExtractor):
}
class LetvTvIE(InfoExtractor):
_VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
class LePlaylistIE(InfoExtractor):
_VALID_URL = r'http://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)'
_TESTS = [{
'url': 'http://www.letv.com/tv/46177.html',
'url': 'http://www.le.com/tv/46177.html',
'info_dict': {
'id': '46177',
'title': '美人天下',
'description': 'md5:395666ff41b44080396e59570dbac01c'
},
'playlist_count': 35
}, {
'url': 'http://tv.le.com/izt/wuzetian/index.html',
'info_dict': {
'id': 'wuzetian',
'title': '武媚娘传奇',
'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
},
# This playlist contains some extra videos other than the drama itself
'playlist_mincount': 96
}, {
'url': 'http://tv.le.com/pzt/lswjzzjc/index.shtml',
# This series is moved to http://www.le.com/tv/10005297.html
'only_matching': True,
}, {
'url': 'http://www.le.com/comic/92063.html',
'only_matching': True,
}, {
'url': 'http://list.le.com/listn/c1009_sc532002_d2_p1_o1.html',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if LeIE.suitable(url) else super(LePlaylistIE, cls).suitable(url)
def _real_extract(self, url):
playlist_id = self._match_id(url)
page = self._download_webpage(url, playlist_id)
media_urls = list(set(re.findall(
r'http://www.letv.com/ptv/vplay/\d+.html', page)))
entries = [self.url_result(media_url, ie='Letv')
for media_url in media_urls]
# Currently old domain names are still used in playlists
media_ids = orderedSet(re.findall(
r'<a[^>]+href="http://www\.letv\.com/ptv/vplay/(\d+)\.html', page))
entries = [self.url_result(LeIE._URL_TEMPLATE % media_id, ie='Le')
for media_id in media_ids]
title = self._html_search_meta('keywords', page,
fatal=False).split('')[0]
@ -222,31 +249,9 @@ class LetvTvIE(InfoExtractor):
playlist_description=description)
class LetvPlaylistIE(LetvTvIE):
_VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
_TESTS = [{
'url': 'http://tv.letv.com/izt/wuzetian/index.html',
'info_dict': {
'id': 'wuzetian',
'title': '武媚娘传奇',
'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
},
# This playlist contains some extra videos other than the drama itself
'playlist_mincount': 96
}, {
'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
'info_dict': {
'id': 'lswjzzjc',
# The title should be "劲舞青春", but I can't find a simple way to
# determine the playlist title
'title': '乐视午间自制剧场',
'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
},
'playlist_mincount': 7
}]
class LetvCloudIE(InfoExtractor):
# Most of *.letv.com is changed to *.le.com on 2016/01/02
# but yuntv.letv.com is kept, so also keep the extractor name
IE_DESC = '乐视云'
_VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
@ -327,7 +332,7 @@ class LetvCloudIE(InfoExtractor):
formats.append({
'url': url,
'ext': determine_ext(decoded_url),
'format_id': int_or_none(play_url.get('vtype')),
'format_id': str_or_none(play_url.get('vtype')),
'format_note': str_or_none(play_url.get('definition')),
'width': int_or_none(play_url.get('vwidth')),
'height': int_or_none(play_url.get('vheight')),

View File

@ -20,18 +20,18 @@ class LifeNewsIE(InfoExtractor):
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://lifenews.ru/news/126342',
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
# single video embedded via video/source
'url': 'http://lifenews.ru/news/98736',
'md5': '77c95eaefaca216e32a76a343ad89d23',
'info_dict': {
'id': '126342',
'id': '98736',
'ext': 'mp4',
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
'thumbnail': 're:http://.*\.jpg',
'upload_date': '20140130',
'title': 'Мужчина нашел дома архив оборонного завода',
'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
'upload_date': '20120805',
}
}, {
# video in <iframe>
# single video embedded via iframe
'url': 'http://lifenews.ru/news/152125',
'md5': '77d19a6f0886cd76bdbf44b4d971a273',
'info_dict': {
@ -42,15 +42,33 @@ class LifeNewsIE(InfoExtractor):
'upload_date': '20150402',
}
}, {
# two videos embedded via iframe
'url': 'http://lifenews.ru/news/153461',
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
'info_dict': {
'id': '153461',
'ext': 'mp4',
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
'upload_date': '20150505',
}
},
'playlist': [{
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
'info_dict': {
'id': '153461-video1',
'ext': 'mp4',
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
'upload_date': '20150505',
},
}, {
'md5': 'ebb3bf3b1ce40e878d0d628e93eb0322',
'info_dict': {
'id': '153461-video2',
'ext': 'mp4',
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
'upload_date': '20150505',
},
}],
}, {
'url': 'http://lifenews.ru/video/13035',
'only_matching': True,
@ -65,10 +83,14 @@ class LifeNewsIE(InfoExtractor):
'http://lifenews.ru/%s/%s' % (section, video_id),
video_id, 'Downloading page')
videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
iframe_link = self._html_search_regex(
'<iframe[^>]+src=["\']([^"\']+)["\']', webpage, 'iframe link', default=None)
if not videos and not iframe_link:
video_urls = re.findall(
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
iframe_links = re.findall(
r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/embed/.+?)["\']',
webpage)
if not video_urls and not iframe_links:
raise ExtractorError('No media links available for %s' % video_id)
title = remove_end(
@ -95,31 +117,44 @@ class LifeNewsIE(InfoExtractor):
'upload_date': upload_date,
}
def make_entry(video_id, media, video_number=None):
def make_entry(video_id, video_url, index=None):
cur_info = dict(common_info)
cur_info.update({
'id': video_id,
'url': media[1],
'thumbnail': media[0],
'title': title if video_number is None else '%s-video%s' % (title, video_number),
'id': video_id if not index else '%s-video%s' % (video_id, index),
'url': video_url,
'title': title if not index else '%s (Видео %s)' % (title, index),
})
return cur_info
if iframe_link:
iframe_link = self._proto_relative_url(iframe_link, 'http:')
cur_info = dict(common_info)
cur_info.update({
'_type': 'url_transparent',
'id': video_id,
'title': title,
'url': iframe_link,
})
def make_video_entry(video_id, video_url, index=None):
video_url = compat_urlparse.urljoin(url, video_url)
return make_entry(video_id, video_url, index)
def make_iframe_entry(video_id, video_url, index=None):
video_url = self._proto_relative_url(video_url, 'http:')
cur_info = make_entry(video_id, video_url, index)
cur_info['_type'] = 'url_transparent'
return cur_info
if len(videos) == 1:
return make_entry(video_id, videos[0])
else:
return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]
if len(video_urls) == 1 and not iframe_links:
return make_video_entry(video_id, video_urls[0])
if len(iframe_links) == 1 and not video_urls:
return make_iframe_entry(video_id, iframe_links[0])
entries = []
if video_urls:
for num, video_url in enumerate(video_urls, 1):
entries.append(make_video_entry(video_id, video_url, num))
if iframe_links:
for num, iframe_link in enumerate(iframe_links, len(video_urls) + 1):
entries.append(make_iframe_entry(video_id, iframe_link, num))
playlist = common_info.copy()
playlist.update(self.playlist_result(entries, video_id, title, description))
return playlist
class LifeEmbedIE(InfoExtractor):

View File

@ -64,7 +64,7 @@ class LivestreamIE(InfoExtractor):
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
base_ele = find_xpath_attr(
smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
base = base_ele.get('content') if base_ele else 'http://livestreamvod-f.akamaihd.net/'
base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
formats = []
video_nodes = smil.findall(self._xpath_ns('.//video', namespace))

View File

@ -14,7 +14,7 @@ from ..utils import (
class MDRIE(InfoExtractor):
IE_DESC = 'MDR.DE and KiKA'
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html'
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+-?(?P<id>\d+)(?:_.+?)?\.html'
_TESTS = [{
# MDR regularly deletes its videos
@ -60,6 +60,9 @@ class MDRIE(InfoExtractor):
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
'only_matching': True,
}, {
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
'only_matching': True,
}]
def _real_extract(self, url):
@ -68,8 +71,8 @@ class MDRIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
data_url = self._search_regex(
r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1',
webpage, 'data url', group='url')
r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>\\?/.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
webpage, 'data url', default=None, group='url').replace('\/', '/')
doc = self._download_xml(
compat_urlparse.urljoin(url, data_url), video_id)

View File

@ -99,7 +99,7 @@ class OCWMITIE(InfoExtractor):
'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
'info_dict': {
'id': 'EObHWIEKGjA',
'ext': 'mp4',
'ext': 'webm',
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
'upload_date': '20121109',

View File

@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
str_to_int,
unified_strdate,
)
@ -12,55 +13,62 @@ from ..utils import (
class MotherlessIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
_TESTS = [
{
'url': 'http://motherless.com/AC3FFE1',
'md5': '310f62e325a9fafe64f68c0bccb6e75f',
'info_dict': {
'id': 'AC3FFE1',
'ext': 'mp4',
'title': 'Fucked in the ass while playing PS3',
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
'upload_date': '20100913',
'uploader_id': 'famouslyfuckedup',
'thumbnail': 're:http://.*\.jpg',
'age_limit': 18,
}
},
{
'url': 'http://motherless.com/532291B',
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
'info_dict': {
'id': '532291B',
'ext': 'mp4',
'title': 'Amazing girl playing the omegle game, PERFECT!',
'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
'upload_date': '20140622',
'uploader_id': 'Sulivana7x',
'thumbnail': 're:http://.*\.jpg',
'age_limit': 18,
}
},
{
'url': 'http://motherless.com/g/cosplay/633979F',
'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
'info_dict': {
'id': '633979F',
'ext': 'mp4',
'title': 'Turtlette',
'categories': ['superheroine heroine superher'],
'upload_date': '20140827',
'uploader_id': 'shade0230',
'thumbnail': 're:http://.*\.jpg',
'age_limit': 18,
}
_TESTS = [{
'url': 'http://motherless.com/AC3FFE1',
'md5': '310f62e325a9fafe64f68c0bccb6e75f',
'info_dict': {
'id': 'AC3FFE1',
'ext': 'mp4',
'title': 'Fucked in the ass while playing PS3',
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
'upload_date': '20100913',
'uploader_id': 'famouslyfuckedup',
'thumbnail': 're:http://.*\.jpg',
'age_limit': 18,
}
]
}, {
'url': 'http://motherless.com/532291B',
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
'info_dict': {
'id': '532291B',
'ext': 'mp4',
'title': 'Amazing girl playing the omegle game, PERFECT!',
'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
'game', 'hairy'],
'upload_date': '20140622',
'uploader_id': 'Sulivana7x',
'thumbnail': 're:http://.*\.jpg',
'age_limit': 18,
},
'skip': '404',
}, {
'url': 'http://motherless.com/g/cosplay/633979F',
'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
'info_dict': {
'id': '633979F',
'ext': 'mp4',
'title': 'Turtlette',
'categories': ['superheroine heroine superher'],
'upload_date': '20140827',
'uploader_id': 'shade0230',
'thumbnail': 're:http://.*\.jpg',
'age_limit': 18,
}
}, {
# no keywords
'url': 'http://motherless.com/8B4BBC1',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if any(p in webpage for p in (
'<title>404 - MOTHERLESS.COM<',
">The page you're looking for cannot be found.<")):
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
title = self._html_search_regex(
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
video_url = self._html_search_regex(
@ -86,7 +94,7 @@ class MotherlessIE(InfoExtractor):
r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
webpage, 'uploader_id')
categories = self._html_search_meta('keywords', webpage)
categories = self._html_search_meta('keywords', webpage, default=None)
if categories:
categories = [cat.strip() for cat in categories.split(',')]

View File

@ -1,18 +1,26 @@
from __future__ import unicode_literals
import functools
import os.path
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urlparse,
)
from ..utils import (
parse_duration,
int_or_none,
OnDemandPagedList,
parse_duration,
remove_start,
xpath_text,
xpath_attr,
)
class NBAIE(InfoExtractor):
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)?video/(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
@ -44,14 +52,101 @@ class NBAIE(InfoExtractor):
'timestamp': 1432134543,
'upload_date': '20150520',
}
}, {
'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
'info_dict': {
'id': '1455672027478-Doc_Feb16_720',
'ext': 'mp4',
'title': 'Practice: Doc Rivers - 2/16/16',
'description': 'Head Coach Doc Rivers addresses the media following practice.',
'upload_date': '20160217',
'timestamp': 1455672000,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
'info_dict': {
'id': 'timberwolves',
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
},
'playlist_count': 30,
'params': {
# Download the whole playlist takes too long time
'playlist_items': '1-30',
},
}, {
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
'info_dict': {
'id': 'Wigginsmp4',
'ext': 'mp4',
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
'upload_date': '20141212',
'timestamp': 1418418600,
},
'params': {
'noplaylist': True,
# m3u8 download
'skip_download': True,
},
}]
_PAGE_SIZE = 30
def _fetch_page(self, team, video_id, page):
search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse.urlencode({
'type': 'teamvideo',
'start': page * self._PAGE_SIZE + 1,
'npp': (page + 1) * self._PAGE_SIZE + 1,
'sort': 'recent',
'output': 'json',
'site': team,
})
results = self._download_json(
search_url, video_id, note='Download page %d of playlist data' % page)['results'][0]
for item in results:
yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url']))
def _extract_playlist(self, orig_path, video_id, webpage):
team = orig_path.split('/')[0]
if self._downloader.params.get('noplaylist'):
self.to_screen('Downloading just video because of --no-playlist')
video_path = self._search_regex(
r'nbaVideoCore\.firstVideo\s*=\s*\'([^\']+)\';', webpage, 'video path')
video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path)
return self.url_result(video_url)
self.to_screen('Downloading playlist - add --no-playlist to just download video')
playlist_title = self._og_search_title(webpage, fatal=False)
entries = OnDemandPagedList(
functools.partial(self._fetch_page, team, video_id),
self._PAGE_SIZE, use_cache=True)
return self.playlist_result(entries, team, playlist_title)
def _real_extract(self, url):
path, video_id = re.match(self._VALID_URL, url).groups()
orig_path = path
if path.startswith('nba/'):
path = path[3:]
if 'video/' not in path:
webpage = self._download_webpage(url, video_id)
path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/')
if path == '{{id}}':
return self._extract_playlist(orig_path, video_id, webpage)
# See prepareContentId() of pkgCvp.js
if path.startswith('video/teams'):
path = 'video/channels/proxy/' + path[6:]
video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
video_id = xpath_text(video_info, 'slug')
video_id = os.path.splitext(xpath_text(video_info, 'slug'))[0]
title = xpath_text(video_info, 'headline')
description = xpath_text(video_info, 'description')
duration = parse_duration(xpath_text(video_info, 'length'))

View File

@ -4,7 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..compat import (
compat_urlparse,
compat_urllib_parse_unquote,
)
from ..utils import (
determine_ext,
ExtractorError,
@ -87,7 +90,7 @@ class NRKIE(InfoExtractor):
class NRKPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
@ -126,6 +129,37 @@ class NRKPlaylistIE(InfoExtractor):
entries, playlist_id, playlist_title, playlist_description)
class NRKSkoleIE(InfoExtractor):
IE_DESC = 'NRK Skole'
_VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/klippdetalj?.*\btopic=(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://nrk.no/skole/klippdetalj?topic=nrk:klipp/616532',
'md5': '04cd85877cc1913bce73c5d28a47e00f',
'info_dict': {
'id': '6021',
'ext': 'flv',
'title': 'Genetikk og eneggede tvillinger',
'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
'duration': 399,
},
}, {
'url': 'http://www.nrk.no/skole/klippdetalj?topic=nrk%3Aklipp%2F616532#embed',
'only_matching': True,
}, {
'url': 'http://www.nrk.no/skole/klippdetalj?topic=urn:x-mediadb:21379',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = compat_urllib_parse_unquote(self._match_id(url))
webpage = self._download_webpage(url, video_id)
nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id')
return self.url_result('nrk:%s' % nrk_id)
class NRKTVIE(InfoExtractor):
IE_DESC = 'NRK TV and NRK Radio'
_VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'

View File

@ -12,14 +12,14 @@ class PyvideoIE(InfoExtractor):
_TESTS = [
{
'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
'md5': '520915673e53a5c5d487c36e0c4d85b5',
'info_dict': {
'id': '24_4WWkSmNo',
'ext': 'mp4',
'ext': 'webm',
'title': 'Become a logging expert in 30 minutes',
'description': 'md5:9665350d466c67fb5b1598de379021f7',
'upload_date': '20130320',
'uploader': 'NextDayVideo',
'uploader': 'Next Day Video',
'uploader_id': 'NextDayVideo',
},
'add_ie': ['Youtube'],

View File

@ -19,7 +19,7 @@ class Revision3IE(InfoExtractor):
'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
'md5': 'd94a72d85d0a829766de4deb8daaf7df',
'info_dict': {
'id': '73034',
'id': '71089',
'display_id': 'technobuffalo/5-google-predictions-for-2016',
'ext': 'webm',
'title': '5 Google Predictions for 2016',
@ -31,6 +31,7 @@ class Revision3IE(InfoExtractor):
'uploader_id': 'technobuffalo',
}
}, {
# Show
'url': 'http://testtube.com/brainstuff',
'info_dict': {
'id': '251',
@ -41,7 +42,7 @@ class Revision3IE(InfoExtractor):
}, {
'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
'info_dict': {
'id': '60163',
'id': '58227',
'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
'duration': 275,
'ext': 'webm',
@ -52,18 +53,72 @@ class Revision3IE(InfoExtractor):
'uploader': 'DNews',
'uploader_id': 'dnews',
},
}, {
'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
'info_dict': {
'id': '71618',
'ext': 'mp4',
'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
'uploader': 'Editors\' Picks',
'uploader_id': 'tt-editors-picks',
'timestamp': 1453309200,
'upload_date': '20160120',
},
'add_ie': ['Youtube'],
}, {
# Tag
'url': 'http://testtube.com/tech-news',
'info_dict': {
'id': '21018',
'title': 'tech news',
},
'playlist_mincount': 9,
}]
_PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
def _real_extract(self, url):
domain, display_id = re.match(self._VALID_URL, url).groups()
site = domain.split('.')[0]
page_info = self._download_json(
self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
if page_info['data']['type'] == 'episode':
episode_data = page_info['data']
video_id = compat_str(episode_data['video']['data']['id'])
page_data = page_info['data']
page_type = page_data['type']
if page_type in ('episode', 'embed'):
show_data = page_data['show']['data']
page_id = compat_str(page_data['id'])
video_id = compat_str(page_data['video']['data']['id'])
preference = qualities(['mini', 'small', 'medium', 'large'])
thumbnails = [{
'url': image_url,
'id': image_id,
'preference': preference(image_id)
} for image_id, image_url in page_data.get('images', {}).items()]
info = {
'id': page_id,
'display_id': display_id,
'title': unescapeHTML(page_data['name']),
'description': unescapeHTML(page_data.get('summary')),
'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
'author': page_data.get('author'),
'uploader': show_data.get('name'),
'uploader_id': show_data.get('slug'),
'thumbnails': thumbnails,
'extractor_key': site,
}
if page_type == 'embed':
info.update({
'_type': 'url_transparent',
'url': page_data['video']['data']['embed'],
})
return info
video_data = self._download_json(
'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
video_id)['items'][0]
@ -84,36 +139,30 @@ class Revision3IE(InfoExtractor):
})
self._sort_formats(formats)
preference = qualities(['mini', 'small', 'medium', 'large'])
thumbnails = [{
'url': image_url,
'id': image_id,
'preference': preference(image_id)
} for image_id, image_url in video_data.get('images', {}).items()]
return {
'id': video_id,
'display_id': display_id,
info.update({
'title': unescapeHTML(video_data['title']),
'description': unescapeHTML(video_data.get('summary')),
'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '),
'author': episode_data.get('author'),
'uploader': video_data.get('show', {}).get('name'),
'uploader_id': video_data.get('show', {}).get('slug'),
'duration': int_or_none(video_data.get('duration')),
'thumbnails': thumbnails,
'formats': formats,
}
})
return info
else:
show_data = page_info['show']['data']
list_data = page_info[page_type]['data']
episodes_data = page_info['episodes']['data']
num_episodes = page_info['meta']['totalEpisodes']
processed_episodes = 0
entries = []
page_num = 1
while True:
entries.extend([self.url_result(
'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data])
entries.extend([{
'_type': 'url',
'url': 'http://%s%s' % (domain, episode['path']),
'id': compat_str(episode['id']),
'ie_key': 'Revision3',
'extractor_key': site,
} for episode in episodes_data])
processed_episodes += len(episodes_data)
if processed_episodes == num_episodes:
break
@ -123,5 +172,5 @@ class Revision3IE(InfoExtractor):
display_id)['episodes']['data']
return self.playlist_result(
entries, compat_str(show_data['id']),
show_data.get('name'), show_data.get('summary'))
entries, compat_str(list_data['id']),
list_data.get('name'), list_data.get('summary'))

View File

@ -10,6 +10,7 @@ from ..utils import (
ExtractorError,
float_or_none,
remove_end,
remove_start,
sanitized_Request,
std_headers,
struct_unpack,
@ -178,14 +179,14 @@ class RTVEInfantilIE(InfoExtractor):
class RTVELiveIE(InfoExtractor):
IE_NAME = 'rtve.es:live'
IE_DESC = 'RTVE.es live streams'
_VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
_VALID_URL = r'http://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
_TESTS = [{
'url': 'http://www.rtve.es/noticias/directo-la-1/',
'url': 'http://www.rtve.es/directo/la-1/',
'info_dict': {
'id': 'directo-la-1',
'ext': 'flv',
'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
'id': 'la-1',
'ext': 'mp4',
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
},
'params': {
'skip_download': 'live stream',
@ -198,23 +199,20 @@ class RTVELiveIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
title = remove_end(self._og_search_title(webpage), ' en directo')
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
title = remove_start(title, 'Estoy viendo ')
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
vidplayer_id = self._search_regex(
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
r'playerId=player([0-9]+)', webpage, 'internal video ID')
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
png = self._download_webpage(png_url, video_id, 'Downloading url information')
video_url = _decrypt_url(png)
m3u8_url = _decrypt_url(png)
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
return {
'id': video_id,
'ext': 'flv',
'title': title,
'url': video_url,
'app': 'rtve-live-live?ovpfv=2.1.2',
'player_url': player_url,
'rtmp_live': True,
'formats': formats,
'is_live': True,
}

View File

@ -70,25 +70,27 @@ class ScreenwaveMediaIE(InfoExtractor):
formats = []
for source in sources:
if source['type'] == 'hls':
formats.extend(self._extract_m3u8_formats(source['file'], video_id, ext='mp4'))
file_ = source.get('file')
if not file_:
continue
if source.get('type') == 'hls':
formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4'))
else:
file_ = source.get('file')
if not file_:
continue
format_label = source.get('label')
format_id = self._search_regex(
r'_(.+?)\.[^.]+$', file_, 'format id', default=None)
if not self._is_valid_url(file_, video_id, format_id or 'video'):
continue
format_label = source.get('label')
height = int_or_none(self._search_regex(
r'^(\d+)[pP]', format_label, 'height', default=None))
formats.append({
'url': source['file'],
'url': file_,
'format_id': format_id,
'format': format_label,
'ext': source.get('type'),
'height': height,
})
self._sort_formats(formats)
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
return {
'id': video_id,

View File

@ -1,38 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
from ..utils import RegexNotFoundError, ExtractorError
class SpaceIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
_TEST = {
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
'info_dict': {
'id': '2780937028001',
'ext': 'mp4',
'title': 'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
'description': 'md5:db81cf7f3122f95ed234b631a6ea1e61',
'uploader': 'TechMedia Networks',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
try:
# Some videos require the playerKey field, which isn't define in
# the BrightcoveExperience object
brightcove_url = self._og_search_video_url(webpage)
except RegexNotFoundError:
# Other videos works fine with the info from the object
brightcove_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if brightcove_url is None:
raise ExtractorError(
'The webpage does not contain a video', expected=True)
return self.url_result(brightcove_url, BrightcoveLegacyIE.ie_key())

View File

@ -73,7 +73,7 @@ class TEDIE(InfoExtractor):
'add_ie': ['Youtube'],
'info_dict': {
'id': '_ZG8HBuDjgc',
'ext': 'mp4',
'ext': 'webm',
'title': 'Douglas Adams: Parrots the Universe and Everything',
'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
'uploader': 'University of California Television (UCTV)',

View File

@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
wat_id = self._html_search_regex(
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1',
webpage, 'wat id', group='id')
return self.url_result('wat:%s' % wat_id, 'Wat')

View File

@ -22,6 +22,7 @@ from ..utils import (
unsmuggle_url,
xpath_with_ns,
mimetype2ext,
find_xpath_attr,
)
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
@ -31,15 +32,11 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
class ThePlatformBaseIE(InfoExtractor):
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
meta = self._download_xml(smil_url, video_id, note=note)
try:
error_msg = next(
n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref'))
if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
except StopIteration:
pass
else:
raise ExtractorError(error_msg, expected=True)
error_element = find_xpath_attr(
meta, _x('.//smil:ref'), 'src',
'http://link.theplatform.com/s/errorFiles/Unavailable.mp4')
if error_element is not None:
raise ExtractorError(error_element.attrib['abstract'], expected=True)
formats = self._parse_smil_formats(
meta, smil_url, video_id, namespace=default_ns,

View File

@ -71,7 +71,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
display_id = mobj.group('display_id') if 'display_id' in mobj.groupdict() else video_id
webpage = self._download_webpage(url, display_id)
@ -117,7 +117,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
title = self._html_search_regex(
self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
age_limit = self._rta_search(webpage)
age_limit = self._rta_search(webpage) or 18
duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration', default=None))
@ -152,6 +152,36 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
}
class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
_VALID_URL = r'https?://player\.(?:tna|emp)flix\.com/video/(?P<id>\d+)'
_TITLE_REGEX = r'<title>([^<]+)</title>'
_TESTS = [{
'url': 'https://player.tnaflix.com/video/6538',
'info_dict': {
'id': '6538',
'display_id': '6538',
'ext': 'mp4',
'title': 'Educational xxx video',
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://player.empflix.com/video/33051',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [url for _, url in re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.(?:tna|emp)flix\.com/video/\d+)\1',
webpage)]
class TNAFlixIE(TNAFlixNetworkBaseIE):
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'

View File

@ -17,6 +17,7 @@ from ..utils import (
encode_dict,
ExtractorError,
int_or_none,
orderedSet,
parse_duration,
parse_iso8601,
sanitized_Request,
@ -281,17 +282,36 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
entries = []
offset = 0
limit = self._PAGE_LIMIT
broken_paging_detected = False
counter_override = None
for counter in itertools.count(1):
response = self._download_json(
self._PLAYLIST_URL % (channel_id, offset, limit),
channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
channel_id,
'Downloading %s videos JSON page %s'
% (self._PLAYLIST_TYPE, counter_override or counter))
page_entries = self._extract_playlist_page(response)
if not page_entries:
break
total = int_or_none(response.get('_total'))
# Since the beginning of March 2016 twitch's paging mechanism
# is completely broken on the twitch side. It simply ignores
# a limit and returns the whole offset number of videos.
# Working around by just requesting all videos at once.
if not broken_paging_detected and total and len(page_entries) > limit:
self.report_warning(
'Twitch paging is broken on twitch side, requesting all videos at once',
channel_id)
broken_paging_detected = True
offset = total
counter_override = '(all at once)'
continue
entries.extend(page_entries)
if broken_paging_detected or total and len(page_entries) >= total:
break
offset += limit
return self.playlist_result(
[self.url_result(entry) for entry in set(entries)],
[self.url_result(entry) for entry in orderedSet(entries)],
channel_id, channel_name)
def _extract_playlist_page(self, response):

View File

@ -10,7 +10,6 @@ from ..utils import (
remove_end,
int_or_none,
ExtractorError,
sanitized_Request,
)
@ -22,7 +21,7 @@ class TwitterBaseIE(InfoExtractor):
class TwitterCardIE(TwitterBaseIE):
IE_NAME = 'twitter:card'
_VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
@ -30,7 +29,7 @@ class TwitterCardIE(TwitterBaseIE):
'info_dict': {
'id': '560070183650213889',
'ext': 'mp4',
'title': 'TwitterCard',
'title': 'Twitter Card',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 30.033,
}
@ -41,7 +40,7 @@ class TwitterCardIE(TwitterBaseIE):
'info_dict': {
'id': '623160978427936768',
'ext': 'mp4',
'title': 'TwitterCard',
'title': 'Twitter Card',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 80.155,
},
@ -72,63 +71,102 @@ class TwitterCardIE(TwitterBaseIE):
'title': 'Vine by ArsenalTerje',
},
'add_ie': ['Vine'],
}
}, {
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
'md5': '3846d0a07109b5ab622425449b59049d',
'info_dict': {
'id': '705235433198714880',
'ext': 'mp4',
'title': 'Twitter web player',
'thumbnail': 're:^https?://.*\.jpg',
},
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
# Different formats served for different User-Agents
USER_AGENTS = [
'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)', # mp4
'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0', # webm
]
config = None
formats = []
for user_agent in USER_AGENTS:
request = sanitized_Request(url)
request.add_header('User-Agent', user_agent)
webpage = self._download_webpage(request, video_id)
duration = None
iframe_url = self._html_search_regex(
r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
webpage, 'video iframe', default=None)
if iframe_url:
return self.url_result(iframe_url)
webpage = self._download_webpage(url, video_id)
config = self._parse_json(self._html_search_regex(
r'data-player-config="([^"]+)"', webpage, 'data player config'),
video_id)
if 'playlist' not in config:
if 'vmapUrl' in config:
formats.append({
'url': self._get_vmap_video_url(config['vmapUrl'], video_id),
})
break # same video regardless of UA
continue
iframe_url = self._html_search_regex(
r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
webpage, 'video iframe', default=None)
if iframe_url:
return self.url_result(iframe_url)
video_url = config['playlist'][0]['source']
config = self._parse_json(self._html_search_regex(
r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),
video_id)
def _search_dimensions_in_video_url(a_format, video_url):
m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
if m:
a_format.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
})
playlist = config.get('playlist')
if playlist:
video_url = playlist[0]['source']
f = {
'url': video_url,
}
m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
if m:
f.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
})
_search_dimensions_in_video_url(f, video_url)
formats.append(f)
vmap_url = config.get('vmapUrl') or config.get('vmap_url')
if vmap_url:
formats.append({
'url': self._get_vmap_video_url(vmap_url, video_id),
})
media_info = None
for entity in config.get('status', {}).get('entities', []):
if 'mediaInfo' in entity:
media_info = entity['mediaInfo']
if media_info:
for media_variant in media_info['variants']:
media_url = media_variant['url']
if media_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
elif media_url.endswith('.mpd'):
formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
else:
vbr = int_or_none(media_variant.get('bitRate'), scale=1000)
a_format = {
'url': media_url,
'format_id': 'http-%d' % vbr if vbr else 'http',
'vbr': vbr,
}
# Reported bitRate may be zero
if not a_format['vbr']:
del a_format['vbr']
_search_dimensions_in_video_url(a_format, media_url)
formats.append(a_format)
duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
self._sort_formats(formats)
thumbnail = config.get('posterImageUrl')
duration = float_or_none(config.get('duration'))
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
thumbnail = config.get('posterImageUrl') or config.get('image_src')
duration = float_or_none(config.get('duration')) or duration
return {
'id': video_id,
'title': 'TwitterCard',
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
@ -142,7 +180,6 @@ class TwitterIE(InfoExtractor):
_TESTS = [{
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
# MD5 checksums are different in different places
'info_dict': {
'id': '643211948184596480',
'ext': 'mp4',
@ -153,6 +190,9 @@ class TwitterIE(InfoExtractor):
'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple',
},
'params': {
'skip_download': True, # requires ffmpeg
},
}, {
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
@ -177,6 +217,36 @@ class TwitterIE(InfoExtractor):
'uploader_id': 'starwars',
'uploader': 'Star Wars',
},
}, {
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
'info_dict': {
'id': '705235433198714880',
'ext': 'mp4',
'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight.',
'description': 'Brent Yarina on Twitter: "Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight."',
'uploader_id': 'BTNBrentYarina',
'uploader': 'Brent Yarina',
},
'params': {
# The same video as https://twitter.com/i/videos/tweet/705235433198714880
# Test case of TwitterCardIE
'skip_download': True,
},
}, {
'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
'md5': '',
'info_dict': {
'id': '700207533655363584',
'ext': 'mp4',
'title': 'jay - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'jay on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
'thumbnail': 're:^https?://.*\.jpg',
'uploader': 'jay',
'uploader_id': 'jaydingeer',
},
'params': {
'skip_download': True, # requires ffmpeg
},
}]
def _real_extract(self, url):
@ -234,6 +304,15 @@ class TwitterIE(InfoExtractor):
})
return info
if 'class="PlayableMedia' in webpage:
info.update({
'_type': 'url_transparent',
'ie_key': 'TwitterCard',
'url': '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid),
})
return info
raise ExtractorError('There\'s no video in this tweet.')

View File

@ -0,0 +1,67 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
)
class UstudioIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
_TEST = {
'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge',
'md5': '58bbfca62125378742df01fc2abbdef6',
'info_dict': {
'id': 'Uxu2my9bgSph',
'display_id': 'san_francisco_golden_gate_bridge',
'ext': 'mp4',
'title': 'San Francisco: Golden Gate Bridge',
'description': 'md5:23925500697f2c6d4830e387ba51a9be',
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20111107',
'uploader': 'Tony Farley',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
config = self._download_xml(
'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id,
display_id)
def extract(kind):
return [{
'url': item.attrib['url'],
'width': int_or_none(item.get('width')),
'height': int_or_none(item.get('height')),
} for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
formats = extract('video')
self._sort_formats(formats)
webpage = self._download_webpage(url, display_id)
title = self._og_search_title(webpage)
upload_date = unified_strdate(self._search_regex(
r'(?s)Uploaded by\s*.+?\s*on\s*<span>([^<]+)</span>',
webpage, 'upload date', fatal=False))
uploader = self._search_regex(
r'Uploaded by\s*<a[^>]*>([^<]+)<',
webpage, 'uploader', fatal=False)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': self._og_search_description(webpage),
'thumbnails': extract('image'),
'upload_date': upload_date,
'uploader': uploader,
'formats': formats,
}

View File

@ -20,6 +20,7 @@ class VGTVIE(XstreamIE):
'aftenbladet.no/tv': 'satv',
'fvn.no/fvntv': 'fvntv',
'aftenposten.no/webtv': 'aptv',
'ap.vgtv.no/webtv': 'aptv',
}
_APP_NAME_TO_VENDOR = {
@ -35,7 +36,7 @@ class VGTVIE(XstreamIE):
(?P<host>
%s
)
/
/?
(?:
\#!/(?:video|live)/|
embed?.*id=
@ -107,19 +108,27 @@ class VGTVIE(XstreamIE):
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
'ext': 'mov',
'ext': 'mp4',
'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
'duration': 66,
'timestamp': 1417002452,
'upload_date': '20141126',
'view_count': int,
}
},
'params': {
# m3u8 download
'skip_download': True,
},
},
{
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
'only_matching': True,
},
{
'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
'only_matching': True,
},
]
def _real_extract(self, url):
@ -144,8 +153,6 @@ class VGTVIE(XstreamIE):
if len(video_id) == 5:
if appname == 'bttv':
info = self._extract_video_info('btno', video_id)
elif appname == 'aptv':
info = self._extract_video_info('ap', video_id)
streams = data['streamUrls']
stream_type = data.get('streamType')

View File

@ -4,11 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import sanitized_Request
from ..utils import (
decode_packed_codes,
sanitized_Request,
)
class VideoMegaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?:videomega:|https?://(?:www\.)?videomega\.tv/(?:(?:view|iframe|cdn)\.php)?\?ref=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA',
@ -42,8 +44,10 @@ class VideoMegaIE(InfoExtractor):
r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s*|\s*-\svideomega\.tv$)', '', title)
thumbnail = self._search_regex(
r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
real_codes = decode_packed_codes(webpage)
video_url = self._search_regex(
r'<source[^>]+?src="([^"]+)"', webpage, 'video URL')
r'"src"\s*,\s*"([^"]+)"', real_codes, 'video URL')
return {
'id': video_id,

View File

@ -1,11 +1,14 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
from .jwplatform import JWPlatformBaseIE
from ..utils import (
decode_packed_codes,
js_to_json,
)
class VidziIE(InfoExtractor):
class VidziIE(JWPlatformBaseIE):
_VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)'
_TEST = {
'url': 'http://vidzi.tv/cghql9yq6emu.html',
@ -14,7 +17,6 @@ class VidziIE(InfoExtractor):
'id': 'cghql9yq6emu',
'ext': 'mp4',
'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
'uploader': 'vidzi.tv',
},
'params': {
# m3u8 download
@ -29,11 +31,12 @@ class VidziIE(InfoExtractor):
title = self._html_search_regex(
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
# Vidzi now uses jwplayer, which can be handled by GenericIE
return {
'_type': 'url_transparent',
'id': video_id,
'title': title,
'url': smuggle_url(url, {'to_generic': True}),
'ie_key': 'Generic',
}
code = decode_packed_codes(webpage).replace('\\\'', '\'')
jwplayer_data = self._parse_json(
self._search_regex(r'setup\(([^)]+)\)', code, 'jwplayer data'),
video_id, transform_source=js_to_json)
info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
info_dict['title'] = title
return info_dict

View File

@ -176,13 +176,13 @@ class VikiIE(VikiBaseIE):
}, {
# youtube external
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
'md5': '63f8600c1da6f01b7640eee7eca4f1da',
'info_dict': {
'id': '50562v',
'ext': 'mp4',
'ext': 'webm',
'title': 'Poor Nastya [COMPLETE] - Episode 1',
'description': '',
'duration': 607,
'duration': 606,
'timestamp': 1274949505,
'upload_date': '20101213',
'uploader': 'ad14065n',

View File

@ -93,6 +93,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
'description': 'md5:2d3305bad981a06ff79f027f19865021',
'upload_date': '20121220',
'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user7108434',
'uploader_id': 'user7108434',
'uploader': 'Filippo Valsorda',
'duration': 10,
@ -105,6 +106,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'info_dict': {
'id': '68093876',
'ext': 'mp4',
'uploader_url': 're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
'uploader_id': 'openstreetmapus',
'uploader': 'OpenStreetMap US',
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
@ -121,6 +123,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
'uploader': 'The BLN & Business of Software',
'uploader_url': 're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware',
'uploader_id': 'theblnbusinessofsoftware',
'duration': 3610,
'description': None,
@ -135,6 +138,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'youtube-dl password protected test video',
'upload_date': '20130614',
'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user18948128',
'uploader_id': 'user18948128',
'uploader': 'Jaime Marquínez Ferrándiz',
'duration': 10,
@ -154,6 +158,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'Key & Peele: Terrorist Interrogation',
'description': 'md5:8678b246399b070816b12313e8b4eb5c',
'uploader_url': 're:https?://(?:www\.)?vimeo\.com/atencio',
'uploader_id': 'atencio',
'uploader': 'Peter Atencio',
'upload_date': '20130927',
@ -169,6 +174,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'title': 'The New Vimeo Player (You Know, For Videos)',
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
'upload_date': '20131015',
'uploader_url': 're:https?://(?:www\.)?vimeo\.com/staff',
'uploader_id': 'staff',
'uploader': 'Vimeo Staff',
'duration': 62,
@ -183,6 +189,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'Pier Solar OUYA Official Trailer',
'uploader': 'Tulio Gonçalves',
'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user28849593',
'uploader_id': 'user28849593',
},
},
@ -195,6 +202,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
'uploader': 'The DMCI',
'uploader_url': 're:https?://(?:www\.)?vimeo\.com/dmci',
'uploader_id': 'dmci',
'upload_date': '20111220',
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
@ -269,9 +277,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
def _real_extract(self, url):
url, data = unsmuggle_url(url, {})
headers = std_headers
headers = std_headers.copy()
if 'http_headers' in data:
headers = headers.copy()
headers.update(data['http_headers'])
if 'Referer' not in headers:
headers['Referer'] = url
@ -286,7 +293,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
request = sanitized_Request(url, None, headers)
request = sanitized_Request(url, headers=headers)
try:
webpage = self._download_webpage(request, video_id)
except ExtractorError as ee:
@ -370,9 +377,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
# Extract title
video_title = config['video']['title']
# Extract uploader and uploader_id
video_uploader = config['video']['owner']['name']
video_uploader_id = config['video']['owner']['url'].split('/')[-1] if config['video']['owner']['url'] else None
# Extract uploader, uploader_url and uploader_id
video_uploader = config['video'].get('owner', {}).get('name')
video_uploader_url = config['video'].get('owner', {}).get('url')
video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None
# Extract video thumbnail
video_thumbnail = config['video'].get('thumbnail')
@ -473,6 +481,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
return {
'id': video_id,
'uploader': video_uploader,
'uploader_url': video_uploader_url,
'uploader_id': video_uploader_id,
'upload_date': video_upload_date,
'title': video_title,

View File

@ -11,6 +11,7 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
int_or_none,
orderedSet,
sanitized_Request,
str_to_int,
@ -141,16 +142,29 @@ class VKIE(InfoExtractor):
'url': 'https://vk.com/video276849682_170681728',
'info_dict': {
'id': 'V3K4mi0SYkc',
'ext': 'mp4',
'ext': 'webm',
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
'duration': 179,
'duration': 178,
'upload_date': '20130116',
'uploader': "Children's Joy Foundation",
'uploader_id': 'thecjf',
'view_count': int,
},
},
{
# video key is extra_data not url\d+
'url': 'http://vk.com/video-110305615_171782105',
'md5': 'e13fcda136f99764872e739d13fac1d1',
'info_dict': {
'id': '171782105',
'ext': 'mp4',
'title': 'S-Dance, репетиции к The way show',
'uploader': 'THE WAY SHOW | 17 апреля',
'upload_date': '20160207',
'view_count': int,
},
},
{
# removed video, just testing that we match the pattern
'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
@ -298,12 +312,17 @@ class VKIE(InfoExtractor):
view_count = str_to_int(self._search_regex(
r'([\d,.]+)', views, 'view count', fatal=False))
formats = [{
'format_id': k,
'url': v,
'width': int(k[len('url'):]),
} for k, v in data.items()
if k.startswith('url')]
formats = []
for k, v in data.items():
if not k.startswith('url') and k != 'extra_data' or not v:
continue
height = int_or_none(self._search_regex(
r'^url(\d+)', k, 'height', default=None))
formats.append({
'format_id': k,
'url': v,
'height': height,
})
self._sort_formats(formats)
return {

View File

@ -12,38 +12,52 @@ class WebOfStoriesIE(InfoExtractor):
_VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
_GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
_USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
_TESTS = [
{
'url': 'http://www.webofstories.com/play/hans.bethe/71',
'md5': '373e4dd915f60cfe3116322642ddf364',
'info_dict': {
'id': '4536',
'ext': 'mp4',
'title': 'The temperature of the sun',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Hans Bethe talks about calculating the temperature of the sun',
'duration': 238,
}
_TESTS = [{
'url': 'http://www.webofstories.com/play/hans.bethe/71',
'md5': '373e4dd915f60cfe3116322642ddf364',
'info_dict': {
'id': '4536',
'ext': 'mp4',
'title': 'The temperature of the sun',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Hans Bethe talks about calculating the temperature of the sun',
'duration': 238,
}
}, {
'url': 'http://www.webofstories.com/play/55908',
'md5': '2985a698e1fe3211022422c4b5ed962c',
'info_dict': {
'id': '55908',
'ext': 'mp4',
'title': 'The story of Gemmata obscuriglobus',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
'duration': 169,
},
{
'url': 'http://www.webofstories.com/play/55908',
'md5': '2985a698e1fe3211022422c4b5ed962c',
'info_dict': {
'id': '55908',
'ext': 'mp4',
'title': 'The story of Gemmata obscuriglobus',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
'duration': 169,
}
'skip': 'notfound',
}, {
# malformed og:title meta
'url': 'http://www.webofstories.com/play/54215?o=MS',
'info_dict': {
'id': '54215',
'ext': 'mp4',
'title': '"A Leg to Stand On"',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Oliver Sacks talks about the death and resurrection of a limb',
'duration': 97,
},
]
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
# Sometimes og:title meta is malformed
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
r'(?s)<strong>Title:\s*</strong>(.+?)<', webpage, 'title')
description = self._html_search_meta('description', webpage)
thumbnail = self._og_search_thumbnail(webpage)

View File

@ -20,7 +20,7 @@ class WimpIE(InfoExtractor):
'md5': '4e2986c793694b55b37cf92521d12bb4',
'info_dict': {
'id': 'clowncar',
'ext': 'mp4',
'ext': 'webm',
'title': 'It\'s like a clown car.',
'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2',
},

View File

@ -35,7 +35,8 @@ class WistiaIE(InfoExtractor):
formats = []
thumbnails = []
for atype, a in data['assets'].items():
for a in data['assets']:
atype = a.get('type')
if atype == 'still':
thumbnails.append({
'url': a['url'],

View File

@ -17,7 +17,7 @@ class XFileShareIE(InfoExtractor):
IE_DESC = 'XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me'
_VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)?
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com|vidto\.me))/
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com|vidto\.me|powerwatch\.pw))/
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
'''
@ -81,6 +81,13 @@ class XFileShareIE(InfoExtractor):
'ext': 'mp4',
'title': 'test'
}
}, {
'url': 'http://powerwatch.pw/duecjibvicbu',
'info_dict': {
'id': 'duecjibvicbu',
'ext': 'mp4',
'title': 'Big Buck Bunny trailer',
},
}]
def _real_extract(self, url):
@ -112,6 +119,7 @@ class XFileShareIE(InfoExtractor):
title = (self._search_regex(
[r'style="z-index: [0-9]+;">([^<]+)</span>',
r'<td nowrap>([^<]+)</td>',
r'h4-fine[^>]*>([^<]+)<',
r'>Watch (.+) ',
r'<h2 class="video-page-head">([^<]+)</h2>'],
webpage, 'title', default=None) or self._og_search_title(webpage)).strip()

View File

@ -10,13 +10,27 @@ from ..compat import (
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
sanitized_Request,
)
class YandexMusicTrackIE(InfoExtractor):
class YandexMusicBaseIE(InfoExtractor):
@staticmethod
def _handle_error(response):
error = response.get('error')
if error:
raise ExtractorError(error, expected=True)
def _download_json(self, *args, **kwargs):
response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
self._handle_error(response)
return response
class YandexMusicTrackIE(YandexMusicBaseIE):
IE_NAME = 'yandexmusic:track'
IE_DESC = 'Яндекс.Музыка - Трек'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
@ -73,7 +87,7 @@ class YandexMusicTrackIE(InfoExtractor):
return self._get_track_info(track)
class YandexMusicPlaylistBaseIE(InfoExtractor):
class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
def _build_playlist(self, tracks):
return [
self.url_result(

View File

@ -75,7 +75,7 @@ class YouPornIE(InfoExtractor):
links = []
sources = self._search_regex(
r'sources\s*:\s*({.+?})', webpage, 'sources', default=None)
r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
if sources:
for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
links.append(link)
@ -101,8 +101,9 @@ class YouPornIE(InfoExtractor):
}
# Video URL's path looks like this:
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# We will benefit from it by extracting some metadata
mobj = re.search(r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
if mobj:
height = int(mobj.group('height'))
bitrate = int(mobj.group('bitrate'))

View File

@ -6,6 +6,7 @@ from __future__ import unicode_literals
import itertools
import json
import os.path
import random
import re
import time
import traceback
@ -382,7 +383,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
'upload_date': '20121002',
'license': 'Standard YouTube License',
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
@ -401,12 +404,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20120506',
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
'alt_title': 'I Love It (feat. Charli XCX)',
'description': 'md5:782e8651347686cba06e58f71ab51773',
'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
'iconic ep', 'iconic', 'love', 'it'],
'uploader': 'Icona Pop',
'uploader_id': 'IconaPop',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop',
'license': 'Standard YouTube License',
'creator': 'Icona Pop',
}
},
@ -422,6 +427,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:64249768eec3bc4276236606ea996373',
'uploader': 'justintimberlakeVEVO',
'uploader_id': 'justintimberlakeVEVO',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
'license': 'Standard YouTube License',
'creator': 'Justin Timberlake',
'age_limit': 18,
}
@ -437,6 +444,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
'uploader': 'SET India',
'uploader_id': 'setindia',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia',
'license': 'Standard YouTube License',
'age_limit': 18,
}
},
@ -449,7 +458,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
'upload_date': '20121002',
'license': 'Standard YouTube License',
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
@ -468,8 +479,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'm4a',
'upload_date': '20121002',
'uploader_id': '8KVIDEO',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
'description': '',
'uploader': '8KVIDEO',
'license': 'Standard YouTube License',
'title': 'UHDTV TEST 8K VIDEO.mp4'
},
'params': {
@ -488,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO',
'upload_date': '20131011',
'license': 'Standard YouTube License',
},
'params': {
'youtube_include_dash_manifest': True,
@ -506,6 +520,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'TaylorSwiftVEVO',
'uploader_id': 'TaylorSwiftVEVO',
'upload_date': '20140818',
'license': 'Standard YouTube License',
'creator': 'Taylor Swift',
},
'params': {
@ -522,6 +537,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20100909',
'uploader': 'The Amazing Atheist',
'uploader_id': 'TheAmazingAtheist',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
'license': 'Standard YouTube License',
'title': 'Burning Everyone\'s Koran',
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
}
@ -536,7 +553,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
'uploader': 'The Witcher',
'uploader_id': 'WitcherGame',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
'upload_date': '20140605',
'license': 'Standard YouTube License',
'age_limit': 18,
},
},
@ -550,7 +569,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
'uploader': 'LloydVEVO',
'uploader_id': 'LloydVEVO',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
'upload_date': '20110629',
'license': 'Standard YouTube License',
'age_limit': 18,
},
},
@ -562,9 +583,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20100430',
'uploader_id': 'deadmau5',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5',
'creator': 'deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5',
'license': 'Standard YouTube License',
'title': 'Deadmau5 - Some Chords (HD)',
'alt_title': 'Some Chords',
},
@ -580,6 +603,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20150827',
'uploader_id': 'olympic',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
'license': 'Standard YouTube License',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
'uploader': 'Olympics',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
@ -597,8 +622,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'stretched_ratio': 16 / 9.,
'upload_date': '20110310',
'uploader_id': 'AllenMeow',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
'uploader': '孫艾倫',
'license': 'Standard YouTube License',
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
},
},
@ -629,7 +656,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:116377fd2963b81ec4ce64b542173306',
'upload_date': '20150625',
'uploader_id': 'dorappi2000',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
'uploader': 'dorappi2000',
'license': 'Standard YouTube License',
'formats': 'mincount:33',
},
},
@ -644,6 +673,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Airtek',
'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
'license': 'Standard YouTube License',
'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
},
'params': {
@ -668,6 +698,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}, {
'info_dict': {
@ -678,6 +710,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}, {
'info_dict': {
@ -688,6 +722,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}, {
'info_dict': {
@ -698,6 +734,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}],
'params': {
@ -731,7 +769,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'upload_date': '20151119',
'uploader_id': 'IronSoulElf',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
'uploader': 'IronSoulElf',
'license': 'Standard YouTube License',
'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
},
'params': {
@ -759,6 +799,42 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'skip_download': True,
},
},
{
# Video licensed under Creative Commons
'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
'info_dict': {
'id': 'M4gD1WSo5mA',
'ext': 'mp4',
'title': 'md5:e41008789470fc2533a3252216f1c1d1',
'description': 'md5:a677553cf0840649b731a3024aeff4cc',
'upload_date': '20150127',
'uploader_id': 'BerkmanCenter',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
'uploader': 'BerkmanCenter',
'license': 'Creative Commons Attribution license (reuse allowed)',
},
'params': {
'skip_download': True,
},
},
{
# Channel-like uploader_url
'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
'info_dict': {
'id': 'eQcmzGIKrzg',
'ext': 'mp4',
'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
'description': 'md5:dda0d780d5a6e120758d1711d062a867',
'upload_date': '20151119',
'uploader': 'Bernie 2016',
'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
'license': 'Creative Commons Attribution license (reuse allowed)',
},
'params': {
'skip_download': True,
},
},
{
'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
'only_matching': True,
@ -975,40 +1051,67 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return {}
try:
args = player_config['args']
caption_url = args['ttsurl']
if not caption_url:
self._downloader.report_warning(err_msg)
return {}
timestamp = args['timestamp']
# We get the available subtitles
list_params = compat_urllib_parse.urlencode({
'type': 'list',
'tlangs': 1,
'asrs': 1,
})
list_url = caption_url + '&' + list_params
caption_list = self._download_xml(list_url, video_id)
original_lang_node = caption_list.find('track')
if original_lang_node is None:
self._downloader.report_warning('Video doesn\'t have automatic captions')
return {}
original_lang = original_lang_node.attrib['lang_code']
caption_kind = original_lang_node.attrib.get('kind', '')
caption_url = args.get('ttsurl')
if caption_url:
timestamp = args['timestamp']
# We get the available subtitles
list_params = compat_urllib_parse.urlencode({
'type': 'list',
'tlangs': 1,
'asrs': 1,
})
list_url = caption_url + '&' + list_params
caption_list = self._download_xml(list_url, video_id)
original_lang_node = caption_list.find('track')
if original_lang_node is None:
self._downloader.report_warning('Video doesn\'t have automatic captions')
return {}
original_lang = original_lang_node.attrib['lang_code']
caption_kind = original_lang_node.attrib.get('kind', '')
sub_lang_list = {}
for lang_node in caption_list.findall('target'):
sub_lang = lang_node.attrib['lang_code']
sub_formats = []
for ext in self._SUBTITLE_FORMATS:
params = compat_urllib_parse.urlencode({
'lang': original_lang,
'tlang': sub_lang,
'fmt': ext,
'ts': timestamp,
'kind': caption_kind,
})
sub_formats.append({
'url': caption_url + '&' + params,
'ext': ext,
})
sub_lang_list[sub_lang] = sub_formats
return sub_lang_list
# Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA)
caption_tracks = args['caption_tracks']
caption_translation_languages = args['caption_translation_languages']
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
parsed_caption_url = compat_urlparse.urlparse(caption_url)
caption_qs = compat_parse_qs(parsed_caption_url.query)
sub_lang_list = {}
for lang_node in caption_list.findall('target'):
sub_lang = lang_node.attrib['lang_code']
for lang in caption_translation_languages.split(','):
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
sub_lang = lang_qs.get('lc', [None])[0]
if not sub_lang:
continue
sub_formats = []
for ext in self._SUBTITLE_FORMATS:
params = compat_urllib_parse.urlencode({
'lang': original_lang,
'tlang': sub_lang,
'fmt': ext,
'ts': timestamp,
'kind': caption_kind,
caption_qs.update({
'tlang': [sub_lang],
'fmt': [ext],
})
sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
query=compat_urllib_parse.urlencode(caption_qs, True)))
sub_formats.append({
'url': caption_url + '&' + params,
'url': sub_url,
'ext': ext,
})
sub_lang_list[sub_lang] = sub_formats
@ -1019,6 +1122,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._downloader.report_warning(err_msg)
return {}
def _mark_watched(self, video_id, video_info):
playback_url = video_info.get('videostats_playback_base_url', [None])[0]
if not playback_url:
return
parsed_playback_url = compat_urlparse.urlparse(playback_url)
qs = compat_urlparse.parse_qs(parsed_playback_url.query)
# cpn generation algorithm is reverse engineered from base.js.
# In fact it works even with dummy cpn.
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
qs.update({
'ver': ['2'],
'cpn': [cpn],
})
playback_url = compat_urlparse.urlunparse(
parsed_playback_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
self._download_webpage(
playback_url, video_id, 'Marking watched',
'Unable to mark watched', fatal=False)
@classmethod
def extract_id(cls, url):
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
@ -1245,9 +1371,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# uploader_id
video_uploader_id = None
mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
video_uploader_url = None
mobj = re.search(
r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
video_webpage)
if mobj is not None:
video_uploader_id = mobj.group(1)
video_uploader_id = mobj.group('uploader_id')
video_uploader_url = mobj.group('uploader_url')
else:
self._downloader.report_warning('unable to extract uploader nickname')
@ -1275,6 +1405,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
upload_date = unified_strdate(upload_date)
video_license = self._html_search_regex(
r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
video_webpage, 'license', default=None)
m_music = re.search(
r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
video_webpage)
@ -1348,6 +1482,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
formats_spec = {}
fmt_list = video_info.get('fmt_list', [''])[0]
if fmt_list:
for fmt in fmt_list.split(','):
spec = fmt.split('/')
if len(spec) > 1:
width_height = spec[1].split('x')
if len(width_height) == 2:
formats_spec[spec[0]] = {
'resolution': spec[1],
'width': int_or_none(width_height[0]),
'height': int_or_none(width_height[1]),
}
formats = []
for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str)
@ -1416,6 +1563,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}
if format_id in self._formats:
dct.update(self._formats[format_id])
if format_id in formats_spec:
dct.update(formats_spec[format_id])
# Some itags are not included in DASH manifest thus corresponding formats will
# lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
@ -1528,11 +1677,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._sort_formats(formats)
self.mark_watched(video_id, video_info)
return {
'id': video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'uploader_url': video_uploader_url,
'upload_date': upload_date,
'license': video_license,
'creator': video_creator,
'title': video_title,
'alt_title': video_alt_title,

View File

@ -137,6 +137,10 @@ class ZDFIE(InfoExtractor):
formats.extend(self._extract_smil_formats(
video_url, video_id, fatal=False))
elif ext == 'm3u8':
# the certificates are misconfigured (see
# https://github.com/rg3/youtube-dl/issues/8665)
if video_url.startswith('https://'):
continue
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
elif ext == 'f4m':

View File

@ -170,6 +170,14 @@ def parseOpts(overrideArguments=None):
action='store_const', dest='extract_flat', const='in_playlist',
default=False,
help='Do not extract the videos of a playlist, only list them.')
general.add_option(
'--mark-watched',
action='store_true', dest='mark_watched', default=False,
help='Mark videos watched (YouTube only)')
general.add_option(
'--no-mark-watched',
action='store_false', dest='mark_watched', default=False,
help='Do not mark videos watched (YouTube only)')
general.add_option(
'--no-color', '--no-colors',
action='store_true', dest='no_color',

View File

@ -6,6 +6,7 @@ from .ffmpeg import (
FFmpegEmbedSubtitlePP,
FFmpegExtractAudioPP,
FFmpegFixupStretchedPP,
FFmpegFixupM3u8PP,
FFmpegFixupM4aPP,
FFmpegMergerPP,
FFmpegMetadataPP,
@ -26,6 +27,7 @@ __all__ = [
'ExecAfterDownloadPP',
'FFmpegEmbedSubtitlePP',
'FFmpegExtractAudioPP',
'FFmpegFixupM3u8PP',
'FFmpegFixupM4aPP',
'FFmpegFixupStretchedPP',
'FFmpegMergerPP',

View File

@ -391,10 +391,6 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
for (name, value) in metadata.items():
options.extend(['-metadata', '%s=%s' % (name, value)])
# https://github.com/rg3/youtube-dl/issues/8350
if info.get('protocol') == 'm3u8_native' or info.get('protocol') == 'm3u8' and self._downloader.params.get('hls_prefer_native', False):
options.extend(['-bsf:a', 'aac_adtstoasc'])
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
@ -467,6 +463,21 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
return [], info
class FFmpegFixupM3u8PP(FFmpegPostProcessor):
def run(self, info):
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return [], info
class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
def __init__(self, downloader=None, format=None):
super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)

View File

@ -6,6 +6,7 @@ import sys
import errno
from .common import PostProcessor
from ..compat import compat_os_name
from ..utils import (
check_executable,
hyphenate_date,
@ -73,7 +74,7 @@ class XAttrMetadataPP(PostProcessor):
raise XAttrMetadataError(e.errno, e.strerror)
except ImportError:
if os.name == 'nt':
if compat_os_name == 'nt':
# Write xattrs to NTFS Alternate Data Streams:
# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
def write_xattr(path, key, value):
@ -168,7 +169,7 @@ class XAttrMetadataPP(PostProcessor):
'Unable to write extended attributes due to too long values.')
else:
msg = 'This filesystem doesn\'t support extended attributes. '
if os.name == 'nt':
if compat_os_name == 'nt':
msg += 'You need to use NTFS.'
else:
msg += '(You may have to enable them in your /etc/fstab)'

View File

@ -160,8 +160,6 @@ if sys.version_info >= (2, 7):
def find_xpath_attr(node, xpath, key, val=None):
""" Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z_-]+$', key)
if val:
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
return node.find(expr)
else:
@ -467,6 +465,10 @@ def encodeFilename(s, for_subprocess=False):
if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
return s
# Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
if sys.platform.startswith('java'):
return s
return s.encode(get_subprocess_encoding(), 'ignore')
@ -905,9 +907,9 @@ def unified_strdate(date_str, day_first=True):
'%d %b %Y',
'%B %d %Y',
'%b %d %Y',
'%b %dst %Y %I:%M%p',
'%b %dnd %Y %I:%M%p',
'%b %dth %Y %I:%M%p',
'%b %dst %Y %I:%M',
'%b %dnd %Y %I:%M',
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
'%Y/%m/%d',
@ -1217,13 +1219,23 @@ if sys.platform == 'win32':
raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
else:
import fcntl
# Some platforms, such as Jython, is missing fcntl
try:
import fcntl
def _lock_file(f, exclusive):
fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
def _lock_file(f, exclusive):
fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
def _unlock_file(f):
fcntl.flock(f, fcntl.LOCK_UN)
def _unlock_file(f):
fcntl.flock(f, fcntl.LOCK_UN)
except ImportError:
UNSUPPORTED_MSG = 'file locking is not supported on this platform'
def _lock_file(f, exclusive):
raise IOError(UNSUPPORTED_MSG)
def _unlock_file(f):
raise IOError(UNSUPPORTED_MSG)
class locked_file(object):
@ -1387,6 +1399,12 @@ def fix_xml_ampersands(xml_str):
def setproctitle(title):
assert isinstance(title, compat_str)
# ctypes in Jython is not complete
# http://bugs.jython.org/issue2148
if sys.platform.startswith('java'):
return
try:
libc = ctypes.cdll.LoadLibrary('libc.so.6')
except OSError:
@ -1570,9 +1588,12 @@ class PagedList(object):
class OnDemandPagedList(PagedList):
def __init__(self, pagefunc, pagesize):
def __init__(self, pagefunc, pagesize, use_cache=False):
self._pagefunc = pagefunc
self._pagesize = pagesize
self._use_cache = use_cache
if use_cache:
self._cache = {}
def getslice(self, start=0, end=None):
res = []
@ -1582,7 +1603,13 @@ class OnDemandPagedList(PagedList):
if start >= nextfirstid:
continue
page_results = list(self._pagefunc(pagenum))
page_results = None
if self._use_cache:
page_results = self._cache.get(pagenum)
if page_results is None:
page_results = list(self._pagefunc(pagenum))
if self._use_cache:
self._cache[pagenum] = page_results
startv = (
start % self._pagesize
@ -1712,6 +1739,15 @@ def urlencode_postdata(*args, **kargs):
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
def update_url_query(url, query):
parsed_url = compat_urlparse.urlparse(url)
qs = compat_parse_qs(parsed_url.query)
qs.update(query)
qs = encode_dict(qs)
return compat_urlparse.urlunparse(parsed_url._replace(
query=compat_urllib_parse.urlencode(qs, True)))
def encode_dict(d, encoding='utf-8'):
def encode(v):
return v.encode(encoding) if isinstance(v, compat_basestring) else v
@ -2610,3 +2646,41 @@ def ohdave_rsa_encrypt(data, exponent, modulus):
payload = int(binascii.hexlify(data[::-1]), 16)
encrypted = pow(payload, exponent, modulus)
return '%x' % encrypted
def encode_base_n(num, n, table=None):
FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
if not table:
table = FULL_TABLE[:n]
if n > len(table):
raise ValueError('base %d exceeds table length %d' % (n, len(table)))
if num == 0:
return table[0]
ret = ''
while num:
ret = table[num % n] + ret
num = num // n
return ret
def decode_packed_codes(code):
mobj = re.search(
r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)",
code)
obfucasted_code, base, count, symbols = mobj.groups()
base = int(base)
count = int(count)
symbols = symbols.split('|')
symbol_table = {}
while count:
count -= 1
base_n_count = encode_base_n(count, base)
symbol_table[base_n_count] = symbols[count] or base_n_count
return re.sub(
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
obfucasted_code)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.02.22'
__version__ = '2016.03.06'