Compare commits

..

128 Commits

Author SHA1 Message Date
Philipp Hagemeister
d0f2ab6969 release 2014.04.13 2014-04-13 03:22:30 +02:00
Philipp Hagemeister
de906ef543 [aol] Add support for playlists (Fixes #2730) 2014-04-13 03:22:24 +02:00
Sergey M․
2fb3deeca1 [tube8] Fix extraction and modernize 2014-04-13 03:56:32 +07:00
Philipp Hagemeister
66398056f1 Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-12 17:15:16 +02:00
Jaime Marquínez Ferrándiz
77477fa4c9 Merge branch 'atomicparsley' (closes #2436) 2014-04-12 15:52:42 +02:00
Jaime Marquínez Ferrándiz
a169e18ce1 [atomicparsley] Remove unneeded __init__ method 2014-04-12 15:51:40 +02:00
Jaime Marquínez Ferrándiz
381640e3ac [brightcove] Only use url from meta element if it has the 'playerKey' field (fixes #2738) 2014-04-12 12:53:48 +02:00
Sergey M․
37e3410137 [prosiebensat1] Add one more clip id pattern (Closes #2737) 2014-04-12 02:53:55 +07:00
Jaime Marquínez Ferrándiz
97b5196960 [weibo] Modernize 2014-04-11 16:02:34 +02:00
Sergey M․
6a4f3528c8 [firstpost] Fix extraction 2014-04-11 20:40:42 +07:00
Philipp Hagemeister
b9c76aa1a9 [youtube] Add support for cleanvideosearch.com (Fixes #2734) 2014-04-11 13:53:05 +02:00
Philipp Hagemeister
0d3070d364 release 2014.04.11.2 2014-04-11 09:44:33 +02:00
Philipp Hagemeister
7753cadbfa [comedycentral:shows] Add support for TDS special editions (Fixes #2733) 2014-04-11 09:30:07 +02:00
Philipp Hagemeister
3950450342 [pyvideo] Fix title 2014-04-11 02:20:50 +02:00
Philipp Hagemeister
c82b1fdad6 [slideshare] Fix description 2014-04-11 02:19:15 +02:00
Philipp Hagemeister
b0fb63abe8 [dailymotion:playlist] Fix title 2014-04-11 02:16:46 +02:00
Philipp Hagemeister
3ab34c603e [comedycentral] Fix test md5sum 2014-04-11 02:14:31 +02:00
Philipp Hagemeister
7d6413341a release 2014.04.11.1 2014-04-11 01:29:54 +02:00
Philipp Hagemeister
140012d0f6 release 2014.04.11 2014-04-11 01:28:30 +02:00
Philipp Hagemeister
4be9f8c814 [ninegag] Add support for p/ URLs 2014-04-11 01:25:24 +02:00
Sergey M․
5c802bac37 [byutv] Fix test 2014-04-10 19:37:55 +07:00
Sergey M․
6c30ff756a [mpora] Fix test 2014-04-10 19:10:03 +07:00
Jaime Marquínez Ferrándiz
62749e4708 [morningstar] Also support 'Cover' (#2729) 2014-04-09 20:51:28 +02:00
Jaime Marquínez Ferrándiz
6b7dee4b38 [morningstar] Recognize urls that use 'videoCenter' (fixes #2729) 2014-04-09 20:45:49 +02:00
Sergey M․
ef2041eb4e [br] Add audio extraction and support more URLs (Closes #2728) 2014-04-09 20:19:27 +07:00
Philipp Hagemeister
29e3e682af [comedycentral] Match more URLs
Looks like they only offer clips instead of full episodes now. We'll need to add new parsing code as well.
2014-04-09 11:43:15 +02:00
Philipp Hagemeister
f983c44199 Merge pull request #2725 from foolscap/subtitles-error-fix
Fix subtitle download error reporting (Fixes #2724)
2014-04-09 10:16:06 +02:00
robbie
e4db19511a Fix subtitle download error reporting (Fixes #2724) 2014-04-08 15:59:27 +01:00
Sergey M․
c47d21da80 [ntv] Update test 2014-04-08 19:11:40 +07:00
Philipp Hagemeister
269aecd0c0 [ffmpeg] Do not pass in byets to subprocess (Fixes #2717) 2014-04-07 23:33:05 +02:00
Philipp Hagemeister
aafddb2b0a Merge remote-tracking branch 'anisse/fix-content-encoding-charset' 2014-04-07 23:27:03 +02:00
Philipp Hagemeister
6262ac8ac5 release 2014.04.07.4 2014-04-07 23:23:54 +02:00
Philipp Hagemeister
89938c719e Fix Windows output for non-BMP unicode characters 2014-04-07 23:23:48 +02:00
Anisse Astier
ec0fafbb19 [extractor/common] fallback on utf-8 when charset is not found
fixes #2721
2014-04-07 23:10:16 +02:00
Philipp Hagemeister
a5863bdf33 release 2014.04.07.3 2014-04-07 22:48:45 +02:00
Philipp Hagemeister
b58ddb32ba [utils] Completely rewrite Windows output (Fixes #2672) 2014-04-07 22:48:13 +02:00
Philipp Hagemeister
b9e12a8140 release 2014.04.07.2 2014-04-07 21:41:20 +02:00
Philipp Hagemeister
104aa7388a Use our own encoding when writing strings 2014-04-07 21:40:34 +02:00
Philipp Hagemeister
c3855d28b0 Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-07 19:57:51 +02:00
Philipp Hagemeister
734f90bb41 Use --encoding when outputting 2014-04-07 19:57:42 +02:00
Jaime Marquínez Ferrándiz
91a6addeeb Add support for rtve.es/alacarta 2014-04-07 17:30:32 +02:00
Philipp Hagemeister
9afb76c5ad release 2014.04.07.1 2014-04-07 15:28:55 +02:00
Philipp Hagemeister
dfb2cb5cfd [teamcoco] Simplify ID management (Closes #2715) 2014-04-07 15:25:35 +02:00
Philipp Hagemeister
650d688d10 release 2014.04.07 2014-04-07 13:11:37 +02:00
Philipp Hagemeister
0ba77818f3 [ted] Add width and height (Fixes #2716) 2014-04-07 13:11:30 +02:00
Sergey M․
09baa7da7e [rts] Update test 2014-04-07 00:34:23 +07:00
Sergey M․
85e787f51d [cbsnews] Add support for cbsnews.com (Closes #2691) 2014-04-06 06:03:58 +07:00
Philipp Hagemeister
2a9e1e453a Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-05 20:05:47 +02:00
Philipp Hagemeister
ee1e199685 [justin.tv] Modernize (Fixes #2705) 2014-04-05 17:56:36 +02:00
Sergey M․
17c5a00774 [novamov] Simplify 2014-04-05 19:36:22 +07:00
Sergey M․
15c0e8e7b2 [generic] Generalize novamov based embeds 2014-04-05 17:20:05 +07:00
Sergey M․
cca37fba48 [divxstage] Fix typo in IE_NAME 2014-04-05 17:15:43 +07:00
Sergey M․
9d0993ec4a [movshare] Support more domains 2014-04-05 17:00:18 +07:00
Sergey M․
342f33bf9e [divxstage] Support more domains 2014-04-05 16:50:05 +07:00
Sergey M․
7cd3bc5f99 [nowvideo] Support more domains 2014-04-05 16:38:57 +07:00
Sergey M․
931055e6cb [videoweed] Revert _FILE_DELETED_REGEX 2014-04-05 16:32:14 +07:00
Sergey M․
d0e4cf82f1 [movshare] Add _FILE_DELETED_REGEX 2014-04-05 16:31:38 +07:00
Sergey M․
6f88df2c57 [divxstage] Add support for divxstage.eu 2014-04-05 16:29:44 +07:00
Sergey M․
4479bf2762 [videoweed] Simplify 2014-04-05 16:09:28 +07:00
Sergey M․
1ff7c0f7d8 [movshare] Add support for movshare.net 2014-04-05 16:09:03 +07:00
Sergey M․
610e47c87e Credit @sainyamkapoor for videoweed extractor 2014-04-05 15:53:50 +07:00
Sergey M․
50f566076f [generic] Add support for videoweed embeds 2014-04-05 15:49:45 +07:00
Sergey M․
92810ff497 [nowvideo] Improve _VALID_URL 2014-04-05 15:35:21 +07:00
Sergey M․
60ccc59a1c [novamov] Improve _VALID_URL 2014-04-05 15:34:54 +07:00
Sergey M․
91745595d3 [videoweed] Simplify 2014-04-05 15:32:55 +07:00
Sainyam Kapoor
d6e40507d0 [videoweed]Cleanup 2014-04-05 10:53:22 +05:30
Sainyam Kapoor
deed48b472 [Videoweed] Added support for videoweed. 2014-04-05 10:40:03 +05:30
Philipp Hagemeister
e4d41bfca5 Merge pull request #2696 from anovicecodemonkey/support-ustream-embeds
[UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694)
2014-04-04 23:33:08 +02:00
Philipp Hagemeister
a355b70f27 [cspan] Do not test number of playlist entries
Apparently, CSpan switches between single-file and multiple-file results. Either one is fine as long as we get the full four hours.
2014-04-04 23:16:22 +02:00
Philipp Hagemeister
f8514f6186 [rts] Use visible id in file names
Maybe the internal ID is more precise, but it's totally confusing, and the obvious ID still allows a google search.
2014-04-04 23:13:55 +02:00
Philipp Hagemeister
e09b8fcd9d [ro220] Make test case more flexible
Either one or two spaces is fine here.
2014-04-04 23:08:33 +02:00
Philipp Hagemeister
7d1b527ff9 [motorsport] Fix on Python 3 2014-04-04 23:06:27 +02:00
Philipp Hagemeister
f943c7b622 release 2014.04.04.7 2014-04-04 23:01:45 +02:00
Philipp Hagemeister
676eb3f2dd Fix unicode_escape (Fixes #2695) 2014-04-04 23:00:51 +02:00
Philipp Hagemeister
98b7cf1ace release 2014.04.04.6 2014-04-04 22:48:35 +02:00
Philipp Hagemeister
c465afd736 [teamcoco] Fix regex in 2.6 (#2700)
The re engine does not want to repeat an empty string, for fear that something like

    (.*)*

could be matching the tokens ...

    ""
    "" ""
    "" "" ""

Of course, that's harmless with a question mark, although still somewhat strange.
2014-04-04 22:46:47 +02:00
Philipp Hagemeister
b84d6e7fc4 Merge remote-tracking branch 'AGSPhoenix/teamcoco-fix' 2014-04-04 22:44:49 +02:00
Philipp Hagemeister
2efd5d78c1 release 2014.04.04.5 2014-04-04 22:24:45 +02:00
Philipp Hagemeister
c8edf47b3a [yahoo] Support https and -uploader URLs (Fixes #2701) 2014-04-04 22:23:59 +02:00
Philipp Hagemeister
3b4c26a428 [pornhd] Avoid shadowing variable url 2014-04-04 22:22:30 +02:00
Philipp Hagemeister
1525148114 Remove unused imports 2014-04-04 22:22:11 +02:00
Philipp Hagemeister
9e0c5791c1 release 2014.04.04.4 2014-04-04 22:15:32 +02:00
Philipp Hagemeister
29a1ab2afc Add alternative --prefer-unsecure spelling (Closes #2697) 2014-04-04 22:15:21 +02:00
AGSPhoenix
fa387d2d99 Revert "Workaround for regex engine limitation"
This reverts commit 6d0d573eca.
2014-04-04 15:37:49 -04:00
AGSPhoenix
6d0d573eca Workaround for regex engine limitation 2014-04-04 15:25:28 -04:00
AGSPhoenix
bb799e811b Add a test for the new URL pages
Add a test for the pages with the video_id in the URL.
2014-04-04 13:52:35 -04:00
AGSPhoenix
04ee53eca1 Support TeamCoco URLs with video_id in the title
If the URL has the video_id in it, use that since the current method of
finding the id breaks on those pages.

Fixes 2698.
2014-04-04 13:42:34 -04:00
Jaime Marquínez Ferrándiz
659eb98a53 [breakcom] Fix YouTube videos extraction (fixes #2699) 2014-04-04 19:01:18 +02:00
anovicecodemonkey
ca6aada48e Fix _TEST for Ustream embed URLs 2014-04-05 03:26:29 +10:30
Jaime Marquínez Ferrándiz
43df5a7e71 [keezmovies] Modernize 2014-04-04 18:52:43 +02:00
Jaime Marquínez Ferrándiz
88f1c6de7b [yahoo] Modernize 2014-04-04 18:52:43 +02:00
Sergey M․
65a40ab82b [pornhd] Update test checksum 2014-04-04 22:47:38 +07:00
Sergey M․
4b9cced103 [pornhd] Fix extraction (Closes #2693) 2014-04-04 22:45:39 +07:00
anovicecodemonkey
5c38625259 [UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694) 2014-04-05 00:53:09 +10:30
Sergey M․
6344fa04bb [rts] Add more formats and audio support (Closes #2689) 2014-04-04 20:42:06 +07:00
Jaime Marquínez Ferrándiz
e3ced9ed61 [downloader/common] Use compat_str with the error in try_rename (appeared in #2389)
Otherwise on python 2.x we get `UnicodeDecodeError` because it may contain non ascii characters.
2014-04-04 14:59:11 +02:00
Philipp Hagemeister
5075d598bc release 2014.04.04.2 2014-04-04 02:24:21 +02:00
Philipp Hagemeister
68eb8e90e6 [youtube:playlist] Fix playlists for logged-in users (Fixes #2690) 2014-04-04 02:23:36 +02:00
Philipp Hagemeister
d3a96346c4 release 2014.04.04.3 2014-04-04 02:09:16 +02:00
Philipp Hagemeister
0e518e2fea [cnet] Fall back to "videos" key 2014-04-04 02:09:04 +02:00
Philipp Hagemeister
1e0a235f39 [dailymotion] Fix playlist+user 2014-04-04 02:04:16 +02:00
Philipp Hagemeister
9ad400f75e [generic] Remove test case that has become a 404 2014-04-04 01:47:17 +02:00
Philipp Hagemeister
3537b93d8a [tests] Fix YoutubeDL tests
Since bec1fad, the id, title, and url (also in formats) keys are mandatory. Change the tests to reflect that.
2014-04-04 01:45:49 +02:00
Philipp Hagemeister
56eca2e956 release 2014.04.04.1 2014-04-04 00:25:43 +02:00
Philipp Hagemeister
2ad4d1ba07 [morningstar] Add new extractor (Fixes #2687) 2014-04-04 00:25:35 +02:00
Philipp Hagemeister
4853de808b release 2014.04.04 2014-04-04 00:06:06 +02:00
Philipp Hagemeister
6ff5f12218 [motorsport] Add extractor (Fixes #2688) 2014-04-04 00:05:43 +02:00
Philipp Hagemeister
52a180684f [README] Fix VALID_URL in extractor example 2014-04-03 23:25:23 +02:00
Philipp Hagemeister
b21e25702f Merge pull request #2681 from phihag/readme-dev-instructions
[README] Improve developer instructions
2014-04-03 23:06:15 +02:00
Jaime Marquínez Ferrándiz
983af2600f [wimp] Detect youtube videos (fixes #2686) 2014-04-03 20:44:51 +02:00
Philipp Hagemeister
f34e6a2cd6 [comedycentral:shows] Do no include 6-digit identifier in display ID 2014-04-03 18:39:00 +02:00
Philipp Hagemeister
a9f304031b release 2014.04.03.3 2014-04-03 16:21:54 +02:00
Philipp Hagemeister
9271bc8355 [cnet] Add new extractor (Fixes #2679) 2014-04-03 16:21:21 +02:00
Philipp Hagemeister
d1b3e3dd75 [README] Add md5 to code example 2014-04-03 15:59:04 +02:00
Philipp Hagemeister
968ed2a777 [comedycentral] Add test for #2677 2014-04-03 15:31:04 +02:00
Philipp Hagemeister
24de5d2556 release 2014.04.03.2 2014-04-03 15:28:56 +02:00
Philipp Hagemeister
d26e981df4 Correct check for empty dirname (Fixes #2683) 2014-04-03 15:28:41 +02:00
Jaime Marquínez Ferrándiz
e45d40b171 [youtube:subscriptions] Add space to the description 2014-04-03 15:13:52 +02:00
Sergey M․
4a419b8851 [c56] Modernize and add duration extraction 2014-04-03 19:53:11 +07:00
Philipp Hagemeister
5fbd672c38 [README] Improve developer instructions
Add a longer tutorial that should cover everything needed to start developing IEs.

Fixes #2676
2014-04-03 14:46:24 +02:00
Philipp Hagemeister
bec1fad223 [YouTubeDL] Throw an early error if the info_dict result is invalid 2014-04-03 14:38:16 +02:00
Philipp Hagemeister
177fed41bc [comedycentral:shows] Support guest/ URLs (Fixes #2677) 2014-04-03 14:38:16 +02:00
Jaime Marquínez Ferrándiz
b900e7cba4 [downloader/f4m] Close the final video 2014-04-03 13:35:07 +02:00
Jaime Marquínez Ferrándiz
14cb4979f0 MANIFEST.in: Only list the files from the docs folder that will be included (closes #2623)
Pruning the _build folder produced the message `no previously-included directories found matching 'docs/_build'` when installing from the source distribution.
2014-04-03 13:26:27 +02:00
pulpe
784763c565 we don't need to run ffmpeg more times 2014-03-26 15:22:52 +01:00
pulpe
39c68260c0 fix ffmpeg metadatapp 2014-03-26 15:22:52 +01:00
pulpe
149254d0d5 fix ffmpeg error, if youtube-dl runs more than once with --embed-thumbnail with same video 2014-03-26 15:22:52 +01:00
pulpe
0c14e2fbe3 add post processor 2014-03-26 15:22:51 +01:00
56 changed files with 1451 additions and 413 deletions

View File

@@ -3,5 +3,4 @@ include test/*.py
include test/*.json
include youtube-dl.bash-completion
include youtube-dl.1
recursive-include docs *
prune docs/_build
recursive-include docs Makefile conf.py *.rst

View File

@@ -250,6 +250,7 @@ which means you can modify it, redistribute it or use it however you like.
default
--embed-subs embed subtitles in the video (only for mp4
videos)
--embed-thumbnail embed thumbnail in the audio as cover art
--add-metadata write metadata to the video file
--xattrs write metadata to the video file's xattrs
(using dublin core and xdg standards)
@@ -371,7 +372,67 @@ If you want to create a build of youtube-dl yourself, you'll need
### Adding support for a new site
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class YourExtractorIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10KiB of the video file',
'info_dict': {
'id': '42',
'ext': 'mp4',
'title': 'Video title goes here',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
# TODO more code goes here, for example ...
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
return {
'id': video_id,
'title': title,
# TODO more properties (see youtube_dl/extractor/common.py)
}
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done.
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
$ git add youtube_dl/extractor/__init__.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions!
# BUGS

View File

@@ -26,16 +26,27 @@ class YDL(FakeYDL):
self.msgs.append(msg)
def _make_result(formats, **kwargs):
res = {
'formats': formats,
'id': 'testid',
'title': 'testttitle',
'extractor': 'testex',
}
res.update(**kwargs)
return res
class TestFormatSelection(unittest.TestCase):
def test_prefer_free_formats(self):
# Same resolution => download webm
ydl = YDL()
ydl.params['prefer_free_formats'] = True
formats = [
{'ext': 'webm', 'height': 460},
{'ext': 'mp4', 'height': 460},
{'ext': 'webm', 'height': 460, 'url': 'x'},
{'ext': 'mp4', 'height': 460, 'url': 'y'},
]
info_dict = {'formats': formats, 'extractor': 'test'}
info_dict = _make_result(formats)
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
@@ -46,8 +57,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = True
formats = [
{'ext': 'webm', 'height': 720},
{'ext': 'mp4', 'height': 1080},
{'ext': 'webm', 'height': 720, 'url': 'a'},
{'ext': 'mp4', 'height': 1080, 'url': 'b'},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
@@ -60,9 +71,9 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
{'ext': 'webm', 'height': 720},
{'ext': 'mp4', 'height': 720},
{'ext': 'flv', 'height': 720},
{'ext': 'webm', 'height': 720, 'url': '_'},
{'ext': 'mp4', 'height': 720, 'url': '_'},
{'ext': 'flv', 'height': 720, 'url': '_'},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
@@ -74,8 +85,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
{'ext': 'flv', 'height': 720},
{'ext': 'webm', 'height': 720},
{'ext': 'flv', 'height': 720, 'url': '_'},
{'ext': 'webm', 'height': 720, 'url': '_'},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
@@ -91,8 +102,7 @@ class TestFormatSelection(unittest.TestCase):
{'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
{'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
]
info_dict = {
'formats': formats, 'extractor': 'test', 'id': 'testvid'}
info_dict = _make_result(formats)
ydl = YDL()
ydl.process_ie_result(info_dict)
@@ -120,12 +130,12 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection(self):
formats = [
{'format_id': '35', 'ext': 'mp4', 'preference': 1},
{'format_id': '45', 'ext': 'webm', 'preference': 2},
{'format_id': '47', 'ext': 'webm', 'preference': 3},
{'format_id': '2', 'ext': 'flv', 'preference': 4},
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
]
info_dict = {'formats': formats, 'extractor': 'test'}
info_dict = _make_result(formats)
ydl = YDL({'format': '20/47'})
ydl.process_ie_result(info_dict.copy())
@@ -154,12 +164,12 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection_audio(self):
formats = [
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'},
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'},
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4},
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
]
info_dict = {'formats': formats, 'extractor': 'test'}
info_dict = _make_result(formats)
ydl = YDL({'format': 'bestaudio'})
ydl.process_ie_result(info_dict.copy())
@@ -172,10 +182,10 @@ class TestFormatSelection(unittest.TestCase):
self.assertEqual(downloaded['format_id'], 'audio-low')
formats = [
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1},
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2},
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
]
info_dict = {'formats': formats, 'extractor': 'test'}
info_dict = _make_result(formats)
ydl = YDL({'format': 'bestaudio/worstaudio/best'})
ydl.process_ie_result(info_dict.copy())
@@ -184,11 +194,11 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection_video(self):
formats = [
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'},
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3},
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
]
info_dict = {'formats': formats, 'extractor': 'test'}
info_dict = _make_result(formats)
ydl = YDL({'format': 'bestvideo'})
ydl.process_ie_result(info_dict.copy())
@@ -217,10 +227,12 @@ class TestFormatSelection(unittest.TestCase):
for f1id, f2id in zip(order, order[1:]):
f1 = YoutubeIE._formats[f1id].copy()
f1['format_id'] = f1id
f1['url'] = 'url:' + f1id
f2 = YoutubeIE._formats[f2id].copy()
f2['format_id'] = f2id
f2['url'] = 'url:' + f2id
info_dict = {'formats': [f1, f2], 'extractor': 'youtube'}
info_dict = _make_result([f1, f2], extractor='youtube')
ydl = YDL()
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
@@ -228,7 +240,7 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1id)
info_dict = {'formats': [f2, f1], 'extractor': 'youtube'}
info_dict = _make_result([f2, f1], extractor='youtube')
ydl = YDL()
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])

View File

@@ -49,6 +49,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
def test_youtube_channel_matching(self):
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
@@ -153,6 +154,27 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch(
'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
['ComedyCentralShows'])
self.assertMatch(
'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
['ComedyCentralShows'])
def test_yahoo_https(self):
# https://github.com/rg3/youtube-dl/issues/2701
self.assertMatch(
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo'])
if __name__ == '__main__':
unittest.main()

View File

@@ -43,6 +43,7 @@ from youtube_dl.extractor import (
XTubeUserIE,
InstagramUserIE,
CSpanIE,
AolIE,
)
@@ -324,10 +325,19 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '342759')
self.assertEqual(
result['title'], 'General Motors Ignition Switch Recall')
self.assertEqual(len(result['entries']), 9)
whole_duration = sum(e['duration'] for e in result['entries'])
self.assertEqual(whole_duration, 14855)
def test_aol_playlist(self):
dl = FakeYDL()
ie = AolIE(dl)
result = ie.extract(
'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '152147')
self.assertEqual(
result['title'], 'Brace Yourself - Today\'s Weirdest News')
self.assertTrue(len(result['entries']) >= 10)
if __name__ == '__main__':
unittest.main()

View File

@@ -38,6 +38,7 @@ from youtube_dl.utils import (
xpath_with_ns,
parse_iso8601,
strip_jsonp,
uppercase_escape,
)
if sys.version_info < (3, 0):
@@ -279,6 +280,9 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped)
self.assertEqual(d, [{"id": "532cb", "x": 3}])
def test_uppercase_escpae(self):
self.assertEqual(uppercase_escape(u''), u'')
self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
if __name__ == '__main__':
unittest.main()

44
youtube_dl/YoutubeDL.py Normal file → Executable file
View File

@@ -286,6 +286,9 @@ class YoutubeDL(object):
"""Print message to stdout if not in quiet mode."""
return self.to_stdout(message, skip_eol, check_quiet=True)
def _write_string(self, s, out=None):
write_string(s, out=out, encoding=self.params.get('encoding'))
def to_stdout(self, message, skip_eol=False, check_quiet=False):
"""Print message to stdout if not in quiet mode."""
if self.params.get('logger'):
@@ -295,7 +298,7 @@ class YoutubeDL(object):
terminator = ['\n', ''][skip_eol]
output = message + terminator
write_string(output, self._screen_file)
self._write_string(output, self._screen_file)
def to_stderr(self, message):
"""Print message to stderr."""
@@ -305,7 +308,7 @@ class YoutubeDL(object):
else:
message = self._bidi_workaround(message)
output = message + '\n'
write_string(output, self._err_file)
self._write_string(output, self._err_file)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
@@ -315,21 +318,21 @@ class YoutubeDL(object):
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
write_string('\033]0;%s\007' % message, self._screen_file)
self._write_string('\033]0;%s\007' % message, self._screen_file)
def save_console_title(self):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
# Save the title on stack
write_string('\033[22;0t', self._screen_file)
self._write_string('\033[22;0t', self._screen_file)
def restore_console_title(self):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
# Restore the title from stack
write_string('\033[23;0t', self._screen_file)
self._write_string('\033[23;0t', self._screen_file)
def __enter__(self):
self.save_console_title()
@@ -702,6 +705,11 @@ class YoutubeDL(object):
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
if 'id' not in info_dict:
raise ExtractorError('Missing "id" field in extractor result')
if 'title' not in info_dict:
raise ExtractorError('Missing "title" field in extractor result')
if 'playlist' not in info_dict:
# It isn't part of a playlist
info_dict['playlist'] = None
@@ -733,6 +741,9 @@ class YoutubeDL(object):
# We check that all the formats have the format and format_id fields
for i, format in enumerate(formats):
if 'url' not in format:
raise ExtractorError('Missing "url" key in result (index %d)' % i)
if format.get('format_id') is None:
format['format_id'] = compat_str(i)
if format.get('format') is None:
@@ -868,7 +879,7 @@ class YoutubeDL(object):
try:
dn = os.path.dirname(encodeFilename(filename))
if dn != '' and not os.path.exists(dn):
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
self.report_error('unable to create directory ' + compat_str(err))
@@ -925,7 +936,7 @@ class YoutubeDL(object):
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + descfn)
self.report_error('Cannot write subtitles file ' + sub_filename)
return
if self.params.get('writeinfojson', False):
@@ -1203,9 +1214,16 @@ class YoutubeDL(object):
if not self.params.get('verbose'):
return
write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
(locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
write_string('[debug] youtube-dl version ' + __version__ + '\n')
write_string(
'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
sys.stdout.encoding,
self.get_encoding()),
encoding=None
)
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
@@ -1214,20 +1232,20 @@ class YoutubeDL(object):
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string('[debug] Git HEAD: ' + out + '\n')
self._write_string('[debug] Git HEAD: ' + out + '\n')
except:
try:
sys.exc_clear()
except:
pass
write_string('[debug] Python version %s - %s' %
self._write_string('[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + '\n')
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
def _setup_opener(self):
timeout_val = self.params.get('socket_timeout')

View File

@@ -52,6 +52,7 @@ __authors__ = (
'Juan C. Olivares',
'Mattias Harrysson',
'phaer',
'Sainyam Kapoor',
)
__license__ = 'Public Domain'
@@ -91,6 +92,8 @@ from .extractor import gen_extractors
from .version import __version__
from .YoutubeDL import YoutubeDL
from .postprocessor import (
AtomicParsleyPP,
FFmpegAudioFixPP,
FFmpegMetadataPP,
FFmpegVideoConvertor,
FFmpegExtractAudioPP,
@@ -242,7 +245,7 @@ def parseOpts(overrideArguments=None):
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
'--prefer-insecure', action='store_true', dest='prefer_insecure',
'--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
@@ -502,6 +505,8 @@ def parseOpts(overrideArguments=None):
help='do not overwrite post-processed files; the post-processed files are overwritten by default')
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
help='embed subtitles in the video (only for mp4 videos)')
postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
help='embed thumbnail in the audio as cover art')
postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
help='write metadata to the video file')
postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
@@ -807,6 +812,10 @@ def _real_main(argv=None):
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
if opts.xattrs:
ydl.add_post_processor(XAttrMetadataPP())
if opts.embedthumbnail:
if not opts.addmetadata:
ydl.add_post_processor(FFmpegAudioFixPP())
ydl.add_post_processor(AtomicParsleyPP())
# Update version
if opts.update_self:

View File

@@ -4,9 +4,10 @@ import sys
import time
from ..utils import (
compat_str,
encodeFilename,
timeconvert,
format_bytes,
timeconvert,
)
@@ -173,7 +174,7 @@ class FileDownloader(object):
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err:
self.report_error(u'unable to rename file: %s' % str(err))
self.report_error(u'unable to rename file: %s' % compat_str(err))
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""

View File

@@ -297,6 +297,7 @@ class F4mFD(FileDownloader):
break
frags_filenames.append(frag_filename)
dest_stream.close()
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
self.try_rename(tmpfilename, filename)

View File

@@ -32,6 +32,7 @@ from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
from .cbsnews import CBSNewsIE
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
@@ -40,6 +41,7 @@ from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE
from .cmt import CMTIE
from .cnet import CNETIE
from .cnn import (
CNNIE,
CNNBlogsIE,
@@ -61,6 +63,7 @@ from .dotsub import DotsubIE
from .dreisat import DreiSatIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
from .dropbox import DropboxIE
from .ebaumsworld import EbaumsWorldIE
from .ehow import EHowIE
@@ -153,6 +156,9 @@ from .mixcloud import MixcloudIE
from .mpora import MporaIE
from .mofosex import MofosexIE
from .mooshare import MooshareIE
from .morningstar import MorningstarIE
from .motorsport import MotorsportIE
from .movshare import MovShareIE
from .mtv import (
MTVIE,
MTVIggyIE,
@@ -202,6 +208,7 @@ from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
from .rts import RTSIE
from .rtve import RTVEALaCartaIE
from .rutube import (
RutubeIE,
RutubeChannelIE,
@@ -273,6 +280,7 @@ from .videodetective import VideoDetectiveIE
from .videolecturesnet import VideoLecturesNetIE
from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .videoweed import VideoWeedIE
from .vimeo import (
VimeoIE,
VimeoChannelIE,

View File

@@ -8,7 +8,18 @@ from .fivemin import FiveMinIE
class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com'
_VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
_VALID_URL = r'''(?x)
(?:
aol-video:|
http://on\.aol\.com/
(?:
video/.*-|
playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
)
)
(?P<id>[0-9]+)
(?:$|\?)
'''
_TEST = {
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@@ -24,5 +35,31 @@ class AolIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
self.to_screen('Downloading 5min.com video %s' % video_id)
playlist_id = mobj.group('playlist_id')
if playlist_id and not self._downloader.params.get('noplaylist'):
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(
r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
playlist_html = self._search_regex(
r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
'playlist HTML')
entries = [{
'_type': 'url',
'url': 'aol-video:%s' % m.group('id'),
'ie_key': 'Aol',
} for m in re.finditer(
r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
playlist_html)]
return {
'_type': 'playlist',
'id': playlist_id,
'display_id': mobj.group('playlist_display_id'),
'title': title,
'entries': entries,
}
return FiveMinIE._build_result(video_id)

View File

@@ -4,39 +4,72 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils import (
ExtractorError,
int_or_none,
)
class BRIE(InfoExtractor):
IE_DESC = "Bayerischer Rundfunk Mediathek"
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$"
_BASE_URL = "http://www.br.de"
IE_DESC = 'Bayerischer Rundfunk Mediathek'
_VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html'
_BASE_URL = 'http://www.br.de'
_TESTS = [
{
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
"info_dict": {
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
"ext": "mp4",
"title": "Feiern und Verzichten",
"description": "Anselm Grün: Feiern und Verzichten",
"uploader": "BR/Birgit Baier",
"upload_date": "20140301"
'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html',
'md5': 'c4f83cf0f023ba5875aba0bf46860df2',
'info_dict': {
'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532',
'ext': 'mp4',
'title': 'Feiern und Verzichten',
'description': 'Anselm Grün: Feiern und Verzichten',
'uploader': 'BR/Birgit Baier',
'upload_date': '20140301',
}
},
{
"url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html",
"md5": "ab451b09d861dbed7d7cc9ab0be19ebe",
"info_dict": {
"id": "2c060e69-3a27-4e13-b0f0-668fac17d812",
"ext": "mp4",
"title": "Über den Pass",
"description": "Die Eroberung der Alpen: Über den Pass",
"uploader": None,
"upload_date": None
'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html',
'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe',
'info_dict': {
'id': '2c060e69-3a27-4e13-b0f0-668fac17d812',
'ext': 'mp4',
'title': 'Über den Pass',
'description': 'Die Eroberung der Alpen: Über den Pass',
}
}
},
{
'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
'info_dict': {
'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab',
'ext': 'aac',
'title': '"Keine neuen Schulden im nächsten Jahr"',
'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
}
},
{
'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
'md5': 'dbab0aef2e047060ea7a21fc1ce1078a',
'info_dict': {
'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
'ext': 'mp4',
'title': 'Umweltbewusster Häuslebauer',
'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
}
},
{
'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
'md5': '23bca295f1650d698f94fc570977dae3',
'info_dict': {
'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
'ext': 'mp4',
'title': 'Folge 1 - Metaphysik',
'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
'uploader': 'Eva Maria Steimle',
'upload_date': '20140117',
}
},
]
def _real_extract(self, url):
@@ -44,56 +77,63 @@ class BRIE(InfoExtractor):
display_id = mobj.group('id')
page = self._download_webpage(url, display_id)
xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
xml = self._download_xml(self._BASE_URL + xml_url, None)
videos = []
for xml_video in xml.findall("video"):
video = {
"id": xml_video.get("externalId"),
"title": xml_video.find("title").text,
"formats": self._extract_formats(xml_video.find("assets")),
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
"webpage_url": xml_video.find("permalink").text
}
if xml_video.find("author").text:
video["uploader"] = xml_video.find("author").text
if xml_video.find("broadcastDate").text:
video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
videos.append(video)
medias = []
if len(videos) > 1:
for xml_media in xml.findall('video') + xml.findall('audio'):
media = {
'id': xml_media.get('externalId'),
'title': xml_media.find('title').text,
'formats': self._extract_formats(xml_media.find('assets')),
'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
'webpage_url': xml_media.find('permalink').text
}
if xml_media.find('author').text:
media['uploader'] = xml_media.find('author').text
if xml_media.find('broadcastDate').text:
media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
medias.append(media)
if len(medias) > 1:
self._downloader.report_warning(
'found multiple videos; please '
'found multiple medias; please '
'report this with the video URL to http://yt-dl.org/bug')
if not videos:
raise ExtractorError('No video entries found')
return videos[0]
if not medias:
raise ExtractorError('No media entries found')
return medias[0]
def _extract_formats(self, assets):
def text_or_none(asset, tag):
elem = asset.find(tag)
return None if elem is None else elem.text
formats = [{
"url": asset.find("downloadUrl").text,
"ext": asset.find("mediaType").text,
"format_id": asset.get("type"),
"width": int(asset.find("frameWidth").text),
"height": int(asset.find("frameHeight").text),
"tbr": int(asset.find("bitrateVideo").text),
"abr": int(asset.find("bitrateAudio").text),
"vcodec": asset.find("codecVideo").text,
"container": asset.find("mediaType").text,
"filesize": int(asset.find("size").text),
} for asset in assets.findall("asset")
if asset.find("downloadUrl") is not None]
'url': text_or_none(asset, 'downloadUrl'),
'ext': text_or_none(asset, 'mediaType'),
'format_id': asset.get('type'),
'width': int_or_none(text_or_none(asset, 'frameWidth')),
'height': int_or_none(text_or_none(asset, 'frameHeight')),
'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')),
'abr': int_or_none(text_or_none(asset, 'bitrateAudio')),
'vcodec': text_or_none(asset, 'codecVideo'),
'acodec': text_or_none(asset, 'codecAudio'),
'container': text_or_none(asset, 'mediaType'),
'filesize': int_or_none(text_or_none(asset, 'size')),
} for asset in assets.findall('asset')
if asset.find('downloadUrl') is not None]
self._sort_formats(formats)
return formats
def _extract_thumbnails(self, variants):
thumbnails = [{
"url": self._BASE_URL + variant.find("url").text,
"width": int(variant.find("width").text),
"height": int(variant.find("height").text),
} for variant in variants.findall("variant")]
thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
'url': self._BASE_URL + variant.find('url').text,
'width': int_or_none(variant.find('width').text),
'height': int_or_none(variant.find('height').text),
} for variant in variants.findall('variant')]
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails

View File

@@ -27,9 +27,10 @@ class BreakIE(InfoExtractor):
webpage, 'info json', flags=re.DOTALL)
info = json.loads(info_json)
video_url = info['videoUri']
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
if m_youtube is not None:
return self.url_result(m_youtube.group(1), 'Youtube')
youtube_id = info.get('youtubeId')
if youtube_id:
return self.url_result(youtube_id, 'Youtube')
final_url = video_url + '?' + info['AuthToken']
return {
'id': video_id,

View File

@@ -140,7 +140,11 @@ class BrightcoveIE(InfoExtractor):
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m:
return [unescapeHTML(url_m.group(1))]
url = unescapeHTML(url_m.group(1))
# Some sites don't add it, we can't download with this url, for example:
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
if 'playerKey' in url:
return [url]
matches = re.findall(
r'''(?sx)<object

View File

@@ -4,9 +4,7 @@ import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
from ..utils import ExtractorError
class BYUtvIE(InfoExtractor):
@@ -16,7 +14,7 @@ class BYUtvIE(InfoExtractor):
'info_dict': {
'id': 'granite-flats-talking',
'ext': 'mp4',
'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f',
'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
'title': 'Talking',
'thumbnail': 're:^https?://.*promo.*'
},

View File

@@ -2,39 +2,46 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
class C56IE(InfoExtractor):
_VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
_VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
IE_NAME = '56.com'
_TEST = {
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
'file': '93440716.flv',
'md5': 'e59995ac63d0457783ea05f93f12a866',
'info_dict': {
'id': '93440716',
'ext': 'flv',
'title': '网事知多少 第32期车怒',
'duration': 283.813,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
text_id = mobj.group('textid')
info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
text_id, 'Downloading video info')
info = json.loads(info_page)['info']
formats = [{
'format_id': f['type'],
'filesize': int(f['filesize']),
'url': f['url']
} for f in info['rfiles']]
page = self._download_json(
'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
info = page['info']
formats = [
{
'format_id': f['type'],
'filesize': int(f['filesize']),
'url': f['url']
} for f in info['rfiles']
]
self._sort_formats(formats)
return {
'id': info['vid'],
'title': info['Subject'],
'duration': int(info['duration']) / 1000.0,
'formats': formats,
'thumbnail': info.get('bimg') or info.get('img'),
}

View File

@@ -0,0 +1,87 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
class CBSNewsIE(InfoExtractor):
IE_DESC = 'CBS News'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
_TESTS = [
{
'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
'info_dict': {
'id': 'tesla-and-spacex-elon-musks-industrial-empire',
'ext': 'flv',
'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
'duration': 791,
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
'info_dict': {
'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
'ext': 'flv',
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
'thumbnail': 'http://cbsnews2.cbsistatic.com/hub/i/r/2014/04/04/0c9fbc66-576b-41ca-8069-02d122060dd2/thumbnail/140x90/6dad7a502f88875ceac38202984b6d58/en-0404-werner-replace-640x360.jpg',
'duration': 205,
},
'params': {
# rtmp download
'skip_download': True,
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_info = json.loads(self._html_search_regex(
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
webpage, 'video JSON info'))
item = video_info['item'] if 'item' in video_info else video_info
title = item.get('articleTitle') or item.get('hed')
duration = item.get('duration')
thumbnail = item.get('mediaImage') or item.get('thumbnail')
formats = []
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
uri = item.get('media' + format_id + 'URI')
if not uri:
continue
fmt = {
'url': uri,
'format_id': format_id,
}
if uri.startswith('rtmp'):
fmt.update({
'app': 'ondemand?auth=cbs',
'play_path': 'mp4:' + uri.split('<break>')[-1],
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
'page_url': 'http://www.cbsnews.com',
'ext': 'flv',
})
elif uri.endswith('.m3u8'):
fmt['ext'] = 'mp4'
formats.append(fmt)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@@ -0,0 +1,75 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
)
class CNETIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
_TEST = {
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
'md5': '041233212a0d06b179c87cbcca1577b8',
'info_dict': {
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
'ext': 'mp4',
'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
'uploader_id': 'sarah.mitroff@cbsinteractive.com',
'uploader': 'Sarah Mitroff',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
webpage, 'data json')
data = json.loads(data_json)
vdata = data['video']
if not vdata:
vdata = data['videos'][0]
if not vdata:
raise ExtractorError('Cannot find video data')
video_id = vdata['id']
title = vdata['headline']
description = vdata.get('dek')
thumbnail = vdata.get('image', {}).get('path')
author = vdata.get('author')
if author:
uploader = '%s %s' % (author['firstName'], author['lastName'])
uploader_id = author.get('email')
else:
uploader = None
uploader_id = None
formats = [{
'format_id': '%s-%s-%s' % (
f['type'], f['format'],
int_or_none(f.get('bitrate'), 1000, default='')),
'url': f['uri'],
'tbr': int_or_none(f.get('bitrate'), 1000),
} for f in vdata['files']['data']]
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'description': description,
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
}

View File

@@ -21,7 +21,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
_TEST = {
'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
'md5': '4167875aae411f903b751a21f357f1ee',
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
'info_dict': {
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
'ext': 'mp4',
@@ -41,9 +41,9 @@ class ComedyCentralShowsIE(InfoExtractor):
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|https?://(:www\.)?
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
(full-episodes/(?P<episode>.*)|
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
(?P<clip>
(?:videos/[^/]+/(?P<videotitle>[^/?#]+))
(?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+))
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
)|

View File

@@ -251,7 +251,10 @@ class InfoExtractor(object):
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
content = webpage_bytes.decode(encoding, 'replace')
try:
content = webpage_bytes.decode(encoding, 'replace')
except LookupError:
content = webpage_bytes.decode('utf-8', 'replace')
if (u'<title>Access to this site is blocked</title>' in content and
u'Websense' in content[:512]):

View File

@@ -8,7 +8,6 @@ from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_urllib_request,
compat_str,
get_element_by_attribute,
get_element_by_id,
orderedSet,
str_to_int,
@@ -180,7 +179,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = u'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
def _extract_entries(self, id):
@@ -190,10 +189,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
video_ids.extend(re.findall(r'data-id="(.+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
break
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
for video_id in orderedSet(video_ids)]
@@ -203,17 +201,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
return {'_type': 'playlist',
'id': playlist_id,
'title': get_element_by_id(u'playlist_name', webpage),
'entries': self._extract_entries(playlist_id),
}
return {
'_type': 'playlist',
'id': playlist_id,
'title': self._og_search_title(webpage),
'entries': self._extract_entries(playlist_id),
}
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = u'dailymotion:user'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
def _real_extract(self, url):

View File

@@ -0,0 +1,27 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class DivxStageIE(NovaMovIE):
IE_NAME = 'divxstage'
IE_DESC = 'DivxStage'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
_HOST = 'www.divxstage.eu'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>'
_DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>'
_TEST = {
'url': 'http://www.divxstage.eu/video/57f238e2e5e01',
'md5': '63969f6eb26533a1968c4d325be63e72',
'info_dict': {
'id': '57f238e2e5e01',
'ext': 'flv',
'title': 'youtubedl test video',
'description': 'This is a test video for youtubedl.',
}
}

View File

@@ -6,7 +6,6 @@ from .common import InfoExtractor
class FirstpostIE(InfoExtractor):
IE_NAME = 'Firstpost.com'
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
_TEST = {
@@ -16,7 +15,6 @@ class FirstpostIE(InfoExtractor):
'id': '1025403',
'ext': 'mp4',
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
}
}
@@ -24,15 +22,26 @@ class FirstpostIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'<div.*?name="div_video".*?flashvars="([^"]+)">',
webpage, 'video URL')
data = self._download_xml(
'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
'Downloading video XML')
item = data.find('./playlist/item')
thumbnail = item.find('./image').text
title = item.find('./title').text
formats = [
{
'url': details.find('./file').text,
'format_id': details.find('./label').text.strip(),
'width': int(details.find('./width').text.strip()),
'height': int(details.find('./height').text.strip()),
} for details in item.findall('./source/file_details') if details.find('./file').text
]
return {
'id': video_id,
'url': video_url,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -114,20 +114,6 @@ class GenericIE(InfoExtractor):
'title': '2cc213299525360.mov', # that's what we get
},
},
# second style of embedded ooyala videos
{
'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
'info_dict': {
'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
'ext': 'mp4',
'title': 'Behind-the-scenes: Financial Review Sunday ',
'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
},
'params': {
# m3u8 download
'skip_download': True,
},
},
# google redirect
{
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@ -198,6 +184,17 @@ class GenericIE(InfoExtractor):
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
}
},
# Embeded Ustream video
{
'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
'md5': '27b99cdb639c9b12a79bca876a073417',
'info_dict': {
'id': '45734260',
'ext': 'flv',
'uploader': 'AU SPA: The NSA and Privacy',
'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
}
},
# nowvideo embed hidden behind percent encoding
{
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
@@ -514,17 +511,18 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora')
# Look for embedded NovaMov player
# Look for embedded NovaMov-based player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
r'''(?x)<iframe[^>]+?src=(["\'])
(?P<url>http://(?:(?:embed|www)\.)?
(?:novamov\.com|
nowvideo\.(?:ch|sx|eu|at|ag|co)|
videoweed\.(?:es|com)|
movshare\.(?:net|sx|ag)|
divxstage\.(?:eu|net|ch|co|at|ag))
/embed\.php.+?)\1''', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'NovaMov')
# Look for embedded NowVideo player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'NowVideo')
return self.url_result(mobj.group('url'))
# Look for embedded Facebook player
mobj = re.search(
@@ -570,6 +568,12 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'TED')
# Look for embedded Ustream videos
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Ustream')
# Look for embedded arte.tv player
mobj = re.search(
r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',

View File

@@ -1,9 +1,12 @@
from __future__ import unicode_literals
import json
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_str,
ExtractorError,
formatSeconds,
)
@@ -24,34 +27,31 @@ class JustinTVIE(InfoExtractor):
/?(?:\#.*)?$
"""
_JUSTIN_PAGE_LIMIT = 100
IE_NAME = u'justin.tv'
IE_NAME = 'justin.tv'
IE_DESC = 'justin.tv and twitch.tv'
_TEST = {
u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360',
u'file': u'296128360.flv',
u'md5': u'ecaa8a790c22a40770901460af191c9a',
u'info_dict': {
u"upload_date": u"20110927",
u"uploader_id": 25114803,
u"uploader": u"thegamedevhub",
u"title": u"Beginner Series - Scripting With Python Pt.1"
'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
'md5': 'ecaa8a790c22a40770901460af191c9a',
'info_dict': {
'id': '296128360',
'ext': 'flv',
'upload_date': '20110927',
'uploader_id': 25114803,
'uploader': 'thegamedevhub',
'title': 'Beginner Series - Scripting With Python Pt.1'
}
}
def report_download_page(self, channel, offset):
"""Report attempt to download a single page of videos."""
self.to_screen(u'%s: Downloading video information from %d to %d' %
(channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
# Return count of items, list of *valid* items
def _parse_page(self, url, video_id):
info_json = self._download_webpage(url, video_id,
u'Downloading video info JSON',
u'unable to download video info JSON')
'Downloading video info JSON',
'unable to download video info JSON')
response = json.loads(info_json)
if type(response) != list:
error_text = response.get('error', 'unknown error')
raise ExtractorError(u'Justin.tv API: %s' % error_text)
raise ExtractorError('Justin.tv API: %s' % error_text)
info = []
for clip in response:
video_url = clip['video_file_url']
@@ -62,7 +62,7 @@ class JustinTVIE(InfoExtractor):
video_id = clip['id']
video_title = clip.get('title', video_id)
info.append({
'id': video_id,
'id': compat_str(video_id),
'url': video_url,
'title': video_title,
'uploader': clip.get('channel_name', video_uploader_id),
@@ -74,8 +74,6 @@ class JustinTVIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'invalid URL: %s' % url)
api_base = 'http://api.justin.tv'
paged = False
@@ -89,40 +87,41 @@ class JustinTVIE(InfoExtractor):
webpage = self._download_webpage(url, chapter_id)
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
if not m:
raise ExtractorError(u'Cannot find archive of a chapter')
raise ExtractorError('Cannot find archive of a chapter')
archive_id = m.group(1)
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
doc = self._download_xml(api, chapter_id,
note=u'Downloading chapter information',
errnote=u'Chapter information download failed')
doc = self._download_xml(
api, chapter_id,
note='Downloading chapter information',
errnote='Chapter information download failed')
for a in doc.findall('.//archive'):
if archive_id == a.find('./id').text:
break
else:
raise ExtractorError(u'Could not find chapter in chapter information')
raise ExtractorError('Could not find chapter in chapter information')
video_url = a.find('./video_file_url').text
video_ext = video_url.rpartition('.')[2] or u'flv'
video_ext = video_url.rpartition('.')[2] or 'flv'
chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
note='Downloading chapter metadata',
errnote='Download of chapter metadata failed')
chapter_info = json.loads(chapter_info_json)
chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
chapter_info = self._download_json(
chapter_api_url, 'c' + chapter_id,
note='Downloading chapter metadata',
errnote='Download of chapter metadata failed')
bracket_start = int(doc.find('.//bracket_start').text)
bracket_end = int(doc.find('.//bracket_end').text)
# TODO determine start (and probably fix up file)
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
#video_url += u'?start=' + TODO:start_timestamp
#video_url += '?start=' + TODO:start_timestamp
# bracket_start is 13290, but we want 51670615
self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
info = {
'id': u'c' + chapter_id,
'id': 'c' + chapter_id,
'url': video_url,
'ext': video_ext,
'title': chapter_info['title'],
@@ -131,14 +130,12 @@ class JustinTVIE(InfoExtractor):
'uploader': chapter_info['channel']['display_name'],
'uploader_id': chapter_info['channel']['name'],
}
return [info]
return info
else:
video_id = mobj.group('videoid')
api = api_base + '/broadcast/by_archive/%s.json' % video_id
self.report_extraction(video_id)
info = []
entries = []
offset = 0
limit = self._JUSTIN_PAGE_LIMIT
while True:
@@ -146,8 +143,12 @@ class JustinTVIE(InfoExtractor):
self.report_download_page(video_id, offset)
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
page_count, page_info = self._parse_page(page_url, video_id)
info.extend(page_info)
entries.extend(page_info)
if not paged or page_count != limit:
break
offset += limit
return info
return {
'_type': 'playlist',
'id': video_id,
'entries': entries,
}

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import os
import re
@@ -11,22 +13,22 @@ from ..aes import (
aes_decrypt_text
)
class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
_VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)'
_TEST = {
u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
u'file': u'1214711.mp4',
u'md5': u'6e297b7e789329923fcf83abb67c9289',
u'info_dict': {
u"title": u"Petite Asian Lady Mai Playing In Bathtub",
u"age_limit": 18,
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
'file': '1214711.mp4',
'md5': '6e297b7e789329923fcf83abb67c9289',
'info_dict': {
'title': 'Petite Asian Lady Mai Playing In Bathtub',
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
@@ -38,10 +40,10 @@ class KeezMoviesIE(InfoExtractor):
embedded_url = mobj.group(1)
return self.url_result(embedded_url)
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title')
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
if webpage.find('encrypted=true')!=-1:
password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, u'password')
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title')
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, 'video_url'))
if 'encrypted=true' in webpage:
password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, 'password')
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]

View File

@@ -0,0 +1,47 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class MorningstarIE(InfoExtractor):
IE_DESC = 'morningstar.com'
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
'info_dict': {
'id': '615869',
'ext': 'mp4',
'title': 'Get Ahead of the Curve on 2013 Taxes',
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
video_url = self._html_search_regex(
r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
webpage, 'video URL')
thumbnail = self._html_search_regex(
r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
webpage, 'thumbnail', fatal=False)
description = self._html_search_regex(
r'<div id="mstarDeck".*?>(.*?)</div>',
webpage, 'description', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'description': description,
}

View File

@@ -0,0 +1,63 @@
# coding: utf-8
from __future__ import unicode_literals
import hashlib
import json
import re
import time
from .common import InfoExtractor
from ..utils import (
compat_parse_qs,
compat_str,
int_or_none,
)
class MotorsportIE(InfoExtractor):
IE_DESC = 'motorsport.com'
_VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])'
_TEST = {
'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',
'info_dict': {
'id': '7063',
'ext': 'mp4',
'title': 'Red Bull Racing: 2014 Rules Explained',
'duration': 207,
'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations which are arguably the most complex the sport has ever seen.',
'uploader': 'rainiere',
'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
flashvars_code = self._html_search_regex(
r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
flashvars = compat_parse_qs(flashvars_code)
params = json.loads(flashvars['parameters'][0])
e = compat_str(int(time.time()) + 24 * 60 * 60)
base_video_url = params['location'] + '?e=' + e
s = 'h3hg713fh32'
h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
video_url = base_video_url + '&h=' + h
uploader = self._html_search_regex(
r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage,
'uploader', fatal=False)
return {
'id': params['video_id'],
'display_id': display_id,
'title': params['title'],
'url': video_url,
'description': params.get('description'),
'thumbnail': params.get('main_thumb'),
'duration': int_or_none(params.get('duration')),
'uploader': uploader,
}

View File

@@ -0,0 +1,27 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class MovShareIE(NovaMovIE):
IE_NAME = 'movshare'
IE_DESC = 'MovShare'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'}
_HOST = 'www.movshare.net'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
_DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
_TEST = {
'url': 'http://www.movshare.net/video/559e28be54d96',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': '559e28be54d96',
'ext': 'flv',
'title': 'dissapeared image',
'description': 'optical illusion dissapeared image magic illusion',
}
}

View File

@@ -4,9 +4,7 @@ import json
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
)
from ..utils import int_or_none
class MporaIE(InfoExtractor):
@@ -20,7 +18,7 @@ class MporaIE(InfoExtractor):
'info_dict': {
'title': 'Katy Curd - Winter in the Forest',
'duration': 416,
'uploader': 'petenewman',
'uploader': 'Peter Newman Media',
},
}

View File

@@ -7,9 +7,14 @@ from .common import InfoExtractor
class NineGagIE(InfoExtractor):
IE_NAME = '9gag'
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
_VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
(?:
v/(?P<numid>[0-9]+)|
p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
)
'''
_TEST = {
_TESTS = [{
"url": "http://9gag.tv/v/1912",
"info_dict": {
"id": "1912",
@@ -20,17 +25,33 @@ class NineGagIE(InfoExtractor):
"thumbnail": "re:^https?://",
},
'add_ie': ['Youtube']
}
},
{
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
'info_dict': {
'id': 'KklwM',
'ext': 'mp4',
'display_id': 'alternate-banned-opening-scene-of-gravity',
"description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = mobj.group('numid') or mobj.group('id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, display_id)
youtube_id = self._html_search_regex(
r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
webpage, 'video ID')
title = self._html_search_regex(
r'(?s)id="jsid-video-post-container".*?data-title="([^"]+)"',
webpage, 'title', default=None)
if not title:
title = self._og_search_title(webpage)
description = self._html_search_regex(
r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
'description', fatal=False)
@@ -46,7 +67,8 @@ class NineGagIE(InfoExtractor):
'url': youtube_id,
'ie_key': 'Youtube',
'id': video_id,
'title': self._og_search_title(webpage),
'display_id': display_id,
'title': title,
'description': description,
'view_count': view_count,
'thumbnail': self._og_search_thumbnail(webpage),

View File

@@ -13,7 +13,8 @@ class NovaMovIE(InfoExtractor):
IE_NAME = 'novamov'
IE_DESC = 'NovaMov'
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
_VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
_VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}
_HOST = 'www.novamov.com'
@@ -36,18 +37,17 @@ class NovaMovIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
video_id = mobj.group('id')
page = self._download_webpage(
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
if re.search(self._FILE_DELETED_REGEX, page) is not None:
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
api_response = self._download_webpage(

View File

@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'}
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'}
_HOST = 'www.nowvideo.ch'

View File

@@ -59,11 +59,11 @@ class NTVIE(InfoExtractor):
{
'url': 'http://www.ntv.ru/kino/Koma_film',
'info_dict': {
'id': '750783',
'id': '758100',
'ext': 'flv',
'title': 'Остросюжетный фильм «Кома» — 4 апреля вечером на НТВ',
'description': 'Остросюжетный фильм «Кома» — 4 апреля вечером на НТВ',
'duration': 28,
'title': 'Остросюжетный фильм «Кома»',
'description': 'Остросюжетный фильм «Кома»',
'duration': 5592,
},
'params': {
# rtmp download

View File

@@ -1,44 +1,81 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import compat_urllib_parse
from ..utils import int_or_none
class PornHdIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
_VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)'
_TEST = {
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
'file': '1962.flv',
'md5': '35272469887dca97abd30abecc6cdf75',
'md5': '956b8ca569f7f4d8ec563e2c41598441',
'info_dict': {
"title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
"age_limit": 18,
'id': '1962',
'ext': 'mp4',
'title': 'Sierra loves doing laundry',
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
video_title = mobj.group('video_title')
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
next_url = self._html_search_regex(
r'&hd=(http.+?)&', webpage, 'video URL')
next_url = compat_urllib_parse.unquote(next_url)
title = self._og_search_title(webpage)
TITLE_SUFFIX = ' porn HD Video | PornHD.com '
if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
video_url = self._download_webpage(
next_url, video_id, note='Retrieving video URL',
errnote='Could not retrieve video URL')
age_limit = 18
description = self._html_search_regex(
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
view_count = int_or_none(self._html_search_regex(
r'(\d+) views </span>', webpage, 'view count', fatal=False))
formats = [
{
'url': format_url,
'ext': format.lower(),
'format_id': '%s-%s' % (format.lower(), quality.lower()),
'quality': 1 if quality.lower() == 'high' else 0,
} for format, quality, format_url in re.findall(
r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
]
mobj = re.search(r'flashVars = (?P<flashvars>{.+?});', webpage)
if mobj:
flashvars = json.loads(mobj.group('flashvars'))
formats.extend([
{
'url': flashvars['hashlink'].replace('?noProxy=1', ''),
'ext': 'flv',
'format_id': 'flv-low',
'quality': 0,
},
{
'url': flashvars['hd'].replace('?noProxy=1', ''),
'ext': 'flv',
'format_id': 'flv-high',
'quality': 1,
}
])
thumbnail = flashvars['urlWallpaper']
else:
thumbnail = self._og_search_thumbnail(webpage)
self._sort_formats(formats)
return {
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': video_title,
'age_limit': age_limit,
'title': title,
'description': description,
'thumbnail': thumbnail,
'view_count': view_count,
'formats': formats,
'age_limit': 18,
}

View File

@@ -160,6 +160,7 @@ class ProSiebenSat1IE(InfoExtractor):
_CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
r'clipId=(\d+)',
]
_TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',

View File

@@ -46,7 +46,8 @@ class PyvideoIE(InfoExtractor):
return self.url_result(m_youtube.group(1), 'Youtube')
title = self._html_search_regex(
r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>',
webpage, 'title', flags=re.DOTALL)
video_url = self._search_regex(
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
webpage, 'video url', flags=re.DOTALL)

View File

@@ -18,7 +18,7 @@ class Ro220IE(InfoExtractor):
'md5': '03af18b73a07b4088753930db7a34add',
'info_dict': {
"title": "Luati-le Banii sez 4 ep 1",
"description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
"description": "re:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$",
}
}

View File

@@ -9,46 +9,136 @@ from ..utils import (
parse_duration,
parse_iso8601,
unescapeHTML,
compat_str,
)
class RTSIE(InfoExtractor):
IE_DESC = 'RTS.ch'
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html'
_TEST = {
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
'md5': '753b877968ad8afaeddccc374d4256a5',
'info_dict': {
'id': '3449373',
'ext': 'mp4',
'duration': 1488,
'title': 'Les Enfants Terribles',
'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
'uploader': 'Divers',
'upload_date': '19680921',
'timestamp': -40280400,
'thumbnail': 're:^https?://.*\.image'
_TESTS = [
{
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
'md5': '753b877968ad8afaeddccc374d4256a5',
'info_dict': {
'id': '3449373',
'ext': 'mp4',
'duration': 1488,
'title': 'Les Enfants Terribles',
'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
'uploader': 'Divers',
'upload_date': '19680921',
'timestamp': -40280400,
'thumbnail': 're:^https?://.*\.image'
},
},
}
{
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
'info_dict': {
'id': '5624067',
'ext': 'mp4',
'duration': 3720,
'title': 'Les yeux dans les cieux - Mon homard au Canada',
'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7',
'uploader': 'Passe-moi les jumelles',
'upload_date': '20140404',
'timestamp': 1396635300,
'thumbnail': 're:^https?://.*\.image'
},
},
{
'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
'md5': 'b4326fecd3eb64a458ba73c73e91299d',
'info_dict': {
'id': '5745975',
'ext': 'mp4',
'duration': 48,
'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
'description': 'Hockey - Playoff',
'uploader': 'Hockey',
'upload_date': '20140403',
'timestamp': 1396556882,
'thumbnail': 're:^https?://.*\.image'
},
'skip': 'Blocked outside Switzerland',
},
{
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
'md5': '9bb06503773c07ce83d3cbd793cebb91',
'info_dict': {
'id': '5745356',
'ext': 'mp4',
'duration': 33,
'title': 'Londres cachée par un épais smog',
'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
'uploader': 'Le Journal en continu',
'upload_date': '20140403',
'timestamp': 1396537322,
'thumbnail': 're:^https?://.*\.image'
},
},
{
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
'info_dict': {
'id': '5706148',
'ext': 'mp3',
'duration': 123,
'title': '"Urban Hippie", de Damien Krisl',
'description': 'Des Hippies super glam.',
'upload_date': '20140403',
'timestamp': 1396551600,
},
},
]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
all_info = self._download_json(
'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
info = all_info['video']['JSONinfo']
def download_json(internal_id):
return self._download_json(
'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
video_id)
all_info = download_json(video_id)
# video_id extracted out of URL is not always a real id
if 'video' not in all_info and 'audio' not in all_info:
page = self._download_webpage(url, video_id)
internal_id = self._html_search_regex(
r'<(?:video|audio) data-id="([0-9]+)"', page,
'internal video id')
all_info = download_json(internal_id)
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
upload_timestamp = parse_iso8601(info.get('broadcast_date'))
duration = parse_duration(info.get('duration'))
duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
if isinstance(duration, compat_str):
duration = parse_duration(duration)
view_count = info.get('plays')
thumbnail = unescapeHTML(info.get('preview_image_url'))
def extract_bitrate(url):
return int_or_none(self._search_regex(
r'-([0-9]+)k\.', url, 'bitrate', default=None))
formats = [{
'format_id': fid,
'url': furl,
'tbr': int_or_none(self._search_regex(
r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
'tbr': extract_bitrate(furl),
} for fid, furl in info['streams'].items()]
if 'media' in info:
formats.extend([{
'format_id': '%s-%sk' % (media['ext'], media['rate']),
'url': 'http://download-video.rts.ch/%s' % media['url'],
'tbr': media['rate'] or extract_bitrate(media['url']),
} for media in info['media'] if media.get('rate')])
self._sort_formats(formats)
return {
@@ -57,6 +147,7 @@ class RTSIE(InfoExtractor):
'title': info['title'],
'description': info.get('intro'),
'duration': duration,
'view_count': view_count,
'uploader': info.get('programName'),
'timestamp': upload_timestamp,
'thumbnail': thumbnail,

View File

@@ -0,0 +1,84 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import base64
from .common import InfoExtractor
from ..utils import (
struct_unpack,
)
class RTVEALaCartaIE(InfoExtractor):
IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta'
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
'md5': '18fcd45965bdd076efdb12cd7f6d7b9e',
'info_dict': {
'id': '2491869',
'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
},
}
def _decrypt_url(self, png):
encrypted_data = base64.b64decode(png)
text_index = encrypted_data.find(b'tEXt')
text_chunk = encrypted_data[text_index-4:]
length = struct_unpack('!I', text_chunk[:4])[0]
# Use bytearray to get integers when iterating in both python 2.x and 3.x
data = bytearray(text_chunk[8:8+length])
data = [chr(b) for b in data if b != 0]
hash_index = data.index('#')
alphabet_data = data[:hash_index]
url_data = data[hash_index+1:]
alphabet = []
e = 0
d = 0
for l in alphabet_data:
if d == 0:
alphabet.append(l)
d = e = (e + 1) % 4
else:
d -= 1
url = ''
f = 0
e = 3
b = 1
for letter in url_data:
if f == 0:
l = int(letter)*10
f = 1
else:
if e == 0:
l += int(letter)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
return url
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info = self._download_json(
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
video_id)['page']['items'][0]
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
png = self._download_webpage(png_url, video_id, 'Downloading url information')
video_url = self._decrypt_url(png)
return {
'id': video_id,
'title': info['title'],
'url': video_url,
'thumbnail': info['image'],
}

View File

@@ -39,7 +39,8 @@ class SlideshareIE(InfoExtractor):
ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex(
r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
'description', fatal=False)
return {
'_type': 'video',

View File

@@ -9,8 +9,18 @@ from ..utils import (
class TeamcocoIE(InfoExtractor):
_VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
_TEST = {
_VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
_TESTS = [
{
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
'file': '80187.mp4',
'md5': '3f7746aa0dc86de18df7539903d399ea',
'info_dict': {
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
}
},
{
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
'file': '19705.mp4',
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
@@ -19,22 +29,23 @@ class TeamcocoIE(InfoExtractor):
"title": "Louis C.K. Interview Pt. 1 11/3/11"
}
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
url_title = mobj.group('url_title')
webpage = self._download_webpage(url, url_title)
video_id = self._html_search_regex(
r'<article class="video" data-id="(\d+?)"',
webpage, 'video id')
self.report_extraction(video_id)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
video_id = mobj.group("video_id")
if not video_id:
video_id = self._html_search_regex(
r'<article class="video" data-id="(\d+?)"',
webpage, 'video id')
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_xml(data_url, video_id, 'Downloading data webpage')
data = self._download_xml(
data_url, display_id, 'Downloading data webpage')
qualities = ['500k', '480p', '1000k', '720p', '1080p']
formats = []
@@ -69,6 +80,7 @@ class TeamcocoIE(InfoExtractor):
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),

View File

@@ -37,6 +37,7 @@ class TEDIE(SubtitlesInfoExtractor):
'consciousness, but that half the time our brains are '
'actively fooling us.'),
'uploader': 'Dan Dennett',
'width': 854,
}
}, {
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
@@ -50,10 +51,10 @@ class TEDIE(SubtitlesInfoExtractor):
}
}]
_FORMATS_PREFERENCE = {
'low': 1,
'medium': 2,
'high': 3,
_NATIVE_FORMATS = {
'low': {'preference': 1, 'width': 320, 'height': 180},
'medium': {'preference': 2, 'width': 512, 'height': 288},
'high': {'preference': 3, 'width': 854, 'height': 480},
}
def _extract_info(self, webpage):
@@ -98,12 +99,14 @@ class TEDIE(SubtitlesInfoExtractor):
talk_info = self._extract_info(webpage)['talks'][0]
formats = [{
'ext': 'mp4',
'url': format_url,
'format_id': format_id,
'format': format_id,
'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
} for (format_id, format_url) in talk_info['nativeDownloads'].items()]
for f in formats:
finfo = self._NATIVE_FORMATS.get(f['format_id'])
if finfo:
f.update(finfo)
self._sort_formats(formats)
video_id = compat_str(talk_info['id'])

View File

@@ -1,63 +1,83 @@
import os
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
int_or_none,
str_to_int,
)
from ..aes import (
aes_decrypt_text
)
from ..aes import aes_decrypt_text
class Tube8IE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
_TEST = {
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
u'file': u'229795.mp4',
u'md5': u'e9e0b0c86734e5e3766e653509475db0',
u'info_dict': {
u"description": u"hot teen Kasia grinding",
u"uploader": u"unknown",
u"title": u"Kasia music video",
u"age_limit": 18,
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
'file': '229795.mp4',
'md5': 'e9e0b0c86734e5e3766e653509475db0',
'info_dict': {
'description': 'hot teen Kasia grinding',
'uploader': 'unknown',
'title': 'Kasia music video',
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
video_id = mobj.group('id')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'videotitle ="([^"]+)', webpage, u'title')
video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False)
video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
if thumbnail:
thumbnail = thumbnail.replace('\\/', '/')
flashvars = json.loads(self._html_search_regex(
r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url')
if webpage.find('"encrypted":true')!=-1:
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
video_url = flashvars['video_url']
if flashvars.get('encrypted') is True:
video_url = aes_decrypt_text(video_url, flashvars['video_title'], 32).decode('utf-8')
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
format = "-".join(format)
format_id = '-'.join(path.split('/')[4].split('_')[:2])
thumbnail = flashvars.get('image_url')
title = self._html_search_regex(
r'videotitle\s*=\s*"([^"]+)', webpage, 'title')
description = self._html_search_regex(
r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False)
uploader = self._html_search_regex(
r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>',
webpage, 'uploader', fatal=False)
like_count = int_or_none(self._html_search_regex(
r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False))
dislike_count = int_or_none(self._html_search_regex(
r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False))
view_count = self._html_search_regex(
r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False)
if view_count:
view_count = str_to_int(view_count)
comment_count = self._html_search_regex(
r'<span id="allCommentsCount">(\d+)</span>', webpage, 'comment count', fatal=False)
if comment_count:
comment_count = str_to_int(comment_count)
return {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'thumbnail': thumbnail,
'description': video_description,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'format_id': format_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
'age_limit': 18,
}

View File

@@ -11,7 +11,7 @@ from ..utils import (
class UstreamIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
_VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
IE_NAME = 'ustream'
_TEST = {
'url': 'http://www.ustream.tv/recorded/20274954',
@@ -25,6 +25,13 @@ class UstreamIE(InfoExtractor):
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
if m.group('type') == 'embed':
video_id = m.group('videoID')
webpage = self._download_webpage(url, video_id)
desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
return self.url_result(desktop_url, 'Ustream')
video_id = m.group('videoID')
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id

View File

@@ -0,0 +1,26 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class VideoWeedIE(NovaMovIE):
IE_NAME = 'videoweed'
IE_DESC = 'VideoWeed'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
_HOST = 'www.videoweed.es'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
_TEST = {
'url': 'http://www.videoweed.es/file/b42178afbea14',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': 'b42178afbea14',
'ext': 'flv',
'title': 'optical illusion dissapeared image magic illusion',
'description': ''
},
}

View File

@@ -1,10 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
class WeiboIE(InfoExtractor):
"""
The videos in Weibo come from different sites, this IE just finds the link
@@ -13,16 +14,16 @@ class WeiboIE(InfoExtractor):
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
_TEST = {
u'add_ie': ['Sina'],
u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
u'file': u'98322879.flv',
u'info_dict': {
u'title': u'魔声耳机最新广告“All Eyes On Us”',
'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
'info_dict': {
'id': '98322879',
'ext': 'flv',
'title': '魔声耳机最新广告“All Eyes On Us”',
},
u'note': u'Sina video',
u'params': {
u'skip_download': True,
'params': {
'skip_download': True,
},
'add_ie': ['Sina'],
}
# Additional example videos from different sites
@@ -33,17 +34,16 @@ class WeiboIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
video_id = mobj.group('id')
info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
info_page = self._download_webpage(info_url, video_id)
info = json.loads(info_page)
info = self._download_json(info_url, video_id)
videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
#Prefer sina video since they have thumbnails
videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
# Prefer sina video since they have thumbnails
videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
player_url = videos_urls[-1]
m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
player_url)
if m_sina is not None:
self.to_screen('Sina video detected')
sina_id = m_sina.group(1)
player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
return self.url_result(player_url)

View File

@@ -3,11 +3,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .youtube import YoutubeIE
class WimpIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
_TEST = {
_TESTS = [{
'url': 'http://www.wimp.com/maruexhausted/',
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
'info_dict': {
@@ -16,7 +17,20 @@ class WimpIE(InfoExtractor):
'title': 'Maru is exhausted.',
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
}
}
}, {
# youtube video
'url': 'http://www.wimp.com/clowncar/',
'info_dict': {
'id': 'cG4CEr2aiSg',
'ext': 'mp4',
'title': 'Basset hound clown car...incredible!',
'description': 'md5:8d228485e0719898c017203f900b3a35',
'uploader': 'Gretchen Hoey',
'uploader_id': 'gretchenandjeff1',
'upload_date': '20140303',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -24,6 +38,13 @@ class WimpIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL')
if YoutubeIE.suitable(video_url):
self.to_screen('Found YouTube video')
return {
'_type': 'url',
'url': video_url,
'ie_key': YoutubeIE.ie_key(),
}
return {
'id': video_id,
@@ -31,4 +52,4 @@ class WimpIE(InfoExtractor):
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
}
}

View File

@@ -15,22 +15,24 @@ from ..utils import (
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen'
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
_VALID_URL = r'https?://screen\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
_TESTS = [
{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
'file': '214727115.mp4',
'md5': '4962b075c08be8690a922ee026d05e69',
'info_dict': {
'id': '214727115',
'ext': 'mp4',
'title': 'Julian Smith & Travis Legg Watch Julian Smith',
'description': 'Julian and Travis watch Julian Smith',
},
},
{
'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
'file': '103000935.mp4',
'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
'info_dict': {
'id': '103000935',
'ext': 'mp4',
'title': 'Codefellas - The Cougar Lies with Spanish Moss',
'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
},
@@ -60,10 +62,9 @@ class YahooIE(InfoExtractor):
'env': 'prod',
'format': 'json',
})
query_result_json = self._download_webpage(
query_result = self._download_json(
'http://video.query.yahoo.com/v1/public/yql?' + data,
video_id, 'Downloading video info')
query_result = json.loads(query_result_json)
info = query_result['query']['results']['mediaObj'][0]
meta = info['meta']
@@ -86,7 +87,6 @@ class YahooIE(InfoExtractor):
else:
format_url = compat_urlparse.urljoin(host, path)
format_info['url'] = format_url
formats.append(format_info)
self._sort_formats(formats)
@@ -134,27 +134,25 @@ class YahooSearchIE(SearchInfoExtractor):
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
res = {
'_type': 'playlist',
'id': query,
'entries': []
}
for pagenum in itertools.count(0):
entries = []
for pagenum in itertools.count(0):
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
webpage = self._download_webpage(result_url, query,
note='Downloading results page '+str(pagenum+1))
info = json.loads(webpage)
info = self._download_json(result_url, query,
note='Downloading results page '+str(pagenum+1))
m = info['m']
results = info['results']
for (i, r) in enumerate(results):
if (pagenum * 30) +i >= n:
if (pagenum * 30) + i >= n:
break
mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
res['entries'].append(e)
if (pagenum * 30 +i >= n) or (m['last'] >= (m['total'] -1)):
entries.append(e)
if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):
break
return res
return {
'_type': 'playlist',
'id': query,
'entries': entries,
}

View File

@@ -151,6 +151,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
)
))
|youtu\.be/ # just youtu.be/xxxx
|https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
)
)? # all until now is optional -> you can pass the naked ID
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
@@ -1453,7 +1454,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
more_widget_html = more['load_more_widget_html']
playlist_title = self._html_search_regex(
r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
page, u'title')
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)
@@ -1753,7 +1755,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
_FEED_NAME = 'subscriptions'
_PLAYLIST_TITLE = u'Youtube Subscriptions'

View File

@@ -1,5 +1,7 @@
from .atomicparsley import AtomicParsleyPP
from .ffmpeg import (
FFmpegAudioFixPP,
FFmpegMergerPP,
FFmpegMetadataPP,
FFmpegVideoConvertor,
@@ -9,6 +11,8 @@ from .ffmpeg import (
from .xattrpp import XAttrMetadataPP
__all__ = [
'AtomicParsleyPP',
'FFmpegAudioFixPP',
'FFmpegMergerPP',
'FFmpegMetadataPP',
'FFmpegVideoConvertor',

View File

@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import subprocess
from .common import PostProcessor
from ..utils import (
check_executable,
compat_urlretrieve,
encodeFilename,
PostProcessingError,
prepend_extension,
shell_quote
)
class AtomicParsleyPPError(PostProcessingError):
pass
class AtomicParsleyPP(PostProcessor):
def run(self, info):
if not check_executable('AtomicParsley', ['-v']):
raise AtomicParsleyPPError('AtomicParsley was not found. Please install.')
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
temp_thumbnail = prepend_extension(filename, 'thumb')
if not info.get('thumbnail'):
raise AtomicParsleyPPError('Thumbnail was not found. Nothing to do.')
compat_urlretrieve(info['thumbnail'], temp_thumbnail)
cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
msg = stderr.decode('utf-8', 'replace').strip()
raise AtomicParsleyPPError(msg)
os.remove(encodeFilename(filename))
os.remove(encodeFilename(temp_thumbnail))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info

View File

@@ -53,8 +53,7 @@ class FFmpegPostProcessor(PostProcessor):
if self._downloader.params.get('verbose', False):
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
bcmd = [self._downloader.encode(c) for c in cmd]
p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
stderr = stderr.decode('utf-8', 'replace')
@@ -465,7 +464,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
options = ['-c', 'copy']
if info['ext'] == u'm4a':
options = ['-vn', '-acodec', 'copy']
else:
options = ['-c', 'copy']
for (name, value) in metadata.items():
options.extend(['-metadata', '%s=%s' % (name, value)])
@@ -484,3 +487,17 @@ class FFmpegMergerPP(FFmpegPostProcessor):
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
return True, info
class FFmpegAudioFixPP(FFmpegPostProcessor):
def run(self, info):
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
options = ['-vn', '-acodec', 'copy']
self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info

View File

@@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
import calendar
import codecs
import contextlib
import ctypes
import datetime
@@ -909,25 +910,84 @@ def platform_name():
return res
def write_string(s, out=None):
def _windows_write_string(s, out):
""" Returns True if the string was written using special methods,
False if it has yet to be written out."""
# Adapted from http://stackoverflow.com/a/3259271/35070
import ctypes
import ctypes.wintypes
WIN_OUTPUT_IDS = {
1: -11,
2: -12,
}
def ucs2_len(s):
return sum((2 if ord(c) > 0xffff else 1) for c in s)
fileno = out.fileno()
if fileno not in WIN_OUTPUT_IDS:
return False
GetStdHandle = ctypes.WINFUNCTYPE(
ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
("GetStdHandle", ctypes.windll.kernel32))
h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
WriteConsoleW = ctypes.WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
written = ctypes.wintypes.DWORD(0)
GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
GetConsoleMode = ctypes.WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
ctypes.POINTER(ctypes.wintypes.DWORD))(
("GetConsoleMode", ctypes.windll.kernel32))
INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None:
return True
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
if not_a_console(h):
return False
remaining = ucs2_len(s)
while remaining > 0:
ret = WriteConsoleW(
h, s, min(remaining, 1024), ctypes.byref(written), None)
if ret == 0:
raise OSError('Failed to write string')
remaining -= written.value
return True
def write_string(s, out=None, encoding=None):
if out is None:
out = sys.stderr
assert type(s) == compat_str
if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
if _windows_write_string(s, out):
return
if ('b' in getattr(out, 'mode', '') or
sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
s = s.encode(preferredencoding(), 'ignore')
try:
byt = s.encode(encoding or preferredencoding(), 'ignore')
out.write(byt)
elif hasattr(out, 'buffer'):
enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
byt = s.encode(enc, 'ignore')
out.buffer.write(byt)
else:
out.write(s)
except UnicodeEncodeError:
# In Windows shells, this can fail even when the codec is just charmap!?
# See https://wiki.python.org/moin/PrintFails#Issue
if sys.platform == 'win32' and hasattr(out, 'encoding'):
s = s.encode(out.encoding, 'ignore').decode(out.encoding)
out.write(s)
else:
raise
out.flush()
@@ -1176,12 +1236,12 @@ class HEADRequest(compat_urllib_request.Request):
return "HEAD"
def int_or_none(v, scale=1):
return v if v is None else (int(v) // scale)
def int_or_none(v, scale=1, default=None):
return default if v is None else (int(v) // scale)
def float_or_none(v, scale=1):
return v if v is None else (float(v) / scale)
def float_or_none(v, scale=1, default=None):
return default if v is None else (float(v) / scale)
def parse_duration(s):
@@ -1263,9 +1323,11 @@ class PagedList(object):
def uppercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub(
r'\\U[0-9a-fA-F]{8}',
lambda m: m.group(0).decode('unicode-escape'), s)
lambda m: unicode_escape(m.group(0))[0],
s)
try:
struct.pack(u'!I', 0)

View File

@@ -1,2 +1,2 @@
__version__ = '2014.04.03.1'
__version__ = '2014.04.13'