release 2015.02.19.3

[nationalgeographic] Add extractor (closes #4960 )
[pornhub] Fix uploader regex
2025-07-27 07:40:49 -05:00 · 2015-02-19 19:28:17 +01:00 · 2015-02-19 18:17:31 +01:00 · 2015-02-19 22:15:49 +06:00 · 2015-02-19 22:15:19 +06:00 · 2015-02-19 21:47:11 +06:00
16 changed files with 155 additions and 40 deletions
--- a/2
+++ b/2
@ -1,7 +1,7 @@
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 clean:
-	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
+	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
 PREFIX ?= /usr/local
 BINDIR ?= $(PREFIX)/bin
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -68,6 +68,7 @@
 - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
 - **CBS**
 - **CBSNews**: CBS News
 - **CBSSports**
 - **CeskaTelevize**
 - **channel9**: Channel 9
 - **Chilloutzone**
@ -264,6 +265,7 @@
 - **myvideo**
 - **MyVidster**
 - **n-tv.de**
 - **NationalGeographic**
 - **Naver**
 - **NBA**
 - **NBC**
@ -321,6 +323,7 @@
 - **podomatic**
 - **PornHd**
 - **PornHub**
 - **PornHubPlaylist**
 - **Pornotube**
 - **PornoXO**
 - **PromptFile**
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -58,6 +58,7 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .cbsnews import CBSNewsIE
 from .cbssports import CBSSportsIE
 from .ccc import CCCIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
@ -284,6 +285,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .myvidster import MyVidsterIE
 from .nationalgeographic import NationalGeographicIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import (
@ -352,7 +354,10 @@ from .playfm import PlayFMIE
 from .playvid import PlayvidIE
 from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
-from .pornhub import PornHubIE
+from .pornhub import (
    PornHubIE,
    PornHubPlaylistIE,
 )
 from .pornotube import PornotubeIE
 from .pornoxo import PornoXOIE
 from .promptfile import PromptFileIE
--- a/youtube_dl/extractor/cbssports.py
+++ b/youtube_dl/extractor/cbssports.py
@ -0,0 +1,30 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class CBSSportsIE(InfoExtractor):
    _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
    _TEST = {
        'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
        'info_dict': {
            'id': '_d5_GbO8p1sT',
            'ext': 'flv',
            'title': 'US Open flashbacks: 1990s',
            'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        section = mobj.group('section')
        video_id = mobj.group('id')
        all_videos = self._download_json(
            'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
            video_id)
        # The json file contains the info of all the videos in the section
        video_info = next(v for v in all_videos if v['pcid'] == video_id)
        return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
--- a/youtube_dl/extractor/fivemin.py
+++ b/youtube_dl/extractor/fivemin.py
@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
    IE_NAME = '5min'
    _VALID_URL = r'''(?x)
        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
            https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
            5min:)
        (?P<id>\d+)
        '''
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -532,7 +532,7 @@ class GenericIE(InfoExtractor):
            'info_dict': {
                'id': 'Mrj4DVp2zeA',
                'ext': 'mp4',
-                'upload_date': '20150204',
+                'upload_date': '20150212',
                'uploader': 'The National Archives UK',
                'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
                'uploader_id': 'NationalArchives08',
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@ -7,11 +7,12 @@ from ..utils import (
    int_or_none,
    js_to_json,
    mimetype2ext,
    ExtractorError,
 )
 class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://i\.imgur\.com/(?P<id>[a-zA-Z0-9]+)\.(?:mp4|gifv)'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
    _TESTS = [{
        'url': 'https://i.imgur.com/A61SaA1.gifv',
@ -21,6 +22,14 @@ class ImgurIE(InfoExtractor):
            'title': 'MRW gifv is up and running without any bugs',
            'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
        },
    }, {
        'url': 'https://imgur.com/A61SaA1',
        'info_dict': {
            'id': 'A61SaA1',
            'ext': 'mp4',
            'title': 'MRW gifv is up and running without any bugs',
            'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
        },
    }]
    def _real_extract(self, url):
@ -34,10 +43,14 @@ class ImgurIE(InfoExtractor):
            r'<param name="height" value="([0-9]+)"',
            webpage, 'height', fatal=False))
        formats = []
        video_elements = self._search_regex(
            r'(?s)<div class="video-elements">(.*?)</div>',
-            webpage, 'video elements')
+            webpage, 'video elements', default=None)
        if not video_elements:
            raise ExtractorError(
                'No sources found for video %s. Maybe an image?' % video_id,
                expected=True)
        formats = []
        for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
            formats.append({
--- a/youtube_dl/extractor/nationalgeographic.py
+++ b/youtube_dl/extractor/nationalgeographic.py
@ -0,0 +1,38 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    smuggle_url,
    url_basename,
 )
 class NationalGeographicIE(InfoExtractor):
    _VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
    _TEST = {
        'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
        'info_dict': {
            'id': '4DmDACA6Qtk_',
            'ext': 'flv',
            'title': 'Mating Crabs Busted by Sharks',
            'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
        },
        'add_ie': ['ThePlatform'],
    }
    def _real_extract(self, url):
        name = url_basename(url)
        webpage = self._download_webpage(url, name)
        feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
        guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
        feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
        content = feed.find('.//{http://search.yahoo.com/mrss/}content')
        theplatform_id = url_basename(content.attrib.get('url'))
        return self.url_result(smuggle_url(
            'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
            # For some reason, the normal links don't work and we must force the use of f4m
            {'force_smil_url': True}))
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -18,13 +18,13 @@ class NBCIE(InfoExtractor):
    _TESTS = [
        {
-            'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
+            'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
            # md5 checksum is not stable
            'info_dict': {
-                'id': 'bTmnLCvIbaaH',
+                'id': 'c9xnCo0YPOPH',
                'ext': 'flv',
-                'title': 'I Am a Firefighter',
+                'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
-                'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
+                'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
            },
        },
        {
--- a/youtube_dl/extractor/netzkino.py
+++ b/youtube_dl/extractor/netzkino.py
@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
            'timestamp': 1344858571,
            'age_limit': 12,
        },
        'params': {
            'skip_download': 'Download only works from Germany',
        }
    }
    def _real_extract(self, url):
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
        video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
        video_uploader = self._html_search_regex(
-            r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
+            r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
            webpage, 'uploader', fatal=False)
        thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
        if thumbnail:
@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor):
            'formats': formats,
            'age_limit': 18,
        }
 class PornHubPlaylistIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.pornhub.com/playlist/6201671',
        'info_dict': {
            'id': '6201671',
            'title': 'P0p4',
        },
        'playlist_mincount': 35,
    }]
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        entries = [
            self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
            for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
        ]
        playlist = self._parse_json(
            self._search_regex(
                r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
            playlist_id)
        return self.playlist_result(
            entries, playlist_id, playlist.get('title'), playlist.get('description'))
--- a/youtube_dl/extractor/sockshare.py
+++ b/youtube_dl/extractor/sockshare.py
@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor):
            'id': '437BE28B89D799D7',
            'title': 'big_buck_bunny_720p_surround.avi',
            'ext': 'avi',
            'thumbnail': 're:^http://.*\.jpg$',
        }
    }
@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor):
            ''', webpage, 'hash')
        fields = {
-            "hash": confirm_hash,
+            "hash": confirm_hash.encode('utf-8'),
            "confirm": "Continue as Free User"
        }
@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor):
            webpage, 'title', default=None)
        thumbnail = self._html_search_regex(
            r'<img\s+src="([^"]*)".+?name="bg"',
-            webpage, 'thumbnail')
+            webpage, 'thumbnail', default=None)
        formats = [{
            'format_id': 'sd',
--- a/youtube_dl/extractor/theonion.py
+++ b/youtube_dl/extractor/theonion.py
@ -4,11 +4,10 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class TheOnionIE(InfoExtractor):
-    _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
+    _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
    _TEST = {
        'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
        'md5': '19eaa9a39cf9b9804d982e654dc791ee',
@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        display_id = self._match_id(url)
-        article_id = mobj.group('article_id')
+        webpage = self._download_webpage(url, display_id)
        webpage = self._download_webpage(url, article_id)
        video_id = self._search_regex(
            r'"videoId":\s(\d+),', webpage, 'video ID')
@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
        thumbnail = self._og_search_thumbnail(webpage)
        sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
        if not sources:
            raise ExtractorError(
                'No sources found for video %s' % video_id, expected=True)
        formats = []
        for src, type_ in sources:
            if type_ == 'video/mp4':
@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
                })
            elif type_ == 'application/x-mpegURL':
                formats.extend(
-                    self._extract_m3u8_formats(src, video_id, preference=-1))
+                    self._extract_m3u8_formats(src, display_id, preference=-1))
            else:
                self.report_warning(
                    'Encountered unexpected format: %s' % type_)
        self._sort_formats(formats)
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor):
        if not provider_id:
            provider_id = 'dJ5BDC'
-        if mobj.group('config'):
+        if smuggled_data.get('force_smil_url', False):
            smil_url = url
        elif mobj.group('config'):
            config_url = url + '&form=json'
            config_url = config_url.replace('swf/', 'config/')
            config_url = config_url.replace('onsite/', 'onsite/config/')
--- a/youtube_dl/extractor/webofstories.py
+++ b/youtube_dl/extractor/webofstories.py
@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor):
        description = self._html_search_meta('description', webpage)
        thumbnail = self._og_search_thumbnail(webpage)
-        story_filename = self._search_regex(
+        embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
-            r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
+            r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
-        speaker_id = self._search_regex(
+            webpage, 'embed params').split(',')]
-            r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
+
-        story_id = self._search_regex(
+        (
-            r'\.storyId\((\d+)\)', webpage, 'story ID')
+            _, speaker_id, story_id, story_duration,
-        speaker_type = self._search_regex(
+            speaker_type, great_life, _thumbnail, _has_subtitles,
-            r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
+            story_filename, _story_order) = embed_params
-        great_life = self._search_regex(
+
            r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
        is_great_life_series = great_life == 'true'
-        duration = int_or_none(self._search_regex(
+        duration = int_or_none(story_duration)
            r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
        # URL building, see: http://www.webofstories.com/scripts/player.js
        ms_prefix = ''
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2015.02.19.2'
+__version__ = '2015.02.19.3'
Author	SHA1	Message	Date
Philipp Hagemeister	a21420389e	release 2015.02.19.3	2015-02-19 19:28:17 +01:00
Jaime Marquínez Ferrándiz	6140baf4e1	[nationalgeographic] Add extractor (closes #4960 )	2015-02-19 18:17:31 +01:00
Sergey M․	8fc642eb5b	[pornhub] Fix uploader regex	2015-02-19 22:15:49 +06:00
Sergey M․	e66e1a0046	[pornhub] Add support for playlists (Closes #4995 )	2015-02-19 22:15:19 +06:00
Sergey M․	d5c69f1da4	[5min] Cover joystiq.com URLs (Closes #4962 )	2015-02-19 21:47:11 +06:00
Jaime Marquínez Ferrándiz	5c8a3f862a	[nbc] Use a test video that works outside the US	2015-02-19 15:00:39 +01:00
Jaime Marquínez Ferrándiz	a3b9157f49	[cbssports] Add extractor (closes #4996 )	2015-02-19 13:06:53 +01:00
Philipp Hagemeister	b88ba05356	[imgur] Simplify	2015-02-19 05:53:09 +01:00
Philipp Hagemeister	b74d505577	Merge remote-tracking branch 'jbboehr/imgur-gifv-improvements'	2015-02-19 05:16:11 +01:00
John Boehr	9e2d7dca87	[imgur] improve error check for non-video URLs	2015-02-18 19:47:54 -08:00
John Boehr	d236b37ac9	[imgur] improve regex #4998	2015-02-18 19:28:19 -08:00
Philipp Hagemeister	e880c66bd8	[theonion] Modernize	2015-02-19 04:12:40 +01:00
Philipp Hagemeister	383456aa29	[Makefile] Also delete *.avi files in clean	2015-02-19 04:09:52 +01:00
John Boehr	1a13940c8d	[imgur] support regular URL	2015-02-18 18:12:48 -08:00
Philipp Hagemeister	3d54788495	[webofstories] Fix extraction	2015-02-19 02:12:08 +01:00
Philipp Hagemeister	71d53ace2f	[sockshare] Do not require thumbnail anymore Thumbnail is not present on the website anymore.	2015-02-19 02:04:30 +01:00
Philipp Hagemeister	f37e3f99f0	[generic] Correct test case Video has been reuploaded / edited	2015-02-19 02:00:52 +01:00
Philipp Hagemeister	bd03ffc16e	[netzkino] Skip download in test case Works fine from Germany, but fails from everywhere else	2015-02-19 01:58:54 +01:00
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2015.02.19.2'`	`__version__ = '2015.02.19.3'`