Compare commits

...

18 Commits

Author SHA1 Message Date
a21420389e release 2015.02.19.3 2015-02-19 19:28:17 +01:00
6140baf4e1 [nationalgeographic] Add extractor (closes #4960) 2015-02-19 18:17:31 +01:00
8fc642eb5b [pornhub] Fix uploader regex 2015-02-19 22:15:49 +06:00
e66e1a0046 [pornhub] Add support for playlists (Closes #4995) 2015-02-19 22:15:19 +06:00
d5c69f1da4 [5min] Cover joystiq.com URLs (Closes #4962) 2015-02-19 21:47:11 +06:00
5c8a3f862a [nbc] Use a test video that works outside the US 2015-02-19 15:00:39 +01:00
a3b9157f49 [cbssports] Add extractor (closes #4996) 2015-02-19 13:06:53 +01:00
b88ba05356 [imgur] Simplify 2015-02-19 05:53:09 +01:00
b74d505577 Merge remote-tracking branch 'jbboehr/imgur-gifv-improvements' 2015-02-19 05:16:11 +01:00
9e2d7dca87 [imgur] improve error check for non-video URLs 2015-02-18 19:47:54 -08:00
d236b37ac9 [imgur] improve regex #4998 2015-02-18 19:28:19 -08:00
e880c66bd8 [theonion] Modernize 2015-02-19 04:12:40 +01:00
383456aa29 [Makefile] Also delete *.avi files in clean 2015-02-19 04:09:52 +01:00
1a13940c8d [imgur] support regular URL 2015-02-18 18:12:48 -08:00
3d54788495 [webofstories] Fix extraction 2015-02-19 02:12:08 +01:00
71d53ace2f [sockshare] Do not require thumbnail anymore
Thumbnail is not present on the website anymore.
2015-02-19 02:04:30 +01:00
f37e3f99f0 [generic] Correct test case
Video has been reuploaded / edited
2015-02-19 02:00:52 +01:00
bd03ffc16e [netzkino] Skip download in test case
Works fine from Germany, but fails from everywhere else
2015-02-19 01:58:54 +01:00
16 changed files with 155 additions and 40 deletions

View File

@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean: clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
PREFIX ?= /usr/local PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin BINDIR ?= $(PREFIX)/bin

View File

@ -68,6 +68,7 @@
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **CBS** - **CBS**
- **CBSNews**: CBS News - **CBSNews**: CBS News
- **CBSSports**
- **CeskaTelevize** - **CeskaTelevize**
- **channel9**: Channel 9 - **channel9**: Channel 9
- **Chilloutzone** - **Chilloutzone**
@ -264,6 +265,7 @@
- **myvideo** - **myvideo**
- **MyVidster** - **MyVidster**
- **n-tv.de** - **n-tv.de**
- **NationalGeographic**
- **Naver** - **Naver**
- **NBA** - **NBA**
- **NBC** - **NBC**
@ -321,6 +323,7 @@
- **podomatic** - **podomatic**
- **PornHd** - **PornHd**
- **PornHub** - **PornHub**
- **PornHubPlaylist**
- **Pornotube** - **Pornotube**
- **PornoXO** - **PornoXO**
- **PromptFile** - **PromptFile**

View File

@ -58,6 +58,7 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .cbs import CBSIE from .cbs import CBSIE
from .cbsnews import CBSNewsIE from .cbsnews import CBSNewsIE
from .cbssports import CBSSportsIE
from .ccc import CCCIE from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
@ -284,6 +285,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE from .myspass import MySpassIE
from .myvideo import MyVideoIE from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE from .myvidster import MyVidsterIE
from .nationalgeographic import NationalGeographicIE
from .naver import NaverIE from .naver import NaverIE
from .nba import NBAIE from .nba import NBAIE
from .nbc import ( from .nbc import (
@ -352,7 +354,10 @@ from .playfm import PlayFMIE
from .playvid import PlayvidIE from .playvid import PlayvidIE
from .podomatic import PodomaticIE from .podomatic import PodomaticIE
from .pornhd import PornHdIE from .pornhd import PornHdIE
from .pornhub import PornHubIE from .pornhub import (
PornHubIE,
PornHubPlaylistIE,
)
from .pornotube import PornotubeIE from .pornotube import PornotubeIE
from .pornoxo import PornoXOIE from .pornoxo import PornoXOIE
from .promptfile import PromptFileIE from .promptfile import PromptFileIE

View File

@ -0,0 +1,30 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class CBSSportsIE(InfoExtractor):
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
'info_dict': {
'id': '_d5_GbO8p1sT',
'ext': 'flv',
'title': 'US Open flashbacks: 1990s',
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
section = mobj.group('section')
video_id = mobj.group('id')
all_videos = self._download_json(
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
video_id)
# The json file contains the info of all the videos in the section
video_info = next(v for v in all_videos if v['pcid'] == video_id)
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')

View File

@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
IE_NAME = '5min' IE_NAME = '5min'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=| (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
5min:) 5min:)
(?P<id>\d+) (?P<id>\d+)
''' '''

View File

@ -532,7 +532,7 @@ class GenericIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'Mrj4DVp2zeA', 'id': 'Mrj4DVp2zeA',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20150204', 'upload_date': '20150212',
'uploader': 'The National Archives UK', 'uploader': 'The National Archives UK',
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
'uploader_id': 'NationalArchives08', 'uploader_id': 'NationalArchives08',

View File

@ -7,11 +7,12 @@ from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
ExtractorError,
) )
class ImgurIE(InfoExtractor): class ImgurIE(InfoExtractor):
_VALID_URL = r'https?://i\.imgur\.com/(?P<id>[a-zA-Z0-9]+)\.(?:mp4|gifv)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
_TESTS = [{ _TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv', 'url': 'https://i.imgur.com/A61SaA1.gifv',
@ -21,6 +22,14 @@ class ImgurIE(InfoExtractor):
'title': 'MRW gifv is up and running without any bugs', 'title': 'MRW gifv is up and running without any bugs',
'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
}, },
}, {
'url': 'https://imgur.com/A61SaA1',
'info_dict': {
'id': 'A61SaA1',
'ext': 'mp4',
'title': 'MRW gifv is up and running without any bugs',
'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -34,10 +43,14 @@ class ImgurIE(InfoExtractor):
r'<param name="height" value="([0-9]+)"', r'<param name="height" value="([0-9]+)"',
webpage, 'height', fatal=False)) webpage, 'height', fatal=False))
formats = []
video_elements = self._search_regex( video_elements = self._search_regex(
r'(?s)<div class="video-elements">(.*?)</div>', r'(?s)<div class="video-elements">(.*?)</div>',
webpage, 'video elements') webpage, 'video elements', default=None)
if not video_elements:
raise ExtractorError(
'No sources found for video %s. Maybe an image?' % video_id,
expected=True)
formats = [] formats = []
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
formats.append({ formats.append({

View File

@ -0,0 +1,38 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
smuggle_url,
url_basename,
)
class NationalGeographicIE(InfoExtractor):
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
_TEST = {
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
'info_dict': {
'id': '4DmDACA6Qtk_',
'ext': 'flv',
'title': 'Mating Crabs Busted by Sharks',
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
},
'add_ie': ['ThePlatform'],
}
def _real_extract(self, url):
name = url_basename(url)
webpage = self._download_webpage(url, name)
feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
theplatform_id = url_basename(content.attrib.get('url'))
return self.url_result(smuggle_url(
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
# For some reason, the normal links don't work and we must force the use of f4m
{'force_smil_url': True}))

View File

@ -18,13 +18,13 @@ class NBCIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', 'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
# md5 checksum is not stable # md5 checksum is not stable
'info_dict': { 'info_dict': {
'id': 'bTmnLCvIbaaH', 'id': 'c9xnCo0YPOPH',
'ext': 'flv', 'ext': 'flv',
'title': 'I Am a Firefighter', 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
}, },
}, },
{ {

View File

@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
'timestamp': 1344858571, 'timestamp': 1344858571,
'age_limit': 12, 'age_limit': 12,
}, },
'params': {
'skip_download': 'Download only works from Germany',
}
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex( video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<', r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
if thumbnail: if thumbnail:
@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor):
'formats': formats, 'formats': formats,
'age_limit': 18, 'age_limit': 18,
} }
class PornHubPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.pornhub.com/playlist/6201671',
'info_dict': {
'id': '6201671',
'title': 'P0p4',
},
'playlist_mincount': 35,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = [
self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
]
playlist = self._parse_json(
self._search_regex(
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
playlist_id)
return self.playlist_result(
entries, playlist_id, playlist.get('title'), playlist.get('description'))

View File

@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor):
'id': '437BE28B89D799D7', 'id': '437BE28B89D799D7',
'title': 'big_buck_bunny_720p_surround.avi', 'title': 'big_buck_bunny_720p_surround.avi',
'ext': 'avi', 'ext': 'avi',
'thumbnail': 're:^http://.*\.jpg$',
} }
} }
@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor):
''', webpage, 'hash') ''', webpage, 'hash')
fields = { fields = {
"hash": confirm_hash, "hash": confirm_hash.encode('utf-8'),
"confirm": "Continue as Free User" "confirm": "Continue as Free User"
} }
@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor):
webpage, 'title', default=None) webpage, 'title', default=None)
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(
r'<img\s+src="([^"]*)".+?name="bg"', r'<img\s+src="([^"]*)".+?name="bg"',
webpage, 'thumbnail') webpage, 'thumbnail', default=None)
formats = [{ formats = [{
'format_id': 'sd', 'format_id': 'sd',

View File

@ -4,11 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError
class TheOnionIE(InfoExtractor): class TheOnionIE(InfoExtractor):
_VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?' _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
_TEST = { _TEST = {
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/', 'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
'md5': '19eaa9a39cf9b9804d982e654dc791ee', 'md5': '19eaa9a39cf9b9804d982e654dc791ee',
@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
article_id = mobj.group('article_id') webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, article_id)
video_id = self._search_regex( video_id = self._search_regex(
r'"videoId":\s(\d+),', webpage, 'video ID') r'"videoId":\s(\d+),', webpage, 'video ID')
@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage) sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
if not sources:
raise ExtractorError(
'No sources found for video %s' % video_id, expected=True)
formats = [] formats = []
for src, type_ in sources: for src, type_ in sources:
if type_ == 'video/mp4': if type_ == 'video/mp4':
@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
}) })
elif type_ == 'application/x-mpegURL': elif type_ == 'application/x-mpegURL':
formats.extend( formats.extend(
self._extract_m3u8_formats(src, video_id, preference=-1)) self._extract_m3u8_formats(src, display_id, preference=-1))
else: else:
self.report_warning( self.report_warning(
'Encountered unexpected format: %s' % type_) 'Encountered unexpected format: %s' % type_)
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': thumbnail,

View File

@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor):
if not provider_id: if not provider_id:
provider_id = 'dJ5BDC' provider_id = 'dJ5BDC'
if mobj.group('config'): if smuggled_data.get('force_smil_url', False):
smil_url = url
elif mobj.group('config'):
config_url = url + '&form=json' config_url = url + '&form=json'
config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('swf/', 'config/')
config_url = config_url.replace('onsite/', 'onsite/config/') config_url = config_url.replace('onsite/', 'onsite/config/')

View File

@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor):
description = self._html_search_meta('description', webpage) description = self._html_search_meta('description', webpage)
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
story_filename = self._search_regex( embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
speaker_id = self._search_regex( webpage, 'embed params').split(',')]
r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
story_id = self._search_regex( (
r'\.storyId\((\d+)\)', webpage, 'story ID') _, speaker_id, story_id, story_duration,
speaker_type = self._search_regex( speaker_type, great_life, _thumbnail, _has_subtitles,
r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') story_filename, _story_order) = embed_params
great_life = self._search_regex(
r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
is_great_life_series = great_life == 'true' is_great_life_series = great_life == 'true'
duration = int_or_none(self._search_regex( duration = int_or_none(story_duration)
r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
# URL building, see: http://www.webofstories.com/scripts/player.js # URL building, see: http://www.webofstories.com/scripts/player.js
ms_prefix = '' ms_prefix = ''

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.02.19.2' __version__ = '2015.02.19.3'