Compare commits

...

42 Commits

Author SHA1 Message Date
7a1e71575e release 2016.07.02 2016-07-02 02:47:42 +07:00
ac2d8f54d1 [vine] Remove superfluous whitespace 2016-07-02 02:45:00 +07:00
14ff6baa0e [fusion] Improve 2016-07-02 02:44:37 +07:00
bb08101ec4 [Fusion] Add new extractor 2016-07-02 02:37:28 +07:00
bc4b2d75ba [pornhub] Add support for thumbzilla (Closes #8696) 2016-07-02 02:11:07 +07:00
35fc3021ba [periscope] Add another fallback source 2016-07-02 01:35:57 +07:00
347227237b [periscope] fix playlist extraction (#9967)
The JSON response changed and the extractor needed to be updated in order to gather the video IDs.
2016-07-02 01:29:11 +07:00
564dc3c6e8 [vine] Fix extraction (Closes #9970) 2016-07-02 01:24:57 +07:00
9f4576a7eb [twitch] Update usher URL (Closes #9975) 2016-07-01 23:16:43 +07:00
f11315e8d4 release 2016.07.01 2016-07-01 03:59:57 +07:00
0c2ac64bb8 [sixplay] Rename preference key to quality in format dict 2016-07-01 03:57:59 +07:00
a9eede3913 [test/compat] compat_shlex_split: test with newlines 2016-07-01 03:30:35 +07:00
9e29ef13a3 [options] Accept quoted string across multiple lines (#9940)
Like:

    -f "
    bestvideo+bestaudio/
    best
    "
2016-07-01 03:30:31 +07:00
eaaaaec042 [pornhub] Add more tests with removed videos 2016-07-01 03:18:27 +07:00
3cb3b60064 [pornhub] Relax removed message regex (Closes #9964) 2016-07-01 03:14:23 +07:00
044e3d91b5 [Pornhub] Fix error detection 2016-07-01 02:59:50 +07:00
c9e538a3b1 [ctvnews] use orderedSet, increase the number of items for playlists and use smaller bin list for test 2016-06-30 19:52:32 +01:00
76dad392f5 [meta] Clarify the source of uppod st decryption algorithm 2016-06-30 18:27:57 +01:00
9617b557aa [ctv] Add new extractor(closes #4077) 2016-06-30 18:22:35 +01:00
bf4fa24414 [ctvnews] Add new extractor(closes #2156) 2016-06-30 18:22:35 +01:00
20361b4f25 [rds] extract 9c9media formats 2016-06-30 18:22:35 +01:00
05a0068a76 [9c9media] Add new extractor 2016-06-30 18:22:35 +01:00
66a42309fa release 2016.06.30 2016-06-30 23:56:55 +07:00
fd94e2671a [meta] Add support for pladform embeds 2016-06-30 23:20:44 +07:00
8ff6697861 [pladform] Improve embed detection 2016-06-30 23:19:29 +07:00
eafa643715 [meta] Make duration and description optional
For iframe URLs
2016-06-30 23:06:13 +07:00
049da7cb6c [meta] Extend _VALID_URL 2016-06-30 23:04:18 +07:00
7dbeee7e22 [generic] make twitter:player extraction non fatal 2016-06-30 14:11:55 +01:00
93ad6c6bfa [sixplay] Add new extractor(closes #2183) 2016-06-30 13:50:49 +01:00
329179073b [generic] add generic support for twitter:player embeds 2016-06-30 12:01:30 +01:00
4d86d2008e [urplay] fix typo and check with flake8 2016-06-30 11:30:42 +01:00
ab47b6e881 [theatlantic] Add new extractor(closes #6611) 2016-06-30 04:08:56 +01:00
df43389ade [skysports] Add new extractor(closes #7066) 2016-06-30 02:54:21 +01:00
397b305cfe [meta] Add new extractor(closes #8789) 2016-06-30 00:21:03 +01:00
e496fa50cd [urplay] Add new extractor(closes #9332) 2016-06-29 20:19:31 +01:00
06a96da15b [eagleplatform] Improve embed detection and extract in separate routine (Closes #9926) 2016-06-29 23:01:34 +07:00
70157c2c43 [aenetworks] add support for movie pages 2016-06-29 16:55:17 +01:00
c58ed8563d [aenetworks] extract history topic playlist title 2016-06-29 16:18:16 +01:00
4c7821227c [aenetworks:historytopic] fix topic video url 2016-06-29 16:03:32 +01:00
42362fdb5e [aenetworks] add support for show and season for A&E Network sites and History topics(closes #9816) 2016-06-29 15:49:17 +01:00
97124e572d [arte:playlist] Fix test 2016-06-28 22:39:53 +07:00
32616c14cc [vrt] extract all formats 2016-06-28 14:02:03 +01:00
26 changed files with 720 additions and 124 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.27** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.02**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.06.27 [debug] youtube-dl version 2016.07.02
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -152,6 +152,8 @@
- **CSNNE** - **CSNNE**
- **CSpan**: C-SPAN - **CSpan**: C-SPAN
- **CtsNews**: 華視新聞 - **CtsNews**: 華視新聞
- **CTV**
- **CTVNews**
- **culturebox.francetvinfo.fr** - **culturebox.francetvinfo.fr**
- **CultureUnplugged** - **CultureUnplugged**
- **CWTV** - **CWTV**
@ -240,6 +242,7 @@
- **FreeVideo** - **FreeVideo**
- **Funimation** - **Funimation**
- **FunnyOrDie** - **FunnyOrDie**
- **Fusion**
- **GameInformer** - **GameInformer**
- **Gamekings** - **Gamekings**
- **GameOne** - **GameOne**
@ -272,6 +275,7 @@
- **Helsinki**: helsinki.fi - **Helsinki**: helsinki.fi
- **HentaiStigma** - **HentaiStigma**
- **HistoricFilms** - **HistoricFilms**
- **history:topic**: History.com Topic
- **hitbox** - **hitbox**
- **hitbox:live** - **hitbox:live**
- **HornBunny** - **HornBunny**
@ -358,6 +362,7 @@
- **MatchTV** - **MatchTV**
- **MDR**: MDR.DE and KiKA - **MDR**: MDR.DE and KiKA
- **media.ccc.de** - **media.ccc.de**
- **META**
- **metacafe** - **metacafe**
- **Metacritic** - **Metacritic**
- **Mgoon** - **Mgoon**
@ -438,6 +443,7 @@
- **nick.de** - **nick.de**
- **niconico**: ニコニコ動画 - **niconico**: ニコニコ動画
- **NiconicoPlaylist** - **NiconicoPlaylist**
- **NineCNineMedia**
- **njoy**: N-JOY - **njoy**: N-JOY
- **njoy:embed** - **njoy:embed**
- **Noco** - **Noco**
@ -503,7 +509,7 @@
- **podomatic** - **podomatic**
- **PolskieRadio** - **PolskieRadio**
- **PornHd** - **PornHd**
- **PornHub** - **PornHub**: PornHub and Thumbzilla
- **PornHubPlaylist** - **PornHubPlaylist**
- **PornHubUserVideos** - **PornHubUserVideos**
- **Pornotube** - **Pornotube**
@ -587,8 +593,10 @@
- **Shared**: shared.sx and vivo.sx - **Shared**: shared.sx and vivo.sx
- **ShareSix** - **ShareSix**
- **Sina** - **Sina**
- **SixPlay**
- **skynewsarabia:article** - **skynewsarabia:article**
- **skynewsarabia:video** - **skynewsarabia:video**
- **SkySports**
- **Slideshare** - **Slideshare**
- **Slutload** - **Slutload**
- **smotri**: Smotri.com - **smotri**: Smotri.com
@ -721,6 +729,7 @@
- **UDNEmbed**: 聯合影音 - **UDNEmbed**: 聯合影音
- **Unistra** - **Unistra**
- **Urort**: NRK P3 Urørt - **Urort**: NRK P3 Urørt
- **URPlay**
- **USAToday** - **USAToday**
- **ustream** - **ustream**
- **ustream:channel** - **ustream:channel**

View File

@ -87,6 +87,7 @@ class TestCompat(unittest.TestCase):
def test_compat_shlex_split(self): def test_compat_shlex_split(self):
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
def test_compat_etree_fromstring(self): def test_compat_etree_fromstring(self):
xml = ''' xml = '''

View File

@ -7,18 +7,123 @@ from ..utils import (
smuggle_url, smuggle_url,
update_url_query, update_url_query,
unescapeHTML, unescapeHTML,
extract_attributes,
get_element_by_attribute,
)
from ..compat import (
compat_urlparse,
) )
class AENetworksIE(InfoExtractor): class AENetworksBaseIE(InfoExtractor):
def theplatform_url_result(self, theplatform_url, video_id, query):
return {
'_type': 'url_transparent',
'id': video_id,
'url': smuggle_url(
update_url_query(theplatform_url, query),
{
'sig': {
'key': 'crazyjava',
'secret': 's3cr3t'
},
'force_smil_url': True
}),
'ie_key': 'ThePlatform',
}
class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks' IE_NAME = 'aenetworks'
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])' _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
_TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
'info_dict': {
'id': '22253814',
'ext': 'mp4',
'title': 'Winter Is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
},
'add_ie': ['ThePlatform'],
}, {
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
'info_dict': {
'id': '71889446852',
},
'playlist_mincount': 5,
}, {
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
'info_dict': {
'id': 'SERIES4317',
'title': 'Atlanta Plastic',
},
'playlist_mincount': 2,
}, {
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'only_matching': True
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True
}, {
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
'only_matching': True
}, {
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
'only_matching': True
}]
def _real_extract(self, url):
show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
display_id = show_path or movie_display_id
webpage = self._download_webpage(url, display_id)
if show_path:
url_parts = show_path.split('/')
url_parts_len = len(url_parts)
if url_parts_len == 1:
entries = []
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
entries.append(self.url_result(
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
return self.playlist_result(
entries, self._html_search_meta('aetn:SeriesId', webpage),
self._html_search_meta('aetn:SeriesTitle', webpage))
elif url_parts_len == 2:
entries = []
for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
episode_attributes = extract_attributes(episode_item)
episode_url = compat_urlparse.urljoin(
url, episode_attributes['data-canonical'])
entries.append(self.url_result(
episode_url, 'AENetworks',
episode_attributes['data-videoid']))
return self.playlist_result(
entries, self._html_search_meta('aetn:SeasonId', webpage))
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
info = self._search_json_ld(webpage, video_id, fatal=False)
info.update(self.theplatform_url_result(
media_url, video_id, {
'mbr': 'true',
'assetTypes': 'medium_video_s3'
}))
return info
class HistoryTopicIE(AENetworksBaseIE):
IE_NAME = 'history:topic'
IE_DESC = 'History.com Topic'
_VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)/videos(?:/(?P<video_display_id>[^/?#]+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
'info_dict': { 'info_dict': {
'id': 'g12m5Gyt3fdR', 'id': '40700995724',
'ext': 'mp4', 'ext': 'mp4',
'title': "Bet You Didn't Know: Valentine's Day", 'title': "Bet You Didn't Know: Valentine's Day",
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
@ -31,57 +136,39 @@ class AENetworksIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'expected_warnings': ['JSON-LD'],
}, { }, {
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos',
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', 'info_dict':
'info_dict': { {
'id': 'eg47EERs_JsZ', 'id': 'world-war-i-history',
'ext': 'mp4', 'title': 'World War I History',
'title': 'Winter Is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
}, },
'add_ie': ['ThePlatform'], 'playlist_mincount': 24,
}, { }, {
'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry', 'url': 'http://www.history.com/topics/world-war-i-history/videos',
'only_matching': True 'only_matching': True,
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
'only_matching': True
}, {
'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
'only_matching': True
}] }]
def _real_extract(self, url): def _real_extract(self, url):
page_type, video_id = re.match(self._VALID_URL, url).groups() topic_id, video_display_id = re.match(self._VALID_URL, url).groups()
if video_display_id:
webpage = self._download_webpage(url, video_display_id)
release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
release_url = unescapeHTML(release_url)
webpage = self._download_webpage(url, video_id) return self.theplatform_url_result(
release_url, video_id, {
video_url_re = [ 'mbr': 'true',
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id, 'switch': 'hls'
r"media_url\s*=\s*'([^']+)'" })
] else:
video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url')) webpage = self._download_webpage(url, topic_id)
query = {'mbr': 'true'} entries = []
if page_type == 'shows': for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage):
query['assetTypes'] = 'medium_video_s3' video_attributes = extract_attributes(episode_item)
if 'switch=hds' in video_url: entries.append(self.theplatform_url_result(
query['switch'] = 'hls' video_attributes['data-release-url'], video_attributes['data-id'], {
'mbr': 'true',
info = self._search_json_ld(webpage, video_id, fatal=False) 'switch': 'hls'
info.update({ }))
'_type': 'url_transparent', return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))
'url': smuggle_url(
update_url_query(video_url, query),
{
'sig': {
'key': 'crazyjava',
'secret': 's3cr3t'},
'force_smil_url': True
}),
})
return info

View File

@ -419,6 +419,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
'info_dict': { 'info_dict': {
'id': 'PL-013263', 'id': 'PL-013263',
'title': 'Areva & Uramin', 'title': 'Areva & Uramin',
'description': 'md5:a1dc0312ce357c262259139cfd48c9bf',
}, },
'playlist_mincount': 6, 'playlist_mincount': 6,
}, { }, {

View File

@ -0,0 +1,30 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class CTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctv.ca/video/player?vid=706966',
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
'info_dict': {
'id': '706966',
'ext': 'mp4',
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
'upload_date': '20150919',
'timestamp': 1442624700,
},
'expected_warnings': ['HTTP Error 404'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'id': video_id,
'url': '9c9media:ctv_web:%s' % video_id,
'ie_key': 'NineCNineMedia',
}

View File

@ -0,0 +1,65 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import orderedSet
class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
'info_dict': {
'id': '901995',
'ext': 'mp4',
'title': 'Extended: \'That person cannot be me\' Johnson says',
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284,
'upload_date': '20160630',
}
}, {
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
'info_dict':
{
'id': '1.2966224',
},
'playlist_mincount': 19,
}, {
'url': 'http://www.ctvnews.ca/video?binId=1.2876780',
'info_dict':
{
'id': '1.2876780',
},
'playlist_mincount': 100,
}, {
'url': 'http://www.ctvnews.ca/1.810401',
'only_matching': True,
}, {
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
'only_matching': True,
}]
def _real_extract(self, url):
page_id = self._match_id(url)
def ninecninemedia_url_result(clip_id):
return {
'_type': 'url_transparent',
'id': clip_id,
'url': '9c9media:ctvnews_web:%s' % clip_id,
'ie_key': 'NineCNineMedia',
}
if page_id.isdigit():
return ninecninemedia_url_result(page_id)
else:
webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={
'ot': 'example.AjaxPageLayout.ot',
'maxItemsPerPage': 1000000,
})
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
return self.playlist_result(entries, page_id)

View File

@ -50,6 +50,14 @@ class EaglePlatformIE(InfoExtractor):
'skip': 'Georestricted', 'skip': 'Georestricted',
}] }]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
webpage)
if mobj is not None:
return mobj.group('url')
@staticmethod @staticmethod
def _handle_error(response): def _handle_error(response):
status = int_or_none(response.get('status', 200)) status = int_or_none(response.get('status', 200))

View File

@ -20,7 +20,10 @@ from .adobetv import (
AdobeTVVideoIE, AdobeTVVideoIE,
) )
from .adultswim import AdultSwimIE from .adultswim import AdultSwimIE
from .aenetworks import AENetworksIE from .aenetworks import (
AENetworksIE,
HistoryTopicIE,
)
from .afreecatv import AfreecaTVIE from .afreecatv import AfreecaTVIE
from .aftonbladet import AftonbladetIE from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
@ -168,6 +171,8 @@ from .crunchyroll import (
) )
from .cspan import CSpanIE from .cspan import CSpanIE
from .ctsnews import CtsNewsIE from .ctsnews import CtsNewsIE
from .ctv import CTVIE
from .ctvnews import CTVNewsIE
from .cultureunplugged import CultureUnpluggedIE from .cultureunplugged import CultureUnpluggedIE
from .cwtv import CWTVIE from .cwtv import CWTVIE
from .dailymail import DailyMailIE from .dailymail import DailyMailIE
@ -276,6 +281,7 @@ from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE from .freevideo import FreeVideoIE
from .funimation import FunimationIE from .funimation import FunimationIE
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .gameinformer import GameInformerIE from .gameinformer import GameInformerIE
from .gamekings import GamekingsIE from .gamekings import GamekingsIE
from .gameone import ( from .gameone import (
@ -422,6 +428,7 @@ from .makerschannel import MakersChannelIE
from .makertv import MakerTVIE from .makertv import MakerTVIE
from .matchtv import MatchTVIE from .matchtv import MatchTVIE
from .mdr import MDRIE from .mdr import MDRIE
from .meta import METAIE
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
from .metacritic import MetacriticIE from .metacritic import MetacriticIE
from .mgoon import MgoonIE from .mgoon import MgoonIE
@ -522,6 +529,7 @@ from .nick import (
NickDeIE, NickDeIE,
) )
from .niconico import NiconicoIE, NiconicoPlaylistIE from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE from .ninegag import NineGagIE
from .noco import NocoIE from .noco import NocoIE
from .normalboots import NormalbootsIE from .normalboots import NormalbootsIE
@ -706,10 +714,12 @@ from .shahid import ShahidIE
from .shared import SharedIE from .shared import SharedIE
from .sharesix import ShareSixIE from .sharesix import ShareSixIE
from .sina import SinaIE from .sina import SinaIE
from .sixplay import SixPlayIE
from .skynewsarabia import ( from .skynewsarabia import (
SkyNewsArabiaIE, SkyNewsArabiaIE,
SkyNewsArabiaArticleIE, SkyNewsArabiaArticleIE,
) )
from .skysports import SkySportsIE
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
from .slutload import SlutloadIE from .slutload import SlutloadIE
from .smotri import ( from .smotri import (
@ -891,6 +901,7 @@ from .udn import UDNEmbedIE
from .digiteka import DigitekaIE from .digiteka import DigitekaIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .urort import UrortIE from .urort import UrortIE
from .urplay import URPlayIE
from .usatoday import USATodayIE from .usatoday import USATodayIE
from .ustream import UstreamIE, UstreamChannelIE from .ustream import UstreamIE, UstreamChannelIE
from .ustudio import ( from .ustudio import (

View File

@ -0,0 +1,35 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .ooyala import OoyalaIE
class FusionIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
'info_dict': {
'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
'ext': 'mp4',
'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
'duration': 140.0,
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://fusion.net/video/201781',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
ooyala_code = self._search_regex(
r'data-video-id=(["\'])(?P<code>.+?)\1',
webpage, 'ooyala code', group='code')
return OoyalaIE._build_url_result(ooyala_code)

View File

@ -65,6 +65,7 @@ from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from .vessel import VesselIE from .vessel import VesselIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1244,6 +1245,22 @@ class GenericIE(InfoExtractor):
'uploader': 'www.hudl.com', 'uploader': 'www.hudl.com',
}, },
}, },
# twitter:player embed
{
'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
'md5': 'a3e0df96369831de324f0778e126653c',
'info_dict': {
'id': '4909620399001',
'ext': 'mp4',
'title': 'What Do Black Holes Sound Like?',
'description': 'what do black holes sound like',
'upload_date': '20160524',
'uploader_id': '29913724001',
'timestamp': 1464107587,
'uploader': 'TheAtlantic',
},
'add_ie': ['BrightcoveLegacy'],
}
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -1932,10 +1949,9 @@ class GenericIE(InfoExtractor):
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
# Look for Eagle.Platform embeds # Look for Eagle.Platform embeds
mobj = re.search( eagleplatform_url = EaglePlatformIE._extract_url(webpage)
r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage) if eagleplatform_url:
if mobj is not None: return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
return self.url_result(mobj.group('url'), 'EaglePlatform')
# Look for ClipYou (uses Eagle.Platform) embeds # Look for ClipYou (uses Eagle.Platform) embeds
mobj = re.search( mobj = re.search(
@ -2081,6 +2097,11 @@ class GenericIE(InfoExtractor):
'uploader': video_uploader, 'uploader': video_uploader,
} }
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
if embed_url:
return self.url_result(embed_url)
def check_video(vurl): def check_video(vurl):
if YoutubeIE.suitable(vurl): if YoutubeIE.suitable(vurl):
return True return True

View File

@ -1,8 +1,6 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -23,34 +21,5 @@ class M6IE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id') return self.url_result('6play:%s' % video_id, 'SixPlay', video_id)
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
'Downloading video RSS')
title = rss.find('./channel/item/title').text
description = rss.find('./channel/item/description').text
thumbnail = rss.find('./channel/item/visuel_clip_big').text
duration = int(rss.find('./channel/item/duration').text)
view_count = int(rss.find('./channel/item/nombre_vues').text)
formats = []
for format_id in ['lq', 'sd', 'hq', 'hd']:
video_url = rss.find('./channel/item/url_video_%s' % format_id)
if video_url is None:
continue
formats.append({
'url': video_url.text,
'format_id': format_id,
})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'formats': formats,
}

View File

@ -0,0 +1,73 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .pladform import PladformIE
from ..utils import (
unescapeHTML,
int_or_none,
ExtractorError,
)
class METAIE(InfoExtractor):
_VALID_URL = r'https?://video\.meta\.ua/(?:iframe/)?(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://video.meta.ua/5502115.video',
'md5': '71b6f3ee274bef16f1ab410f7f56b476',
'info_dict': {
'id': '5502115',
'ext': 'mp4',
'title': 'Sony Xperia Z camera test [HQ]',
'description': 'Xperia Z shoots video in FullHD HDR.',
'uploader_id': 'nomobile',
'uploader': 'CHЁZA.TV',
'upload_date': '20130211',
},
'add_ie': ['Youtube'],
}, {
'url': 'http://video.meta.ua/iframe/5502115',
'only_matching': True,
}, {
# pladform embed
'url': 'http://video.meta.ua/7121015.video',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
st_html5 = self._search_regex(
r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None)
if st_html5:
# uppod st decryption algorithm is reverse engineered from function un(s) at uppod.js
json_str = ''
for i in range(0, len(st_html5), 3):
json_str += '&#x0%s;' % st_html5[i:i + 3]
uppod_data = self._parse_json(unescapeHTML(json_str), video_id)
error = uppod_data.get('customnotfound')
if error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
video_url = uppod_data['file']
info = {
'id': video_id,
'url': video_url,
'title': uppod_data.get('comment') or self._og_search_title(webpage),
'description': self._og_search_description(webpage, default=None),
'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage),
'duration': int_or_none(self._og_search_property(
'video:duration', webpage, default=None)),
}
if 'youtube.com/' in video_url:
info.update({
'_type': 'url_transparent',
'ie_key': 'Youtube',
})
return info
pladform_url = PladformIE._extract_url(webpage)
if pladform_url:
return self.url_result(pladform_url)

View File

@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
parse_duration,
ExtractorError
)
class NineCNineMediaIE(InfoExtractor):
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
def _real_extract(self, url):
destination_code, video_id = re.match(self._VALID_URL, url).groups()
api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id)
content = self._download_json(api_base_url, video_id, query={
'$include': '[contentpackages]',
})
title = content['Name']
if len(content['ContentPackages']) > 1:
raise ExtractorError('multiple content packages')
content_package = content['ContentPackages'][0]
stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id']
stacks = self._download_json(stacks_base_url, video_id)['Items']
if len(stacks) > 1:
raise ExtractorError('multiple stacks')
stack = stacks[0]
stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id'])
formats = []
formats.extend(self._extract_m3u8_formats(
stack_base_url + 'm3u8', video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
stack_base_url + 'f4m', video_id,
f4m_id='hds', fatal=False))
mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False)
if mp4_url:
formats.append({
'url': mp4_url,
'format_id': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': content.get('Desc') or content.get('ShortDesc'),
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
'duration': parse_duration(content.get('BroadcastTime')),
'formats': formats,
}

View File

@ -120,9 +120,12 @@ class PeriscopeUserIE(InfoExtractor):
title = user.get('display_name') or user.get('username') title = user.get('display_name') or user.get('username')
description = user.get('description') description = user.get('description')
broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or
data_store.get('BroadcastCache', {}).get('broadcastIds', []))
entries = [ entries = [
self.url_result( self.url_result(
'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id'])) 'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id))
for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])] for broadcast_id in broadcast_ids]
return self.playlist_result(entries, user_id, title, description) return self.playlist_result(entries, user_id, title, description)

View File

@ -49,7 +49,7 @@ class PladformIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)"', webpage) r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')

View File

@ -25,7 +25,15 @@ from ..aes import (
class PornHubIE(InfoExtractor): class PornHubIE(InfoExtractor):
_VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)' IE_DESC = 'PornHub and Thumbzilla'
_VALID_URL = r'''(?x)
https?://
(?:
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[0-9a-z]+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': '1e19b41231a02eba417839222ac9d58e', 'md5': '1e19b41231a02eba417839222ac9d58e',
@ -63,8 +71,20 @@ class PornHubIE(InfoExtractor):
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True, 'only_matching': True,
}, { }, {
# removed at the request of cam4.com
'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
'only_matching': True, 'only_matching': True,
}, {
# removed at the request of the copyright owner
'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
'only_matching': True,
}, {
# removed by uploader
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
'only_matching': True,
}, {
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
'only_matching': True,
}] }]
@classmethod @classmethod
@ -87,8 +107,8 @@ class PornHubIE(InfoExtractor):
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)
error_msg = self._html_search_regex( error_msg = self._html_search_regex(
r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>', r'(?s)<div[^>]+class=(["\']).*?\bremoved\b.*?\1[^>]*>(?P<error>.+?)</div>',
webpage, 'error message', default=None) webpage, 'error message', default=None, group='error')
if error_msg: if error_msg:
error_msg = re.sub(r'\s+', ' ', error_msg) error_msg = re.sub(r'\s+', ' ', error_msg)
raise ExtractorError( raise ExtractorError(

View File

@ -1,23 +1,23 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
js_to_json,
) )
from ..compat import compat_str
class RDSIE(InfoExtractor): class RDSIE(InfoExtractor):
IE_DESC = 'RDS.ca' IE_DESC = 'RDS.ca'
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)' _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
_TESTS = [{ _TESTS = [{
'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799', 'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
'info_dict': { 'info_dict': {
'id': '3.1132799', 'id': '604333',
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville', 'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Fowler Jr. prend la direction de Jacksonville', 'title': 'Fowler Jr. prend la direction de Jacksonville',
@ -33,22 +33,17 @@ class RDSIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
# TODO: extract f4m from 9c9media.com item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json)
video_url = self._search_regex( video_id = compat_str(item['id'])
r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"', title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta(
webpage, 'video url')
title = self._og_search_title(webpage) or self._html_search_meta(
'title', webpage, 'title', fatal=True) 'title', webpage, 'title', fatal=True)
description = self._og_search_description(webpage) or self._html_search_meta( description = self._og_search_description(webpage) or self._html_search_meta(
'description', webpage, 'description') 'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage) or self._search_regex( thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex(
[r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"', [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'], r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
webpage, 'thumbnail', fatal=False) webpage, 'thumbnail', fatal=False)
@ -61,13 +56,15 @@ class RDSIE(InfoExtractor):
age_limit = self._family_friendly_search(webpage) age_limit = self._family_friendly_search(webpage)
return { return {
'_type': 'url_transparent',
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'url': video_url, 'url': '9c9media:rds_web:%s' % video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'timestamp': timestamp, 'timestamp': timestamp,
'duration': duration, 'duration': duration,
'age_limit': age_limit, 'age_limit': age_limit,
'ie_key': 'NineCNineMedia',
} }

View File

@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
qualities,
int_or_none,
)
class SixPlayIE(InfoExtractor):
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320',
'md5': '42310bffe4ba3982db112b9cd3467328',
'info_dict': {
'id': '11495320',
'ext': 'mp4',
'title': 'Jamel et ses amis au Marrakech du rire 2015',
'description': 'md5:ba2149d5c321d5201b78070ee839d872',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
clip_data = self._download_json(
'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id,
video_id)
video_data = clip_data['videoInfo']
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
formats = []
for source in clip_data['sources']:
source_type, source_url = source.get('type'), source.get('src')
if not source_url or source_type == 'hls/primetime':
continue
if source_type == 'application/vnd.apple.mpegURL':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
source_url.replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
elif source_type == 'video/mp4':
quality = source.get('quality')
formats.append({
'url': source_url,
'format_id': quality,
'quality': quality_key(quality),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['title'].strip(),
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'series': video_data.get('titlePgm'),
'formats': formats,
}

View File

@ -0,0 +1,33 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class SkySportsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
'md5': 'c44a1db29f27daf9a0003e010af82100',
'info_dict': {
'id': '10328419',
'ext': 'flv',
'title': 'Bale: Its our time to shine',
'description': 'md5:9fd1de3614d525f5addda32ac3c482c9',
},
'add_ie': ['Ooyala'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'ooyala:%s' % self._search_regex(
r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'ie_key': 'Ooyala',
}

View File

@ -29,7 +29,7 @@ class TwitchBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv' _VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
_API_BASE = 'https://api.twitch.tv' _API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'http://usher.twitch.tv' _USHER_BASE = 'https://usher.ttvnw.net'
_LOGIN_URL = 'http://www.twitch.tv/login' _LOGIN_URL = 'http://www.twitch.tv/login'
_NETRC_MACHINE = 'twitch' _NETRC_MACHINE = 'twitch'

View File

@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class URPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?urplay\.se/program/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde',
'md5': '15ca67b63fd8fb320ac2bcd854bad7b6',
'info_dict': {
'id': '190031',
'ext': 'mp4',
'title': 'Tripp, Trapp, Träd : Sovkudde',
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
urplayer_data = self._parse_json(self._search_regex(
r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id)
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
formats = []
for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)):
file_rtmp = urplayer_data.get('file_rtmp' + quality_attr)
if file_rtmp:
formats.append({
'url': 'rtmp://%s/urplay/mp4:%s' % (host, file_rtmp),
'format_id': quality + '-rtmp',
'ext': 'flv',
'preference': preference,
})
file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr)
if file_http:
file_http_base_url = 'http://%s/%s' % (host, file_http)
formats.extend(self._extract_f4m_formats(
file_http_base_url + 'manifest.f4m', video_id,
preference, '%s-hds' % quality, fatal=False))
formats.extend(self._extract_m3u8_formats(
file_http_base_url + 'playlist.m3u8', video_id, 'mp4',
'm3u8_native', preference, '%s-hls' % quality, fatal=False))
self._sort_formats(formats)
subtitles = {}
for subtitle in urplayer_data.get('subtitles', []):
subtitle_url = subtitle.get('file')
kind = subtitle.get('kind')
if subtitle_url or kind and kind != 'captions':
continue
subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({
'url': subtitle_url,
})
return {
'id': video_id,
'title': urplayer_data['title'],
'description': self._og_search_description(webpage),
'thumbnail': urplayer_data.get('image'),
'series': urplayer_data.get('series_title'),
'subtitles': subtitles,
'formats': formats,
}

View File

@ -90,10 +90,12 @@ class VineIE(InfoExtractor):
data = self._parse_json( data = self._parse_json(
self._search_regex( self._search_regex(
r'window\.POST_DATA\s*=\s*{\s*%s\s*:\s*({.+?})\s*};\s*</script>' % video_id, r'window\.POST_DATA\s*=\s*({.+?});\s*</script>',
webpage, 'vine data'), webpage, 'vine data'),
video_id) video_id)
data = data[list(data.keys())[0]]
formats = [{ formats = [{
'format_id': '%(format)s-%(rate)s' % f, 'format_id': '%(format)s-%(rate)s' % f,
'vcodec': f.get('format'), 'vcodec': f.get('format'),

View File

@ -25,7 +25,8 @@ class VRTIE(InfoExtractor):
'timestamp': 1414271750.949, 'timestamp': 1414271750.949,
'upload_date': '20141025', 'upload_date': '20141025',
'duration': 929, 'duration': 929,
} },
'skip': 'HTTP Error 404: Not Found',
}, },
# sporza.be # sporza.be
{ {
@ -39,7 +40,8 @@ class VRTIE(InfoExtractor):
'timestamp': 1413835980.560, 'timestamp': 1413835980.560,
'upload_date': '20141020', 'upload_date': '20141020',
'duration': 3238, 'duration': 3238,
} },
'skip': 'HTTP Error 404: Not Found',
}, },
# cobra.be # cobra.be
{ {
@ -53,16 +55,39 @@ class VRTIE(InfoExtractor):
'timestamp': 1413967500.494, 'timestamp': 1413967500.494,
'upload_date': '20141022', 'upload_date': '20141022',
'duration': 661, 'duration': 661,
} },
'skip': 'HTTP Error 404: Not Found',
}, },
{ {
# YouTube video # YouTube video
'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957', 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957',
'only_matching': True, 'md5': 'b8b93da1df1cea6c8556255a796b7d61',
'info_dict': {
'id': 'Wji-BZ0oCwg',
'ext': 'mp4',
'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer',
'description': 'md5:8e468944dce15567a786a67f74262583',
'uploader': 'Star Wars',
'uploader_id': 'starwars',
'upload_date': '20160407',
},
'add_ie': ['Youtube'],
}, },
{ {
'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
'only_matching': True, 'md5': '',
'info_dict': {
'id': '2377055',
'ext': 'mp4',
'title': 'Cafe Derby',
'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.',
'upload_date': '20150626',
'timestamp': 1435305240.769,
},
'params': {
# m3u8 download
'skip_download': True,
}
} }
] ]
@ -98,6 +123,32 @@ class VRTIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native', src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
src.replace('playlist.m3u8', 'manifest.f4m'),
video_id, f4m_id='hds', fatal=False))
if 'data-video-geoblocking="true"' not in webpage:
rtmp_formats = self._extract_smil_formats(
src.replace('playlist.m3u8', 'jwplayer.smil'),
video_id, fatal=False)
formats.extend(rtmp_formats)
for rtmp_format in rtmp_formats:
rtmp_format_c = rtmp_format.copy()
rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
del rtmp_format_c['play_path']
del rtmp_format_c['ext']
http_format = rtmp_format_c.copy()
http_format.update({
'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
'format_id': rtmp_format['format_id'].replace('rtmp', 'http'),
'protocol': 'http',
})
rtsp_format = rtmp_format_c.copy()
rtsp_format.update({
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
'protocol': 'rtsp',
})
formats.extend([http_format, rtsp_format])
else: else:
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
'%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False)) '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False))

View File

@ -26,9 +26,7 @@ def parseOpts(overrideArguments=None):
except IOError: except IOError:
return default # silently skip if file is not present return default # silently skip if file is not present
try: try:
res = [] res = compat_shlex_split(optionf.read(), comments=True)
for l in optionf:
res += compat_shlex_split(l, comments=True)
finally: finally:
optionf.close() optionf.close()
return res return res

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.06.27' __version__ = '2016.07.02'