mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-08-03 02:50:01 -05:00
Compare commits
62 Commits
2014.01.27
...
2014.01.30
Author | SHA1 | Date | |
---|---|---|---|
![]() |
ed9cc2f1e0 | ||
![]() |
975fa541c2 | ||
![]() |
251974e44c | ||
![]() |
57b6288358 | ||
![]() |
c3f51436bf | ||
![]() |
0c708f11cb | ||
![]() |
fb2a706d11 | ||
![]() |
0b76600deb | ||
![]() |
245b612a36 | ||
![]() |
d882161d5a | ||
![]() |
d4a21e0b49 | ||
![]() |
26a78d4bbf | ||
![]() |
8db69786c2 | ||
![]() |
b11cec4162 | ||
![]() |
7eeb5bef24 | ||
![]() |
9d2032932c | ||
![]() |
6490306017 | ||
![]() |
ceb2b7d257 | ||
![]() |
459a53c2c2 | ||
![]() |
adc267eebf | ||
![]() |
ffe8f62d27 | ||
![]() |
ed85007039 | ||
![]() |
5aaca50d60 | ||
![]() |
869baf3565 | ||
![]() |
e299f6d27f | ||
![]() |
4a192f817e | ||
![]() |
bc1d1a5a71 | ||
![]() |
456895d9cf | ||
![]() |
218c15ab59 | ||
![]() |
17ab4d3b5e | ||
![]() |
31ef0ff038 | ||
![]() |
37e3b90d59 | ||
![]() |
00ff8f92a5 | ||
![]() |
4857beba3a | ||
![]() |
c1e60cc2bf | ||
![]() |
98669ed79c | ||
![]() |
a3978a6159 | ||
![]() |
e3a9f32f52 | ||
![]() |
87fac3238d | ||
![]() |
a2fb2a2134 | ||
![]() |
9e8ee54553 | ||
![]() |
117bec936c | ||
![]() |
1547c8cc88 | ||
![]() |
075911d48e | ||
![]() |
b21a918984 | ||
![]() |
f9b8549609 | ||
![]() |
e2ba07024f | ||
![]() |
9b05bd42e5 | ||
![]() |
b6d3a99678 | ||
![]() |
96d7b8873a | ||
![]() |
efc867775e | ||
![]() |
5ab772f09c | ||
![]() |
2a89386232 | ||
![]() |
4d9be98dbc | ||
![]() |
6737907826 | ||
![]() |
c060b77446 | ||
![]() |
7e8caf30c0 | ||
![]() |
ca3e054750 | ||
![]() |
1da1558f46 | ||
![]() |
53bfd6b24c | ||
![]() |
bacb5e4f44 | ||
![]() |
008af8660b |
@@ -120,5 +120,9 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
def test_soundcloud_not_matching_sets(self):
|
||||
self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])
|
||||
|
||||
def test_tumblr(self):
|
||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
|
||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -33,6 +33,7 @@ from youtube_dl.extractor import (
|
||||
ImdbListIE,
|
||||
KhanAcademyIE,
|
||||
EveryonesMixtapeIE,
|
||||
RutubeChannelIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -195,11 +196,11 @@ class TestPlaylists(unittest.TestCase):
|
||||
def test_imdb_list(self):
|
||||
dl = FakeYDL()
|
||||
ie = ImdbListIE(dl)
|
||||
result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
|
||||
result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'sMjedvGDd8U')
|
||||
self.assertEqual(result['title'], 'Animated and Family Films')
|
||||
self.assertTrue(len(result['entries']) >= 48)
|
||||
self.assertEqual(result['id'], 'JFs9NWw6XI0')
|
||||
self.assertEqual(result['title'], 'March 23, 2012 Releases')
|
||||
self.assertEqual(len(result['entries']), 7)
|
||||
|
||||
def test_khanacademy_topic(self):
|
||||
dl = FakeYDL()
|
||||
@@ -219,6 +220,14 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['id'], 'm7m0jJAbMQi')
|
||||
self.assertEqual(result['title'], 'Driving')
|
||||
self.assertEqual(len(result['entries']), 24)
|
||||
|
||||
def test_rutube_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = RutubeChannelIE(dl)
|
||||
result = ie.extract('http://rutube.ru/tags/video/1409')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '1409')
|
||||
self.assertTrue(len(result['entries']) >= 34)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -40,6 +40,7 @@ __authors__ = (
|
||||
'Michael Orlitzky',
|
||||
'Chris Gahan',
|
||||
'Saimadhav Heblikar',
|
||||
'Mike Col',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@@ -27,6 +27,7 @@ from .cbs import CBSIE
|
||||
from .channel9 import Channel9IE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .cmt import CMTIE
|
||||
from .cnn import CNNIE
|
||||
@@ -47,6 +48,7 @@ from .depositfiles import DepositFilesIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .dropbox import DropboxIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .ehow import EHowIE
|
||||
@@ -115,6 +117,7 @@ from .lynda import (
|
||||
LyndaCourseIE
|
||||
)
|
||||
from .macgamestore import MacGameStoreIE
|
||||
from .malemotion import MalemotionIE
|
||||
from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
@@ -158,7 +161,12 @@ from .ro220 import Ro220IE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .rutube import RutubeIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
RutubeChannelIE,
|
||||
RutubeMovieIE,
|
||||
RutubePersonIE,
|
||||
)
|
||||
from .servingsys import ServingSysIE
|
||||
from .sina import SinaIE
|
||||
from .slashdot import SlashdotIE
|
||||
|
@@ -1,22 +1,28 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
|
||||
_TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
|
||||
_MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640',
|
||||
u'file': u'14077640.mp4',
|
||||
u'md5': u'6ca8824255460c787376353f9e20bbd8',
|
||||
u'info_dict': {
|
||||
u"title": u"11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden"
|
||||
'url': 'http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786',
|
||||
'file': '19288786.mp4',
|
||||
'md5': '515bf47ce209fb3f5a61b7aad364634c',
|
||||
'info_dict': {
|
||||
'title': 'Edward Snowden im Interview - Held oder Verräter?',
|
||||
'description': 'Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdcberwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend.',
|
||||
'thumbnail': 'http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037',
|
||||
},
|
||||
u'skip': u'Requires rtmpdump'
|
||||
'skip': 'Blocked outside of Germany',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -29,26 +35,49 @@ class ARDIE(InfoExtractor):
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
# determine title and media streams from webpage
|
||||
html = self._download_webpage(url, video_id)
|
||||
title = re.search(self._TITLE, html).group('title')
|
||||
streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM, html)]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
streams = [
|
||||
mo.groupdict()
|
||||
for mo in re.finditer(
|
||||
r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)]
|
||||
if not streams:
|
||||
assert '"fsk"' in html
|
||||
raise ExtractorError(u'This video is only available after 8:00 pm')
|
||||
if '"fsk"' in webpage:
|
||||
raise ExtractorError('This video is only available after 20:00')
|
||||
|
||||
# choose default media type and highest quality for now
|
||||
stream = max([s for s in streams if int(s["media_type"]) == 0],
|
||||
key=lambda s: int(s["quality"]))
|
||||
formats = []
|
||||
for s in streams:
|
||||
format = {
|
||||
'quality': int(s['quality']),
|
||||
}
|
||||
if s.get('rtmp_url'):
|
||||
format['protocol'] = 'rtmp'
|
||||
format['url'] = s['rtmp_url']
|
||||
format['playpath'] = s['video_url']
|
||||
else:
|
||||
format['url'] = s['video_url']
|
||||
|
||||
# there's two possibilities: RTMP stream or HTTP download
|
||||
info = {'id': video_id, 'title': title, 'ext': 'mp4'}
|
||||
if stream['rtmp_url']:
|
||||
self.to_screen(u'RTMP download detected')
|
||||
assert stream['video_url'].startswith('mp4:')
|
||||
info["url"] = stream["rtmp_url"]
|
||||
info["play_path"] = stream['video_url']
|
||||
else:
|
||||
assert stream["video_url"].endswith('.mp4')
|
||||
info["url"] = stream["video_url"]
|
||||
return [info]
|
||||
quality_name = self._search_regex(
|
||||
r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'],
|
||||
'quality name', default='NA')
|
||||
format['format_id'] = '%s-%s-%s-%s' % (
|
||||
determine_ext(format['url']), quality_name, s['media_type'],
|
||||
s['quality'])
|
||||
|
||||
formats.append(format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -24,5 +24,5 @@ class BloombergIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
ooyala_code = self._search_regex(r'<source src="http://player.ooyala.com/player/[^/]+/([^".]+)', webpage, u'ooyala url')
|
||||
return OoyalaIE._build_url_result(ooyala_code)
|
||||
ooyala_url = self._twitter_search_player(webpage)
|
||||
return self.url_result(ooyala_url, OoyalaIE.ie_key())
|
||||
|
@@ -23,7 +23,6 @@ from ..utils import (
|
||||
class BrightcoveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
|
||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||
_PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -70,7 +69,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
'description': 'md5:363109c02998fee92ec02211bd8000df',
|
||||
'uploader': 'National Ballet of Canada',
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@classmethod
|
||||
@@ -131,6 +130,11 @@ class BrightcoveIE(InfoExtractor):
|
||||
"""Try to extract the brightcove url from the wepbage, returns None
|
||||
if it can't be found
|
||||
"""
|
||||
|
||||
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
||||
if url_m:
|
||||
return url_m.group(1)
|
||||
|
||||
m_brightcove = re.search(
|
||||
r'''(?sx)<object
|
||||
(?:
|
||||
@@ -183,8 +187,9 @@ class BrightcoveIE(InfoExtractor):
|
||||
return self._extract_video_info(video_info)
|
||||
|
||||
def _get_playlist_info(self, player_key):
|
||||
playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
|
||||
player_key, 'Downloading playlist information')
|
||||
info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
|
||||
playlist_info = self._download_webpage(
|
||||
info_url, player_key, 'Downloading playlist information')
|
||||
|
||||
json_data = json.loads(playlist_info)
|
||||
if 'videoList' not in json_data:
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
@@ -11,38 +11,38 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
The type of provided URL (video or playlist) is determined according to
|
||||
meta Search.PageType from web page HTML rather than URL itself, as it is
|
||||
not always possible to do.
|
||||
not always possible to do.
|
||||
'''
|
||||
IE_DESC = u'Channel 9'
|
||||
IE_NAME = u'channel9'
|
||||
IE_DESC = 'Channel 9'
|
||||
IE_NAME = 'channel9'
|
||||
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||
u'file': u'Events_TechEd_Australia_2013_KOS002.mp4',
|
||||
u'md5': u'bbd75296ba47916b754e73c3a4bbdf10',
|
||||
u'info_dict': {
|
||||
u'title': u'Developer Kick-Off Session: Stuff We Love',
|
||||
u'description': u'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||
u'duration': 4576,
|
||||
u'thumbnail': u'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
|
||||
u'session_code': u'KOS002',
|
||||
u'session_day': u'Day 1',
|
||||
u'session_room': u'Arena 1A',
|
||||
u'session_speakers': [ u'Ed Blankenship', u'Andrew Coates', u'Brady Gaster', u'Patrick Klug', u'Mads Kristensen' ],
|
||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||
'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
|
||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||
'info_dict': {
|
||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||
'duration': 4576,
|
||||
'thumbnail': 'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
|
||||
'session_code': 'KOS002',
|
||||
'session_day': 'Day 1',
|
||||
'session_room': 'Arena 1A',
|
||||
'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ],
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
u'file': u'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
|
||||
u'md5': u'b43ee4529d111bc37ba7ee4f34813e68',
|
||||
u'info_dict': {
|
||||
u'title': u'Self-service BI with Power BI - nuclear testing',
|
||||
u'description': u'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
u'duration': 1540,
|
||||
u'thumbnail': u'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
|
||||
u'authors': [ u'Mike Wilmot' ],
|
||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
|
||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||
'info_dict': {
|
||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
'duration': 1540,
|
||||
'thumbnail': 'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
|
||||
'authors': [ 'Mike Wilmot' ],
|
||||
},
|
||||
}
|
||||
]
|
||||
@@ -60,7 +60,7 @@ class Channel9IE(InfoExtractor):
|
||||
return 0
|
||||
units = m.group('units')
|
||||
try:
|
||||
exponent = [u'B', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB'].index(units.upper())
|
||||
exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper())
|
||||
except ValueError:
|
||||
return 0
|
||||
size = float(m.group('size'))
|
||||
@@ -80,7 +80,7 @@ class Channel9IE(InfoExtractor):
|
||||
'url': x.group('url'),
|
||||
'format_id': x.group('quality'),
|
||||
'format_note': x.group('note'),
|
||||
'format': u'%s (%s)' % (x.group('quality'), x.group('note')),
|
||||
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
|
||||
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
|
||||
'preference': self._known_formats.index(x.group('quality')),
|
||||
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
|
||||
@@ -91,10 +91,10 @@ class Channel9IE(InfoExtractor):
|
||||
return formats
|
||||
|
||||
def _extract_title(self, html):
|
||||
title = self._html_search_meta(u'title', html, u'title')
|
||||
title = self._html_search_meta('title', html, 'title')
|
||||
if title is None:
|
||||
title = self._og_search_title(html)
|
||||
TITLE_SUFFIX = u' (Channel 9)'
|
||||
TITLE_SUFFIX = ' (Channel 9)'
|
||||
if title is not None and title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
return title
|
||||
@@ -110,7 +110,7 @@ class Channel9IE(InfoExtractor):
|
||||
m = re.search(DESCRIPTION_REGEX, html)
|
||||
if m is not None:
|
||||
return m.group('description')
|
||||
return self._html_search_meta(u'description', html, u'description')
|
||||
return self._html_search_meta('description', html, 'description')
|
||||
|
||||
def _extract_duration(self, html):
|
||||
m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
|
||||
@@ -172,7 +172,7 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
# Nothing to download
|
||||
if len(formats) == 0 and slides is None and zip_ is None:
|
||||
self._downloader.report_warning(u'None of recording, slides or zip are available for %s' % content_path)
|
||||
self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path)
|
||||
return
|
||||
|
||||
# Extract meta
|
||||
@@ -244,7 +244,7 @@ class Channel9IE(InfoExtractor):
|
||||
return contents
|
||||
|
||||
def _extract_list(self, content_path):
|
||||
rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS')
|
||||
rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
|
||||
entries = [self.url_result(session_url.text, 'Channel9')
|
||||
for session_url in rss.findall('./channel/item/link')]
|
||||
title_text = rss.find('./channel/title').text
|
||||
@@ -254,11 +254,11 @@ class Channel9IE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
content_path = mobj.group('contentpath')
|
||||
|
||||
webpage = self._download_webpage(url, content_path, u'Downloading web page')
|
||||
webpage = self._download_webpage(url, content_path, 'Downloading web page')
|
||||
|
||||
page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
|
||||
if page_type_m is None:
|
||||
raise ExtractorError(u'Search.PageType not found, don\'t know how to process this page', expected=True)
|
||||
raise ExtractorError('Search.PageType not found, don\'t know how to process this page', expected=True)
|
||||
|
||||
page_type = page_type_m.group('pagetype')
|
||||
if page_type == 'List': # List page, may contain list of 'item'-like objects
|
||||
@@ -268,4 +268,4 @@ class Channel9IE(InfoExtractor):
|
||||
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||
return self._extract_session(webpage, content_path)
|
||||
else:
|
||||
raise ExtractorError(u'Unexpected Search.PageType %s' % page_type, expected=True)
|
||||
raise ExtractorError('Unexpected Search.PageType %s' % page_type, expected=True)
|
59
youtube_dl/extractor/cliphunter.py
Normal file
59
youtube_dl/extractor/cliphunter.py
Normal file
@@ -0,0 +1,59 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
translation_table = {
|
||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
|
||||
'y': 'l', 'z': 'i',
|
||||
'$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
|
||||
}
|
||||
|
||||
|
||||
class CliphunterIE(InfoExtractor):
|
||||
IE_NAME = 'cliphunter'
|
||||
|
||||
_VALID_URL = r'''(?x)http://(?:www\.)?cliphunter\.com/w/
|
||||
(?P<id>[0-9]+)/
|
||||
(?P<seo>.+?)(?:$|[#\?])
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
||||
'file': '1012420.flv',
|
||||
'md5': '15e7740f30428abf70f4223478dc1225',
|
||||
'info_dict': {
|
||||
'title': 'Fun Jynx Maze solo',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
pl_fiji = self._search_regex(
|
||||
r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
|
||||
pl_c_qual = self._search_regex(
|
||||
r'pl_c_qual = "(.)"', webpage, 'video quality')
|
||||
video_title = self._search_regex(
|
||||
r'mediaTitle = "([^"]+)"', webpage, 'title')
|
||||
|
||||
video_url = ''.join(translation_table.get(c, c) for c in pl_fiji)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'format_id': pl_c_qual,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
}
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
|
||||
(video-clips|episodes|cc-studios|video-collections)
|
||||
/(?P<title>.*)'''
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
@@ -86,7 +86,7 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _transform_rtmp_url(rtmp_video_url):
|
||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
|
||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
|
||||
if not m:
|
||||
raise ExtractorError('Cannot transform RTMP url')
|
||||
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
||||
|
@@ -465,7 +465,14 @@ class InfoExtractor(object):
|
||||
}
|
||||
return RATING_TABLE.get(rating.lower(), None)
|
||||
|
||||
def _twitter_search_player(self, html):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
'twitter card player')
|
||||
|
||||
def _sort_formats(self, formats):
|
||||
if not formats:
|
||||
raise ExtractorError(u'No video formats found')
|
||||
|
||||
def _formats_key(f):
|
||||
# TODO remove the following workaround
|
||||
from ..utils import determine_ext
|
||||
|
46
youtube_dl/extractor/discovery.py
Normal file
46
youtube_dl/extractor/discovery.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class DiscoveryIE(InfoExtractor):
|
||||
_VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
||||
_TEST = {
|
||||
'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
||||
'file': '614784.mp4',
|
||||
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
|
||||
'info_dict': {
|
||||
'title': 'MythBusters: Mission Impossible Outtakes',
|
||||
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
||||
' each other -- to the point of confusing Jamie\'s dog -- and '
|
||||
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
|
||||
' back.'),
|
||||
'duration': 156,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
|
||||
webpage, 'video list', flags=re.DOTALL)
|
||||
video_list = json.loads(video_list_json)
|
||||
info = video_list['clips'][0]
|
||||
formats = []
|
||||
for f in info['mp4']:
|
||||
formats.append(
|
||||
{'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])})
|
||||
|
||||
return {
|
||||
'id': info['contentId'],
|
||||
'title': video_list['name'],
|
||||
'formats': formats,
|
||||
'description': info['videoCaption'],
|
||||
'thumbnail': info.get('videoStillURL') or info.get('thumbnailURL'),
|
||||
'duration': info['duration'],
|
||||
}
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -6,13 +8,16 @@ from .common import InfoExtractor
|
||||
class FunnyOrDieIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
|
||||
u'file': u'0732f586d7.mp4',
|
||||
u'md5': u'f647e9e90064b53b6e046e75d0241fbd',
|
||||
u'info_dict': {
|
||||
u"description": u"Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.",
|
||||
u"title": u"Heart-Shaped Box: Literal Video Version"
|
||||
}
|
||||
'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
|
||||
'file': '0732f586d7.mp4',
|
||||
'md5': 'f647e9e90064b53b6e046e75d0241fbd',
|
||||
'info_dict': {
|
||||
'description': ('Lyrics changed to match the video. Spoken cameo '
|
||||
'by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a '
|
||||
'concept by Dustin McLean (DustFilms.com). Performed, edited, '
|
||||
'and written by David A. Scott.'),
|
||||
'title': 'Heart-Shaped Box: Literal Video Version',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -23,13 +28,12 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
|
||||
video_url = self._search_regex(
|
||||
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
|
||||
webpage, u'video URL', flags=re.DOTALL)
|
||||
webpage, 'video URL', flags=re.DOTALL)
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
return [info]
|
||||
|
@@ -78,6 +78,18 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# https://github.com/rg3/youtube-dl/issues/2253
|
||||
'url': 'http://bcove.me/i6nfkrc3',
|
||||
'file': '3101154703001.mp4',
|
||||
'md5': '0ba9446db037002366bab3b3eb30c88c',
|
||||
'info_dict': {
|
||||
'title': 'Still no power',
|
||||
'uploader': 'thestar.com',
|
||||
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
|
||||
},
|
||||
'add_ie': ['Brightcove'],
|
||||
},
|
||||
# Direct link to a video
|
||||
{
|
||||
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
||||
@@ -242,7 +254,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//player.vimeo.com/video/.+?)"', webpage)
|
||||
r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
|
||||
if mobj:
|
||||
player_url = unescapeHTML(mobj.group(1))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
@@ -250,7 +262,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
|
||||
r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
if mobj:
|
||||
return self.url_result(mobj.group(1), 'Vimeo')
|
||||
|
||||
@@ -320,7 +332,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group(1), 'Aparat')
|
||||
|
||||
# Look for MPORA videos
|
||||
mobj = re.search(r'<iframe .*?src="(http://mpora\.com/videos/[^"]+)"', webpage)
|
||||
mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group(1), 'Mpora')
|
||||
|
||||
@@ -338,7 +350,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded Huffington Post player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'HuffPost')
|
||||
|
||||
|
@@ -69,12 +69,9 @@ class ImdbListIE(InfoExtractor):
|
||||
list_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
list_code = self._search_regex(
|
||||
r'(?s)<div\s+class="list\sdetail">(.*?)class="see-more"',
|
||||
webpage, 'list code')
|
||||
entries = [
|
||||
self.url_result('http://www.imdb.com' + m, 'Imdb')
|
||||
for m in re.findall(r'href="(/video/imdb/vi[^"]+)"', webpage)]
|
||||
for m in re.findall(r'href="(/video/imdb/vi[^"]+)"\s+data-type="playlist"', webpage)]
|
||||
|
||||
list_title = self._html_search_regex(
|
||||
r'<h1 class="header">(.*?)</h1>', webpage, 'list title')
|
||||
|
@@ -1,27 +1,27 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class InfoQIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
|
||||
_TEST = {
|
||||
u"name": u"InfoQ",
|
||||
u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
||||
u"file": u"12-jan-pythonthings.mp4",
|
||||
u"info_dict": {
|
||||
u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
|
||||
u"title": u"A Few of My Favorite [Python] Things"
|
||||
"name": "InfoQ",
|
||||
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
||||
"file": "12-jan-pythonthings.mp4",
|
||||
"info_dict": {
|
||||
"description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
|
||||
"title": "A Few of My Favorite [Python] Things",
|
||||
},
|
||||
"params": {
|
||||
"skip_download": True,
|
||||
},
|
||||
u"params": {
|
||||
u"skip_download": True
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -31,32 +31,25 @@ class InfoQIE(InfoExtractor):
|
||||
self.report_extraction(url)
|
||||
|
||||
# Extract video URL
|
||||
mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video url')
|
||||
real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
|
||||
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
||||
real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
||||
video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
|
||||
|
||||
# Extract title
|
||||
video_title = self._search_regex(r'contentTitle = "(.*?)";',
|
||||
webpage, u'title')
|
||||
webpage, 'title')
|
||||
|
||||
# Extract description
|
||||
video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
|
||||
webpage, u'description', fatal=False)
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
video_filename = video_url.split('/')[-1]
|
||||
video_id, extension = video_filename.split('.')
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
||||
'thumbnail': None,
|
||||
'description': video_description,
|
||||
}
|
||||
|
||||
return [info]
|
@@ -1,4 +1,5 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
@@ -11,38 +12,38 @@ from ..utils import (
|
||||
|
||||
|
||||
class IviIE(InfoExtractor):
|
||||
IE_DESC = u'ivi.ru'
|
||||
IE_NAME = u'ivi'
|
||||
IE_DESC = 'ivi.ru'
|
||||
IE_NAME = 'ivi'
|
||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
# Single movie
|
||||
{
|
||||
u'url': u'http://www.ivi.ru/watch/53141',
|
||||
u'file': u'53141.mp4',
|
||||
u'md5': u'6ff5be2254e796ed346251d117196cf4',
|
||||
u'info_dict': {
|
||||
u'title': u'Иван Васильевич меняет профессию',
|
||||
u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346',
|
||||
u'duration': 5498,
|
||||
u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
|
||||
'url': 'http://www.ivi.ru/watch/53141',
|
||||
'file': '53141.mp4',
|
||||
'md5': '6ff5be2254e796ed346251d117196cf4',
|
||||
'info_dict': {
|
||||
'title': 'Иван Васильевич меняет профессию',
|
||||
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
|
||||
'duration': 5498,
|
||||
'thumbnail': 'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
|
||||
},
|
||||
u'skip': u'Only works from Russia',
|
||||
'skip': 'Only works from Russia',
|
||||
},
|
||||
# Serial's serie
|
||||
{
|
||||
u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
||||
u'file': u'74791.mp4',
|
||||
u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9',
|
||||
u'info_dict': {
|
||||
u'title': u'Дежурный ангел - 1 серия',
|
||||
u'duration': 2490,
|
||||
u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
||||
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
||||
'file': '74791.mp4',
|
||||
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
||||
'info_dict': {
|
||||
'title': 'Дежурный ангел - 1 серия',
|
||||
'duration': 2490,
|
||||
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
||||
},
|
||||
u'skip': u'Only works from Russia',
|
||||
'skip': 'Only works from Russia',
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
# Sorted by quality
|
||||
_known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
|
||||
|
||||
@@ -54,7 +55,7 @@ class IviIE(InfoExtractor):
|
||||
return m.group('description') if m is not None else None
|
||||
|
||||
def _extract_comment_count(self, html):
|
||||
m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
|
||||
m = re.search('(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
|
||||
return int(m.group('commentcount')) if m is not None else 0
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -63,49 +64,49 @@ class IviIE(InfoExtractor):
|
||||
|
||||
api_url = 'http://api.digitalaccess.ru/api/json/'
|
||||
|
||||
data = {u'method': u'da.content.get',
|
||||
u'params': [video_id, {u'site': u's183',
|
||||
u'referrer': u'http://www.ivi.ru/watch/%s' % video_id,
|
||||
u'contentid': video_id
|
||||
}
|
||||
]
|
||||
data = {'method': 'da.content.get',
|
||||
'params': [video_id, {'site': 's183',
|
||||
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
|
||||
'contentid': video_id
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(api_url, json.dumps(data))
|
||||
|
||||
video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON')
|
||||
video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON')
|
||||
video_json = json.loads(video_json_page)
|
||||
|
||||
if u'error' in video_json:
|
||||
error = video_json[u'error']
|
||||
if error[u'origin'] == u'NoRedisValidData':
|
||||
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||
raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True)
|
||||
if 'error' in video_json:
|
||||
error = video_json['error']
|
||||
if error['origin'] == 'NoRedisValidData':
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True)
|
||||
|
||||
result = video_json[u'result']
|
||||
result = video_json['result']
|
||||
|
||||
formats = [{
|
||||
'url': x[u'url'],
|
||||
'format_id': x[u'content_format'],
|
||||
'preference': self._known_formats.index(x[u'content_format']),
|
||||
} for x in result[u'files'] if x[u'content_format'] in self._known_formats]
|
||||
'url': x['url'],
|
||||
'format_id': x['content_format'],
|
||||
'preference': self._known_formats.index(x['content_format']),
|
||||
} for x in result['files'] if x['content_format'] in self._known_formats]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError(u'No media links available for %s' % video_id)
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
duration = result[u'duration']
|
||||
compilation = result[u'compilation']
|
||||
title = result[u'title']
|
||||
duration = result['duration']
|
||||
compilation = result['compilation']
|
||||
title = result['title']
|
||||
|
||||
title = '%s - %s' % (compilation, title) if compilation is not None else title
|
||||
|
||||
previews = result[u'preview']
|
||||
previews = result['preview']
|
||||
previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
|
||||
thumbnail = previews[-1][u'url'] if len(previews) > 0 else None
|
||||
thumbnail = previews[-1]['url'] if len(previews) > 0 else None
|
||||
|
||||
video_page = self._download_webpage(url, video_id, u'Downloading video page')
|
||||
video_page = self._download_webpage(url, video_id, 'Downloading video page')
|
||||
description = self._extract_description(video_page)
|
||||
comment_count = self._extract_comment_count(video_page)
|
||||
|
||||
@@ -121,8 +122,8 @@ class IviIE(InfoExtractor):
|
||||
|
||||
|
||||
class IviCompilationIE(InfoExtractor):
|
||||
IE_DESC = u'ivi.ru compilations'
|
||||
IE_NAME = u'ivi:compilation'
|
||||
IE_DESC = 'ivi.ru compilations'
|
||||
IE_NAME = 'ivi:compilation'
|
||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
||||
|
||||
def _extract_entries(self, html, compilation_id):
|
||||
@@ -135,22 +136,23 @@ class IviCompilationIE(InfoExtractor):
|
||||
season_id = mobj.group('seasonid')
|
||||
|
||||
if season_id is not None: # Season link
|
||||
season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id)
|
||||
season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id)
|
||||
playlist_id = '%s/season%s' % (compilation_id, season_id)
|
||||
playlist_title = self._html_search_meta(u'title', season_page, u'title')
|
||||
playlist_title = self._html_search_meta('title', season_page, 'title')
|
||||
entries = self._extract_entries(season_page, compilation_id)
|
||||
else: # Compilation link
|
||||
compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page')
|
||||
compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
|
||||
playlist_id = compilation_id
|
||||
playlist_title = self._html_search_meta(u'title', compilation_page, u'title')
|
||||
playlist_title = self._html_search_meta('title', compilation_page, 'title')
|
||||
seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
|
||||
if len(seasons) == 0: # No seasons in this compilation
|
||||
entries = self._extract_entries(compilation_page, compilation_id)
|
||||
else:
|
||||
entries = []
|
||||
for season_id in seasons:
|
||||
season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
|
||||
compilation_id, u'Downloading season %s web page' % season_id)
|
||||
season_page = self._download_webpage(
|
||||
'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
|
||||
compilation_id, 'Downloading season %s web page' % season_id)
|
||||
entries.extend(self._extract_entries(season_page, compilation_id))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -5,36 +7,34 @@ from .common import InfoExtractor
|
||||
|
||||
class KeekIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
|
||||
IE_NAME = u'keek'
|
||||
IE_NAME = 'keek'
|
||||
_TEST = {
|
||||
u'url': u'https://www.keek.com/ytdl/keeks/NODfbab',
|
||||
u'file': u'NODfbab.mp4',
|
||||
u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
|
||||
u'info_dict': {
|
||||
u"uploader": u"ytdl",
|
||||
u"title": u"test chars: \"'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
|
||||
}
|
||||
'url': 'https://www.keek.com/ytdl/keeks/NODfbab',
|
||||
'file': 'NODfbab.mp4',
|
||||
'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
|
||||
'info_dict': {
|
||||
'uploader': 'ytdl',
|
||||
'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('videoID')
|
||||
|
||||
video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
|
||||
thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
|
||||
video_url = 'http://cdn.keek.com/keek/video/%s' % video_id
|
||||
thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._og_search_title(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
|
||||
webpage, u'uploader', fatal=False)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader
|
||||
}
|
||||
return [info]
|
||||
|
@@ -10,7 +10,13 @@ from ..utils import (
|
||||
|
||||
class LA7IE(InfoExtractor):
|
||||
IE_NAME = 'la7.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?la7\.tv/richplayer/\?assetid=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?la7\.tv/
|
||||
(?:
|
||||
richplayer/\?assetid=|
|
||||
\?contentId=
|
||||
)
|
||||
(?P<id>[0-9]+)'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
|
||||
@@ -20,7 +26,8 @@ class LA7IE(InfoExtractor):
|
||||
'title': 'IL DIVO',
|
||||
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
|
||||
'duration': 6254,
|
||||
}
|
||||
},
|
||||
'skip': 'Blocked in the US',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,3 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -7,46 +10,43 @@ from ..utils import (
|
||||
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||
IE_NAME = u'liveleak'
|
||||
_TEST = {
|
||||
u'url': u'http://www.liveleak.com/view?i=757_1364311680',
|
||||
u'file': u'757_1364311680.mp4',
|
||||
u'md5': u'0813c2430bea7a46bf13acf3406992f4',
|
||||
u'info_dict': {
|
||||
u"description": u"extremely bad day for this guy..!",
|
||||
u"uploader": u"ljfriel2",
|
||||
u"title": u"Most unlucky car accident"
|
||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||
'file': '757_1364311680.mp4',
|
||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||
'info_dict': {
|
||||
'description': 'extremely bad day for this guy..!',
|
||||
'uploader': 'ljfriel2',
|
||||
'title': 'Most unlucky car accident'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
sources_raw = self._search_regex(
|
||||
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs')
|
||||
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
|
||||
sources = json.loads(sources_json)
|
||||
|
||||
video_url = self._search_regex(r'file: "(.*?)",',
|
||||
webpage, u'video URL')
|
||||
formats = [{
|
||||
'format_note': s.get('label'),
|
||||
'url': s['file'],
|
||||
} for s in sources]
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
||||
|
||||
video_description = self._og_search_description(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
|
||||
|
||||
video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
|
||||
webpage, u'uploader', fatal=False)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader
|
||||
'uploader': video_uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
return [info]
|
||||
|
58
youtube_dl/extractor/malemotion.py
Normal file
58
youtube_dl/extractor/malemotion.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
class MalemotionIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
|
||||
_TEST = {
|
||||
'url': 'http://malemotion.com/video/bien-dur.10ew',
|
||||
'file': '10ew.mp4',
|
||||
'md5': 'b3cc49f953b107e4a363cdff07d100ce',
|
||||
'info_dict': {
|
||||
"title": "Bien dur",
|
||||
"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group("id")
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Extract video URL
|
||||
video_url = compat_urllib_parse.unquote(
|
||||
self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
|
||||
|
||||
# Extract title
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(.*?)</title', webpage, 'title')
|
||||
|
||||
# Extract video thumbnail
|
||||
video_thumbnail = self._search_regex(
|
||||
r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': 'mp4',
|
||||
'preference': 1,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': None,
|
||||
'age_limit': 18,
|
||||
}
|
@@ -1,3 +1,4 @@
|
||||
from __future__ import unicode_literals
|
||||
import os.path
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,13 +12,13 @@ from ..utils import (
|
||||
class MySpassIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.myspass\.de/.*'
|
||||
_TEST = {
|
||||
u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
||||
u'file': u'11741.mp4',
|
||||
u'md5': u'0b49f4844a068f8b33f4b7c88405862b',
|
||||
u'info_dict': {
|
||||
u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
|
||||
u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
|
||||
}
|
||||
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
||||
'file': '11741.mp4',
|
||||
'md5': '0b49f4844a068f8b33f4b7c88405862b',
|
||||
'info_dict': {
|
||||
"description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
|
||||
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -37,12 +38,11 @@ class MySpassIE(InfoExtractor):
|
||||
# extract values from metadata
|
||||
url_flv_el = metadata.find('url_flv')
|
||||
if url_flv_el is None:
|
||||
raise ExtractorError(u'Unable to extract download url')
|
||||
raise ExtractorError('Unable to extract download url')
|
||||
video_url = url_flv_el.text
|
||||
extension = os.path.splitext(video_url)[1][1:]
|
||||
title_el = metadata.find('title')
|
||||
if title_el is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
raise ExtractorError('Unable to extract title')
|
||||
title = title_el.text
|
||||
format_id_el = metadata.find('format_id')
|
||||
if format_id_el is None:
|
||||
@@ -59,13 +59,12 @@ class MySpassIE(InfoExtractor):
|
||||
thumbnail = imagePreview_el.text
|
||||
else:
|
||||
thumbnail = None
|
||||
info = {
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description
|
||||
'description': description,
|
||||
}
|
||||
return [info]
|
||||
|
@@ -1,48 +1,39 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
u'file': u'0021200253-okc-bkn-recap.nba.mp4',
|
||||
u'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
u'info_dict': {
|
||||
u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.",
|
||||
u"title": u"Thunder vs. Nets"
|
||||
}
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'file': u'0021200253-okc-bkn-recap.nba.mp4',
|
||||
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
'info_dict': {
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'title': 'Thunder vs. Nets',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
||||
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
||||
|
||||
shortened_video_id = video_id.rpartition('/')[2]
|
||||
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
|
||||
|
||||
# It isn't there in the HTML it returns to us
|
||||
# uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
|
||||
|
||||
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': shortened_video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
# 'uploader_date': uploader_date,
|
||||
'description': description,
|
||||
}
|
||||
return [info]
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -9,13 +11,13 @@ class NineGagIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
u"url": u"http://9gag.tv/v/1912",
|
||||
u"file": u"1912.mp4",
|
||||
u"info_dict": {
|
||||
u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||
u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
|
||||
"url": "http://9gag.tv/v/1912",
|
||||
"file": "1912.mp4",
|
||||
"info_dict": {
|
||||
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome"
|
||||
},
|
||||
u'add_ie': [u'Youtube']
|
||||
'add_ie': ['Youtube']
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -25,7 +27,7 @@ class NineGagIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._html_search_regex(r'''(?x)
|
||||
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
|
||||
data-video-meta="([^"]+)"''', webpage, u'video metadata')
|
||||
data-video-meta="([^"]+)"''', webpage, 'video metadata')
|
||||
|
||||
data = json.loads(data_json)
|
||||
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
from ..utils import unescapeHTML
|
||||
|
||||
class OoyalaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
|
||||
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -7,12 +9,12 @@ from ..utils import compat_urllib_parse
|
||||
class PornHdIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
||||
u'file': u'1962.flv',
|
||||
u'md5': u'35272469887dca97abd30abecc6cdf75',
|
||||
u'info_dict': {
|
||||
u"title": u"sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
|
||||
u"age_limit": 18,
|
||||
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
||||
'file': '1962.flv',
|
||||
'md5': '35272469887dca97abd30abecc6cdf75',
|
||||
'info_dict': {
|
||||
"title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
|
||||
"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,9 +26,13 @@ class PornHdIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'&hd=(http.+?)&', webpage, u'video URL')
|
||||
video_url = compat_urllib_parse.unquote(video_url)
|
||||
next_url = self._html_search_regex(
|
||||
r'&hd=(http.+?)&', webpage, 'video URL')
|
||||
next_url = compat_urllib_parse.unquote(next_url)
|
||||
|
||||
video_url = self._download_webpage(
|
||||
next_url, video_id, note='Retrieving video URL',
|
||||
errnote='Could not retrieve video URL')
|
||||
age_limit = 18
|
||||
|
||||
return {
|
||||
|
@@ -1,3 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -12,16 +15,16 @@ from ..utils import (
|
||||
class RBMARadioIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
||||
u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3',
|
||||
u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95',
|
||||
u'info_dict': {
|
||||
u"uploader_id": u"ford-lopatin",
|
||||
u"location": u"Spain",
|
||||
u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
||||
u"uploader": u"Ford & Lopatin",
|
||||
u"title": u"Live at Primavera Sound 2011"
|
||||
}
|
||||
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
||||
'file': 'ford-lopatin-live-at-primavera-sound-2011.mp3',
|
||||
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
||||
'info_dict': {
|
||||
"uploader_id": "ford-lopatin",
|
||||
"location": "Spain",
|
||||
"description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
||||
"uploader": "Ford & Lopatin",
|
||||
"title": "Live at Primavera Sound 2011",
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -31,26 +34,24 @@ class RBMARadioIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
|
||||
webpage, u'json data', flags=re.MULTILINE)
|
||||
webpage, 'json data', flags=re.MULTILINE)
|
||||
|
||||
try:
|
||||
data = json.loads(json_data)
|
||||
except ValueError as e:
|
||||
raise ExtractorError(u'Invalid JSON: ' + str(e))
|
||||
raise ExtractorError('Invalid JSON: ' + str(e))
|
||||
|
||||
video_url = data['akamai_url'] + '&cbr=256'
|
||||
url_parts = compat_urllib_parse_urlparse(video_url)
|
||||
video_ext = url_parts.path.rpartition('.')[2]
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_ext,
|
||||
'title': data['title'],
|
||||
'description': data.get('teaser_text'),
|
||||
'location': data.get('country_of_origin'),
|
||||
'uploader': data.get('host', {}).get('name'),
|
||||
'uploader_id': data.get('host', {}).get('slug'),
|
||||
'thumbnail': data.get('image', {}).get('large_url_2x'),
|
||||
'duration': data.get('duration'),
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': data['title'],
|
||||
'description': data.get('teaser_text'),
|
||||
'location': data.get('country_of_origin'),
|
||||
'uploader': data.get('host', {}).get('name'),
|
||||
'uploader_id': data.get('host', {}).get('slug'),
|
||||
'thumbnail': data.get('image', {}).get('large_url_2x'),
|
||||
'duration': data.get('duration'),
|
||||
}
|
||||
return [info]
|
||||
|
@@ -1,58 +1,124 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class RutubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)'
|
||||
IE_NAME = 'rutube'
|
||||
IE_DESC = 'Rutube videos'
|
||||
_VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||
u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Раненный кенгуру забежал в аптеку',
|
||||
u'uploader': u'NTDRussian',
|
||||
u'uploader_id': u'29790',
|
||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||
'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
|
||||
'info_dict': {
|
||||
'title': 'Раненный кенгуру забежал в аптеку',
|
||||
'description': 'http://www.ntdtv.ru ',
|
||||
'duration': 80,
|
||||
'uploader': 'NTDRussian',
|
||||
'uploader_id': '29790',
|
||||
'upload_date': '20131016',
|
||||
},
|
||||
u'params': {
|
||||
'params': {
|
||||
# It requires ffmpeg (m3u8 download)
|
||||
u'skip_download': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _get_api_response(self, short_id, subpath):
|
||||
api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id)
|
||||
response_json = self._download_webpage(api_url, short_id,
|
||||
u'Downloading %s json' % subpath)
|
||||
return json.loads(response_json)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
long_id = mobj.group('long_id')
|
||||
webpage = self._download_webpage(url, long_id)
|
||||
og_video = self._og_search_video_url(webpage)
|
||||
short_id = compat_urlparse.urlparse(og_video).path[1:]
|
||||
options = self._get_api_response(short_id, 'options')
|
||||
trackinfo = self._get_api_response(short_id, 'trackinfo')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
video = json.loads(api_response)
|
||||
|
||||
api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
|
||||
video_id, 'Downloading trackinfo JSON')
|
||||
trackinfo = json.loads(api_response)
|
||||
|
||||
# Some videos don't have the author field
|
||||
author = trackinfo.get('author') or {}
|
||||
m3u8_url = trackinfo['video_balancer'].get('m3u8')
|
||||
if m3u8_url is None:
|
||||
raise ExtractorError(u'Couldn\'t find m3u8 manifest url')
|
||||
raise ExtractorError('Couldn\'t find m3u8 manifest url')
|
||||
|
||||
return {
|
||||
'id': trackinfo['id'],
|
||||
'title': trackinfo['title'],
|
||||
'id': video['id'],
|
||||
'title': video['title'],
|
||||
'description': video['description'],
|
||||
'duration': video['duration'],
|
||||
'view_count': video['hits'],
|
||||
'url': m3u8_url,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': options['thumbnail_url'],
|
||||
'thumbnail': video['thumbnail_url'],
|
||||
'uploader': author.get('name'),
|
||||
'uploader_id': compat_str(author['id']) if author else None,
|
||||
'upload_date': unified_strdate(video['created_ts']),
|
||||
'age_limit': 18 if video['is_adult'] else 0,
|
||||
}
|
||||
|
||||
|
||||
class RutubeChannelIE(InfoExtractor):
|
||||
IE_NAME = 'rutube:channel'
|
||||
IE_DESC = 'Rutube channels'
|
||||
_VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
|
||||
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
|
||||
|
||||
def _extract_videos(self, channel_id, channel_title=None):
|
||||
entries = []
|
||||
for pagenum in itertools.count(1):
|
||||
api_response = self._download_webpage(
|
||||
self._PAGE_TEMPLATE % (channel_id, pagenum),
|
||||
channel_id, 'Downloading page %s' % pagenum)
|
||||
page = json.loads(api_response)
|
||||
results = page['results']
|
||||
if not results:
|
||||
break
|
||||
entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
|
||||
if not page['has_next']:
|
||||
break
|
||||
return self.playlist_result(entries, channel_id, channel_title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
return self._extract_videos(channel_id)
|
||||
|
||||
|
||||
class RutubeMovieIE(RutubeChannelIE):
|
||||
IE_NAME = 'rutube:movie'
|
||||
IE_DESC = 'Rutube movies'
|
||||
_VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
|
||||
|
||||
_MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
movie_id = mobj.group('id')
|
||||
api_response = self._download_webpage(
|
||||
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
||||
'Downloading movie JSON')
|
||||
movie = json.loads(api_response)
|
||||
movie_name = movie['name']
|
||||
return self._extract_videos(movie_id, movie_name)
|
||||
|
||||
|
||||
class RutubePersonIE(RutubeChannelIE):
|
||||
IE_NAME = 'rutube:person'
|
||||
IE_DESC = 'Rutube person videos'
|
||||
_VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
|
||||
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
||||
|
@@ -1,4 +1,5 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
@@ -16,76 +17,76 @@ from ..utils import (
|
||||
|
||||
|
||||
class SmotriIE(InfoExtractor):
|
||||
IE_DESC = u'Smotri.com'
|
||||
IE_NAME = u'smotri'
|
||||
IE_DESC = 'Smotri.com'
|
||||
IE_NAME = 'smotri'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
|
||||
|
||||
_TESTS = [
|
||||
# real video id 2610366
|
||||
{
|
||||
u'url': u'http://smotri.com/video/view/?id=v261036632ab',
|
||||
u'file': u'v261036632ab.mp4',
|
||||
u'md5': u'2a7b08249e6f5636557579c368040eb9',
|
||||
u'info_dict': {
|
||||
u'title': u'катастрофа с камер видеонаблюдения',
|
||||
u'uploader': u'rbc2008',
|
||||
u'uploader_id': u'rbc08',
|
||||
u'upload_date': u'20131118',
|
||||
u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
|
||||
u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
|
||||
'url': 'http://smotri.com/video/view/?id=v261036632ab',
|
||||
'file': 'v261036632ab.mp4',
|
||||
'md5': '2a7b08249e6f5636557579c368040eb9',
|
||||
'info_dict': {
|
||||
'title': 'катастрофа с камер видеонаблюдения',
|
||||
'uploader': 'rbc2008',
|
||||
'uploader_id': 'rbc08',
|
||||
'upload_date': '20131118',
|
||||
'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
|
||||
'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
|
||||
},
|
||||
},
|
||||
# real video id 57591
|
||||
{
|
||||
u'url': u'http://smotri.com/video/view/?id=v57591cb20',
|
||||
u'file': u'v57591cb20.flv',
|
||||
u'md5': u'830266dfc21f077eac5afd1883091bcd',
|
||||
u'info_dict': {
|
||||
u'title': u'test',
|
||||
u'uploader': u'Support Photofile@photofile',
|
||||
u'uploader_id': u'support-photofile',
|
||||
u'upload_date': u'20070704',
|
||||
u'description': u'test, видео test',
|
||||
u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
|
||||
'url': 'http://smotri.com/video/view/?id=v57591cb20',
|
||||
'file': 'v57591cb20.flv',
|
||||
'md5': '830266dfc21f077eac5afd1883091bcd',
|
||||
'info_dict': {
|
||||
'title': 'test',
|
||||
'uploader': 'Support Photofile@photofile',
|
||||
'uploader_id': 'support-photofile',
|
||||
'upload_date': '20070704',
|
||||
'description': 'test, видео test',
|
||||
'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
|
||||
},
|
||||
},
|
||||
# video-password
|
||||
{
|
||||
u'url': u'http://smotri.com/video/view/?id=v1390466a13c',
|
||||
u'file': u'v1390466a13c.mp4',
|
||||
u'md5': u'f6331cef33cad65a0815ee482a54440b',
|
||||
u'info_dict': {
|
||||
u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
||||
u'uploader': u'timoxa40',
|
||||
u'uploader_id': u'timoxa40',
|
||||
u'upload_date': u'20100404',
|
||||
u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
|
||||
u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
||||
'url': 'http://smotri.com/video/view/?id=v1390466a13c',
|
||||
'file': 'v1390466a13c.mp4',
|
||||
'md5': 'f6331cef33cad65a0815ee482a54440b',
|
||||
'info_dict': {
|
||||
'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
||||
'uploader': 'timoxa40',
|
||||
'uploader_id': 'timoxa40',
|
||||
'upload_date': '20100404',
|
||||
'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
|
||||
'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
||||
},
|
||||
u'params': {
|
||||
u'videopassword': u'qwerty',
|
||||
'params': {
|
||||
'videopassword': 'qwerty',
|
||||
},
|
||||
},
|
||||
# age limit + video-password
|
||||
{
|
||||
u'url': u'http://smotri.com/video/view/?id=v15408898bcf',
|
||||
u'file': u'v15408898bcf.flv',
|
||||
u'md5': u'91e909c9f0521adf5ee86fbe073aad70',
|
||||
u'info_dict': {
|
||||
u'title': u'этот ролик не покажут по ТВ',
|
||||
u'uploader': u'zzxxx',
|
||||
u'uploader_id': u'ueggb',
|
||||
u'upload_date': u'20101001',
|
||||
u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
|
||||
u'age_limit': 18,
|
||||
u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
|
||||
'url': 'http://smotri.com/video/view/?id=v15408898bcf',
|
||||
'file': 'v15408898bcf.flv',
|
||||
'md5': '91e909c9f0521adf5ee86fbe073aad70',
|
||||
'info_dict': {
|
||||
'title': 'этот ролик не покажут по ТВ',
|
||||
'uploader': 'zzxxx',
|
||||
'uploader_id': 'ueggb',
|
||||
'upload_date': '20101001',
|
||||
'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
|
||||
'age_limit': 18,
|
||||
'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
|
||||
},
|
||||
u'params': {
|
||||
u'videopassword': u'333'
|
||||
'params': {
|
||||
'videopassword': '333'
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
_SUCCESS = 0
|
||||
_PASSWORD_NOT_VERIFIED = 1
|
||||
_PASSWORD_DETECTED = 2
|
||||
@@ -106,71 +107,71 @@ class SmotriIE(InfoExtractor):
|
||||
|
||||
# Download video JSON data
|
||||
video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
|
||||
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON')
|
||||
video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON')
|
||||
video_json = json.loads(video_json_page)
|
||||
|
||||
|
||||
status = video_json['status']
|
||||
if status == self._VIDEO_NOT_FOUND:
|
||||
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||
elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
|
||||
# video-password set
|
||||
video_password = self._downloader.params.get('videopassword', None)
|
||||
if not video_password:
|
||||
raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True)
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
||||
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)')
|
||||
video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)')
|
||||
video_json = json.loads(video_json_page)
|
||||
status = video_json['status']
|
||||
if status == self._PASSWORD_NOT_VERIFIED:
|
||||
raise ExtractorError(u'Video password is invalid', expected=True)
|
||||
|
||||
raise ExtractorError('Video password is invalid', expected=True)
|
||||
|
||||
if status != self._SUCCESS:
|
||||
raise ExtractorError(u'Unexpected status value %s' % status)
|
||||
|
||||
raise ExtractorError('Unexpected status value %s' % status)
|
||||
|
||||
# Extract the URL of the video
|
||||
video_url = video_json['file_data']
|
||||
|
||||
|
||||
# Video JSON does not provide enough meta data
|
||||
# We will extract some from the video web page instead
|
||||
video_page_url = 'http://' + mobj.group('url')
|
||||
video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
|
||||
video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
|
||||
|
||||
# Warning if video is unavailable
|
||||
warning = self._html_search_regex(
|
||||
r'<div class="videoUnModer">(.*?)</div>', video_page,
|
||||
u'warning message', default=None)
|
||||
'warning message', default=None)
|
||||
if warning is not None:
|
||||
self._downloader.report_warning(
|
||||
u'Video %s may not be available; smotri said: %s ' %
|
||||
'Video %s may not be available; smotri said: %s ' %
|
||||
(video_id, warning))
|
||||
|
||||
# Adult content
|
||||
if re.search(u'EroConfirmText">', video_page) is not None:
|
||||
if re.search('EroConfirmText">', video_page) is not None:
|
||||
self.report_age_confirmation()
|
||||
confirm_string = self._html_search_regex(
|
||||
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
|
||||
video_page, u'confirm string')
|
||||
video_page, 'confirm string')
|
||||
confirm_url = video_page_url + '&confirm=%s' % confirm_string
|
||||
video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)')
|
||||
video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
|
||||
adult_content = True
|
||||
else:
|
||||
adult_content = False
|
||||
|
||||
|
||||
# Extract the rest of meta data
|
||||
video_title = self._search_meta(u'name', video_page, u'title')
|
||||
video_title = self._search_meta('name', video_page, 'title')
|
||||
if not video_title:
|
||||
video_title = os.path.splitext(url_basename(video_url))[0]
|
||||
|
||||
video_description = self._search_meta(u'description', video_page)
|
||||
END_TEXT = u' на сайте Smotri.com'
|
||||
video_description = self._search_meta('description', video_page)
|
||||
END_TEXT = ' на сайте Smotri.com'
|
||||
if video_description and video_description.endswith(END_TEXT):
|
||||
video_description = video_description[:-len(END_TEXT)]
|
||||
START_TEXT = u'Смотреть онлайн ролик '
|
||||
START_TEXT = 'Смотреть онлайн ролик '
|
||||
if video_description and video_description.startswith(START_TEXT):
|
||||
video_description = video_description[len(START_TEXT):]
|
||||
video_thumbnail = self._search_meta(u'thumbnail', video_page)
|
||||
video_thumbnail = self._search_meta('thumbnail', video_page)
|
||||
|
||||
upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
|
||||
upload_date_str = self._search_meta('uploadDate', video_page, 'upload date')
|
||||
if upload_date_str:
|
||||
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
|
||||
video_upload_date = (
|
||||
@@ -183,8 +184,8 @@ class SmotriIE(InfoExtractor):
|
||||
)
|
||||
else:
|
||||
video_upload_date = None
|
||||
|
||||
duration_str = self._search_meta(u'duration', video_page)
|
||||
|
||||
duration_str = self._search_meta('duration', video_page)
|
||||
if duration_str:
|
||||
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
|
||||
video_duration = (
|
||||
@@ -197,19 +198,19 @@ class SmotriIE(InfoExtractor):
|
||||
)
|
||||
else:
|
||||
video_duration = None
|
||||
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
|
||||
video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
|
||||
video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
video_uploader_id = self._html_search_regex(
|
||||
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
|
||||
video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
|
||||
video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
video_view_count = self._html_search_regex(
|
||||
u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
|
||||
video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
|
||||
video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
@@ -227,8 +228,8 @@ class SmotriIE(InfoExtractor):
|
||||
|
||||
|
||||
class SmotriCommunityIE(InfoExtractor):
|
||||
IE_DESC = u'Smotri.com community videos'
|
||||
IE_NAME = u'smotri:community'
|
||||
IE_DESC = 'Smotri.com community videos'
|
||||
IE_NAME = 'smotri:community'
|
||||
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -236,21 +237,21 @@ class SmotriCommunityIE(InfoExtractor):
|
||||
community_id = mobj.group('communityid')
|
||||
|
||||
url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
|
||||
rss = self._download_xml(url, community_id, u'Downloading community RSS')
|
||||
rss = self._download_xml(url, community_id, 'Downloading community RSS')
|
||||
|
||||
entries = [self.url_result(video_url.text, 'Smotri')
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
description_text = rss.find('./channel/description').text
|
||||
community_title = self._html_search_regex(
|
||||
u'^Видео сообщества "([^"]+)"$', description_text, u'community title')
|
||||
'^Видео сообщества "([^"]+)"$', description_text, 'community title')
|
||||
|
||||
return self.playlist_result(entries, community_id, community_title)
|
||||
|
||||
|
||||
class SmotriUserIE(InfoExtractor):
|
||||
IE_DESC = u'Smotri.com user videos'
|
||||
IE_NAME = u'smotri:user'
|
||||
IE_DESC = 'Smotri.com user videos'
|
||||
IE_NAME = 'smotri:user'
|
||||
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -258,22 +259,22 @@ class SmotriUserIE(InfoExtractor):
|
||||
user_id = mobj.group('userid')
|
||||
|
||||
url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
|
||||
rss = self._download_xml(url, user_id, u'Downloading user RSS')
|
||||
rss = self._download_xml(url, user_id, 'Downloading user RSS')
|
||||
|
||||
entries = [self.url_result(video_url.text, 'Smotri')
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
description_text = rss.find('./channel/description').text
|
||||
user_nickname = self._html_search_regex(
|
||||
u'^Видео режиссера (.*)$', description_text,
|
||||
u'user nickname')
|
||||
'^Видео режиссера (.*)$', description_text,
|
||||
'user nickname')
|
||||
|
||||
return self.playlist_result(entries, user_id, user_nickname)
|
||||
|
||||
|
||||
class SmotriBroadcastIE(InfoExtractor):
|
||||
IE_DESC = u'Smotri.com broadcasts'
|
||||
IE_NAME = u'smotri:broadcast'
|
||||
IE_DESC = 'Smotri.com broadcasts'
|
||||
IE_NAME = 'smotri:broadcast'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -281,46 +282,40 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
broadcast_id = mobj.group('broadcastid')
|
||||
|
||||
broadcast_url = 'http://' + mobj.group('url')
|
||||
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, u'Downloading broadcast page')
|
||||
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
|
||||
|
||||
if re.search(u'>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
|
||||
raise ExtractorError(u'Broadcast %s does not exist' % broadcast_id, expected=True)
|
||||
if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
|
||||
raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
|
||||
|
||||
# Adult content
|
||||
if re.search(u'EroConfirmText">', broadcast_page) is not None:
|
||||
if re.search('EroConfirmText">', broadcast_page) is not None:
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(u'Erotic broadcasts allowed only for registered users, '
|
||||
u'use --username and --password options to provide account credentials.', expected=True)
|
||||
raise ExtractorError('Erotic broadcasts allowed only for registered users, '
|
||||
'use --username and --password options to provide account credentials.', expected=True)
|
||||
|
||||
# Log in
|
||||
login_form_strs = {
|
||||
u'login-hint53': '1',
|
||||
u'confirm_erotic': '1',
|
||||
u'login': username,
|
||||
u'password': password,
|
||||
login_form = {
|
||||
'login-hint53': '1',
|
||||
'confirm_erotic': '1',
|
||||
'login': username,
|
||||
'password': password,
|
||||
}
|
||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||
# chokes on unicode
|
||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||
login_url = broadcast_url + '/?no_redirect=1'
|
||||
request = compat_urllib_request.Request(login_url, login_data)
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
broadcast_page = self._download_webpage(
|
||||
request, broadcast_id, note=u'Logging in and confirming age')
|
||||
|
||||
if re.search(u'>Неверный логин или пароль<', broadcast_page) is not None:
|
||||
raise ExtractorError(u'Unable to log in: bad username or password', expected=True)
|
||||
request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
|
||||
|
||||
if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
|
||||
raise ExtractorError('Unable to log in: bad username or password', expected=True)
|
||||
|
||||
adult_content = True
|
||||
else:
|
||||
adult_content = False
|
||||
|
||||
ticket = self._html_search_regex(
|
||||
u'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
|
||||
broadcast_page, u'broadcast ticket')
|
||||
'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
|
||||
broadcast_page, 'broadcast ticket')
|
||||
|
||||
url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
|
||||
|
||||
@@ -328,22 +323,22 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
if broadcast_password:
|
||||
url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
|
||||
|
||||
broadcast_json_page = self._download_webpage(url, broadcast_id, u'Downloading broadcast JSON')
|
||||
broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
|
||||
|
||||
try:
|
||||
broadcast_json = json.loads(broadcast_json_page)
|
||||
|
||||
protected_broadcast = broadcast_json['_pass_protected'] == 1
|
||||
if protected_broadcast and not broadcast_password:
|
||||
raise ExtractorError(u'This broadcast is protected by a password, use the --video-password option', expected=True)
|
||||
raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
|
||||
|
||||
broadcast_offline = broadcast_json['is_play'] == 0
|
||||
if broadcast_offline:
|
||||
raise ExtractorError(u'Broadcast %s is offline' % broadcast_id, expected=True)
|
||||
raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
|
||||
|
||||
rtmp_url = broadcast_json['_server']
|
||||
if not rtmp_url.startswith('rtmp://'):
|
||||
raise ExtractorError(u'Unexpected broadcast rtmp URL')
|
||||
raise ExtractorError('Unexpected broadcast rtmp URL')
|
||||
|
||||
broadcast_playpath = broadcast_json['_streamName']
|
||||
broadcast_thumbnail = broadcast_json['_imgURL']
|
||||
@@ -354,8 +349,8 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
rtmp_conn = 'S:%s' % uuid.uuid4().hex
|
||||
except KeyError:
|
||||
if protected_broadcast:
|
||||
raise ExtractorError(u'Bad broadcast password', expected=True)
|
||||
raise ExtractorError(u'Unexpected broadcast JSON')
|
||||
raise ExtractorError('Bad broadcast password', expected=True)
|
||||
raise ExtractorError('Unexpected broadcast JSON')
|
||||
|
||||
return {
|
||||
'id': broadcast_id,
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -7,13 +9,13 @@ from ..utils import (
|
||||
|
||||
|
||||
class TumblrIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
|
||||
_VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)($|/)'
|
||||
_TEST = {
|
||||
u'url': u'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
|
||||
u'file': u'54196191430.mp4',
|
||||
u'md5': u'479bb068e5b16462f5176a6828829767',
|
||||
u'info_dict': {
|
||||
u"title": u"tatiana maslany news"
|
||||
'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
|
||||
'file': '54196191430.mp4',
|
||||
'md5': '479bb068e5b16462f5176a6828829767',
|
||||
'info_dict': {
|
||||
"title": "tatiana maslany news"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,18 +30,20 @@ class TumblrIE(InfoExtractor):
|
||||
re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
|
||||
video = re.search(re_video, webpage)
|
||||
if video is None:
|
||||
raise ExtractorError(u'Unable to extract video')
|
||||
raise ExtractorError('Unable to extract video')
|
||||
video_url = video.group('video_url')
|
||||
ext = video.group('ext')
|
||||
|
||||
video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
|
||||
webpage, u'thumbnail', fatal=False) # We pick the first poster
|
||||
if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
|
||||
video_thumbnail = self._search_regex(
|
||||
r'posters.*?\[\\x22(.*?)\\x22',
|
||||
webpage, 'thumbnail', fatal=False) # We pick the first poster
|
||||
if video_thumbnail:
|
||||
video_thumbnail = video_thumbnail.replace('\\\\/', '/')
|
||||
|
||||
# The only place where you can get a title, it's not complete,
|
||||
# but searching in other places doesn't work for all videos
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
|
||||
webpage, u'title', flags=re.DOTALL)
|
||||
webpage, 'title', flags=re.DOTALL)
|
||||
|
||||
return [{'id': video_id,
|
||||
'url': video_url,
|
||||
|
@@ -1,3 +1,4 @@
|
||||
from __future__ import unicode_literals
|
||||
import base64
|
||||
import re
|
||||
|
||||
@@ -6,15 +7,16 @@ from ..utils import (
|
||||
compat_parse_qs,
|
||||
)
|
||||
|
||||
|
||||
class TutvIE(InfoExtractor):
|
||||
_VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
|
||||
u'file': u'2742556.flv',
|
||||
u'md5': u'5eb766671f69b82e528dc1e7769c5cb2',
|
||||
u'info_dict': {
|
||||
u"title": u"Noah en pabellon cuahutemoc"
|
||||
}
|
||||
'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
|
||||
'file': '2742556.flv',
|
||||
'md5': '5eb766671f69b82e528dc1e7769c5cb2',
|
||||
'info_dict': {
|
||||
'title': 'Noah en pabellon cuahutemoc',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -22,18 +24,15 @@ class TutvIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
|
||||
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
|
||||
|
||||
data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
|
||||
data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info')
|
||||
data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
|
||||
data_content = self._download_webpage(data_url, video_id, note='Downloading video info')
|
||||
data = compat_parse_qs(data_content)
|
||||
video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
|
||||
ext = video_url.partition(u'?')[0].rpartition(u'.')[2]
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': internal_id,
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'title': self._og_search_title(webpage),
|
||||
}
|
||||
return [info]
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -9,12 +11,12 @@ from ..utils import (
|
||||
class YouJizzIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
|
||||
u'file': u'2189178.flv',
|
||||
u'md5': u'07e15fa469ba384c7693fd246905547c',
|
||||
u'info_dict': {
|
||||
u"title": u"Zeichentrick 1",
|
||||
u"age_limit": 18,
|
||||
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
|
||||
'file': '2189178.flv',
|
||||
'md5': '07e15fa469ba384c7693fd246905547c',
|
||||
'info_dict': {
|
||||
"title": "Zeichentrick 1",
|
||||
"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,12 +32,12 @@ class YouJizzIE(InfoExtractor):
|
||||
|
||||
# Get the video title
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
|
||||
webpage, u'title').strip()
|
||||
webpage, 'title').strip()
|
||||
|
||||
# Get the embed page
|
||||
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
|
||||
if result is None:
|
||||
raise ExtractorError(u'ERROR: unable to extract embed page')
|
||||
raise ExtractorError('ERROR: unable to extract embed page')
|
||||
|
||||
embed_page_url = result.group(0).strip()
|
||||
video_id = result.group('videoid')
|
||||
@@ -47,23 +49,23 @@ class YouJizzIE(InfoExtractor):
|
||||
if m_playlist is not None:
|
||||
playlist_url = m_playlist.group('playlist')
|
||||
playlist_page = self._download_webpage(playlist_url, video_id,
|
||||
u'Downloading playlist page')
|
||||
'Downloading playlist page')
|
||||
m_levels = list(re.finditer(r'<level bitrate="(\d+?)" file="(.*?)"', playlist_page))
|
||||
if len(m_levels) == 0:
|
||||
raise ExtractorError(u'Unable to extract video url')
|
||||
raise ExtractorError('Unable to extract video url')
|
||||
videos = [(int(m.group(1)), m.group(2)) for m in m_levels]
|
||||
(_, video_url) = sorted(videos)[0]
|
||||
video_url = video_url.replace('%252F', '%2F')
|
||||
else:
|
||||
video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
|
||||
webpage, u'video URL')
|
||||
webpage, 'video URL')
|
||||
|
||||
info = {'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'format': 'flv',
|
||||
'player_url': embed_page_url,
|
||||
'age_limit': age_limit}
|
||||
|
||||
return [info]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'format': 'flv',
|
||||
'player_url': embed_page_url,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@@ -1662,7 +1662,7 @@ class YoutubeUserIE(InfoExtractor):
|
||||
'_type': 'url',
|
||||
'url': video_id,
|
||||
'ie_key': 'Youtube',
|
||||
'id': 'video_id',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
}
|
||||
url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.01.27'
|
||||
__version__ = '2014.01.30'
|
||||
|
Reference in New Issue
Block a user