mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-07-22 21:31:40 -05:00
Compare commits
40 Commits
2014.03.06
...
2014.03.10
Author | SHA1 | Date | |
---|---|---|---|
963d7ec412 | |||
e712d94adf | |||
6a72423955 | |||
4126826b10 | |||
b773ead7fd | |||
855e2750bc | |||
805ef3c60b | |||
fbc2dcb40b | |||
5375d7ad84 | |||
90f3476180 | |||
ee95c09333 | |||
75d06db9fc | |||
439a1fffcb | |||
9d9d70c462 | |||
b4a186b7be | |||
bdebf51c8f | |||
264b86f9b4 | |||
9e55e37a2e | |||
1471956573 | |||
27865b2169 | |||
6d07ce0162 | |||
edb7fc5435 | |||
31f77343f2 | |||
63ad031583 | |||
957688cee6 | |||
806d6c2e8c | |||
0ef68e04d9 | |||
a496524db2 | |||
935c7360cc | |||
340b046876 | |||
cc1db7f9b7 | |||
a4ff6c4762 | |||
1060425cbb | |||
e9c092f125 | |||
22ff5d2105 | |||
136db7881b | |||
dae313e725 | |||
b74fa8cd2c | |||
94eae04c94 | |||
16ff7ebc77 |
44
test/test_InfoExtractor.py
Normal file
44
test/test_InfoExtractor.py
Normal file
@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
pass
|
||||
|
||||
|
||||
class TestInfoExtractor(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.ie = TestIE(FakeYDL())
|
||||
|
||||
def test_ie_key(self):
|
||||
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
|
||||
|
||||
def test_html_search_regex(self):
|
||||
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
|
||||
search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
|
||||
self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
|
||||
|
||||
def test_opengraph(self):
|
||||
ie = self.ie
|
||||
html = '''
|
||||
<meta name="og:title" content='Foo'/>
|
||||
<meta content="Some video's description " name="og:description"/>
|
||||
<meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/>
|
||||
'''
|
||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -99,7 +99,7 @@ class TestPlaylists(unittest.TestCase):
|
||||
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '5124905')
|
||||
self.assertTrue(len(result['entries']) >= 11)
|
||||
self.assertTrue(len(result['entries']) >= 6)
|
||||
|
||||
def test_soundcloud_set(self):
|
||||
dl = FakeYDL()
|
||||
@ -254,9 +254,9 @@ class TestPlaylists(unittest.TestCase):
|
||||
def test_generic_rss_feed(self):
|
||||
dl = FakeYDL()
|
||||
ie = GenericIE(dl)
|
||||
result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
|
||||
result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
|
||||
self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
|
||||
self.assertEqual(result['title'], 'Zero Punctuation')
|
||||
self.assertTrue(len(result['entries']) > 10)
|
||||
|
||||
|
@ -33,6 +33,7 @@ from youtube_dl.utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
@ -261,5 +262,9 @@ class TestUtil(unittest.TestCase):
|
||||
bam''')
|
||||
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
|
||||
|
||||
def test_urlencode_postdata(self):
|
||||
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
||||
self.assertTrue(isinstance(data, bytes))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -370,12 +370,15 @@ class YoutubeDL(object):
|
||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
if self.params.get('logger') is not None:
|
||||
self.params['logger'].warning(message)
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
warning_message = '%s %s' % (_msg_header, message)
|
||||
self.to_stderr(warning_message)
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
warning_message = '%s %s' % (_msg_header, message)
|
||||
self.to_stderr(warning_message)
|
||||
|
||||
def report_error(self, message, tb=None):
|
||||
'''
|
||||
@ -413,9 +416,9 @@ class YoutubeDL(object):
|
||||
if template_dict.get('width') and template_dict.get('height'):
|
||||
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||
elif template_dict.get('height'):
|
||||
res = '%sp' % template_dict['height']
|
||||
template_dict['resolution'] = '%sp' % template_dict['height']
|
||||
elif template_dict.get('width'):
|
||||
res = '?x%d' % template_dict['width']
|
||||
template_dict['resolution'] = '?x%d' % template_dict['width']
|
||||
|
||||
sanitize = lambda k, v: sanitize_filename(
|
||||
compat_str(v),
|
||||
@ -918,7 +921,7 @@ class YoutubeDL(object):
|
||||
self.to_screen('[%s] %s: Downloading thumbnail ...' %
|
||||
(info_dict['extractor'], info_dict['id']))
|
||||
try:
|
||||
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
|
||||
uf = self.urlopen(info_dict['thumbnail'])
|
||||
with open(thumb_filename, 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
|
||||
|
@ -50,6 +50,7 @@ __authors__ = (
|
||||
'Anthony Weems',
|
||||
'David Wagner',
|
||||
'Juan C. Olivares',
|
||||
'Mattias Harrysson',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
|
||||
while count <= retries:
|
||||
# Establish connection
|
||||
try:
|
||||
data = compat_urllib_request.urlopen(request)
|
||||
data = self.ydl.urlopen(request)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
@ -59,7 +59,7 @@ class HttpFD(FileDownloader):
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
data = compat_urllib_request.urlopen(basic_request)
|
||||
data = self.ydl.urlopen(basic_request)
|
||||
content_length = data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
|
@ -1,5 +1,6 @@
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .anitube import AnitubeIE
|
||||
from .aparat import AparatIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
|
69
youtube_dl/extractor/aftonbladet.py
Normal file
69
youtube_dl/extractor/aftonbladet.py
Normal file
@ -0,0 +1,69 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AftonbladetIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
|
||||
'info_dict': {
|
||||
'id': 'article36015',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
|
||||
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
|
||||
'upload_date': '20140306',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.search(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('video_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# find internal video meta data
|
||||
META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
||||
internal_meta_id = self._html_search_regex(
|
||||
r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
|
||||
internal_meta_url = META_URL % internal_meta_id
|
||||
internal_meta_json = self._download_json(
|
||||
internal_meta_url, video_id, 'Downloading video meta data')
|
||||
|
||||
# find internal video formats
|
||||
FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
|
||||
internal_video_id = internal_meta_json['videoId']
|
||||
internal_formats_url = FORMATS_URL % internal_video_id
|
||||
internal_formats_json = self._download_json(
|
||||
internal_formats_url, video_id, 'Downloading video formats')
|
||||
|
||||
formats = []
|
||||
for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
|
||||
p = fmt['paths'][0]
|
||||
formats.append({
|
||||
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
|
||||
'ext': 'mp4',
|
||||
'width': fmt['width'],
|
||||
'height': fmt['height'],
|
||||
'tbr': fmt['bitrate'],
|
||||
'protocol': 'http',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
|
||||
upload_date = timestamp.strftime('%Y%m%d')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': internal_meta_json['title'],
|
||||
'formats': formats,
|
||||
'thumbnail': internal_meta_json['imageUrl'],
|
||||
'description': internal_meta_json['shortPreamble'],
|
||||
'upload_date': upload_date,
|
||||
'duration': internal_meta_json['duration'],
|
||||
'view_count': internal_meta_json['views'],
|
||||
}
|
@ -72,18 +72,22 @@ class ArteTvIE(InfoExtractor):
|
||||
return self._extract_liveweb(url, name, lang)
|
||||
|
||||
if re.search(self._LIVE_URL, url) is not None:
|
||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||
raise ExtractorError('Arte live streams are not yet supported, sorry')
|
||||
# self.extractLiveStream(url)
|
||||
# return
|
||||
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
def _extract_video(self, url, video_id, lang):
|
||||
"""Extract from videos.arte.tv"""
|
||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||
ref_xml_doc = self._download_xml(
|
||||
ref_xml_url, video_id, note='Downloading metadata')
|
||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||
config_xml_url = config_node.attrib['ref']
|
||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||
config_xml = self._download_webpage(
|
||||
config_xml_url, video_id, note='Downloading configuration')
|
||||
|
||||
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
||||
def _key(m):
|
||||
|
@ -35,15 +35,15 @@ class CollegeHumorIE(InfoExtractor):
|
||||
},
|
||||
# embedded youtube video
|
||||
{
|
||||
'url': 'http://www.collegehumor.com/embed/6950457',
|
||||
'url': 'http://www.collegehumor.com/embed/6950306',
|
||||
'info_dict': {
|
||||
'id': 'W5gMp3ZjYg4',
|
||||
'id': 'Z-bao9fg6Yc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||
'uploader': 'FunnyPlox TV',
|
||||
'uploader_id': 'funnyploxtv',
|
||||
'description': 'md5:7ded37421526d54afdf005e25bc2b7a3',
|
||||
'upload_date': '20140128',
|
||||
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
|
||||
'uploader': 'Mark Dice',
|
||||
'uploader_id': 'MarkDice',
|
||||
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
|
||||
'upload_date': '20140127',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -11,16 +11,15 @@ from ..utils import (
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
urlencode_postdata,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class FacebookIE(InfoExtractor):
|
||||
"""Information Extractor for Facebook"""
|
||||
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?(?:\w+\.)?facebook\.com/
|
||||
https?://(?:\w+\.)?facebook\.com/
|
||||
(?:[^#?]*\#!/)?
|
||||
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
|
||||
(?:v|video_id)=(?P<id>[0-9]+)
|
||||
@ -36,14 +35,10 @@ class FacebookIE(InfoExtractor):
|
||||
'id': '120708114770723',
|
||||
'ext': 'mp4',
|
||||
'duration': 279,
|
||||
'title': 'PEOPLE ARE AWESOME 2013'
|
||||
'title': 'PEOPLE ARE AWESOME 2013',
|
||||
}
|
||||
}
|
||||
|
||||
def report_login(self):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen('Logging in')
|
||||
|
||||
def _login(self):
|
||||
(useremail, password) = self._get_login_info()
|
||||
if useremail is None:
|
||||
@ -51,8 +46,8 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
||||
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||
self.report_login()
|
||||
login_page = self._download_webpage(login_page_req, None, note=False,
|
||||
login_page = self._download_webpage(login_page_req, None,
|
||||
note='Downloading login page',
|
||||
errnote='Unable to download login page')
|
||||
lsd = self._search_regex(
|
||||
r'<input type="hidden" name="lsd" value="([^"]*)"',
|
||||
@ -70,23 +65,25 @@ class FacebookIE(InfoExtractor):
|
||||
'timezone': '-60',
|
||||
'trynum': '1',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
try:
|
||||
login_results = compat_urllib_request.urlopen(request).read()
|
||||
login_results = self._download_webpage(request, None,
|
||||
note='Logging in', errnote='unable to fetch login page')
|
||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||
return
|
||||
|
||||
check_form = {
|
||||
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'),
|
||||
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
|
||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
|
||||
'name_action_selected': 'dont_save',
|
||||
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'),
|
||||
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
|
||||
}
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
check_response = compat_urllib_request.urlopen(check_req).read()
|
||||
check_response = self._download_webpage(check_req, None,
|
||||
note='Confirming login')
|
||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
@ -98,8 +95,6 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
||||
@ -125,18 +120,14 @@ class FacebookIE(InfoExtractor):
|
||||
video_url = video_data['sd_src']
|
||||
if not video_url:
|
||||
raise ExtractorError('Cannot find video URL')
|
||||
video_duration = int(video_data['video_duration'])
|
||||
thumbnail = video_data['thumbnail_src']
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'duration': video_duration,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int(video_data['video_duration']),
|
||||
'thumbnail': video_data['thumbnail_src'],
|
||||
}
|
||||
return [info]
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -6,13 +8,14 @@ from .common import InfoExtractor
|
||||
class GamekingsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
|
||||
u'file': u'20130811.mp4',
|
||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
# MD5 is flaky, seems to change regularly
|
||||
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
u'info_dict': {
|
||||
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
|
||||
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
|
||||
'id': '20130811',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||
'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,56 +1,61 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
RegexNotFoundError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class JukeboxIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
||||
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
|
||||
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
|
||||
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
|
||||
_IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"'
|
||||
_TEST = {
|
||||
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
||||
'md5': '5dc6477e74b1e37042ac5acedd8413e5',
|
||||
'info_dict': {
|
||||
'id': 'r303r',
|
||||
'ext': 'flv',
|
||||
'title': 'Kosheen-En Vivo Pride',
|
||||
'uploader': 'Kosheen',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
html = self._download_webpage(url, video_id)
|
||||
|
||||
mobj = re.search(self._IFRAME, html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract iframe url')
|
||||
iframe_url = unescapeHTML(mobj.group('iframe'))
|
||||
iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
|
||||
|
||||
iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
|
||||
mobj = re.search(r'class="jkb_waiting"', iframe_html)
|
||||
if mobj is not None:
|
||||
raise ExtractorError(u'Video is not available(in your country?)!')
|
||||
if re.search(r'class="jkb_waiting"', iframe_html) is not None:
|
||||
raise ExtractorError('Video is not available(in your country?)!')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
mobj = re.search(self._VIDEO_URL, iframe_html)
|
||||
if mobj is None:
|
||||
mobj = re.search(self._IS_YOUTUBE, iframe_html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract video url')
|
||||
youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/')
|
||||
self.to_screen(u'Youtube video detected')
|
||||
return self.url_result(youtube_url,ie='Youtube')
|
||||
video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/')
|
||||
video_ext = unescapeHTML(mobj.group('video_ext'))
|
||||
try:
|
||||
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
|
||||
iframe_html, 'video url')
|
||||
video_url = unescapeHTML(video_url).replace('\/', '/')
|
||||
except RegexNotFoundError:
|
||||
youtube_url = self._search_regex(
|
||||
r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"',
|
||||
iframe_html, 'youtube url')
|
||||
youtube_url = unescapeHTML(youtube_url).replace('\/', '/')
|
||||
self.to_screen('Youtube video detected')
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
mobj = re.search(self._TITLE, html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract title')
|
||||
title = unescapeHTML(mobj.group('title'))
|
||||
artist = unescapeHTML(mobj.group('artist'))
|
||||
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
|
||||
html, 'title')
|
||||
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
|
||||
html, 'artist')
|
||||
|
||||
return [{'id': video_id,
|
||||
'url': video_url,
|
||||
'title': artist + '-' + title,
|
||||
'ext': video_ext
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': artist + '-' + title,
|
||||
'uploader': artist,
|
||||
}
|
||||
|
@ -8,7 +8,9 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
ExtractorError
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
@ -19,16 +21,17 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
||||
_NETRC_MACHINE = 'lynda'
|
||||
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account'
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
|
||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||
|
||||
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
'file': '114408.mp4',
|
||||
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
|
||||
'info_dict': {
|
||||
'id': '114408',
|
||||
'ext': 'mp4',
|
||||
'title': 'Using the exercise files',
|
||||
'duration': 68
|
||||
}
|
||||
@ -41,27 +44,44 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
|
||||
'Downloading video JSON')
|
||||
video_json = json.loads(page)
|
||||
|
||||
if 'Status' in video_json:
|
||||
raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
|
||||
|
||||
if video_json['HasAccess'] is False:
|
||||
raise ExtractorError('Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
|
||||
raise ExtractorError(
|
||||
'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
|
||||
|
||||
video_id = video_json['ID']
|
||||
video_id = compat_str(video_json['ID'])
|
||||
duration = video_json['DurationInSeconds']
|
||||
title = video_json['Title']
|
||||
|
||||
formats = [{'url': fmt['Url'],
|
||||
formats = []
|
||||
|
||||
fmts = video_json.get('Formats')
|
||||
if fmts:
|
||||
formats.extend([
|
||||
{
|
||||
'url': fmt['Url'],
|
||||
'ext': fmt['Extension'],
|
||||
'width': fmt['Width'],
|
||||
'height': fmt['Height'],
|
||||
'filesize': fmt['FileSize'],
|
||||
'format_id': str(fmt['Resolution'])
|
||||
} for fmt in video_json['Formats']]
|
||||
} for fmt in fmts])
|
||||
|
||||
prioritized_streams = video_json.get('PrioritizedStreams')
|
||||
if prioritized_streams:
|
||||
formats.extend([
|
||||
{
|
||||
'url': video_url,
|
||||
'width': int_or_none(format_id),
|
||||
'format_id': format_id,
|
||||
} for format_id, video_url in prioritized_streams['0'].items()
|
||||
])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@ -91,7 +111,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
'stayPut': 'false'
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
|
||||
login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
|
||||
|
||||
# Not (yet) logged in
|
||||
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
|
||||
@ -116,7 +136,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
'stayPut': 'false',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
|
||||
login_page = self._download_webpage(request, None, note='Confirming log in and log out from another device')
|
||||
login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
|
||||
|
||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||
raise ExtractorError('Unable to log in')
|
||||
@ -150,7 +170,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||
sub = self._download_webpage(url, None, note=False)
|
||||
sub = self._download_webpage(url, None, False)
|
||||
sub_json = json.loads(sub)
|
||||
return {'en': url} if len(sub_json) > 0 else {}
|
||||
|
||||
@ -179,6 +199,9 @@ class LyndaCourseIE(InfoExtractor):
|
||||
videos = []
|
||||
(username, _) = self._get_login_info()
|
||||
|
||||
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
|
||||
# by single video API anymore
|
||||
|
||||
for chapter in course_json['Chapters']:
|
||||
for video in chapter['Videos']:
|
||||
if username is None and video['HasAccess'] is False:
|
||||
|
@ -5,9 +5,12 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
HEADRequest,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
RegexNotFoundError,
|
||||
)
|
||||
@ -18,6 +21,7 @@ def _media_xml_tag(tag):
|
||||
|
||||
|
||||
class MTVServicesInfoExtractor(InfoExtractor):
|
||||
_MOBILE_TEMPLATE = None
|
||||
@staticmethod
|
||||
def _id_from_uri(uri):
|
||||
return uri.split(':')[-1]
|
||||
@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
else:
|
||||
return thumb_node.attrib['url']
|
||||
|
||||
def _extract_video_formats(self, mdoc):
|
||||
if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
|
||||
raise ExtractorError('This video is not available from your country.', expected=True)
|
||||
def _extract_mobile_video_formats(self, mtvn_id):
|
||||
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
|
||||
req = compat_urllib_request.Request(webpage_url)
|
||||
# Otherwise we get a webpage that would execute some javascript
|
||||
req.add_header('Youtubedl-user-agent', 'curl/7')
|
||||
webpage = self._download_webpage(req, mtvn_id,
|
||||
'Downloading mobile page')
|
||||
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
||||
req = HEADRequest(metrics_url)
|
||||
response = self._request_webpage(req, mtvn_id, 'Resolving url')
|
||||
url = response.geturl()
|
||||
# Transform the url to get the best quality:
|
||||
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
|
||||
return [{'url': url,'ext': 'mp4'}]
|
||||
|
||||
def _extract_video_formats(self, mdoc, mtvn_id):
|
||||
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
|
||||
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
|
||||
self.to_screen('The normal version is not available from your '
|
||||
'country, trying with the mobile version')
|
||||
return self._extract_mobile_video_formats(mtvn_id)
|
||||
raise ExtractorError('This video is not available from your country.',
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
for rendition in mdoc.findall('.//rendition'):
|
||||
@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
raise ExtractorError('Could not find video title')
|
||||
title = title.strip()
|
||||
|
||||
# This a short id that's used in the webpage urls
|
||||
mtvn_id = None
|
||||
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||
'scheme', 'urn:mtvn:id')
|
||||
if mtvn_id_node is not None:
|
||||
mtvn_id = mtvn_id_node.text
|
||||
|
||||
return {
|
||||
'title': title,
|
||||
'formats': self._extract_video_formats(mediagen_doc),
|
||||
'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
|
||||
'id': video_id,
|
||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||
'description': description,
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import binascii
|
||||
import base64
|
||||
import hashlib
|
||||
@ -14,18 +16,16 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
|
||||
class MyVideoIE(InfoExtractor):
|
||||
"""Information Extractor for myvideo.de."""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
|
||||
IE_NAME = u'myvideo'
|
||||
_VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
|
||||
IE_NAME = 'myvideo'
|
||||
_TEST = {
|
||||
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
||||
u'file': u'8229274.flv',
|
||||
u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
|
||||
u'info_dict': {
|
||||
u"title": u"bowling-fail-or-win"
|
||||
'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
||||
'md5': '2d2753e8130479ba2cb7e0a37002053e',
|
||||
'info_dict': {
|
||||
'id': '8229274',
|
||||
'ext': 'flv',
|
||||
'title': 'bowling-fail-or-win',
|
||||
}
|
||||
}
|
||||
|
||||
@ -53,10 +53,7 @@ class MyVideoIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
GK = (
|
||||
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
|
||||
@ -74,37 +71,33 @@ class MyVideoIE(InfoExtractor):
|
||||
video_url = mobj.group(1) + '.flv'
|
||||
|
||||
video_title = self._html_search_regex('<title>([^<]+)</title>',
|
||||
webpage, u'title')
|
||||
webpage, 'title')
|
||||
|
||||
video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_ext,
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
}
|
||||
|
||||
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
|
||||
if mobj is not None:
|
||||
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
||||
response = self._download_webpage(request, video_id,
|
||||
u'Downloading video info')
|
||||
'Downloading video info')
|
||||
info = json.loads(base64.b64decode(response).decode('utf-8'))
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
|
||||
'play_path': info['filename'],
|
||||
'ext': 'flv',
|
||||
'thumbnail': info['thumbnail'][0]['url'],
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
|
||||
'play_path': info['filename'],
|
||||
'ext': 'flv',
|
||||
'thumbnail': info['thumbnail'][0]['url'],
|
||||
}
|
||||
|
||||
# try encxml
|
||||
mobj = re.search('var flashvars={(.+?)}', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video')
|
||||
raise ExtractorError('Unable to extract video')
|
||||
|
||||
params = {}
|
||||
encxml = ''
|
||||
@ -118,7 +111,7 @@ class MyVideoIE(InfoExtractor):
|
||||
params['domain'] = 'www.myvideo.de'
|
||||
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
|
||||
if 'flash_playertype=MTV' in xmldata_url:
|
||||
self._downloader.report_warning(u'avoiding MTV player')
|
||||
self._downloader.report_warning('avoiding MTV player')
|
||||
xmldata_url = (
|
||||
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
|
||||
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
|
||||
@ -144,7 +137,7 @@ class MyVideoIE(InfoExtractor):
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||
if 'myvideo2flash' in video_url:
|
||||
self.report_warning(
|
||||
u'Rewriting URL to use unencrypted rtmp:// ...',
|
||||
'Rewriting URL to use unencrypted rtmp:// ...',
|
||||
video_id)
|
||||
video_url = video_url.replace('rtmpe://', 'rtmp://')
|
||||
|
||||
@ -152,39 +145,31 @@ class MyVideoIE(InfoExtractor):
|
||||
# extract non rtmp videos
|
||||
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'unable to extract url')
|
||||
raise ExtractorError('unable to extract url')
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
|
||||
|
||||
video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
|
||||
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
|
||||
video_file = compat_urllib_parse.unquote(video_file)
|
||||
|
||||
if not video_file.endswith('f4m'):
|
||||
ppath, prefix = video_file.split('.')
|
||||
video_playpath = '%s:%s' % (prefix, ppath)
|
||||
video_hls_playlist = ''
|
||||
else:
|
||||
video_playpath = ''
|
||||
video_hls_playlist = (
|
||||
video_file
|
||||
).replace('.f4m', '.m3u8')
|
||||
|
||||
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
|
||||
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
||||
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
||||
|
||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||
webpage, u'title')
|
||||
webpage, 'title')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'tc_url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': u'flv',
|
||||
'play_path': video_playpath,
|
||||
'video_file': video_file,
|
||||
'video_hls_playlist': video_hls_playlist,
|
||||
'player_url': video_swfobj,
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'tc_url': video_url,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'play_path': video_playpath,
|
||||
'player_url': video_swfobj,
|
||||
}
|
||||
|
||||
|
@ -1,76 +1,43 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class PhotobucketIE(InfoExtractor):
|
||||
"""Information extractor for photobucket.com."""
|
||||
|
||||
# TODO: the original _VALID_URL was:
|
||||
# r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
|
||||
# Check if it's necessary to keep the old extracion process
|
||||
_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
|
||||
IE_NAME = u'photobucket'
|
||||
_VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
|
||||
_TEST = {
|
||||
u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
|
||||
u'file': u'zpsc0c3b9fa.mp4',
|
||||
u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20130504",
|
||||
u"uploader": u"rachaneronas",
|
||||
u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!"
|
||||
'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
|
||||
'file': 'zpsc0c3b9fa.mp4',
|
||||
'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
|
||||
'info_dict': {
|
||||
'upload_date': '20130504',
|
||||
'uploader': 'rachaneronas',
|
||||
'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_extension = mobj.group('ext')
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Extract URL, uploader, and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
# We try first by looking the javascript code:
|
||||
mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage)
|
||||
if mobj is not None:
|
||||
info = json.loads(mobj.group('json'))
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': info[u'downloadUrl'],
|
||||
'uploader': info[u'username'],
|
||||
'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'),
|
||||
'title': info[u'title'],
|
||||
'ext': video_extension,
|
||||
'thumbnail': info[u'thumbUrl'],
|
||||
}]
|
||||
|
||||
# We try looking in other parts of the webpage
|
||||
video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
|
||||
webpage, u'video URL')
|
||||
|
||||
mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
video_title = mobj.group(1).decode('utf-8')
|
||||
video_uploader = mobj.group(2).decode('utf-8')
|
||||
|
||||
return [{
|
||||
'id': video_id.decode('utf-8'),
|
||||
'url': video_url.decode('utf-8'),
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_extension.decode('utf-8'),
|
||||
}]
|
||||
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
|
||||
webpage, 'info json')
|
||||
info = json.loads(info_json)
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': info['downloadUrl'],
|
||||
'uploader': info['username'],
|
||||
'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'),
|
||||
'title': info['title'],
|
||||
'ext': video_extension,
|
||||
'thumbnail': info['thumbUrl'],
|
||||
}
|
||||
|
@ -44,7 +44,7 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ')
|
||||
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||
|
||||
formats = []
|
||||
|
@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'id': '47127627',
|
||||
'ext': 'mp3',
|
||||
'title': 'Goldrushed',
|
||||
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||
'uploader': 'The Royal Concept',
|
||||
'upload_date': '20120521',
|
||||
},
|
||||
|
@ -1,10 +1,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
|
||||
class SpikeIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(www\.spike\.com/(video-clips|episodes)/.+|
|
||||
m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+))
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
|
||||
'md5': '1a9265f32b0c375793d6c4ce45255256',
|
||||
@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor):
|
||||
}
|
||||
|
||||
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
|
||||
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.search(self._VALID_URL, url)
|
||||
mobile_id = mobj.group('mobile_id')
|
||||
if mobile_id is not None:
|
||||
url = 'http://www.spike.com/video-clips/%s' % mobile_id
|
||||
return super(SpikeIE, self)._real_extract(url)
|
||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
class VestiIE(InfoExtractor):
|
||||
IE_NAME = 'vesti'
|
||||
IE_DESC = 'Вести.Ru'
|
||||
_VALID_URL = r'http://(?:.+?\.)?(?:vesti\.ru|russia\.tv)/(?P<id>.+)'
|
||||
_VALID_URL = r'http://(?:(?:.+?\.)?vesti\.ru|(?:2\.)?russia\.tv|tvkultura\.ru|rutv\.ru)/(?P<id>.+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@ -72,6 +72,35 @@ class VestiIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
|
||||
'info_dict': {
|
||||
'id': '766403',
|
||||
'ext': 'mp4',
|
||||
'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
|
||||
'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
|
||||
'duration': 271,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Blocked outside Russia'
|
||||
},
|
||||
{
|
||||
'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
|
||||
'info_dict': {
|
||||
'id': '51499',
|
||||
'ext': 'flv',
|
||||
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
|
||||
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Translation has finished'
|
||||
},
|
||||
{
|
||||
'url': 'http://russia.tv/video/show/brand_id/5169/episode_id/970443/video_id/975648',
|
||||
'info_dict': {
|
||||
@ -101,34 +130,47 @@ class VestiIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
|
||||
'url': 'http://2.russia.tv/video/show/brand_id/48863/episode_id/972920/video_id/978667/viewtype/picture',
|
||||
'info_dict': {
|
||||
'id': '766403',
|
||||
'id': '775081',
|
||||
'ext': 'mp4',
|
||||
'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
|
||||
'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
|
||||
'duration': 271,
|
||||
'title': 'XXII зимние Олимпийские игры. Россияне заняли весь пьедестал в лыжных гонках',
|
||||
'description': 'md5:15d3741dd8d04b203fbc031c6a47fb0f',
|
||||
'duration': 101,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Blocked outside Russia'
|
||||
},
|
||||
{
|
||||
'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
|
||||
'url': 'http://tvkultura.ru/video/show/brand_id/31724/episode_id/972347/video_id/978186',
|
||||
'info_dict': {
|
||||
'id': '51499',
|
||||
'ext': 'flv',
|
||||
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
|
||||
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
|
||||
'id': '774471',
|
||||
'ext': 'mp4',
|
||||
'title': 'Монологи на все времена',
|
||||
'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
|
||||
'duration': 2906,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Translation has finished'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://rutv.ru/brand/show/id/6792/channel/75',
|
||||
'info_dict': {
|
||||
'id': '125521',
|
||||
'ext': 'mp4',
|
||||
'title': 'Грустная дама червей. Х/ф',
|
||||
'description': '',
|
||||
'duration': 4882,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,22 +1,23 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import compat_urlparse
|
||||
|
||||
|
||||
class VideoDetectiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487',
|
||||
u'file': u'194487.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'KICK-ASS 2',
|
||||
u'description': u'md5:65ba37ad619165afac7d432eaded6013',
|
||||
u'duration': 135,
|
||||
'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
|
||||
'info_dict': {
|
||||
'id': '194487',
|
||||
'ext': 'mp4',
|
||||
'title': 'KICK-ASS 2',
|
||||
'description': 'md5:65ba37ad619165afac7d432eaded6013',
|
||||
'duration': 135,
|
||||
},
|
||||
}
|
||||
|
||||
@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
og_video = self._og_search_video_url(webpage)
|
||||
query = compat_urlparse.urlparse(og_video).query
|
||||
return self.url_result(InternetVideoArchiveIE._build_url(query),
|
||||
ie=InternetVideoArchiveIE.ie_key())
|
||||
return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key())
|
||||
|
@ -8,6 +8,7 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
clean_html,
|
||||
@ -172,7 +173,18 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
try:
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
errmsg = ee.cause.read()
|
||||
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
|
||||
raise ExtractorError(
|
||||
'Cannot download embed-only video without embedding '
|
||||
'URL. Please call youtube-dl with the URL of the page '
|
||||
'that embeds this video.',
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
# Now we begin extracting as much information as we can from what we
|
||||
# retrieved. First we extract the information common to all extractors,
|
||||
|
@ -13,7 +13,7 @@ class VubeIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||
'md5': 'f81dcf6d0448e3291f54380181695821',
|
||||
'md5': 'db7aba89d4603dadd627e9d1973946fe',
|
||||
'info_dict': {
|
||||
'id': 'YL2qNPkqon',
|
||||
'ext': 'mp4',
|
||||
@ -77,4 +77,4 @@ class VubeIE(InfoExtractor):
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
||||
}
|
||||
|
@ -1,55 +1,49 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class XNXXIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
|
||||
VIDEO_URL_RE = r'flv_url=(.*?)&'
|
||||
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
|
||||
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&'
|
||||
_VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)'
|
||||
_TEST = {
|
||||
u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
|
||||
u'file': u'1135332.flv',
|
||||
u'md5': u'0831677e2b4761795f68d417e0b7b445',
|
||||
u'info_dict': {
|
||||
u"title": u"lida \u00bb Naked Funny Actress (5)",
|
||||
u"age_limit": 18,
|
||||
'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
|
||||
'md5': '0831677e2b4761795f68d417e0b7b445',
|
||||
'info_dict': {
|
||||
'id': '1135332',
|
||||
'ext': 'flv',
|
||||
'title': 'lida » Naked Funny Actress (5)',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# Get webpage content
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(self.VIDEO_URL_RE,
|
||||
webpage, u'video URL')
|
||||
video_url = self._search_regex(r'flv_url=(.*?)&',
|
||||
webpage, 'video URL')
|
||||
video_url = compat_urllib_parse.unquote(video_url)
|
||||
|
||||
video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
|
||||
webpage, u'title')
|
||||
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
|
||||
webpage, 'title')
|
||||
|
||||
video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
|
||||
webpage, u'thumbnail', fatal=False)
|
||||
video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return [{
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': None,
|
||||
'age_limit': 18,
|
||||
}]
|
||||
}
|
||||
|
@ -1263,3 +1263,7 @@ def read_batch_urls(batch_fd):
|
||||
|
||||
with contextlib.closing(batch_fd) as fd:
|
||||
return [url for url in map(fixup, fd) if url]
|
||||
|
||||
|
||||
def urlencode_postdata(*args, **kargs):
|
||||
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
||||
|
@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.03.06'
|
||||
__version__ = '2014.03.10'
|
||||
|
Reference in New Issue
Block a user