Compare commits

...

40 Commits

Author SHA1 Message Date
963d7ec412 release 2014.03.10 2014-03-10 13:04:20 +01:00
e712d94adf Merge branch 'master' of github.com:rg3/youtube-dl 2014-03-10 13:03:52 +01:00
6a72423955 [generic] Use a different URL for the generic RSS test (Closes #2532) 2014-03-10 13:03:39 +01:00
4126826b10 [photobucket] More unicode literals 2014-03-10 12:59:19 +01:00
b773ead7fd [vesti] Add support for more sites (Closes #2534) 2014-03-10 18:52:00 +07:00
855e2750bc Credit @mharrys for aftonbladet 2014-03-10 10:30:17 +01:00
805ef3c60b Correct automatic resolution determination 2014-03-10 10:29:25 +01:00
fbc2dcb40b [aftonbladet] Modernize 2014-03-10 10:28:56 +01:00
5375d7ad84 Merge remote-tracking branch 'mharrys/aftonbladet' 2014-03-10 10:23:45 +01:00
90f3476180 [photobucket] Modernize and remove the old extraction code 2014-03-09 19:36:46 +01:00
ee95c09333 [pornhub] Use compat_urllib_parse.unquote_plus (#2531) 2014-03-09 19:16:25 +01:00
75d06db9fc Merge branch 'pornhub_unquote_password' of github.com:MikeCol/youtube-dl 2014-03-09 19:15:33 +01:00
439a1fffcb [myvideo] Modernize 2014-03-09 18:58:34 +01:00
9d9d70c462 [facebook] Modernize 2014-03-09 18:42:44 +01:00
b4a186b7be [jukebox] Modernize and add a test 2014-03-09 18:33:17 +01:00
bdebf51c8f [xnxx] Modernize 2014-03-09 18:31:39 +01:00
264b86f9b4 Unquote password 2014-03-09 18:26:18 +01:00
9e55e37a2e Merge remote-tracking branch 'origin/master' 2014-03-09 18:08:16 +01:00
1471956573 Add a basic test suite for the InfoExtractor class 2014-03-09 17:05:29 +01:00
27865b2169 [aftonbladet] add extractor for aftonbladet.se 2014-03-09 16:59:18 +01:00
6d07ce0162 YoutubeDL: If the logger is set call its warning method in report_warning 2014-03-09 15:16:54 +01:00
edb7fc5435 [videodetective] Modernize 2014-03-09 18:39:39 +07:00
31f77343f2 [vube] Update the test's checksum 2014-03-09 12:27:38 +01:00
63ad031583 [soundcloud] Add the description field to the second test 2014-03-09 12:26:58 +01:00
957688cee6 [ustream:channel] Update test's number of entries 2014-03-09 12:03:49 +01:00
806d6c2e8c [gamekings] Modernize and update the test's description field 2014-03-09 11:57:30 +01:00
0ef68e04d9 [mtv] Transform the urls from the mobile version to get the best quality
And don't report a warning, just log a message, it allows to pass the test from Europe.
2014-03-08 22:09:42 +01:00
a496524db2 [collegehumor] Replace youtube test 2014-03-09 03:21:26 +07:00
935c7360cc [spike] Add support for mobile urls 2014-03-08 21:10:21 +01:00
340b046876 [spike] Add support for downloading the mobile version if the normal version is geoblocked 2014-03-08 20:59:11 +01:00
cc1db7f9b7 [mtv] Improve detection of geoblocked videos 2014-03-08 19:46:34 +01:00
a4ff6c4762 [arte] Raise a proper error when no video is found 2014-03-08 16:04:03 +01:00
1060425cbb [vimeo] Add a better error message for embed-only videos (#2527) 2014-03-08 12:25:09 +01:00
e9c092f125 YoutubeDL: Use its urlopen method for downloading the thumbnail. 2014-03-07 16:43:34 +01:00
22ff5d2105 [http] Use the YoutubeDL.urlopen method 2014-03-07 16:41:42 +01:00
136db7881b [lynda] Modernize 2014-03-07 22:11:01 +07:00
dae313e725 release 2014.03.07.1 2014-03-07 15:59:10 +01:00
b74fa8cd2c [facebook] Fix login process
It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.
2014-03-07 15:25:33 +01:00
94eae04c94 release 2014.03.07 2014-03-07 06:41:48 +01:00
16ff7ebc77 [lynda] Fix successful login regex and fix formats extraction (Closes #2520) 2014-03-07 06:56:48 +07:00
27 changed files with 470 additions and 272 deletions

View File

@ -0,0 +1,44 @@
#!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
class TestIE(InfoExtractor):
pass
class TestInfoExtractor(unittest.TestCase):
def setUp(self):
self.ie = TestIE(FakeYDL())
def test_ie_key(self):
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
def test_html_search_regex(self):
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
def test_opengraph(self):
ie = self.ie
html = '''
<meta name="og:title" content='Foo'/>
<meta content="Some video's description " name="og:description"/>
<meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/>
'''
self.assertEqual(ie._og_search_title(html), 'Foo')
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
if __name__ == '__main__':
unittest.main()

View File

@ -99,7 +99,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '5124905')
self.assertTrue(len(result['entries']) >= 11)
self.assertTrue(len(result['entries']) >= 6)
def test_soundcloud_set(self):
dl = FakeYDL()
@ -254,9 +254,9 @@ class TestPlaylists(unittest.TestCase):
def test_generic_rss_feed(self):
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
self.assertEqual(result['title'], 'Zero Punctuation')
self.assertTrue(len(result['entries']) > 10)

View File

@ -33,6 +33,7 @@ from youtube_dl.utils import (
unified_strdate,
unsmuggle_url,
url_basename,
urlencode_postdata,
xpath_with_ns,
)
@ -261,5 +262,9 @@ class TestUtil(unittest.TestCase):
bam''')
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
def test_urlencode_postdata(self):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
self.assertTrue(isinstance(data, bytes))
if __name__ == '__main__':
unittest.main()

View File

@ -370,12 +370,15 @@ class YoutubeDL(object):
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
if self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m'
if self.params.get('logger') is not None:
self.params['logger'].warning(message)
else:
_msg_header = 'WARNING:'
warning_message = '%s %s' % (_msg_header, message)
self.to_stderr(warning_message)
if self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m'
else:
_msg_header = 'WARNING:'
warning_message = '%s %s' % (_msg_header, message)
self.to_stderr(warning_message)
def report_error(self, message, tb=None):
'''
@ -413,9 +416,9 @@ class YoutubeDL(object):
if template_dict.get('width') and template_dict.get('height'):
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
elif template_dict.get('height'):
res = '%sp' % template_dict['height']
template_dict['resolution'] = '%sp' % template_dict['height']
elif template_dict.get('width'):
res = '?x%d' % template_dict['width']
template_dict['resolution'] = '?x%d' % template_dict['width']
sanitize = lambda k, v: sanitize_filename(
compat_str(v),
@ -918,7 +921,7 @@ class YoutubeDL(object):
self.to_screen('[%s] %s: Downloading thumbnail ...' %
(info_dict['extractor'], info_dict['id']))
try:
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
uf = self.urlopen(info_dict['thumbnail'])
with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen('[%s] %s: Writing thumbnail to: %s' %

View File

@ -50,6 +50,7 @@ __authors__ = (
'Anthony Weems',
'David Wagner',
'Juan C. Olivares',
'Mattias Harrysson',
)
__license__ = 'Public Domain'

View File

@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
while count <= retries:
# Establish connection
try:
data = compat_urllib_request.urlopen(request)
data = self.ydl.urlopen(request)
break
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
@ -59,7 +59,7 @@ class HttpFD(FileDownloader):
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
data = compat_urllib_request.urlopen(basic_request)
data = self.ydl.urlopen(basic_request)
content_length = data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:

View File

@ -1,5 +1,6 @@
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE
from .aparat import AparatIE
from .appletrailers import AppleTrailersIE

View File

@ -0,0 +1,69 @@
# encoding: utf-8
from __future__ import unicode_literals
import datetime
import re
from .common import InfoExtractor
class AftonbladetIE(InfoExtractor):
_VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
_TEST = {
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
'info_dict': {
'id': 'article36015',
'ext': 'mp4',
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
'upload_date': '20140306',
},
}
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
# find internal video meta data
META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
internal_meta_id = self._html_search_regex(
r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
internal_meta_url = META_URL % internal_meta_id
internal_meta_json = self._download_json(
internal_meta_url, video_id, 'Downloading video meta data')
# find internal video formats
FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
internal_video_id = internal_meta_json['videoId']
internal_formats_url = FORMATS_URL % internal_video_id
internal_formats_json = self._download_json(
internal_formats_url, video_id, 'Downloading video formats')
formats = []
for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
p = fmt['paths'][0]
formats.append({
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
'ext': 'mp4',
'width': fmt['width'],
'height': fmt['height'],
'tbr': fmt['bitrate'],
'protocol': 'http',
})
self._sort_formats(formats)
timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
upload_date = timestamp.strftime('%Y%m%d')
return {
'id': video_id,
'title': internal_meta_json['title'],
'formats': formats,
'thumbnail': internal_meta_json['imageUrl'],
'description': internal_meta_json['shortPreamble'],
'upload_date': upload_date,
'duration': internal_meta_json['duration'],
'view_count': internal_meta_json['views'],
}

View File

@ -72,18 +72,22 @@ class ArteTvIE(InfoExtractor):
return self._extract_liveweb(url, name, lang)
if re.search(self._LIVE_URL, url) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
raise ExtractorError('Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url)
# return
raise ExtractorError('No video found')
def _extract_video(self, url, video_id, lang):
"""Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
ref_xml_doc = self._download_xml(
ref_xml_url, video_id, note='Downloading metadata')
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
config_xml_url = config_node.attrib['ref']
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
config_xml = self._download_webpage(
config_xml_url, video_id, note='Downloading configuration')
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
def _key(m):

View File

@ -35,15 +35,15 @@ class CollegeHumorIE(InfoExtractor):
},
# embedded youtube video
{
'url': 'http://www.collegehumor.com/embed/6950457',
'url': 'http://www.collegehumor.com/embed/6950306',
'info_dict': {
'id': 'W5gMp3ZjYg4',
'id': 'Z-bao9fg6Yc',
'ext': 'mp4',
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
'uploader': 'FunnyPlox TV',
'uploader_id': 'funnyploxtv',
'description': 'md5:7ded37421526d54afdf005e25bc2b7a3',
'upload_date': '20140128',
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
'uploader': 'Mark Dice',
'uploader_id': 'MarkDice',
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
'upload_date': '20140127',
},
'params': {
'skip_download': True,

View File

@ -11,16 +11,15 @@ from ..utils import (
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
urlencode_postdata,
ExtractorError,
)
class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook"""
_VALID_URL = r'''(?x)
(?:https?://)?(?:\w+\.)?facebook\.com/
https?://(?:\w+\.)?facebook\.com/
(?:[^#?]*\#!/)?
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
(?:v|video_id)=(?P<id>[0-9]+)
@ -36,14 +35,10 @@ class FacebookIE(InfoExtractor):
'id': '120708114770723',
'ext': 'mp4',
'duration': 279,
'title': 'PEOPLE ARE AWESOME 2013'
'title': 'PEOPLE ARE AWESOME 2013',
}
}
def report_login(self):
"""Report attempt to log in."""
self.to_screen('Logging in')
def _login(self):
(useremail, password) = self._get_login_info()
if useremail is None:
@ -51,8 +46,8 @@ class FacebookIE(InfoExtractor):
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
login_page_req.add_header('Cookie', 'locale=en_US')
self.report_login()
login_page = self._download_webpage(login_page_req, None, note=False,
login_page = self._download_webpage(login_page_req, None,
note='Downloading login page',
errnote='Unable to download login page')
lsd = self._search_regex(
r'<input type="hidden" name="lsd" value="([^"]*)"',
@ -70,23 +65,25 @@ class FacebookIE(InfoExtractor):
'timezone': '-60',
'trynum': '1',
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try:
login_results = compat_urllib_request.urlopen(request).read()
login_results = self._download_webpage(request, None,
note='Logging in', errnote='unable to fetch login page')
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return
check_form = {
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'),
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
'name_action_selected': 'dont_save',
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'),
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
}
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
check_response = compat_urllib_request.urlopen(check_req).read()
check_response = self._download_webpage(check_req, None,
note='Confirming login')
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@ -98,8 +95,6 @@ class FacebookIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
video_id = mobj.group('id')
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
@ -125,18 +120,14 @@ class FacebookIE(InfoExtractor):
video_url = video_data['sd_src']
if not video_url:
raise ExtractorError('Cannot find video URL')
video_duration = int(video_data['video_duration'])
thumbnail = video_data['thumbnail_src']
video_title = self._html_search_regex(
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')
info = {
return {
'id': video_id,
'title': video_title,
'url': video_url,
'ext': 'mp4',
'duration': video_duration,
'thumbnail': thumbnail,
'duration': int(video_data['video_duration']),
'thumbnail': video_data['thumbnail_src'],
}
return [info]

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@ -6,13 +8,14 @@ from .common import InfoExtractor
class GamekingsIE(InfoExtractor):
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
_TEST = {
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
u'file': u'20130811.mp4',
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
# MD5 is flaky, seems to change regularly
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
u'info_dict': {
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
'id': '20130811',
'ext': 'mp4',
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',
}
}

View File

@ -1,56 +1,61 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
RegexNotFoundError,
unescapeHTML,
)
class JukeboxIE(InfoExtractor):
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
_IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"'
_TEST = {
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
'md5': '5dc6477e74b1e37042ac5acedd8413e5',
'info_dict': {
'id': 'r303r',
'ext': 'flv',
'title': 'Kosheen-En Vivo Pride',
'uploader': 'Kosheen',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
html = self._download_webpage(url, video_id)
mobj = re.search(self._IFRAME, html)
if mobj is None:
raise ExtractorError(u'Cannot extract iframe url')
iframe_url = unescapeHTML(mobj.group('iframe'))
iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
mobj = re.search(r'class="jkb_waiting"', iframe_html)
if mobj is not None:
raise ExtractorError(u'Video is not available(in your country?)!')
if re.search(r'class="jkb_waiting"', iframe_html) is not None:
raise ExtractorError('Video is not available(in your country?)!')
self.report_extraction(video_id)
mobj = re.search(self._VIDEO_URL, iframe_html)
if mobj is None:
mobj = re.search(self._IS_YOUTUBE, iframe_html)
if mobj is None:
raise ExtractorError(u'Cannot extract video url')
youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/')
self.to_screen(u'Youtube video detected')
return self.url_result(youtube_url,ie='Youtube')
video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/')
video_ext = unescapeHTML(mobj.group('video_ext'))
try:
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
iframe_html, 'video url')
video_url = unescapeHTML(video_url).replace('\/', '/')
except RegexNotFoundError:
youtube_url = self._search_regex(
r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"',
iframe_html, 'youtube url')
youtube_url = unescapeHTML(youtube_url).replace('\/', '/')
self.to_screen('Youtube video detected')
return self.url_result(youtube_url, ie='Youtube')
mobj = re.search(self._TITLE, html)
if mobj is None:
raise ExtractorError(u'Cannot extract title')
title = unescapeHTML(mobj.group('title'))
artist = unescapeHTML(mobj.group('artist'))
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
html, 'title')
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
html, 'artist')
return [{'id': video_id,
'url': video_url,
'title': artist + '-' + title,
'ext': video_ext
}]
return {
'id': video_id,
'url': video_url,
'title': artist + '-' + title,
'uploader': artist,
}

View File

@ -8,7 +8,9 @@ from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError
ExtractorError,
int_or_none,
compat_str,
)
@ -19,16 +21,17 @@ class LyndaIE(SubtitlesInfoExtractor):
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
_NETRC_MACHINE = 'lynda'
_SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account'
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
_TEST = {
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'file': '114408.mp4',
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
'info_dict': {
'id': '114408',
'ext': 'mp4',
'title': 'Using the exercise files',
'duration': 68
}
@ -41,27 +44,44 @@ class LyndaIE(SubtitlesInfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
video_id, 'Downloading video JSON')
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
'Downloading video JSON')
video_json = json.loads(page)
if 'Status' in video_json:
raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
if video_json['HasAccess'] is False:
raise ExtractorError('Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
raise ExtractorError(
'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
video_id = video_json['ID']
video_id = compat_str(video_json['ID'])
duration = video_json['DurationInSeconds']
title = video_json['Title']
formats = [{'url': fmt['Url'],
formats = []
fmts = video_json.get('Formats')
if fmts:
formats.extend([
{
'url': fmt['Url'],
'ext': fmt['Extension'],
'width': fmt['Width'],
'height': fmt['Height'],
'filesize': fmt['FileSize'],
'format_id': str(fmt['Resolution'])
} for fmt in video_json['Formats']]
} for fmt in fmts])
prioritized_streams = video_json.get('PrioritizedStreams')
if prioritized_streams:
formats.extend([
{
'url': video_url,
'width': int_or_none(format_id),
'format_id': format_id,
} for format_id, video_url in prioritized_streams['0'].items()
])
self._sort_formats(formats)
@ -91,7 +111,7 @@ class LyndaIE(SubtitlesInfoExtractor):
'stayPut': 'false'
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
# Not (yet) logged in
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
@ -116,7 +136,7 @@ class LyndaIE(SubtitlesInfoExtractor):
'stayPut': 'false',
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
login_page = self._download_webpage(request, None, note='Confirming log in and log out from another device')
login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
raise ExtractorError('Unable to log in')
@ -150,7 +170,7 @@ class LyndaIE(SubtitlesInfoExtractor):
def _get_available_subtitles(self, video_id, webpage):
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
sub = self._download_webpage(url, None, note=False)
sub = self._download_webpage(url, None, False)
sub_json = json.loads(sub)
return {'en': url} if len(sub_json) > 0 else {}
@ -179,6 +199,9 @@ class LyndaCourseIE(InfoExtractor):
videos = []
(username, _) = self._get_login_info()
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
# by single video API anymore
for chapter in course_json['Chapters']:
for video in chapter['Videos']:
if username is None and video['HasAccess'] is False:

View File

@ -5,9 +5,12 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
HEADRequest,
unescapeHTML,
url_basename,
RegexNotFoundError,
)
@ -18,6 +21,7 @@ def _media_xml_tag(tag):
class MTVServicesInfoExtractor(InfoExtractor):
_MOBILE_TEMPLATE = None
@staticmethod
def _id_from_uri(uri):
return uri.split(':')[-1]
@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor):
else:
return thumb_node.attrib['url']
def _extract_video_formats(self, mdoc):
if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
raise ExtractorError('This video is not available from your country.', expected=True)
def _extract_mobile_video_formats(self, mtvn_id):
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
req = compat_urllib_request.Request(webpage_url)
# Otherwise we get a webpage that would execute some javascript
req.add_header('Youtubedl-user-agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id,
'Downloading mobile page')
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
req = HEADRequest(metrics_url)
response = self._request_webpage(req, mtvn_id, 'Resolving url')
url = response.geturl()
# Transform the url to get the best quality:
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
return [{'url': url,'ext': 'mp4'}]
def _extract_video_formats(self, mdoc, mtvn_id):
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
self.to_screen('The normal version is not available from your '
'country, trying with the mobile version')
return self._extract_mobile_video_formats(mtvn_id)
raise ExtractorError('This video is not available from your country.',
expected=True)
formats = []
for rendition in mdoc.findall('.//rendition'):
@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor):
raise ExtractorError('Could not find video title')
title = title.strip()
# This a short id that's used in the webpage urls
mtvn_id = None
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
'scheme', 'urn:mtvn:id')
if mtvn_id_node is not None:
mtvn_id = mtvn_id_node.text
return {
'title': title,
'formats': self._extract_video_formats(mediagen_doc),
'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
'id': video_id,
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
'description': description,

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import binascii
import base64
import hashlib
@ -14,18 +16,16 @@ from ..utils import (
)
class MyVideoIE(InfoExtractor):
"""Information Extractor for myvideo.de."""
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo'
_VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
IE_NAME = 'myvideo'
_TEST = {
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
u'file': u'8229274.flv',
u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
u'info_dict': {
u"title": u"bowling-fail-or-win"
'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
'md5': '2d2753e8130479ba2cb7e0a37002053e',
'info_dict': {
'id': '8229274',
'ext': 'flv',
'title': 'bowling-fail-or-win',
}
}
@ -53,10 +53,7 @@ class MyVideoIE(InfoExtractor):
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'invalid URL: %s' % url)
video_id = mobj.group(1)
video_id = mobj.group('id')
GK = (
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
@ -74,37 +71,33 @@ class MyVideoIE(InfoExtractor):
video_url = mobj.group(1) + '.flv'
video_title = self._html_search_regex('<title>([^<]+)</title>',
webpage, u'title')
webpage, 'title')
video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
return [{
'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': video_ext,
}]
return {
'id': video_id,
'url': video_url,
'title': video_title,
}
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None:
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id,
u'Downloading video info')
'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8'))
return {'id': video_id,
'title': info['title'],
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
'play_path': info['filename'],
'ext': 'flv',
'thumbnail': info['thumbnail'][0]['url'],
}
return {
'id': video_id,
'title': info['title'],
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
'play_path': info['filename'],
'ext': 'flv',
'thumbnail': info['thumbnail'][0]['url'],
}
# try encxml
mobj = re.search('var flashvars={(.+?)}', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video')
raise ExtractorError('Unable to extract video')
params = {}
encxml = ''
@ -118,7 +111,7 @@ class MyVideoIE(InfoExtractor):
params['domain'] = 'www.myvideo.de'
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
if 'flash_playertype=MTV' in xmldata_url:
self._downloader.report_warning(u'avoiding MTV player')
self._downloader.report_warning('avoiding MTV player')
xmldata_url = (
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
@ -144,7 +137,7 @@ class MyVideoIE(InfoExtractor):
video_url = compat_urllib_parse.unquote(mobj.group(1))
if 'myvideo2flash' in video_url:
self.report_warning(
u'Rewriting URL to use unencrypted rtmp:// ...',
'Rewriting URL to use unencrypted rtmp:// ...',
video_id)
video_url = video_url.replace('rtmpe://', 'rtmp://')
@ -152,39 +145,31 @@ class MyVideoIE(InfoExtractor):
# extract non rtmp videos
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError(u'unable to extract url')
raise ExtractorError('unable to extract url')
video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
video_file = compat_urllib_parse.unquote(video_file)
if not video_file.endswith('f4m'):
ppath, prefix = video_file.split('.')
video_playpath = '%s:%s' % (prefix, ppath)
video_hls_playlist = ''
else:
video_playpath = ''
video_hls_playlist = (
video_file
).replace('.f4m', '.m3u8')
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
webpage, u'title')
webpage, 'title')
return [{
'id': video_id,
'url': video_url,
'tc_url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': u'flv',
'play_path': video_playpath,
'video_file': video_file,
'video_hls_playlist': video_hls_playlist,
'player_url': video_swfobj,
}]
return {
'id': video_id,
'url': video_url,
'tc_url': video_url,
'title': video_title,
'ext': 'flv',
'play_path': video_playpath,
'player_url': video_swfobj,
}

View File

@ -1,76 +1,43 @@
from __future__ import unicode_literals
import datetime
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class PhotobucketIE(InfoExtractor):
"""Information extractor for photobucket.com."""
# TODO: the original _VALID_URL was:
# r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
# Check if it's necessary to keep the old extracion process
_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
IE_NAME = u'photobucket'
_VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
_TEST = {
u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
u'file': u'zpsc0c3b9fa.mp4',
u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',
u'info_dict': {
u"upload_date": u"20130504",
u"uploader": u"rachaneronas",
u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!"
'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
'file': 'zpsc0c3b9fa.mp4',
'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
'info_dict': {
'upload_date': '20130504',
'uploader': 'rachaneronas',
'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
}
}
def _real_extract(self, url):
# Extract id from URL
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id')
video_extension = mobj.group('ext')
# Retrieve video webpage to extract further information
webpage = self._download_webpage(url, video_id)
# Extract URL, uploader, and title from webpage
self.report_extraction(video_id)
# We try first by looking the javascript code:
mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage)
if mobj is not None:
info = json.loads(mobj.group('json'))
return [{
'id': video_id,
'url': info[u'downloadUrl'],
'uploader': info[u'username'],
'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'),
'title': info[u'title'],
'ext': video_extension,
'thumbnail': info[u'thumbUrl'],
}]
# We try looking in other parts of the webpage
video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
webpage, u'video URL')
mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1).decode('utf-8')
video_uploader = mobj.group(2).decode('utf-8')
return [{
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
'uploader': video_uploader,
'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
}]
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
webpage, 'info json')
info = json.loads(info_json)
return {
'id': video_id,
'url': info['downloadUrl'],
'uploader': info['username'],
'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'),
'title': info['title'],
'ext': video_extension,
'thumbnail': info['thumbUrl'],
}

View File

@ -44,7 +44,7 @@ class PornHubIE(InfoExtractor):
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ')
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
formats = []

View File

@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor):
'id': '47127627',
'ext': 'mp3',
'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept',
'upload_date': '20120521',
},

View File

@ -1,10 +1,15 @@
from __future__ import unicode_literals
import re
from .mtv import MTVServicesInfoExtractor
class SpikeIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
_VALID_URL = r'''(?x)https?://
(www\.spike\.com/(video-clips|episodes)/.+|
m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+))
'''
_TEST = {
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
'md5': '1a9265f32b0c375793d6c4ce45255256',
@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor):
}
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
mobile_id = mobj.group('mobile_id')
if mobile_id is not None:
url = 'http://www.spike.com/video-clips/%s' % mobile_id
return super(SpikeIE, self)._real_extract(url)

View File

@ -13,7 +13,7 @@ from ..utils import (
class VestiIE(InfoExtractor):
IE_NAME = 'vesti'
IE_DESC = 'Вести.Ru'
_VALID_URL = r'http://(?:.+?\.)?(?:vesti\.ru|russia\.tv)/(?P<id>.+)'
_VALID_URL = r'http://(?:(?:.+?\.)?vesti\.ru|(?:2\.)?russia\.tv|tvkultura\.ru|rutv\.ru)/(?P<id>.+)'
_TESTS = [
{
@ -72,6 +72,35 @@ class VestiIE(InfoExtractor):
'skip_download': True,
},
},
{
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
'info_dict': {
'id': '766403',
'ext': 'mp4',
'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
'duration': 271,
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Blocked outside Russia'
},
{
'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
'info_dict': {
'id': '51499',
'ext': 'flv',
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'Translation has finished'
},
{
'url': 'http://russia.tv/video/show/brand_id/5169/episode_id/970443/video_id/975648',
'info_dict': {
@ -101,34 +130,47 @@ class VestiIE(InfoExtractor):
},
},
{
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
'url': 'http://2.russia.tv/video/show/brand_id/48863/episode_id/972920/video_id/978667/viewtype/picture',
'info_dict': {
'id': '766403',
'id': '775081',
'ext': 'mp4',
'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
'duration': 271,
'title': 'XXII зимние Олимпийские игры. Россияне заняли весь пьедестал в лыжных гонках',
'description': 'md5:15d3741dd8d04b203fbc031c6a47fb0f',
'duration': 101,
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Blocked outside Russia'
},
{
'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
'url': 'http://tvkultura.ru/video/show/brand_id/31724/episode_id/972347/video_id/978186',
'info_dict': {
'id': '51499',
'ext': 'flv',
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
'id': '774471',
'ext': 'mp4',
'title': 'Монологи на все времена',
'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
'duration': 2906,
},
'params': {
# rtmp download
# m3u8 download
'skip_download': True,
},
'skip': 'Translation has finished'
}
},
{
'url': 'http://rutv.ru/brand/show/id/6792/channel/75',
'info_dict': {
'id': '125521',
'ext': 'mp4',
'title': 'Грустная дама червей. Х',
'description': '',
'duration': 4882,
},
'params': {
# m3u8 download
'skip_download': True,
},
},
]
def _real_extract(self, url):

View File

@ -1,22 +1,23 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .internetvideoarchive import InternetVideoArchiveIE
from ..utils import (
compat_urlparse,
)
from ..utils import compat_urlparse
class VideoDetectiveIE(InfoExtractor):
_VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487',
u'file': u'194487.mp4',
u'info_dict': {
u'title': u'KICK-ASS 2',
u'description': u'md5:65ba37ad619165afac7d432eaded6013',
u'duration': 135,
'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
'info_dict': {
'id': '194487',
'ext': 'mp4',
'title': 'KICK-ASS 2',
'description': 'md5:65ba37ad619165afac7d432eaded6013',
'duration': 135,
},
}
@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
og_video = self._og_search_video_url(webpage)
query = compat_urlparse.urlparse(og_video).query
return self.url_result(InternetVideoArchiveIE._build_url(query),
ie=InternetVideoArchiveIE.ie_key())
return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key())

View File

@ -8,6 +8,7 @@ import itertools
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_HTTPError,
compat_urllib_parse,
compat_urllib_request,
clean_html,
@ -172,7 +173,18 @@ class VimeoIE(SubtitlesInfoExtractor):
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers)
webpage = self._download_webpage(request, video_id)
try:
webpage = self._download_webpage(request, video_id)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
errmsg = ee.cause.read()
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
raise ExtractorError(
'Cannot download embed-only video without embedding '
'URL. Please call youtube-dl with the URL of the page '
'that embeds this video.',
expected=True)
raise
# Now we begin extracting as much information as we can from what we
# retrieved. First we extract the information common to all extractors,

View File

@ -13,7 +13,7 @@ class VubeIE(InfoExtractor):
_TEST = {
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
'md5': 'f81dcf6d0448e3291f54380181695821',
'md5': 'db7aba89d4603dadd627e9d1973946fe',
'info_dict': {
'id': 'YL2qNPkqon',
'ext': 'mp4',
@ -77,4 +77,4 @@ class VubeIE(InfoExtractor):
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
}
}

View File

@ -1,55 +1,49 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
ExtractorError,
)
class XNXXIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
_VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)'
_TEST = {
u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
u'file': u'1135332.flv',
u'md5': u'0831677e2b4761795f68d417e0b7b445',
u'info_dict': {
u"title": u"lida \u00bb Naked Funny Actress (5)",
u"age_limit": 18,
'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
'md5': '0831677e2b4761795f68d417e0b7b445',
'info_dict': {
'id': '1135332',
'ext': 'flv',
'title': 'lida » Naked Funny Actress (5)',
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group(1)
video_id = mobj.group('id')
# Get webpage content
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(self.VIDEO_URL_RE,
webpage, u'video URL')
video_url = self._search_regex(r'flv_url=(.*?)&amp;',
webpage, 'video URL')
video_url = compat_urllib_parse.unquote(video_url)
video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
webpage, u'title')
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
webpage, 'title')
video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
webpage, u'thumbnail', fatal=False)
video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&amp;',
webpage, 'thumbnail', fatal=False)
return [{
return {
'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': 'flv',
'thumbnail': video_thumbnail,
'description': None,
'age_limit': 18,
}]
}

View File

@ -1263,3 +1263,7 @@ def read_batch_urls(batch_fd):
with contextlib.closing(batch_fd) as fd:
return [url for url in map(fixup, fd) if url]
def urlencode_postdata(*args, **kargs):
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')

View File

@ -1,2 +1,2 @@
__version__ = '2014.03.06'
__version__ = '2014.03.10'