mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-07-22 05:11:42 -05:00
Compare commits
15 Commits
2014.06.07
...
2014.06.09
Author | SHA1 | Date | |
---|---|---|---|
9706f3f802 | |||
d5e944359e | |||
826ec77fb2 | |||
2656f4eb6a | |||
2b88feedf7 | |||
23566e0d78 | |||
828553b614 | |||
3048e82a94 | |||
09ffa08ba1 | |||
e0b4cc489f | |||
15e423407f | |||
702e522044 | |||
814d4257df | |||
23ae281b31 | |||
94128d6b0d |
@ -25,7 +25,7 @@ class HlsFD(FileDownloader):
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
else:
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
|
@ -106,7 +106,7 @@ class RtmpFD(FileDownloader):
|
||||
try:
|
||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error('RTMP download detected but "rtmpdump" could not be run')
|
||||
self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
|
@ -332,6 +332,7 @@ from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vulture import VultureIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wat import WatIE
|
||||
from .wdr import (
|
||||
|
@ -1,39 +1,37 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TEST = {
|
||||
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
||||
u'file': u'36983.mp4',
|
||||
u'md5': u'9dcfe344732808dbfcc901537973c922',
|
||||
u'info_dict': {
|
||||
u"title": u"Kaffeeland Schweiz",
|
||||
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
|
||||
u"uploader": u"3sat",
|
||||
u"upload_date": u"20130622"
|
||||
'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
|
||||
'md5': '9dcfe344732808dbfcc901537973c922',
|
||||
'info_dict': {
|
||||
'id': '36983',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kaffeeland Schweiz',
|
||||
'description': 'md5:cc4424b18b75ae9948b13929a0814033',
|
||||
'uploader': '3sat',
|
||||
'upload_date': '20130622'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
|
||||
details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
|
||||
|
||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||
thumbnails = [{
|
||||
'width': te.attrib['key'].partition('x')[0],
|
||||
'height': te.attrib['key'].partition('x')[2],
|
||||
'width': int(te.attrib['key'].partition('x')[0]),
|
||||
'height': int(te.attrib['key'].partition('x')[2]),
|
||||
'url': te.text,
|
||||
} for te in thumbnail_els]
|
||||
|
||||
|
@ -50,10 +50,13 @@ class FC2IE(InfoExtractor):
|
||||
raise ExtractorError('Error code: %s' % info['err_code'][0])
|
||||
|
||||
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
|
||||
title_info = info.get('title')
|
||||
if title_info:
|
||||
title = title_info[0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'][0],
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'thumbnail': thumbnail,
|
||||
|
@ -260,7 +260,24 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'Spi0n',
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
}
|
||||
},
|
||||
# YouTube embed
|
||||
{
|
||||
'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
|
||||
'info_dict': {
|
||||
'id': 'FXRb4ykk4S0',
|
||||
'ext': 'mp4',
|
||||
'title': 'The NBL Auction 2014',
|
||||
'uploader': 'BADMINTON England',
|
||||
'uploader_id': 'BADMINTONEvents',
|
||||
'upload_date': '20140603',
|
||||
'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@ -478,8 +495,13 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(r'''(?x)
|
||||
(?:<iframe[^>]+?src=|embedSWF\(\s*)
|
||||
(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
|
||||
(?:
|
||||
<iframe[^>]+?src=|
|
||||
<embed[^>]+?src=|
|
||||
embedSWF\(?:\s*
|
||||
)
|
||||
(["\'])
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
|
||||
(?:embed|v)/.+?)
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
@ -646,6 +668,14 @@ class GenericIE(InfoExtractor):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url)
|
||||
|
||||
# Look for embedded vulture.com player
|
||||
mobj = re.search(
|
||||
r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url, ie='Vulture')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if not found:
|
||||
|
@ -1,10 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
@ -13,59 +14,55 @@ from ..utils import (
|
||||
|
||||
|
||||
class HypemIE(InfoExtractor):
|
||||
"""Information Extractor for hypem"""
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
|
||||
_VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
|
||||
u'file': u'1v6ga.mp3',
|
||||
u'md5': u'b9cc91b5af8995e9f0c1cee04c575828',
|
||||
u'info_dict': {
|
||||
u"title": u"Tame"
|
||||
'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
|
||||
'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
|
||||
'info_dict': {
|
||||
'id': '1v6ga',
|
||||
'ext': 'mp3',
|
||||
'title': 'Tame',
|
||||
'uploader': 'BODYWORK',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
track_id = mobj.group(1)
|
||||
|
||||
data = {'ax': 1, 'ts': time.time()}
|
||||
data_encoded = compat_urllib_parse.urlencode(data)
|
||||
complete_url = url + "?" + data_encoded
|
||||
request = compat_urllib_request.Request(complete_url)
|
||||
response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
|
||||
response, urlh = self._download_webpage_handle(
|
||||
request, track_id, 'Downloading webpage with the url')
|
||||
cookie = urlh.headers.get('Set-Cookie', '')
|
||||
|
||||
self.report_extraction(track_id)
|
||||
|
||||
html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
|
||||
response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
|
||||
html_tracks = self._html_search_regex(
|
||||
r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>',
|
||||
response, 'tracks')
|
||||
try:
|
||||
track_list = json.loads(html_tracks)
|
||||
track = track_list[u'tracks'][0]
|
||||
track = track_list['tracks'][0]
|
||||
except ValueError:
|
||||
raise ExtractorError(u'Hypemachine contained invalid JSON.')
|
||||
raise ExtractorError('Hypemachine contained invalid JSON.')
|
||||
|
||||
key = track[u"key"]
|
||||
track_id = track[u"id"]
|
||||
artist = track[u"artist"]
|
||||
title = track[u"song"]
|
||||
key = track['key']
|
||||
track_id = track['id']
|
||||
artist = track['artist']
|
||||
title = track['song']
|
||||
|
||||
serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
|
||||
request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
|
||||
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
|
||||
request = compat_urllib_request.Request(
|
||||
serve_url, '', {'Content-Type': 'application/json'})
|
||||
request.add_header('cookie', cookie)
|
||||
song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
|
||||
try:
|
||||
song_data = json.loads(song_data_json)
|
||||
except ValueError:
|
||||
raise ExtractorError(u'Hypemachine contained invalid JSON.')
|
||||
final_url = song_data[u"url"]
|
||||
song_data = self._download_json(request, track_id, 'Downloading metadata')
|
||||
final_url = song_data["url"]
|
||||
|
||||
return [{
|
||||
'id': track_id,
|
||||
'url': final_url,
|
||||
'ext': "mp3",
|
||||
'title': title,
|
||||
'artist': artist,
|
||||
}]
|
||||
return {
|
||||
'id': track_id,
|
||||
'url': final_url,
|
||||
'ext': 'mp3',
|
||||
'title': title,
|
||||
'uploader': artist,
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@ -89,7 +89,7 @@ class NRKTVIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://tv.nrk.no/program/mdfp15000514',
|
||||
'md5': '383650ece2b25ecec996ad7b5bb2a384',
|
||||
'md5': 'af01795a31f1cf7265c8657534d8077b',
|
||||
'info_dict': {
|
||||
'id': 'mdfp15000514',
|
||||
'ext': 'flv',
|
||||
@ -111,9 +111,8 @@ class NRKTVIE(InfoExtractor):
|
||||
description = self._html_search_meta('description', page, 'description')
|
||||
thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
|
||||
duration = self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)
|
||||
if duration:
|
||||
duration = float(duration)
|
||||
duration = float_or_none(
|
||||
self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False))
|
||||
|
||||
formats = []
|
||||
|
||||
|
@ -5,7 +5,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML
|
||||
)
|
||||
|
||||
|
@ -3,6 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class NuvidIE(InfoExtractor):
|
||||
@ -13,8 +18,10 @@ class NuvidIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1310741',
|
||||
'ext': 'mp4',
|
||||
"title": "Horny babes show their awesome bodeis and",
|
||||
"age_limit": 18,
|
||||
'title': 'Horny babes show their awesome bodeis and',
|
||||
'duration': 129,
|
||||
'upload_date': '20140508',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@ -22,27 +29,41 @@ class NuvidIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
murl = url.replace('://www.', '://m.')
|
||||
webpage = self._download_webpage(murl, video_id)
|
||||
formats = []
|
||||
|
||||
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
|
||||
request = compat_urllib_request.Request(
|
||||
'http://m.nuvid.com/play/%s' % video_id)
|
||||
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
|
||||
webpage = self._download_webpage(
|
||||
request, video_id, 'Downloading %s page' % format_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
|
||||
if not video_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
|
||||
title = self._html_search_regex(
|
||||
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>',
|
||||
webpage, 'title').strip()
|
||||
|
||||
url_end = self._html_search_regex(
|
||||
r'href="(/[^"]+)"[^>]*data-link_type="mp4"',
|
||||
webpage, 'video_url')
|
||||
video_url = 'http://m.nuvid.com' + url_end
|
||||
|
||||
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
|
||||
thumbnail = self._html_search_regex(
|
||||
r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
|
||||
webpage, 'thumbnail URL', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18,
|
||||
}
|
||||
'formats': formats,
|
||||
}
|
@ -3,9 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class SlutloadIE(InfoExtractor):
|
||||
|
@ -1,7 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import itertools
|
||||
|
||||
|
@ -55,11 +55,13 @@ class TeacherTubeIE(InfoExtractor):
|
||||
|
||||
quality = qualities(['mp3', 'flv', 'mp4'])
|
||||
|
||||
_, media_urls = zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': media_url,
|
||||
'quality': quality(determine_ext(media_url))
|
||||
} for media_url in set(zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))[1])
|
||||
} for media_url in set(media_urls)
|
||||
]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@ -94,8 +95,12 @@ class VeohIE(InfoExtractor):
|
||||
if video_id.startswith('v'):
|
||||
rsp = self._download_xml(
|
||||
r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML')
|
||||
if rsp.get('stat') == 'ok':
|
||||
stat = rsp.get('stat')
|
||||
if stat == 'ok':
|
||||
return self._extract_video(rsp.find('./videoList/video'))
|
||||
elif stat == 'fail':
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, rsp.find('./errorList/error').get('errorMessage')), expected=True)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
age_limit = 0
|
||||
|
69
youtube_dl/extractor/vulture.py
Normal file
69
youtube_dl/extractor/vulture.py
Normal file
@ -0,0 +1,69 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class VultureIE(InfoExtractor):
|
||||
IE_NAME = 'vulture.com'
|
||||
_VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/'
|
||||
_TEST = {
|
||||
'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1',
|
||||
'md5': '8d997845642a2b5152820f7257871bc8',
|
||||
'info_dict': {
|
||||
'id': '6GHRQL3RV7MSD1H4',
|
||||
'ext': 'mp4',
|
||||
'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED',
|
||||
'uploader_id': 'Sarah',
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
'timestamp': 1401288564,
|
||||
'upload_date': '20140528',
|
||||
'description': 'Uplifting and witty, as predicted.',
|
||||
'duration': 1015,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
query_string = self._search_regex(
|
||||
r"queryString\s*=\s*'([^']+)'", webpage, 'query string')
|
||||
video_id = self._search_regex(
|
||||
r'content=([^&]+)', query_string, 'video ID')
|
||||
query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string
|
||||
|
||||
query_webpage = self._download_webpage(
|
||||
query_url, display_id, note='Downloading query page')
|
||||
params_json = self._search_regex(
|
||||
r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n',
|
||||
query_webpage,
|
||||
'player params')
|
||||
params = json.loads(params_json)
|
||||
|
||||
upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T'))
|
||||
uploader_id = params.get('user', {}).get('handle')
|
||||
|
||||
media_item = params['media_item']
|
||||
title = os.path.splitext(media_item['title'])[0]
|
||||
duration = int_or_none(media_item.get('duration_seconds'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': media_item['pipeline_xid'],
|
||||
'title': title,
|
||||
'timestamp': upload_timestamp,
|
||||
'thumbnail': params.get('thumbnail_url'),
|
||||
'uploader_id': uploader_id,
|
||||
'description': params.get('description'),
|
||||
'duration': duration,
|
||||
}
|
@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.06.07'
|
||||
__version__ = '2014.06.09'
|
||||
|
Reference in New Issue
Block a user