Compare commits

...

24 Commits

Author SHA1 Message Date
9cad27008b release 2015.02.18 2015-02-18 00:49:34 +01:00
11e611a7fa Extend various playlist tests 2015-02-18 00:49:10 +01:00
72c1f8de06 [bandcamp:album] Fix extractor results and associated test 2015-02-18 00:48:52 +01:00
6e99868e4c [buzzfeed] Fix playlist test case 2015-02-18 00:41:45 +01:00
4d278fde64 [ign] Amend playlist test 2015-02-18 00:38:55 +01:00
f21e915fb9 [test/helper] Render info_dict with a final comma 2015-02-18 00:38:42 +01:00
6f53c63df6 [test/helper] Only output a newline for forgotten keys if keys are really missing 2015-02-18 00:37:54 +01:00
1def5f359e [livestream] Correct playlist ID and add a test for it 2015-02-18 00:34:45 +01:00
15ec669374 [vk] Amend playlist test 2015-02-18 00:33:41 +01:00
a3fa5da496 [vimeo] Amend playlist tests 2015-02-18 00:33:31 +01:00
30965ac66a [vimeo] Prevent infinite loops if video password verification fails
We're seeing this in the tests¹ right now, which do not terminate.

¹  https://travis-ci.org/jaimeMF/youtube-dl/jobs/51135858
2015-02-18 00:27:58 +01:00
09ab40b7d1 Merge branch 'progress-as-hook2' 2015-02-17 23:41:48 +01:00
fa15607773 PEP8 fixes 2015-02-17 21:46:20 +01:00
a91a2c1a83 [downloader] Remove various unneeded assignments and imports 2015-02-17 21:44:41 +01:00
16e7711e22 [downloader/http] Remove gruesome import 2015-02-17 21:42:31 +01:00
5cda4eda72 [YoutubeDL] Use a progress hook for progress reporting
Instead of every downloader calling two helper functions, let our progress report be an ordinary progress hook like everyone else's.
Closes #4875.
2015-02-17 21:40:35 +01:00
98f000409f [radio.de] Fix extraction 2015-02-17 21:40:09 +01:00
4a8d4a53b1 [videolecturesnet] Fix rtmp stream glitches (Closes #4968) 2015-02-18 01:16:49 +06:00
4cd95bcbc3 [twitch:stream] Prefer the 'source' format (fixes #4972) 2015-02-17 18:57:01 +01:00
be24c8697f release 2015.02.17.2 2015-02-17 17:38:31 +01:00
0d93378887 [videolecturesnet] Check http format URLs (Closes #4968) 2015-02-17 22:35:27 +06:00
4069766c52 [extractor/common] Test URLs with GET 2015-02-17 22:35:27 +06:00
7010577720 release 2015.02.17.1 2015-02-17 17:35:08 +01:00
8ac27a68e6 [hls] Switch to available as a property 2015-02-17 17:35:03 +01:00
22 changed files with 231 additions and 123 deletions

View File

@ -163,12 +163,14 @@ def expect_info_dict(self, got_dict, expected_dict):
info_dict_str += ''.join( info_dict_str += ''.join(
' %s: %s,\n' % (_repr(k), _repr(v)) ' %s: %s,\n' % (_repr(k), _repr(v))
for k, v in test_info_dict.items() if k not in missing_keys) for k, v in test_info_dict.items() if k not in missing_keys)
info_dict_str += '\n'
if info_dict_str:
info_dict_str += '\n'
info_dict_str += ''.join( info_dict_str += ''.join(
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k])) ' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
for k in missing_keys) for k in missing_keys)
write_string( write_string(
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr) '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
self.assertFalse( self.assertFalse(
missing_keys, missing_keys,
'Missing keys in test definition: %s' % ( 'Missing keys in test definition: %s' % (

View File

@ -199,18 +199,25 @@ class YoutubeDL(object):
postprocessor. postprocessor.
progress_hooks: A list of functions that get called on download progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries progress, with a dictionary with the entries
* status: One of "downloading" and "finished". * status: One of "downloading", "error", or "finished".
Check this first and ignore unknown values. Check this first and ignore unknown values.
If status is one of "downloading" or "finished", the If status is one of "downloading", or "finished", the
following properties may also be present: following properties may also be present:
* filename: The final filename (always present) * filename: The final filename (always present)
* tmpfilename: The filename we're currently writing to
* downloaded_bytes: Bytes on disk * downloaded_bytes: Bytes on disk
* total_bytes: Size of the whole file, None if unknown * total_bytes: Size of the whole file, None if unknown
* tmpfilename: The filename we're currently writing to * total_bytes_estimate: Guess of the eventual file size,
None if unavailable.
* elapsed: The number of seconds since download started.
* eta: The estimated time in seconds, None if unknown * eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if * speed: The download speed in bytes/second, None if
unknown unknown
* fragment_index: The counter of the currently
downloaded video fragment.
* fragment_count: The number of fragments (= individual
files that will be merged)
Progress hooks are guaranteed to be called at least once Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful. (with status "finished") if the download is successful.

View File

@ -1,4 +1,4 @@
from __future__ import unicode_literals from __future__ import division, unicode_literals
import os import os
import re import re
@ -54,6 +54,7 @@ class FileDownloader(object):
self.ydl = ydl self.ydl = ydl
self._progress_hooks = [] self._progress_hooks = []
self.params = params self.params = params
self.add_progress_hook(self.report_progress)
@staticmethod @staticmethod
def format_seconds(seconds): def format_seconds(seconds):
@ -226,42 +227,64 @@ class FileDownloader(object):
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
self.to_console_title('youtube-dl ' + msg) self.to_console_title('youtube-dl ' + msg)
def report_progress(self, percent, data_len_str, speed, eta): def report_progress(self, s):
"""Report download progress.""" if s['status'] == 'finished':
if self.params.get('noprogress', False): if self.params.get('noprogress', False):
self.to_screen('[download] Download completed')
else:
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
if s.get('elapsed') is not None:
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
else:
msg_template = '100%% of %(_total_bytes_str)s'
self._report_progress_status(
msg_template % s, is_last_line=True)
if self.params.get('noprogress'):
return return
if eta is not None:
eta_str = self.format_eta(eta)
else:
eta_str = 'Unknown ETA'
if percent is not None:
percent_str = self.format_percent(percent)
else:
percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
msg = ('%s of %s at %s ETA %s' % if s['status'] != 'downloading':
(percent_str, data_len_str, speed_str, eta_str))
self._report_progress_status(msg)
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
if self.params.get('noprogress', False):
return return
downloaded_str = format_bytes(downloaded_data_len)
speed_str = self.format_speed(speed)
elapsed_str = FileDownloader.format_seconds(elapsed)
msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
self._report_progress_status(msg)
def report_finish(self, data_len_str, tot_time): if s.get('eta') is not None:
"""Report download finished.""" s['_eta_str'] = self.format_eta(s['eta'])
if self.params.get('noprogress', False):
self.to_screen('[download] Download completed')
else: else:
self._report_progress_status( s['_eta_str'] = 'Unknown ETA'
('100%% of %s in %s' %
(data_len_str, self.format_seconds(tot_time))), if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
is_last_line=True) s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
else:
if s.get('downloaded_bytes') == 0:
s['_percent_str'] = self.format_percent(0)
else:
s['_percent_str'] = 'Unknown %'
if s.get('speed') is not None:
s['_speed_str'] = self.format_speed(s['speed'])
else:
s['_speed_str'] = 'Unknown speed'
if s.get('total_bytes') is not None:
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
elif s.get('total_bytes_estimate') is not None:
s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
else:
if s.get('downloaded_bytes') is not None:
s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
if s.get('elapsed'):
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
else:
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
else:
msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
self._report_progress_status(msg_template % s)
def report_resuming_byte(self, resume_len): def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte.""" """Report attempt to resume at given byte."""

View File

@ -1,4 +1,4 @@
from __future__ import unicode_literals from __future__ import division, unicode_literals
import base64 import base64
import io import io
@ -15,7 +15,6 @@ from ..compat import (
from ..utils import ( from ..utils import (
struct_pack, struct_pack,
struct_unpack, struct_unpack,
format_bytes,
encodeFilename, encodeFilename,
sanitize_open, sanitize_open,
xpath_text, xpath_text,
@ -252,17 +251,6 @@ class F4mFD(FileDownloader):
requested_bitrate = info_dict.get('tbr') requested_bitrate = info_dict.get('tbr')
self.to_screen('[download] Downloading f4m manifest') self.to_screen('[download] Downloading f4m manifest')
manifest = self.ydl.urlopen(man_url).read() manifest = self.ydl.urlopen(man_url).read()
self.report_destination(filename)
http_dl = HttpQuietDownloader(
self.ydl,
{
'continuedl': True,
'quiet': True,
'noprogress': True,
'ratelimit': self.params.get('ratelimit', None),
'test': self.params.get('test', False),
}
)
doc = etree.fromstring(manifest) doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) formats = [(int(f.attrib.get('bitrate', -1)), f)
@ -298,39 +286,65 @@ class F4mFD(FileDownloader):
# For some akamai manifests we'll need to add a query to the fragment url # For some akamai manifests we'll need to add a query to the fragment url
akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
self.report_destination(filename)
http_dl = HttpQuietDownloader(
self.ydl,
{
'continuedl': True,
'quiet': True,
'noprogress': True,
'ratelimit': self.params.get('ratelimit', None),
'test': self.params.get('test', False),
}
)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
write_flv_header(dest_stream) write_flv_header(dest_stream)
write_metadata_tag(dest_stream, metadata) write_metadata_tag(dest_stream, metadata)
# This dict stores the download progress, it's updated by the progress # This dict stores the download progress, it's updated by the progress
# hook # hook
state = { state = {
'status': 'downloading',
'downloaded_bytes': 0, 'downloaded_bytes': 0,
'frag_counter': 0, 'frag_index': 0,
'frag_count': total_frags,
'filename': filename,
'tmpfilename': tmpfilename,
} }
start = time.time() start = time.time()
def frag_progress_hook(status): def frag_progress_hook(s):
frag_total_bytes = status.get('total_bytes', 0) if s['status'] not in ('downloading', 'finished'):
estimated_size = (state['downloaded_bytes'] + return
(total_frags - state['frag_counter']) * frag_total_bytes)
if status['status'] == 'finished': frag_total_bytes = s.get('total_bytes', 0)
if s['status'] == 'finished':
state['downloaded_bytes'] += frag_total_bytes state['downloaded_bytes'] += frag_total_bytes
state['frag_counter'] += 1 state['frag_index'] += 1
progress = self.calc_percent(state['frag_counter'], total_frags)
byte_counter = state['downloaded_bytes'] estimated_size = (
(state['downloaded_bytes'] + frag_total_bytes)
/ (state['frag_index'] + 1) * total_frags)
time_now = time.time()
state['total_bytes_estimate'] = estimated_size
state['elapsed'] = time_now - start
if s['status'] == 'finished':
progress = self.calc_percent(state['frag_index'], total_frags)
else: else:
frag_downloaded_bytes = status['downloaded_bytes'] frag_downloaded_bytes = s['downloaded_bytes']
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
frag_progress = self.calc_percent(frag_downloaded_bytes, frag_progress = self.calc_percent(frag_downloaded_bytes,
frag_total_bytes) frag_total_bytes)
progress = self.calc_percent(state['frag_counter'], total_frags) progress = self.calc_percent(state['frag_index'], total_frags)
progress += frag_progress / float(total_frags) progress += frag_progress / float(total_frags)
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) state['eta'] = self.calc_eta(
self.report_progress(progress, format_bytes(estimated_size), start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
status.get('speed'), eta) state['speed'] = s.get('speed')
self._hook_progress(state)
http_dl.add_progress_hook(frag_progress_hook) http_dl.add_progress_hook(frag_progress_hook)
frags_filenames = [] frags_filenames = []
@ -354,8 +368,8 @@ class F4mFD(FileDownloader):
frags_filenames.append(frag_filename) frags_filenames.append(frag_filename)
dest_stream.close() dest_stream.close()
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
elapsed = time.time() - start
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
for frag_file in frags_filenames: for frag_file in frags_filenames:
os.remove(frag_file) os.remove(frag_file)
@ -366,6 +380,7 @@ class F4mFD(FileDownloader):
'total_bytes': fsize, 'total_bytes': fsize,
'filename': filename, 'filename': filename,
'status': 'finished', 'status': 'finished',
'elapsed': elapsed,
}) })
return True return True

View File

@ -23,7 +23,7 @@ class HlsFD(FileDownloader):
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
ffpp = FFmpegPostProcessor(downloader=self) ffpp = FFmpegPostProcessor(downloader=self)
if not ffpp.available(): if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False return False
ffpp.check_version() ffpp.check_version()

View File

@ -1,10 +1,9 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os
import time
from socket import error as SocketError
import errno import errno
import os
import socket
import time
from .common import FileDownloader from .common import FileDownloader
from ..compat import ( from ..compat import (
@ -15,7 +14,6 @@ from ..utils import (
ContentTooShortError, ContentTooShortError,
encodeFilename, encodeFilename,
sanitize_open, sanitize_open,
format_bytes,
) )
@ -102,7 +100,7 @@ class HttpFD(FileDownloader):
resume_len = 0 resume_len = 0
open_mode = 'wb' open_mode = 'wb'
break break
except SocketError as e: except socket.error as e:
if e.errno != errno.ECONNRESET: if e.errno != errno.ECONNRESET:
# Connection reset is no problem, just retry # Connection reset is no problem, just retry
raise raise
@ -137,7 +135,6 @@ class HttpFD(FileDownloader):
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False return False
data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024) block_size = self.params.get('buffersize', 1024)
start = time.time() start = time.time()
@ -196,20 +193,19 @@ class HttpFD(FileDownloader):
# Progress message # Progress message
speed = self.calc_speed(start, now, byte_counter - resume_len) speed = self.calc_speed(start, now, byte_counter - resume_len)
if data_len is None: if data_len is None:
eta = percent = None eta = None
else: else:
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({ self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter, 'downloaded_bytes': byte_counter,
'total_bytes': data_len, 'total_bytes': data_len,
'tmpfilename': tmpfilename, 'tmpfilename': tmpfilename,
'filename': filename, 'filename': filename,
'status': 'downloading',
'eta': eta, 'eta': eta,
'speed': speed, 'speed': speed,
'elapsed': now - start,
}) })
if is_test and byte_counter == data_len: if is_test and byte_counter == data_len:
@ -221,7 +217,13 @@ class HttpFD(FileDownloader):
return False return False
if tmpfilename != '-': if tmpfilename != '-':
stream.close() stream.close()
self.report_finish(data_len_str, (time.time() - start))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'status': 'error',
})
if data_len is not None and byte_counter != data_len: if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len)) raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
@ -235,6 +237,7 @@ class HttpFD(FileDownloader):
'total_bytes': byte_counter, 'total_bytes': byte_counter,
'filename': filename, 'filename': filename,
'status': 'finished', 'status': 'finished',
'elapsed': time.time() - start,
}) })
return True return True

View File

@ -11,7 +11,6 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
check_executable, check_executable,
encodeFilename, encodeFilename,
format_bytes,
get_exe_version, get_exe_version,
) )
@ -51,23 +50,23 @@ class RtmpFD(FileDownloader):
if not resume_percent: if not resume_percent:
resume_percent = percent resume_percent = percent
resume_downloaded_data_len = downloaded_data_len resume_downloaded_data_len = downloaded_data_len
eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent) time_now = time.time()
speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len) eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
data_len = None data_len = None
if percent > 0: if percent > 0:
data_len = int(downloaded_data_len * 100 / percent) data_len = int(downloaded_data_len * 100 / percent)
data_len_str = '~' + format_bytes(data_len)
self.report_progress(percent, data_len_str, speed, eta)
cursor_in_new_line = False
self._hook_progress({ self._hook_progress({
'status': 'downloading',
'downloaded_bytes': downloaded_data_len, 'downloaded_bytes': downloaded_data_len,
'total_bytes': data_len, 'total_bytes_estimate': data_len,
'tmpfilename': tmpfilename, 'tmpfilename': tmpfilename,
'filename': filename, 'filename': filename,
'status': 'downloading',
'eta': eta, 'eta': eta,
'elapsed': time_now - start,
'speed': speed, 'speed': speed,
}) })
cursor_in_new_line = False
else: else:
# no percent for live streams # no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
@ -75,15 +74,15 @@ class RtmpFD(FileDownloader):
downloaded_data_len = int(float(mobj.group(1)) * 1024) downloaded_data_len = int(float(mobj.group(1)) * 1024)
time_now = time.time() time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len) speed = self.calc_speed(start, time_now, downloaded_data_len)
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
cursor_in_new_line = False
self._hook_progress({ self._hook_progress({
'downloaded_bytes': downloaded_data_len, 'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename, 'tmpfilename': tmpfilename,
'filename': filename, 'filename': filename,
'status': 'downloading', 'status': 'downloading',
'elapsed': time_now - start,
'speed': speed, 'speed': speed,
}) })
cursor_in_new_line = False
elif self.params.get('verbose', False): elif self.params.get('verbose', False):
if not cursor_in_new_line: if not cursor_in_new_line:
self.to_screen('') self.to_screen('')

View File

@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor):
}, },
], ],
'info_dict': { 'info_dict': {
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
'title': 'Rick and Morty - Pilot', 'title': 'Rick and Morty - Pilot',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
} }
@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor):
} }
], ],
'info_dict': { 'info_dict': {
'id': '-t8CamQlQ2aYZ49ItZCFog',
'title': 'American Dad - Putting Francine Out of Business', 'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
}, },

View File

@ -14,6 +14,9 @@ class AppleTrailersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = { _TEST = {
"url": "http://trailers.apple.com/trailers/wb/manofsteel/", "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
'info_dict': {
'id': 'manofsteel',
},
"playlist": [ "playlist": [
{ {
"md5": "d97a8e575432dbcb81b7c3acb741f8a8", "md5": "d97a8e575432dbcb81b7c3acb741f8a8",

View File

@ -109,7 +109,7 @@ class BandcampIE(InfoExtractor):
class BandcampAlbumIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor):
IE_NAME = 'Bandcamp:album' IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))' _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
_TESTS = [{ _TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@ -133,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor):
], ],
'info_dict': { 'info_dict': {
'title': 'Jazz Format Mixtape vol.1', 'title': 'Jazz Format Mixtape vol.1',
'id': 'jazz-format-mixtape-vol-1',
'uploader_id': 'blazo',
}, },
'params': { 'params': {
'playlistend': 2 'playlistend': 2
}, },
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' 'skip': 'Bandcamp imposes download limits.'
}, { }, {
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
'info_dict': { 'info_dict': {
'title': 'Hierophany of the Open Grave', 'title': 'Hierophany of the Open Grave',
'uploader_id': 'nightbringer',
'id': 'hierophany-of-the-open-grave',
}, },
'playlist_mincount': 9, 'playlist_mincount': 9,
}, { }, {
'url': 'http://dotscale.bandcamp.com', 'url': 'http://dotscale.bandcamp.com',
'info_dict': { 'info_dict': {
'title': 'Loom', 'title': 'Loom',
'id': 'dotscale',
'uploader_id': 'dotscale',
}, },
'playlist_mincount': 7, 'playlist_mincount': 7,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('subdomain') uploader_id = mobj.group('subdomain')
title = mobj.group('title') album_id = mobj.group('album_id')
display_id = title or playlist_id playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, playlist_id)
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
if not tracks_paths: if not tracks_paths:
raise ExtractorError('The page doesn\'t contain any tracks') raise ExtractorError('The page doesn\'t contain any tracks')
@ -168,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor):
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
return { return {
'_type': 'playlist', '_type': 'playlist',
'uploader_id': uploader_id,
'id': playlist_id, 'id': playlist_id,
'display_id': display_id,
'title': title, 'title': title,
'entries': entries, 'entries': entries,
} }

View File

@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor):
'skip_download': True, # Got enough YouTube download tests 'skip_download': True, # Got enough YouTube download tests
}, },
'info_dict': { 'info_dict': {
'id': 'look-at-this-cute-dog-omg',
'description': 're:Munchkin the Teddy Bear is back ?!', 'description': 're:Munchkin the Teddy Bear is back ?!',
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill', 'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
}, },
@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20141124', 'upload_date': '20141124',
'uploader_id': 'CindysMunchkin', 'uploader_id': 'CindysMunchkin',
'description': 're:© 2014 Munchkin the Shih Tzu', 'description': 're:© 2014 Munchkin the',
'uploader': 'Munchkin the Shih Tzu', 'uploader': 're:^Munchkin the',
'title': 're:Munchkin the Teddy Bear gets her exercise', 'title': 're:Munchkin the Teddy Bear gets her exercise',
}, },
}] }]

View File

@ -27,7 +27,6 @@ from ..utils import (
compiled_regex_type, compiled_regex_type,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
HEADRequest,
int_or_none, int_or_none,
RegexNotFoundError, RegexNotFoundError,
sanitize_filename, sanitize_filename,
@ -753,9 +752,7 @@ class InfoExtractor(object):
def _is_valid_url(self, url, video_id, item='video'): def _is_valid_url(self, url, video_id, item='video'):
try: try:
self._request_webpage( self._request_webpage(url, video_id, 'Checking %s URL' % item)
HEADRequest(url), video_id,
'Checking %s URL' % item)
return True return True
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError): if isinstance(e.cause, compat_HTTPError):
@ -841,6 +838,7 @@ class InfoExtractor(object):
note='Downloading m3u8 information', note='Downloading m3u8 information',
errnote='Failed to download m3u8 information') errnote='Failed to download m3u8 information')
last_info = None last_info = None
last_media = None
kv_rex = re.compile( kv_rex = re.compile(
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)') r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
for line in m3u8_doc.splitlines(): for line in m3u8_doc.splitlines():
@ -851,6 +849,13 @@ class InfoExtractor(object):
if v.startswith('"'): if v.startswith('"'):
v = v[1:-1] v = v[1:-1]
last_info[m.group('key')] = v last_info[m.group('key')] = v
elif line.startswith('#EXT-X-MEDIA:'):
last_media = {}
for m in kv_rex.finditer(line):
v = m.group('val')
if v.startswith('"'):
v = v[1:-1]
last_media[m.group('key')] = v
elif line.startswith('#') or not line.strip(): elif line.startswith('#') or not line.strip():
continue continue
else: else:
@ -879,6 +884,9 @@ class InfoExtractor(object):
width_str, height_str = resolution.split('x') width_str, height_str = resolution.split('x')
f['width'] = int(width_str) f['width'] = int(width_str)
f['height'] = int(height_str) f['height'] = int(height_str)
if last_media is not None:
f['m3u8_media'] = last_media
last_media = None
formats.append(f) formats.append(f)
last_info = {} last_info = {}
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -194,6 +194,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
'info_dict': { 'info_dict': {
'title': 'SPORT', 'title': 'SPORT',
'id': 'xv4bw_nqtv_sport',
}, },
'playlist_mincount': 20, 'playlist_mincount': 20,
}] }]

View File

@ -473,6 +473,7 @@ class GenericIE(InfoExtractor):
{ {
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
'info_dict': { 'info_dict': {
'id': '1986',
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,

View File

@ -34,6 +34,9 @@ class IGNIE(InfoExtractor):
}, },
{ {
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
'info_dict': {
'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
},
'playlist': [ 'playlist': [
{ {
'info_dict': { 'info_dict': {

View File

@ -37,6 +37,7 @@ class LivestreamIE(InfoExtractor):
'url': 'http://new.livestream.com/tedx/cityenglish', 'url': 'http://new.livestream.com/tedx/cityenglish',
'info_dict': { 'info_dict': {
'title': 'TEDCity2.0 (English)', 'title': 'TEDCity2.0 (English)',
'id': '2245590',
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
}, { }, {
@ -148,7 +149,8 @@ class LivestreamIE(InfoExtractor):
if is_relevant(video_data, video_id)] if is_relevant(video_data, video_id)]
if video_id is None: if video_id is None:
# This is an event page: # This is an event page:
return self.playlist_result(videos, info['id'], info['full_name']) return self.playlist_result(
videos, '%s' % info['id'], info['full_name'])
else: else:
if not videos: if not videos:
raise ExtractorError('Cannot find video %s' % video_id) raise ExtractorError('Cannot find video %s' % video_id)

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
from .common import InfoExtractor from .common import InfoExtractor
@ -10,13 +8,13 @@ class RadioDeIE(InfoExtractor):
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
_TEST = { _TEST = {
'url': 'http://ndr2.radio.de/', 'url': 'http://ndr2.radio.de/',
'md5': '3b4cdd011bc59174596b6145cda474a4',
'info_dict': { 'info_dict': {
'id': 'ndr2', 'id': 'ndr2',
'ext': 'mp3', 'ext': 'mp3',
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:591c49c702db1a33751625ebfb67f273', 'description': 'md5:591c49c702db1a33751625ebfb67f273',
'thumbnail': 're:^https?://.*\.png', 'thumbnail': 're:^https?://.*\.png',
'is_live': True,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -25,16 +23,15 @@ class RadioDeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
radio_id = self._match_id(url) radio_id = self._match_id(url)
webpage = self._download_webpage(url, radio_id) webpage = self._download_webpage(url, radio_id)
jscode = self._search_regex(
r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n",
webpage, 'broadcast')
broadcast = json.loads(self._search_regex( broadcast = self._parse_json(jscode, radio_id)
r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
webpage, 'broadcast'))
title = self._live_title(broadcast['name']) title = self._live_title(broadcast['name'])
description = broadcast.get('description') or broadcast.get('shortDescription') description = broadcast.get('description') or broadcast.get('shortDescription')
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100')
formats = [{ formats = [{
'url': stream['streamUrl'], 'url': stream['streamUrl'],

View File

@ -349,6 +349,13 @@ class TwitchStreamIE(TwitchBaseIE):
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')), % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
channel_id, 'mp4') channel_id, 'mp4')
# prefer the 'source' stream, the others are limited to 30 fps
def _sort_source(f):
if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
return 1
return 0
formats = sorted(formats, key=_sort_source)
view_count = stream.get('viewers') view_count = stream.get('viewers')
timestamp = parse_iso8601(stream.get('created_at')) timestamp = parse_iso8601(stream.get('created_at'))

View File

@ -49,15 +49,31 @@ class VideoLecturesNetIE(InfoExtractor):
thumbnail = ( thumbnail = (
None if thumbnail_el is None else thumbnail_el.attrib.get('src')) None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
formats = [{ formats = []
'url': v.attrib['src'], for v in switch.findall('./video'):
'width': int_or_none(v.attrib.get('width')), proto = v.attrib.get('proto')
'height': int_or_none(v.attrib.get('height')), if proto not in ['http', 'rtmp']:
'filesize': int_or_none(v.attrib.get('size')), continue
'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0, f = {
'ext': v.attrib.get('ext'), 'width': int_or_none(v.attrib.get('width')),
} for v in switch.findall('./video') 'height': int_or_none(v.attrib.get('height')),
if v.attrib.get('proto') == 'http'] 'filesize': int_or_none(v.attrib.get('size')),
'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
'ext': v.attrib.get('ext'),
}
src = v.attrib['src']
if proto == 'http':
if self._is_valid_url(src, video_id):
f['url'] = src
formats.append(f)
elif proto == 'rtmp':
f.update({
'url': v.attrib['streamer'],
'play_path': src,
'rtmp_real_time': True,
})
formats.append(f)
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,

View File

@ -18,6 +18,7 @@ from ..utils import (
InAdvancePagedList, InAdvancePagedList,
int_or_none, int_or_none,
RegexNotFoundError, RegexNotFoundError,
smuggle_url,
std_headers, std_headers,
unsmuggle_url, unsmuggle_url,
urlencode_postdata, urlencode_postdata,
@ -267,8 +268,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None: if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
if data and '_video_password_verified' in data:
raise ExtractorError('video password verification failed!')
self._verify_video_password(url, video_id, webpage) self._verify_video_password(url, video_id, webpage)
return self._real_extract(url) return self._real_extract(
smuggle_url(url, {'_video_password_verified': 'verified'}))
else: else:
raise ExtractorError('Unable to extract info section', raise ExtractorError('Unable to extract info section',
cause=e) cause=e)
@ -401,6 +405,7 @@ class VimeoChannelIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://vimeo.com/channels/tributes', 'url': 'http://vimeo.com/channels/tributes',
'info_dict': { 'info_dict': {
'id': 'tributes',
'title': 'Vimeo Tributes', 'title': 'Vimeo Tributes',
}, },
'playlist_mincount': 25, 'playlist_mincount': 25,
@ -479,6 +484,7 @@ class VimeoUserIE(VimeoChannelIE):
'url': 'http://vimeo.com/nkistudio/videos', 'url': 'http://vimeo.com/nkistudio/videos',
'info_dict': { 'info_dict': {
'title': 'Nki', 'title': 'Nki',
'id': 'nkistudio',
}, },
'playlist_mincount': 66, 'playlist_mincount': 66,
}] }]
@ -496,6 +502,7 @@ class VimeoAlbumIE(VimeoChannelIE):
_TESTS = [{ _TESTS = [{
'url': 'http://vimeo.com/album/2632481', 'url': 'http://vimeo.com/album/2632481',
'info_dict': { 'info_dict': {
'id': '2632481',
'title': 'Staff Favorites: November 2013', 'title': 'Staff Favorites: November 2013',
}, },
'playlist_mincount': 13, 'playlist_mincount': 13,
@ -526,6 +533,7 @@ class VimeoGroupsIE(VimeoAlbumIE):
_TESTS = [{ _TESTS = [{
'url': 'http://vimeo.com/groups/rolexawards', 'url': 'http://vimeo.com/groups/rolexawards',
'info_dict': { 'info_dict': {
'id': 'rolexawards',
'title': 'Rolex Awards for Enterprise', 'title': 'Rolex Awards for Enterprise',
}, },
'playlist_mincount': 73, 'playlist_mincount': 73,
@ -608,6 +616,7 @@ class VimeoLikesIE(InfoExtractor):
'url': 'https://vimeo.com/user755559/likes/', 'url': 'https://vimeo.com/user755559/likes/',
'playlist_mincount': 293, 'playlist_mincount': 293,
"info_dict": { "info_dict": {
'id': 'user755559_likes',
"description": "See all the videos urza likes", "description": "See all the videos urza likes",
"title": 'Videos urza likes', "title": 'Videos urza likes',
}, },

View File

@ -217,6 +217,9 @@ class VKUserVideosIE(InfoExtractor):
_TEMPLATE_URL = 'https://vk.com/videos' _TEMPLATE_URL = 'https://vk.com/videos'
_TEST = { _TEST = {
'url': 'http://vk.com/videos205387401', 'url': 'http://vk.com/videos205387401',
'info_dict': {
'id': '205387401',
},
'playlist_mincount': 4, 'playlist_mincount': 4,
} }

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.02.17' __version__ = '2015.02.18'