Compare commits

..

14 Commits

Author SHA1 Message Date
Ricardo Garcia
152edc0d4c Set version number 2010-10-31 11:24:16 +01:00
Ricardo Garcia
b74c859d0f Use store_const instead of append_const as the latter requires Python 2.5 2010-10-31 11:24:16 +01:00
Ricardo Garcia
0e54320009 Restore INTERNAL version number 2010-10-31 11:24:16 +01:00
Ricardo Garcia
43f35682e9 Put version number in place 2010-10-31 11:24:12 +01:00
Ricardo Garcia
ad274509aa Add an "epoch" keyword to the output template 2010-10-31 11:24:12 +01:00
Ricardo Garcia
d09744d055 Add format 35 (flv) as second best in quality 2010-10-31 11:24:12 +01:00
Ricardo Garcia
1c76e23e0f Move the -t and -l options to the filesystem group 2010-10-31 11:24:12 +01:00
dannycolligan
42bcd27d3b Some consistency changes and ghost-file bugfix after discussion with rg3 2010-10-31 11:24:12 +01:00
dannycolligan
2740c509b3 Fixed ambiguity of multiple video option specifiers by dissalowing it; changed some sys.ext calls to parser.error 2010-10-31 11:24:12 +01:00
dannycolligan
7b7759f5a4 Added -b option and created option groups for help prompt 2010-10-31 11:24:12 +01:00
Ricardo Garcia
8d2c83eda5 Update and correct (format,extension) table for YouTube 2010-10-31 11:24:12 +01:00
Ricardo Garcia
2f11508ada Minor documentation change 2010-10-31 11:24:12 +01:00
Ricardo Garcia
b65740e474 Skip blank lines in batch file -- fixes issue #9 2010-10-31 11:24:12 +01:00
Ricardo Garcia
a825f0ca83 Revert version number to INTERNAL 2010-10-31 11:24:12 +01:00

View File

@@ -52,6 +52,13 @@ class PostProcessingError(Exception):
"""
pass
class UnavailableFormatError(Exception):
"""Unavailable Format exception.
This exception will be thrown when a video is requested
in a format that is not available for that video.
"""
class FileDownloader(object):
"""File Downloader class.
@@ -253,40 +260,44 @@ class FileDownloader(object):
return
try:
filename = self.params['outtmpl'] % info_dict
template_dict = dict(info_dict)
template_dict['epoch'] = unicode(long(time.time()))
filename = self.params['outtmpl'] % template_dict
self.report_destination(filename)
except (ValueError, KeyError), err:
self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
if self.params['nooverwrites'] and os.path.exists(filename):
self.to_stderr('WARNING: file exists: %s; skipping' % filename)
return
try:
self.pmkdir(filename)
except (OSError, IOError), err:
self.trouble('ERROR: unable to create directories: %s' % str(err))
return
try:
outstream = open(filename, 'wb')
except (OSError, IOError), err:
self.trouble('ERROR: unable to open for writing: %s' % str(err))
return
try:
self._do_download(outstream, info_dict['url'])
outstream.close()
except (OSError, IOError), err:
self.trouble('ERROR: unable to write video data: %s' % str(err))
return
os.remove(filename)
raise UnavailableFormatError
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self.trouble('ERROR: unable to download video data: %s' % str(err))
return
try:
self.post_process(filename, info_dict)
except (PostProcessingError), err:
self.trouble('ERROR: postprocessing: %s' % str(err))
return
return
def download(self, url_list):
"""Download a given list of URLs."""
if len(url_list) > 1 and self.fixed_template():
@@ -424,6 +435,13 @@ class YoutubeIE(InfoExtractor):
_LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NETRC_MACHINE = 'youtube'
_available_formats = ['22', '35', '18', '17', '13'] # listed in order of priority for -b flag
_video_extensions = {
'13': '3gp',
'17': 'mp4',
'18': 'mp4',
'22': 'mp4',
}
@staticmethod
def suitable(url):
@@ -476,6 +494,10 @@ class YoutubeIE(InfoExtractor):
"""Report extracted video URL."""
self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
def report_unavailable_format(self, video_id, format):
"""Report extracted video URL."""
self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
def _real_initialize(self):
if self._downloader is None:
return
@@ -554,70 +576,91 @@ class YoutubeIE(InfoExtractor):
video_id = mobj.group(2)
# Downloader parameters
best_quality = False
format_param = None
quality_index = 0
if self._downloader is not None:
params = self._downloader.params
format_param = params.get('format', None)
if format_param == '0':
format_param = self._available_formats[quality_index]
best_quality = True
# Extension
video_extension = {
'17': '3gp',
'18': 'mp4',
'22': 'mp4',
}.get(format_param, 'flv')
while True:
try:
# Extension
video_extension = self._video_extensions.get(format_param, 'flv')
# Normalize URL, including format
normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
if format_param is not None:
normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
request = urllib2.Request(normalized_url, None, std_headers)
try:
self.report_webpage_download(video_id)
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
return
self.report_information_extraction(video_id)
# "t" param
mobj = re.search(r', "t": "([^"]+)"', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
return
video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
if format_param is not None:
video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
self.report_video_url(video_id, video_real_url)
# Normalize URL, including format
normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
if format_param is not None:
normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
request = urllib2.Request(normalized_url, None, std_headers)
try:
self.report_webpage_download(video_id)
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
return
self.report_information_extraction(video_id)
# "t" param
mobj = re.search(r', "t": "([^"]+)"', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
return
video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
if format_param is not None:
video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
self.report_video_url(video_id, video_real_url)
# uploader
mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
video_uploader = mobj.group(1)
# uploader
mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
video_uploader = mobj.group(1)
# title
mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = mobj.group(1).decode('utf-8')
video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
video_title = video_title.replace(os.sep, u'%')
# title
mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = mobj.group(1).decode('utf-8')
video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
video_title = video_title.replace(os.sep, u'%')
# simplified title
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
simple_title = simple_title.strip(ur'_')
# simplified title
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
simple_title = simple_title.strip(ur'_')
# Process video information
self._downloader.process_info({
'id': video_id.decode('utf-8'),
'url': video_real_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
'title': video_title,
'stitle': simple_title,
'ext': video_extension.decode('utf-8'),
})
return
except UnavailableFormatError, err:
if best_quality:
if quality_index == len(self._available_formats) - 1:
# I don't ever expect this to happen
self._downloader.trouble(u'ERROR: no known formats available for video')
return
else:
self.report_unavailable_format(video_id, format_param)
quality_index += 1
format_param = self._available_formats[quality_index]
continue
else:
self._downloader.trouble('ERROR: format not available for video')
return
# Process video information
self._downloader.process_info({
'id': video_id.decode('utf-8'),
'url': video_real_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
'title': video_title,
'stitle': simple_title,
'ext': video_extension.decode('utf-8'),
})
class MetacafeIE(InfoExtractor):
"""Information Extractor for metacafe.com."""
@@ -729,15 +772,18 @@ class MetacafeIE(InfoExtractor):
return
video_uploader = mobj.group(1)
# Process video information
self._downloader.process_info({
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
'title': video_title,
'stitle': simple_title,
'ext': video_extension.decode('utf-8'),
try:
# Process video information
self._downloader.process_info({
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
'title': video_title,
'stitle': simple_title,
'ext': video_extension.decode('utf-8'),
})
except UnavailableFormatError:
self._downloader.trouble(u'ERROR: format not available for video')
class YoutubeSearchIE(InfoExtractor):
@@ -919,7 +965,7 @@ class PostProcessor(object):
"""Run the PostProcessor.
The "information" argument is a dictionary like the ones
returned by InfoExtractors. The only difference is that this
composed by InfoExtractors. The only difference is that this
one has an extra field called "filepath" that points to the
downloaded file.
@@ -949,77 +995,102 @@ if __name__ == '__main__':
# Parse command line
parser = optparse.OptionParser(
usage='Usage: %prog [options] url...',
version='2009.04.25',
conflict_handler='resolve',
)
usage='Usage: %prog [options] url...',
version='2009.05.13',
conflict_handler='resolve',
)
parser.add_option('-h', '--help',
action='help', help='print this help text and exit')
parser.add_option('-v', '--version',
action='version', help='print program version and exit')
parser.add_option('-u', '--username',
dest='username', metavar='UN', help='account username')
parser.add_option('-p', '--password',
dest='password', metavar='PW', help='account password')
parser.add_option('-o', '--output',
dest='outtmpl', metavar='TPL', help='output filename template')
parser.add_option('-q', '--quiet',
action='store_true', dest='quiet', help='activates quiet mode', default=False)
parser.add_option('-s', '--simulate',
action='store_true', dest='simulate', help='do not download video', default=False)
parser.add_option('-t', '--title',
action='store_true', dest='usetitle', help='use title in file name', default=False)
parser.add_option('-l', '--literal',
action='store_true', dest='useliteral', help='use literal title in file name', default=False)
parser.add_option('-n', '--netrc',
action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
parser.add_option('-g', '--get-url',
action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
parser.add_option('-e', '--get-title',
action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
parser.add_option('-f', '--format',
dest='format', metavar='FMT', help='video format code')
parser.add_option('-m', '--mobile-version',
action='store_const', dest='format', help='alias for -f 17', const='17')
parser.add_option('-d', '--high-def',
action='store_const', dest='format', help='alias for -f 22', const='22')
parser.add_option('-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
parser.add_option('-r', '--rate-limit',
dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
parser.add_option('-a', '--batch-file',
authentication = optparse.OptionGroup(parser, 'Authentication Options')
authentication.add_option('-u', '--username',
dest='username', metavar='UN', help='account username')
authentication.add_option('-p', '--password',
dest='password', metavar='PW', help='account password')
authentication.add_option('-n', '--netrc',
action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
parser.add_option_group(authentication)
video_format = optparse.OptionGroup(parser, 'Video Format Options')
video_format.add_option('-f', '--format',
action='append', dest='format', metavar='FMT', help='video format code')
video_format.add_option('-b', '--best-quality',
action='store_const', dest='format', help='download the best quality video possible', const='0')
video_format.add_option('-m', '--mobile-version',
action='store_const', dest='format', help='alias for -f 17', const='17')
video_format.add_option('-d', '--high-def',
action='store_const', dest='format', help='alias for -f 22', const='22')
parser.add_option_group(video_format)
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
verbosity.add_option('-q', '--quiet',
action='store_true', dest='quiet', help='activates quiet mode', default=False)
verbosity.add_option('-s', '--simulate',
action='store_true', dest='simulate', help='do not download video', default=False)
verbosity.add_option('-g', '--get-url',
action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
verbosity.add_option('-e', '--get-title',
action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
parser.add_option_group(verbosity)
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
filesystem.add_option('-t', '--title',
action='store_true', dest='usetitle', help='use title in file name', default=False)
filesystem.add_option('-l', '--literal',
action='store_true', dest='useliteral', help='use literal title in file name', default=False)
filesystem.add_option('-o', '--output',
dest='outtmpl', metavar='TPL', help='output filename template')
filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='F', help='file containing URLs to download')
parser.add_option('-w', '--no-overwrites',
filesystem.add_option('-w', '--no-overwrites',
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
parser.add_option_group(filesystem)
(opts, args) = parser.parse_args()
# Batch file verification
batchurls = []
if opts.batchfile is not None:
try:
batchurls = [line.strip() for line in open(opts.batchfile, 'r')]
batchurls = open(opts.batchfile, 'r').readlines()
batchurls = [x.strip() for x in batchurls]
batchurls = [x for x in batchurls if len(x) > 0]
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args
# Conflicting, missing and erroneous options
if len(all_urls) < 1:
sys.exit(u'ERROR: you must provide at least one URL')
parser.error(u'you must provide at least one URL')
if opts.usenetrc and (opts.username is not None or opts.password is not None):
sys.exit(u'ERROR: using .netrc conflicts with giving username/password')
parser.error(u'using .netrc conflicts with giving username/password')
if opts.password is not None and opts.username is None:
sys.exit(u'ERROR: account username missing')
parser.error(u'account username missing')
if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
sys.exit(u'ERROR: using output template conflicts with using title or literal title')
parser.error(u'using output template conflicts with using title or literal title')
if opts.usetitle and opts.useliteral:
sys.exit(u'ERROR: using title conflicts with using literal title')
parser.error(u'using title conflicts with using literal title')
if opts.username is not None and opts.password is None:
opts.password = getpass.getpass(u'Type account password and press return:')
if opts.ratelimit is not None:
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
if numeric_limit is None:
sys.exit(u'ERROR: invalid rate limit specified')
parser.error(u'invalid rate limit specified')
opts.ratelimit = numeric_limit
if opts.format is not None and len(opts.format) > 1:
parser.error(u'pass at most one of the video format option flags (-f, -b, -m, -d)')
if opts.format is None:
real_format = None
else:
real_format = opts.format[0]
# Information extractors
youtube_ie = YoutubeIE()
@@ -1036,7 +1107,7 @@ if __name__ == '__main__':
'forceurl': opts.geturl,
'forcetitle': opts.gettitle,
'simulate': (opts.simulate or opts.geturl or opts.gettitle),
'format': opts.format,
'format': real_format,
'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')