mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-08-03 10:59:47 -05:00
Compare commits
14 Commits
2009.04.25
...
2009.05.13
Author | SHA1 | Date | |
---|---|---|---|
![]() |
152edc0d4c | ||
![]() |
b74c859d0f | ||
![]() |
0e54320009 | ||
![]() |
43f35682e9 | ||
![]() |
ad274509aa | ||
![]() |
d09744d055 | ||
![]() |
1c76e23e0f | ||
![]() |
42bcd27d3b | ||
![]() |
2740c509b3 | ||
![]() |
7b7759f5a4 | ||
![]() |
8d2c83eda5 | ||
![]() |
2f11508ada | ||
![]() |
b65740e474 | ||
![]() |
a825f0ca83 |
287
youtube-dl
287
youtube-dl
@@ -52,6 +52,13 @@ class PostProcessingError(Exception):
|
||||
"""
|
||||
pass
|
||||
|
||||
class UnavailableFormatError(Exception):
|
||||
"""Unavailable Format exception.
|
||||
|
||||
This exception will be thrown when a video is requested
|
||||
in a format that is not available for that video.
|
||||
"""
|
||||
|
||||
class FileDownloader(object):
|
||||
"""File Downloader class.
|
||||
|
||||
@@ -253,40 +260,44 @@ class FileDownloader(object):
|
||||
return
|
||||
|
||||
try:
|
||||
filename = self.params['outtmpl'] % info_dict
|
||||
template_dict = dict(info_dict)
|
||||
template_dict['epoch'] = unicode(long(time.time()))
|
||||
filename = self.params['outtmpl'] % template_dict
|
||||
self.report_destination(filename)
|
||||
except (ValueError, KeyError), err:
|
||||
self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
|
||||
if self.params['nooverwrites'] and os.path.exists(filename):
|
||||
self.to_stderr('WARNING: file exists: %s; skipping' % filename)
|
||||
return
|
||||
|
||||
try:
|
||||
self.pmkdir(filename)
|
||||
except (OSError, IOError), err:
|
||||
self.trouble('ERROR: unable to create directories: %s' % str(err))
|
||||
return
|
||||
|
||||
try:
|
||||
outstream = open(filename, 'wb')
|
||||
except (OSError, IOError), err:
|
||||
self.trouble('ERROR: unable to open for writing: %s' % str(err))
|
||||
return
|
||||
|
||||
try:
|
||||
self._do_download(outstream, info_dict['url'])
|
||||
outstream.close()
|
||||
except (OSError, IOError), err:
|
||||
self.trouble('ERROR: unable to write video data: %s' % str(err))
|
||||
return
|
||||
os.remove(filename)
|
||||
raise UnavailableFormatError
|
||||
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
||||
self.trouble('ERROR: unable to download video data: %s' % str(err))
|
||||
return
|
||||
|
||||
try:
|
||||
self.post_process(filename, info_dict)
|
||||
except (PostProcessingError), err:
|
||||
self.trouble('ERROR: postprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
return
|
||||
|
||||
def download(self, url_list):
|
||||
"""Download a given list of URLs."""
|
||||
if len(url_list) > 1 and self.fixed_template():
|
||||
@@ -424,6 +435,13 @@ class YoutubeIE(InfoExtractor):
|
||||
_LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
|
||||
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
_available_formats = ['22', '35', '18', '17', '13'] # listed in order of priority for -b flag
|
||||
_video_extensions = {
|
||||
'13': '3gp',
|
||||
'17': 'mp4',
|
||||
'18': 'mp4',
|
||||
'22': 'mp4',
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def suitable(url):
|
||||
@@ -476,6 +494,10 @@ class YoutubeIE(InfoExtractor):
|
||||
"""Report extracted video URL."""
|
||||
self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
|
||||
|
||||
def report_unavailable_format(self, video_id, format):
|
||||
"""Report extracted video URL."""
|
||||
self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._downloader is None:
|
||||
return
|
||||
@@ -554,70 +576,91 @@ class YoutubeIE(InfoExtractor):
|
||||
video_id = mobj.group(2)
|
||||
|
||||
# Downloader parameters
|
||||
best_quality = False
|
||||
format_param = None
|
||||
quality_index = 0
|
||||
if self._downloader is not None:
|
||||
params = self._downloader.params
|
||||
format_param = params.get('format', None)
|
||||
if format_param == '0':
|
||||
format_param = self._available_formats[quality_index]
|
||||
best_quality = True
|
||||
|
||||
# Extension
|
||||
video_extension = {
|
||||
'17': '3gp',
|
||||
'18': 'mp4',
|
||||
'22': 'mp4',
|
||||
}.get(format_param, 'flv')
|
||||
while True:
|
||||
try:
|
||||
# Extension
|
||||
video_extension = self._video_extensions.get(format_param, 'flv')
|
||||
|
||||
# Normalize URL, including format
|
||||
normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
|
||||
if format_param is not None:
|
||||
normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
|
||||
request = urllib2.Request(normalized_url, None, std_headers)
|
||||
try:
|
||||
self.report_webpage_download(video_id)
|
||||
video_webpage = urllib2.urlopen(request).read()
|
||||
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
||||
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
|
||||
return
|
||||
self.report_information_extraction(video_id)
|
||||
|
||||
# "t" param
|
||||
mobj = re.search(r', "t": "([^"]+)"', video_webpage)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
|
||||
return
|
||||
video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
|
||||
if format_param is not None:
|
||||
video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
|
||||
self.report_video_url(video_id, video_real_url)
|
||||
# Normalize URL, including format
|
||||
normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
|
||||
if format_param is not None:
|
||||
normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
|
||||
request = urllib2.Request(normalized_url, None, std_headers)
|
||||
try:
|
||||
self.report_webpage_download(video_id)
|
||||
video_webpage = urllib2.urlopen(request).read()
|
||||
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
||||
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
|
||||
return
|
||||
self.report_information_extraction(video_id)
|
||||
|
||||
# "t" param
|
||||
mobj = re.search(r', "t": "([^"]+)"', video_webpage)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
|
||||
return
|
||||
video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
|
||||
if format_param is not None:
|
||||
video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
|
||||
self.report_video_url(video_id, video_real_url)
|
||||
|
||||
# uploader
|
||||
mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
|
||||
return
|
||||
video_uploader = mobj.group(1)
|
||||
# uploader
|
||||
mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
|
||||
return
|
||||
video_uploader = mobj.group(1)
|
||||
|
||||
# title
|
||||
mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: unable to extract video title')
|
||||
return
|
||||
video_title = mobj.group(1).decode('utf-8')
|
||||
video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
|
||||
video_title = video_title.replace(os.sep, u'%')
|
||||
# title
|
||||
mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: unable to extract video title')
|
||||
return
|
||||
video_title = mobj.group(1).decode('utf-8')
|
||||
video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
|
||||
video_title = video_title.replace(os.sep, u'%')
|
||||
|
||||
# simplified title
|
||||
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
|
||||
simple_title = simple_title.strip(ur'_')
|
||||
# simplified title
|
||||
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
|
||||
simple_title = simple_title.strip(ur'_')
|
||||
|
||||
# Process video information
|
||||
self._downloader.process_info({
|
||||
'id': video_id.decode('utf-8'),
|
||||
'url': video_real_url.decode('utf-8'),
|
||||
'uploader': video_uploader.decode('utf-8'),
|
||||
'title': video_title,
|
||||
'stitle': simple_title,
|
||||
'ext': video_extension.decode('utf-8'),
|
||||
})
|
||||
|
||||
return
|
||||
|
||||
except UnavailableFormatError, err:
|
||||
if best_quality:
|
||||
if quality_index == len(self._available_formats) - 1:
|
||||
# I don't ever expect this to happen
|
||||
self._downloader.trouble(u'ERROR: no known formats available for video')
|
||||
return
|
||||
else:
|
||||
self.report_unavailable_format(video_id, format_param)
|
||||
quality_index += 1
|
||||
format_param = self._available_formats[quality_index]
|
||||
continue
|
||||
else:
|
||||
self._downloader.trouble('ERROR: format not available for video')
|
||||
return
|
||||
|
||||
# Process video information
|
||||
self._downloader.process_info({
|
||||
'id': video_id.decode('utf-8'),
|
||||
'url': video_real_url.decode('utf-8'),
|
||||
'uploader': video_uploader.decode('utf-8'),
|
||||
'title': video_title,
|
||||
'stitle': simple_title,
|
||||
'ext': video_extension.decode('utf-8'),
|
||||
})
|
||||
|
||||
class MetacafeIE(InfoExtractor):
|
||||
"""Information Extractor for metacafe.com."""
|
||||
@@ -729,15 +772,18 @@ class MetacafeIE(InfoExtractor):
|
||||
return
|
||||
video_uploader = mobj.group(1)
|
||||
|
||||
# Process video information
|
||||
self._downloader.process_info({
|
||||
'id': video_id.decode('utf-8'),
|
||||
'url': video_url.decode('utf-8'),
|
||||
'uploader': video_uploader.decode('utf-8'),
|
||||
'title': video_title,
|
||||
'stitle': simple_title,
|
||||
'ext': video_extension.decode('utf-8'),
|
||||
try:
|
||||
# Process video information
|
||||
self._downloader.process_info({
|
||||
'id': video_id.decode('utf-8'),
|
||||
'url': video_url.decode('utf-8'),
|
||||
'uploader': video_uploader.decode('utf-8'),
|
||||
'title': video_title,
|
||||
'stitle': simple_title,
|
||||
'ext': video_extension.decode('utf-8'),
|
||||
})
|
||||
except UnavailableFormatError:
|
||||
self._downloader.trouble(u'ERROR: format not available for video')
|
||||
|
||||
|
||||
class YoutubeSearchIE(InfoExtractor):
|
||||
@@ -919,7 +965,7 @@ class PostProcessor(object):
|
||||
"""Run the PostProcessor.
|
||||
|
||||
The "information" argument is a dictionary like the ones
|
||||
returned by InfoExtractors. The only difference is that this
|
||||
composed by InfoExtractors. The only difference is that this
|
||||
one has an extra field called "filepath" that points to the
|
||||
downloaded file.
|
||||
|
||||
@@ -949,77 +995,102 @@ if __name__ == '__main__':
|
||||
|
||||
# Parse command line
|
||||
parser = optparse.OptionParser(
|
||||
usage='Usage: %prog [options] url...',
|
||||
version='2009.04.25',
|
||||
conflict_handler='resolve',
|
||||
)
|
||||
usage='Usage: %prog [options] url...',
|
||||
version='2009.05.13',
|
||||
conflict_handler='resolve',
|
||||
)
|
||||
|
||||
parser.add_option('-h', '--help',
|
||||
action='help', help='print this help text and exit')
|
||||
parser.add_option('-v', '--version',
|
||||
action='version', help='print program version and exit')
|
||||
parser.add_option('-u', '--username',
|
||||
dest='username', metavar='UN', help='account username')
|
||||
parser.add_option('-p', '--password',
|
||||
dest='password', metavar='PW', help='account password')
|
||||
parser.add_option('-o', '--output',
|
||||
dest='outtmpl', metavar='TPL', help='output filename template')
|
||||
parser.add_option('-q', '--quiet',
|
||||
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
||||
parser.add_option('-s', '--simulate',
|
||||
action='store_true', dest='simulate', help='do not download video', default=False)
|
||||
parser.add_option('-t', '--title',
|
||||
action='store_true', dest='usetitle', help='use title in file name', default=False)
|
||||
parser.add_option('-l', '--literal',
|
||||
action='store_true', dest='useliteral', help='use literal title in file name', default=False)
|
||||
parser.add_option('-n', '--netrc',
|
||||
action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
|
||||
parser.add_option('-g', '--get-url',
|
||||
action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
|
||||
parser.add_option('-e', '--get-title',
|
||||
action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
|
||||
parser.add_option('-f', '--format',
|
||||
dest='format', metavar='FMT', help='video format code')
|
||||
parser.add_option('-m', '--mobile-version',
|
||||
action='store_const', dest='format', help='alias for -f 17', const='17')
|
||||
parser.add_option('-d', '--high-def',
|
||||
action='store_const', dest='format', help='alias for -f 22', const='22')
|
||||
parser.add_option('-i', '--ignore-errors',
|
||||
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
|
||||
parser.add_option('-r', '--rate-limit',
|
||||
dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
|
||||
parser.add_option('-a', '--batch-file',
|
||||
|
||||
authentication = optparse.OptionGroup(parser, 'Authentication Options')
|
||||
authentication.add_option('-u', '--username',
|
||||
dest='username', metavar='UN', help='account username')
|
||||
authentication.add_option('-p', '--password',
|
||||
dest='password', metavar='PW', help='account password')
|
||||
authentication.add_option('-n', '--netrc',
|
||||
action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
|
||||
parser.add_option_group(authentication)
|
||||
|
||||
video_format = optparse.OptionGroup(parser, 'Video Format Options')
|
||||
video_format.add_option('-f', '--format',
|
||||
action='append', dest='format', metavar='FMT', help='video format code')
|
||||
video_format.add_option('-b', '--best-quality',
|
||||
action='store_const', dest='format', help='download the best quality video possible', const='0')
|
||||
video_format.add_option('-m', '--mobile-version',
|
||||
action='store_const', dest='format', help='alias for -f 17', const='17')
|
||||
video_format.add_option('-d', '--high-def',
|
||||
action='store_const', dest='format', help='alias for -f 22', const='22')
|
||||
parser.add_option_group(video_format)
|
||||
|
||||
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
||||
verbosity.add_option('-q', '--quiet',
|
||||
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
||||
verbosity.add_option('-s', '--simulate',
|
||||
action='store_true', dest='simulate', help='do not download video', default=False)
|
||||
verbosity.add_option('-g', '--get-url',
|
||||
action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
|
||||
verbosity.add_option('-e', '--get-title',
|
||||
action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
|
||||
parser.add_option_group(verbosity)
|
||||
|
||||
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
|
||||
filesystem.add_option('-t', '--title',
|
||||
action='store_true', dest='usetitle', help='use title in file name', default=False)
|
||||
filesystem.add_option('-l', '--literal',
|
||||
action='store_true', dest='useliteral', help='use literal title in file name', default=False)
|
||||
filesystem.add_option('-o', '--output',
|
||||
dest='outtmpl', metavar='TPL', help='output filename template')
|
||||
filesystem.add_option('-a', '--batch-file',
|
||||
dest='batchfile', metavar='F', help='file containing URLs to download')
|
||||
parser.add_option('-w', '--no-overwrites',
|
||||
filesystem.add_option('-w', '--no-overwrites',
|
||||
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
|
||||
parser.add_option_group(filesystem)
|
||||
|
||||
(opts, args) = parser.parse_args()
|
||||
|
||||
# Batch file verification
|
||||
batchurls = []
|
||||
if opts.batchfile is not None:
|
||||
try:
|
||||
batchurls = [line.strip() for line in open(opts.batchfile, 'r')]
|
||||
batchurls = open(opts.batchfile, 'r').readlines()
|
||||
batchurls = [x.strip() for x in batchurls]
|
||||
batchurls = [x for x in batchurls if len(x) > 0]
|
||||
except IOError:
|
||||
sys.exit(u'ERROR: batch file could not be read')
|
||||
all_urls = batchurls + args
|
||||
|
||||
# Conflicting, missing and erroneous options
|
||||
if len(all_urls) < 1:
|
||||
sys.exit(u'ERROR: you must provide at least one URL')
|
||||
parser.error(u'you must provide at least one URL')
|
||||
if opts.usenetrc and (opts.username is not None or opts.password is not None):
|
||||
sys.exit(u'ERROR: using .netrc conflicts with giving username/password')
|
||||
parser.error(u'using .netrc conflicts with giving username/password')
|
||||
if opts.password is not None and opts.username is None:
|
||||
sys.exit(u'ERROR: account username missing')
|
||||
parser.error(u'account username missing')
|
||||
if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
|
||||
sys.exit(u'ERROR: using output template conflicts with using title or literal title')
|
||||
parser.error(u'using output template conflicts with using title or literal title')
|
||||
if opts.usetitle and opts.useliteral:
|
||||
sys.exit(u'ERROR: using title conflicts with using literal title')
|
||||
parser.error(u'using title conflicts with using literal title')
|
||||
if opts.username is not None and opts.password is None:
|
||||
opts.password = getpass.getpass(u'Type account password and press return:')
|
||||
if opts.ratelimit is not None:
|
||||
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
|
||||
if numeric_limit is None:
|
||||
sys.exit(u'ERROR: invalid rate limit specified')
|
||||
parser.error(u'invalid rate limit specified')
|
||||
opts.ratelimit = numeric_limit
|
||||
if opts.format is not None and len(opts.format) > 1:
|
||||
parser.error(u'pass at most one of the video format option flags (-f, -b, -m, -d)')
|
||||
if opts.format is None:
|
||||
real_format = None
|
||||
else:
|
||||
real_format = opts.format[0]
|
||||
|
||||
|
||||
# Information extractors
|
||||
youtube_ie = YoutubeIE()
|
||||
@@ -1036,7 +1107,7 @@ if __name__ == '__main__':
|
||||
'forceurl': opts.geturl,
|
||||
'forcetitle': opts.gettitle,
|
||||
'simulate': (opts.simulate or opts.geturl or opts.gettitle),
|
||||
'format': opts.format,
|
||||
'format': real_format,
|
||||
'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
|
||||
or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
|
||||
or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
|
||||
|
Reference in New Issue
Block a user