Bump version number

Document new "continuedl" FileDownloader option
Remove integer casts and replace them with long integer casts
2025-08-03 10:59:47 -05:00 · 2010-10-31 11:24:36 +01:00 · 2010-10-31 11:24:36 +01:00 · 2010-10-31 11:24:36 +01:00 · 2010-10-31 11:24:36 +01:00 · 2010-10-31 11:24:36 +01:00
2 changed files with 114 additions and 68 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-2009.05.23
+2009.05.30
--- a/180
+++ b/180
@@ -114,6 +114,7 @@ class FileDownloader(object):
 	ignoreerrors:	Do not stop on download errors.
 	ratelimit:	Download speed limit, in bytes/sec.
 	nooverwrites:	Prevent overwriting files.
+	continuedl:	Try to continue downloads if possible.
 	"""

 	params = None
@@ -182,13 +183,13 @@ class FileDownloader(object):
 		new_min = max(bytes / 2.0, 1.0)
 		new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 		if elapsed_time < 0.001:
-			return int(new_max)
+			return long(new_max)
 		rate = bytes / elapsed_time
 		if rate > new_max:
-			return int(new_max)
+			return long(new_max)
 		if rate < new_min:
-			return int(new_min)
-		return int(rate)
+			return long(new_min)
+		return long(rate)

 	@staticmethod
 	def parse_bytes(bytestr):
@@ -200,6 +201,14 @@ class FileDownloader(object):
 		multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 		return long(round(number * multiplier))

+	@staticmethod
+	def verify_url(url):
+		"""Verify a URL is valid and data could be downloaded."""
+		request = urllib2.Request(url, None, std_headers)
+		data = urllib2.urlopen(request)
+		data.read(1)
+		data.close()
+
 	def add_info_extractor(self, ie):
 		"""Add an InfoExtractor object to the end of the list."""
 		self._ies.append(ie)
@@ -258,6 +267,18 @@ class FileDownloader(object):
 		"""Report download progress."""
 		self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
 				(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+
+	def report_resuming_byte(self, resume_len):
+		"""Report attemtp to resume at given byte."""
+		self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
+	
+	def report_file_already_downloaded(self, file_name):
+		"""Report file has already been fully downloaded."""
+		self.to_stdout(u'[download] %s has already been downloaded' % file_name)
+	
+	def report_unable_to_resume(self):
+		"""Report it was impossible to resume download."""
+		self.to_stdout(u'[download] Unable to resume')
 	
 	def report_finish(self):
 		"""Report download finished."""
@@ -265,16 +286,21 @@ class FileDownloader(object):

 	def process_info(self, info_dict):
 		"""Process a single dictionary returned by an InfoExtractor."""
-		# Forced printings
-		if self.params.get('forcetitle', False):
-			print info_dict['title'].encode(locale.getpreferredencoding())
-		if self.params.get('forceurl', False):
-			print info_dict['url'].encode(locale.getpreferredencoding())
-			
 		# Do nothing else if in simulate mode
 		if self.params.get('simulate', False):
-			return
+			try:
+				self.verify_url(info_dict['url'])
+			except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
+				raise UnavailableFormatError

+			# Forced printings
+			if self.params.get('forcetitle', False):
+				print info_dict['title'].encode(locale.getpreferredencoding())
+			if self.params.get('forceurl', False):
+				print info_dict['url'].encode(locale.getpreferredencoding())
+
+			return
+			
 		try:
 			template_dict = dict(info_dict)
 			template_dict['epoch'] = unicode(long(time.time()))
@@ -293,7 +319,7 @@ class FileDownloader(object):
 			return

 		try:
-			outstream = open(filename, 'wb')
+			outstream = open(filename, 'ab')
 		except (OSError, IOError), err:
 			self.trouble('ERROR: unable to open for writing: %s' % str(err))
 			return
@@ -354,8 +380,32 @@ class FileDownloader(object):
 				break
 	
 	def _do_download(self, stream, url):
+		basic_request = urllib2.Request(url, None, std_headers)
 		request = urllib2.Request(url, None, std_headers)
-		data = urllib2.urlopen(request)
+
+		# Resume transfer if filesize is non-zero
+		resume_len = stream.tell()
+		if self.params['continuedl'] and resume_len != 0:
+			self.report_resuming_byte(resume_len)
+			request.add_header('Range','bytes=%d-' % resume_len)
+		else:
+			stream.close()
+			stream = open(stream.name,'wb')
+		try:
+			data = urllib2.urlopen(request)
+		except urllib2.HTTPError, e:
+			if not e.code == 416: #  416 is 'Requested range not satisfiable'
+				raise
+			data = urllib2.urlopen(basic_request)
+			content_length = data.info()['Content-Length']
+			if content_length is not None and long(content_length) == resume_len:
+				self.report_file_already_downloaded(stream.name)
+				return
+			else:
+				self.report_unable_to_resume()
+				stream.close()
+				stream = open(stream.name,'wb')
+
 		data_len = data.info().get('Content-length', None)
 		data_len_str = self.format_bytes(data_len)
 		byte_counter = 0
@@ -607,53 +657,53 @@ class YoutubeIE(InfoExtractor):
 				best_quality = True

 		while True:
+			# Extension
+			video_extension = self._video_extensions.get(format_param, 'flv')
+
+			# Normalize URL, including format
+			normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
+			if format_param is not None:
+				normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
+			request = urllib2.Request(normalized_url, None, std_headers)
 			try:
-				# Extension
-				video_extension = self._video_extensions.get(format_param, 'flv')
+				self.report_webpage_download(video_id)
+				video_webpage = urllib2.urlopen(request).read()
+			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+				self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+				return
+			self.report_information_extraction(video_id)
+			
+			# "t" param
+			mobj = re.search(r', "t": "([^"]+)"', video_webpage)
+			if mobj is None:
+				self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
+				return
+			video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
+			if format_param is not None:
+				video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
+			self.report_video_url(video_id, video_real_url)

-				# Normalize URL, including format
-				normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
-				if format_param is not None:
-					normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
-				request = urllib2.Request(normalized_url, None, std_headers)
-				try:
-					self.report_webpage_download(video_id)
-					video_webpage = urllib2.urlopen(request).read()
-				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-					self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
-					return
-				self.report_information_extraction(video_id)
-				
-				# "t" param
-				mobj = re.search(r', "t": "([^"]+)"', video_webpage)
-				if mobj is None:
-					self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
-					return
-				video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
-				if format_param is not None:
-					video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
-				self.report_video_url(video_id, video_real_url)
+			# uploader
+			mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
+			if mobj is None:
+				self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+				return
+			video_uploader = mobj.group(1)

-				# uploader
-				mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
-				if mobj is None:
-					self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
-					return
-				video_uploader = mobj.group(1)
+			# title
+			mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
+			if mobj is None:
+				self._downloader.trouble(u'ERROR: unable to extract video title')
+				return
+			video_title = mobj.group(1).decode('utf-8')
+			video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
+			video_title = video_title.replace(os.sep, u'%')

-				# title
-				mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
-				if mobj is None:
-					self._downloader.trouble(u'ERROR: unable to extract video title')
-					return
-				video_title = mobj.group(1).decode('utf-8')
-				video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
-				video_title = video_title.replace(os.sep, u'%')
-
-				# simplified title
-				simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
-				simple_title = simple_title.strip(ur'_')
+			# simplified title
+			simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
+			simple_title = simple_title.strip(ur'_')

+			try:
 				# Process video information
 				self._downloader.process_info({
 					'id':		video_id.decode('utf-8'),
@@ -846,7 +896,7 @@ class YoutubeSearchIE(InfoExtractor):
 			return
 		else:
 			try:
-				n = int(prefix)
+				n = long(prefix)
 				if n <= 0:
 					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 					return
@@ -855,7 +905,7 @@ class YoutubeSearchIE(InfoExtractor):
 					n = self._max_youtube_results
 				self._download_n_results(query, n)
 				return
-			except ValueError: # parsing prefix as int fails
+			except ValueError: # parsing prefix as integer fails
 				self._download_n_results(query, 1)
 				return

@@ -1016,7 +1066,7 @@ if __name__ == '__main__':
 		# Parse command line
 		parser = optparse.OptionParser(
 			usage='Usage: %prog [options] url...',
-			version='2009.05.23',
+			version='2009.05.30',
 			conflict_handler='resolve',
 		)

@@ -1040,7 +1090,7 @@ if __name__ == '__main__':

 		video_format = optparse.OptionGroup(parser, 'Video Format Options')
 		video_format.add_option('-f', '--format',
-				action='append', dest='format', metavar='FMT', help='video format code')
+				action='store', dest='format', metavar='FMT', help='video format code')
 		video_format.add_option('-b', '--best-quality',
 				action='store_const', dest='format', help='download the best quality video possible', const='0')
 		video_format.add_option('-m', '--mobile-version',
@@ -1071,6 +1121,8 @@ if __name__ == '__main__':
 				dest='batchfile', metavar='F', help='file containing URLs to download')
 		filesystem.add_option('-w', '--no-overwrites',
 				action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
+		filesystem.add_option('-c', '--continue',
+				action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
 		parser.add_option_group(filesystem)

 		(opts, args) = parser.parse_args()
@@ -1104,13 +1156,6 @@ if __name__ == '__main__':
 			if numeric_limit is None:
 				parser.error(u'invalid rate limit specified')
 			opts.ratelimit = numeric_limit
-		if opts.format is not None and len(opts.format) > 1:
-			parser.error(u'pass at most one of the video format option flags (-f, -b, -m, -d)')
-		if opts.format is None:
-			real_format = None
-		else:
-			real_format = opts.format[0]
-

 		# Information extractors
 		youtube_ie = YoutubeIE()
@@ -1127,7 +1172,7 @@ if __name__ == '__main__':
 			'forceurl': opts.geturl,
 			'forcetitle': opts.gettitle,
 			'simulate': (opts.simulate or opts.geturl or opts.gettitle),
-			'format': real_format,
+			'format': opts.format,
 			'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
 				or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
 				or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
@@ -1135,6 +1180,7 @@ if __name__ == '__main__':
 			'ignoreerrors': opts.ignoreerrors,
 			'ratelimit': opts.ratelimit,
 			'nooverwrites': opts.nooverwrites,
+			'continuedl': opts.continue_dl,
 			})
 		fd.add_info_extractor(youtube_search_ie)
 		fd.add_info_extractor(youtube_pl_ie)
Author	SHA1	Message	Date
Ricardo Garcia	7374795552	Bump version number	2010-10-31 11:24:36 +01:00
Ricardo Garcia	0cd61126fc	Document new "continuedl" FileDownloader option	2010-10-31 11:24:36 +01:00
Ricardo Garcia	e1f18b8a84	Remove integer casts and replace them with long integer casts	2010-10-31 11:24:36 +01:00
Ricardo Garcia	6a0015a7e0	Fix missing cast preventing detection of already downloaded file	2010-10-31 11:24:36 +01:00
Ricardo Garcia	7db85b2c70	Tweaks to ivanov's code	2010-10-31 11:24:36 +01:00
Paul Ivanov	f76c2df64e	Added -c option (--continue) interrupted downloads will properly resume and append to the previously downloaded data, instead of overwriting the file. There's some error checking - if the length of the file to be download matches the length of the previously downloaded data, we report that this file has already been downloaded and do nothing. If there is some other HTTP 416 'Requested range not satisfiable' error, we simply re-download the whole file (reverting to the original functionality) All other HTTP errors are simply raised. Resuming does not override -w (--nooverwrite), since it is not clear what should happen if file on disk is larger than file to be downloaded. Thus, -c does nothing if -w is present.	2010-10-31 11:24:36 +01:00
Ricardo Garcia	daa88ccc2e	Fix TypeError when using the -f option (fixes issue #24 )	2010-10-31 11:24:36 +01:00
Ricardo Garcia	eb5d184157	Restore INTERNAL version number	2010-10-31 11:24:36 +01:00
Ricardo Garcia	5745bfdcdc	Bump version number	2010-10-31 11:24:32 +01:00
Ricardo Garcia	320becd692	Remove trails from the "append_const" change (fixes issue #23 )	2010-10-31 11:24:32 +01:00
Ricardo Garcia	968aa88438	Only catch UnavailableFormatError in call to process_info	2010-10-31 11:24:32 +01:00
Ricardo Garcia	cbfff4db63	Verify URLs in simulate mode (fixes issue #22 )	2010-10-31 11:24:32 +01:00
Ricardo Garcia	781daeabdb	Restore "INTERNAL" version number	2010-10-31 11:24:32 +01:00
@@ -1 +1 @@
 .05.23
 .05.30