Bump version number

Modify "more pages" check in YouTube playlist (fixes issue #29 )
Delay opening file until there is data to write
2025-08-03 19:09:58 -05:00 · 2010-10-31 11:24:40 +01:00 · 2010-10-31 11:24:40 +01:00 · 2010-10-31 11:24:40 +01:00 · 2010-10-31 11:24:36 +01:00 · 2010-10-31 11:24:36 +01:00
2 changed files with 80 additions and 35 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-2009.05.25
+2009.06.29
--- a/113
+++ b/113
@@ -114,6 +114,7 @@ class FileDownloader(object):
 	ignoreerrors:	Do not stop on download errors.
 	ratelimit:	Download speed limit, in bytes/sec.
 	nooverwrites:	Prevent overwriting files.
+	continuedl:	Try to continue downloads if possible.
 	"""

 	params = None
@@ -182,13 +183,13 @@ class FileDownloader(object):
 		new_min = max(bytes / 2.0, 1.0)
 		new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 		if elapsed_time < 0.001:
-			return int(new_max)
+			return long(new_max)
 		rate = bytes / elapsed_time
 		if rate > new_max:
-			return int(new_max)
+			return long(new_max)
 		if rate < new_min:
-			return int(new_min)
-		return int(rate)
+			return long(new_min)
+		return long(rate)

 	@staticmethod
 	def parse_bytes(bytestr):
@@ -266,6 +267,18 @@ class FileDownloader(object):
 		"""Report download progress."""
 		self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
 				(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+
+	def report_resuming_byte(self, resume_len):
+		"""Report attemtp to resume at given byte."""
+		self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
+	
+	def report_file_already_downloaded(self, file_name):
+		"""Report file has already been fully downloaded."""
+		self.to_stdout(u'[download] %s has already been downloaded' % file_name)
+	
+	def report_unable_to_resume(self):
+		"""Report it was impossible to resume download."""
+		self.to_stdout(u'[download] Unable to resume')
 	
 	def report_finish(self):
 		"""Report download finished."""
@@ -292,7 +305,6 @@ class FileDownloader(object):
 			template_dict = dict(info_dict)
 			template_dict['epoch'] = unicode(long(time.time()))
 			filename = self.params['outtmpl'] % template_dict
-			self.report_destination(filename)
 		except (ValueError, KeyError), err:
 			self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
 		if self.params['nooverwrites'] and os.path.exists(filename):
@@ -306,17 +318,8 @@ class FileDownloader(object):
 			return

 		try:
-			outstream = open(filename, 'wb')
+			success = self._do_download(filename, info_dict['url'])
 		except (OSError, IOError), err:
-			self.trouble('ERROR: unable to open for writing: %s' % str(err))
-			return
-
-		try:
-			self._do_download(outstream, info_dict['url'])
-			outstream.close()
-		except (OSError, IOError), err:
-			outstream.close()
-			os.remove(filename)
 			raise UnavailableFormatError
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			self.trouble('ERROR: unable to download video data: %s' % str(err))
@@ -325,11 +328,12 @@ class FileDownloader(object):
 			self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 			return

-		try:
-			self.post_process(filename, info_dict)
-		except (PostProcessingError), err:
-			self.trouble('ERROR: postprocessing: %s' % str(err))
-			return
+		if success:
+			try:
+				self.post_process(filename, info_dict)
+			except (PostProcessingError), err:
+				self.trouble('ERROR: postprocessing: %s' % str(err))
+				return

 	def download(self, url_list):
 		"""Download a given list of URLs."""
@@ -366,21 +370,43 @@ class FileDownloader(object):
 			if info is None:
 				break
 	
-	def _do_download(self, stream, url):
+	def _do_download(self, filename, url):
+		stream = None
+		open_mode = 'ab'
+
+		basic_request = urllib2.Request(url, None, std_headers)
 		request = urllib2.Request(url, None, std_headers)
-		data = urllib2.urlopen(request)
+
+		# Attempt to resume download with "continuedl" option
+		if os.path.isfile(filename):
+			resume_len = os.path.getsize(filename)
+		else:
+			resume_len = 0
+		if self.params['continuedl'] and resume_len != 0:
+			self.report_resuming_byte(resume_len)
+			request.add_header('Range','bytes=%d-' % resume_len)
+
+		# Establish connection
+		try:
+			data = urllib2.urlopen(request)
+		except (urllib2.HTTPError, ), err:
+			if err.code != 416: #  416 is 'Requested range not satisfiable'
+				raise
+			data = urllib2.urlopen(basic_request)
+			content_length = data.info()['Content-Length']
+			if content_length is not None and long(content_length) == resume_len:
+				self.report_file_already_downloaded(filename)
+				return True
+			else:
+				self.report_unable_to_resume()
+				open_mode = 'wb'
+
 		data_len = data.info().get('Content-length', None)
 		data_len_str = self.format_bytes(data_len)
 		byte_counter = 0
 		block_size = 1024
 		start = time.time()
 		while True:
-			# Progress message
-			percent_str = self.calc_percent(byte_counter, data_len)
-			eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
-			speed_str = self.calc_speed(start, time.time(), byte_counter)
-			self.report_progress(percent_str, data_len_str, speed_str, eta_str)
-
 			# Download and write
 			before = time.time()
 			data_block = data.read(block_size)
@@ -389,15 +415,31 @@ class FileDownloader(object):
 			if data_block_len == 0:
 				break
 			byte_counter += data_block_len
+
+			# Open file just in time
+			if stream is None:
+				try:
+					stream = open(filename, open_mode)
+					self.report_destination(filename)
+				except (OSError, IOError), err:
+					self.trouble('ERROR: unable to open for writing: %s' % str(err))
+					return False
 			stream.write(data_block)
 			block_size = self.best_block_size(after - before, data_block_len)

+			# Progress message
+			percent_str = self.calc_percent(byte_counter, data_len)
+			eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
+			speed_str = self.calc_speed(start, time.time(), byte_counter)
+			self.report_progress(percent_str, data_len_str, speed_str, eta_str)
+
 			# Apply rate limit
 			self.slow_down(start, byte_counter)

 		self.report_finish()
 		if data_len is not None and str(byte_counter) != data_len:
 			raise ContentTooShortError(byte_counter, long(data_len))
+		return True

 class InfoExtractor(object):
 	"""Information Extractor class.
@@ -859,7 +901,7 @@ class YoutubeSearchIE(InfoExtractor):
 			return
 		else:
 			try:
-				n = int(prefix)
+				n = long(prefix)
 				if n <= 0:
 					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 					return
@@ -868,7 +910,7 @@ class YoutubeSearchIE(InfoExtractor):
 					n = self._max_youtube_results
 				self._download_n_results(query, n)
 				return
-			except ValueError: # parsing prefix as int fails
+			except ValueError: # parsing prefix as integer fails
 				self._download_n_results(query, 1)
 				return

@@ -914,7 +956,7 @@ class YoutubePlaylistIE(InfoExtractor):
 	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)'
 	_TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
 	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
-	_MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&amp;page=%s'
+	_MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
 	_youtube_ie = None

 	def __init__(self, youtube_ie, downloader=None):
@@ -960,7 +1002,7 @@ class YoutubePlaylistIE(InfoExtractor):
 					ids_in_page.append(mobj.group(1))
 			video_ids.extend(ids_in_page)

-			if (self._MORE_PAGES_INDICATOR % (playlist_id, pagenum + 1)) not in page:
+			if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
 				break
 			pagenum = pagenum + 1

@@ -1029,7 +1071,7 @@ if __name__ == '__main__':
 		# Parse command line
 		parser = optparse.OptionParser(
 			usage='Usage: %prog [options] url...',
-			version='2009.05.25',
+			version='2009.06.29',
 			conflict_handler='resolve',
 		)

@@ -1053,7 +1095,7 @@ if __name__ == '__main__':

 		video_format = optparse.OptionGroup(parser, 'Video Format Options')
 		video_format.add_option('-f', '--format',
-				action='append', dest='format', metavar='FMT', help='video format code')
+				action='store', dest='format', metavar='FMT', help='video format code')
 		video_format.add_option('-b', '--best-quality',
 				action='store_const', dest='format', help='download the best quality video possible', const='0')
 		video_format.add_option('-m', '--mobile-version',
@@ -1084,6 +1126,8 @@ if __name__ == '__main__':
 				dest='batchfile', metavar='F', help='file containing URLs to download')
 		filesystem.add_option('-w', '--no-overwrites',
 				action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
+		filesystem.add_option('-c', '--continue',
+				action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
 		parser.add_option_group(filesystem)

 		(opts, args) = parser.parse_args()
@@ -1141,6 +1185,7 @@ if __name__ == '__main__':
 			'ignoreerrors': opts.ignoreerrors,
 			'ratelimit': opts.ratelimit,
 			'nooverwrites': opts.nooverwrites,
+			'continuedl': opts.continue_dl,
 			})
 		fd.add_info_extractor(youtube_search_ie)
 		fd.add_info_extractor(youtube_pl_ie)
Author	SHA1	Message	Date
Ricardo Garcia	8b07dec5f6	Bump version number	2010-10-31 11:24:40 +01:00
Ricardo Garcia	113e5266cc	Modify "more pages" check in YouTube playlist (fixes issue #29 )	2010-10-31 11:24:40 +01:00
Ricardo Garcia	55e7c75e12	Delay opening file until there is data to write Fixes issue #19.	2010-10-31 11:24:40 +01:00
Ricardo Garcia	ff21a710ae	Restore INTERNAL version number	2010-10-31 11:24:36 +01:00
Ricardo Garcia	7374795552	Bump version number	2010-10-31 11:24:36 +01:00
Ricardo Garcia	0cd61126fc	Document new "continuedl" FileDownloader option	2010-10-31 11:24:36 +01:00
Ricardo Garcia	e1f18b8a84	Remove integer casts and replace them with long integer casts	2010-10-31 11:24:36 +01:00
Ricardo Garcia	6a0015a7e0	Fix missing cast preventing detection of already downloaded file	2010-10-31 11:24:36 +01:00
Ricardo Garcia	7db85b2c70	Tweaks to ivanov's code	2010-10-31 11:24:36 +01:00
Paul Ivanov	f76c2df64e	Added -c option (--continue) interrupted downloads will properly resume and append to the previously downloaded data, instead of overwriting the file. There's some error checking - if the length of the file to be download matches the length of the previously downloaded data, we report that this file has already been downloaded and do nothing. If there is some other HTTP 416 'Requested range not satisfiable' error, we simply re-download the whole file (reverting to the original functionality) All other HTTP errors are simply raised. Resuming does not override -w (--nooverwrite), since it is not clear what should happen if file on disk is larger than file to be downloaded. Thus, -c does nothing if -w is present.	2010-10-31 11:24:36 +01:00
Ricardo Garcia	daa88ccc2e	Fix TypeError when using the -f option (fixes issue #24 )	2010-10-31 11:24:36 +01:00
Ricardo Garcia	eb5d184157	Restore INTERNAL version number	2010-10-31 11:24:36 +01:00
@@ -1 +1 @@
 .05.25
 .06.29