Bump version number

Report final URL with -g and do not print URLs normally (fixes issue #49 )
Handle weird OSX locale settings gracefully (fixes issue #43 )
2025-08-03 10:59:47 -05:00 · 2010-10-31 11:24:52 +01:00 · 2010-10-31 11:24:52 +01:00 · 2010-10-31 11:24:52 +01:00 · 2010-10-31 11:24:52 +01:00 · 2010-10-31 11:24:48 +01:00
3 changed files with 333 additions and 166 deletions
--- a/.hgignore
+++ b/.hgignore
@@ -1,2 +0,0 @@
-syntax: glob
-.*.swp
--- a/1
+++ b/1
@@ -0,0 +1 @@
+2009.09.13
--- a/496
+++ b/496
@@ -19,7 +19,7 @@ import urllib
 import urllib2

 std_headers = {
-	'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8',
+	'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
 	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 	'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
 	'Accept-Language': 'en-us,en;q=0.5',
@@ -27,6 +27,22 @@ std_headers = {

 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')

+def preferredencoding():
+	"""Get preferred encoding.
+
+	Returns the best encoding scheme for the system, based on
+	locale.getpreferredencoding() and some further tweaks.
+	"""
+	try:
+		pref = locale.getpreferredencoding()
+		# Mac OSX systems have this problem sometimes
+		if pref == '':
+			return 'UTF-8'
+		return pref
+	except:
+		sys.stderr.write('WARNING: problem obtaining preferred encoding. Falling back to UTF-8.\n')
+		return 'UTF-8'
+
 class DownloadError(Exception):
 	"""Download Error exception.
 	
@@ -52,6 +68,29 @@ class PostProcessingError(Exception):
 	"""
 	pass

+class UnavailableFormatError(Exception):
+	"""Unavailable Format exception.
+
+	This exception will be thrown when a video is requested
+	in a format that is not available for that video.
+	"""
+	pass
+
+class ContentTooShortError(Exception):
+	"""Content Too Short exception.
+
+	This exception may be raised by FileDownloader objects when a file they
+	download is too small for what the server announced first, indicating
+	the connection was probably interrupted.
+	"""
+	# Both in bytes
+	downloaded = None
+	expected = None
+
+	def __init__(self, downloaded, expected):
+		self.downloaded = downloaded
+		self.expected = expected
+
 class FileDownloader(object):
 	"""File Downloader class.

@@ -91,6 +130,7 @@ class FileDownloader(object):
 	ignoreerrors:	Do not stop on download errors.
 	ratelimit:	Download speed limit, in bytes/sec.
 	nooverwrites:	Prevent overwriting files.
+	continuedl:	Try to continue downloads if possible.
 	"""

 	params = None
@@ -119,10 +159,12 @@ class FileDownloader(object):
 	def format_bytes(bytes):
 		if bytes is None:
 			return 'N/A'
-		if bytes == 0:
+		if type(bytes) is str:
+			bytes = float(bytes)
+		if bytes == 0.0:
 			exponent = 0
 		else:
-			exponent = long(math.log(float(bytes), 1024.0))
+			exponent = long(math.log(bytes, 1024.0))
 		suffix = 'bkMGTPEZY'[exponent]
 		converted = float(bytes) / float(1024**exponent)
 		return '%.2f%s' % (converted, suffix)
@@ -159,13 +201,13 @@ class FileDownloader(object):
 		new_min = max(bytes / 2.0, 1.0)
 		new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 		if elapsed_time < 0.001:
-			return int(new_max)
+			return long(new_max)
 		rate = bytes / elapsed_time
 		if rate > new_max:
-			return int(new_max)
+			return long(new_max)
 		if rate < new_min:
-			return int(new_min)
-		return int(rate)
+			return long(new_min)
+		return long(rate)

 	@staticmethod
 	def parse_bytes(bytestr):
@@ -177,6 +219,16 @@ class FileDownloader(object):
 		multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 		return long(round(number * multiplier))

+	@staticmethod
+	def verify_url(url):
+		"""Verify a URL is valid and data could be downloaded. Return real data URL."""
+		request = urllib2.Request(url, None, std_headers)
+		data = urllib2.urlopen(request)
+		data.read(1)
+		url = data.geturl()
+		data.close()
+		return url
+
 	def add_info_extractor(self, ie):
 		"""Add an InfoExtractor object to the end of the list."""
 		self._ies.append(ie)
@@ -190,12 +242,12 @@ class FileDownloader(object):
 	def to_stdout(self, message, skip_eol=False):
 		"""Print message to stdout if not in quiet mode."""
 		if not self.params.get('quiet', False):
-			print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()),
+			print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
 			sys.stdout.flush()
 	
 	def to_stderr(self, message):
 		"""Print message to stderr."""
-		print >>sys.stderr, message
+		print >>sys.stderr, message.encode(preferredencoding())
 	
 	def fixed_template(self):
 		"""Checks if the output template is fixed."""
@@ -235,6 +287,18 @@ class FileDownloader(object):
 		"""Report download progress."""
 		self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
 				(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+
+	def report_resuming_byte(self, resume_len):
+		"""Report attemtp to resume at given byte."""
+		self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
+	
+	def report_file_already_downloaded(self, file_name):
+		"""Report file has already been fully downloaded."""
+		self.to_stdout(u'[download] %s has already been downloaded' % file_name)
+	
+	def report_unable_to_resume(self):
+		"""Report it was impossible to resume download."""
+		self.to_stdout(u'[download] Unable to resume')
 	
 	def report_finish(self):
 		"""Report download finished."""
@@ -242,50 +306,54 @@ class FileDownloader(object):

 	def process_info(self, info_dict):
 		"""Process a single dictionary returned by an InfoExtractor."""
-		# Forced printings
-		if self.params.get('forcetitle', False):
-			print info_dict['title'].encode(locale.getpreferredencoding())
-		if self.params.get('forceurl', False):
-			print info_dict['url'].encode(locale.getpreferredencoding())
-			
 		# Do nothing else if in simulate mode
 		if self.params.get('simulate', False):
-			return
+			try:
+				info_dict['url'] = self.verify_url(info_dict['url'])
+			except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
+				raise UnavailableFormatError

+			# Forced printings
+			if self.params.get('forcetitle', False):
+				print info_dict['title'].encode(preferredencoding())
+			if self.params.get('forceurl', False):
+				print info_dict['url'].encode(preferredencoding())
+
+			return
+			
 		try:
-			filename = self.params['outtmpl'] % info_dict
-			self.report_destination(filename)
+			template_dict = dict(info_dict)
+			template_dict['epoch'] = unicode(long(time.time()))
+			filename = self.params['outtmpl'] % template_dict
 		except (ValueError, KeyError), err:
 			self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
 		if self.params['nooverwrites'] and os.path.exists(filename):
-			self.to_stderr('WARNING: file exists: %s; skipping' % filename)
+			self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
 			return
+
 		try:
 			self.pmkdir(filename)
 		except (OSError, IOError), err:
 			self.trouble('ERROR: unable to create directories: %s' % str(err))
 			return
+
 		try:
-			outstream = open(filename, 'wb')
+			success = self._do_download(filename, info_dict['url'])
 		except (OSError, IOError), err:
-			self.trouble('ERROR: unable to open for writing: %s' % str(err))
-			return
-		try:
-			self._do_download(outstream, info_dict['url'])
-			outstream.close()
-		except (OSError, IOError), err:
-			self.trouble('ERROR: unable to write video data: %s' % str(err))
-			return
+			raise UnavailableFormatError
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			self.trouble('ERROR: unable to download video data: %s' % str(err))
 			return
-		try:
-			self.post_process(filename, info_dict)
-		except (PostProcessingError), err:
-			self.trouble('ERROR: postprocessing: %s' % str(err))
+		except (ContentTooShortError, ), err:
+			self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 			return

-		return
+		if success:
+			try:
+				self.post_process(filename, info_dict)
+			except (PostProcessingError), err:
+				self.trouble('ERROR: postprocessing: %s' % str(err))
+				return

 	def download(self, url_list):
 		"""Download a given list of URLs."""
@@ -322,21 +390,43 @@ class FileDownloader(object):
 			if info is None:
 				break
 	
-	def _do_download(self, stream, url):
+	def _do_download(self, filename, url):
+		stream = None
+		open_mode = 'ab'
+
+		basic_request = urllib2.Request(url, None, std_headers)
 		request = urllib2.Request(url, None, std_headers)
-		data = urllib2.urlopen(request)
+
+		# Attempt to resume download with "continuedl" option
+		if os.path.isfile(filename):
+			resume_len = os.path.getsize(filename)
+		else:
+			resume_len = 0
+		if self.params['continuedl'] and resume_len != 0:
+			self.report_resuming_byte(resume_len)
+			request.add_header('Range','bytes=%d-' % resume_len)
+
+		# Establish connection
+		try:
+			data = urllib2.urlopen(request)
+		except (urllib2.HTTPError, ), err:
+			if err.code != 416: #  416 is 'Requested range not satisfiable'
+				raise
+			data = urllib2.urlopen(basic_request)
+			content_length = data.info()['Content-Length']
+			if content_length is not None and long(content_length) == resume_len:
+				self.report_file_already_downloaded(filename)
+				return True
+			else:
+				self.report_unable_to_resume()
+				open_mode = 'wb'
+
 		data_len = data.info().get('Content-length', None)
 		data_len_str = self.format_bytes(data_len)
 		byte_counter = 0
 		block_size = 1024
 		start = time.time()
 		while True:
-			# Progress message
-			percent_str = self.calc_percent(byte_counter, data_len)
-			eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
-			speed_str = self.calc_speed(start, time.time(), byte_counter)
-			self.report_progress(percent_str, data_len_str, speed_str, eta_str)
-
 			# Download and write
 			before = time.time()
 			data_block = data.read(block_size)
@@ -345,15 +435,31 @@ class FileDownloader(object):
 			if data_block_len == 0:
 				break
 			byte_counter += data_block_len
+
+			# Open file just in time
+			if stream is None:
+				try:
+					stream = open(filename, open_mode)
+					self.report_destination(filename)
+				except (OSError, IOError), err:
+					self.trouble('ERROR: unable to open for writing: %s' % str(err))
+					return False
 			stream.write(data_block)
 			block_size = self.best_block_size(after - before, data_block_len)

+			# Progress message
+			percent_str = self.calc_percent(byte_counter, data_len)
+			eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
+			speed_str = self.calc_speed(start, time.time(), byte_counter)
+			self.report_progress(percent_str, data_len_str, speed_str, eta_str)
+
 			# Apply rate limit
 			self.slow_down(start, byte_counter)

 		self.report_finish()
 		if data_len is not None and str(byte_counter) != data_len:
-			raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
+			raise ContentTooShortError(byte_counter, long(data_len))
+		return True

 class InfoExtractor(object):
 	"""Information Extractor class.
@@ -424,6 +530,13 @@ class YoutubeIE(InfoExtractor):
 	_LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
 	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 	_NETRC_MACHINE = 'youtube'
+	_available_formats = ['22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
+	_video_extensions = {
+		'13': '3gp',
+		'17': 'mp4',
+		'18': 'mp4',
+		'22': 'mp4',
+	}

 	@staticmethod
 	def suitable(url):
@@ -464,17 +577,17 @@ class YoutubeIE(InfoExtractor):
 		"""Report attempt to confirm age."""
 		self._downloader.to_stdout(u'[youtube] Confirming age')
 	
-	def report_webpage_download(self, video_id):
-		"""Report attempt to download webpage."""
-		self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
+	def report_video_info_webpage_download(self, video_id):
+		"""Report attempt to download video info webpage."""
+		self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
 	
 	def report_information_extraction(self, video_id):
 		"""Report attempt to extract video information."""
 		self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
 	
-	def report_video_url(self, video_id, video_real_url):
+	def report_unavailable_format(self, video_id, format):
 		"""Report extracted video URL."""
-		self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
+		self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
 	
 	def _real_initialize(self):
 		if self._downloader is None:
@@ -554,70 +667,99 @@ class YoutubeIE(InfoExtractor):
 		video_id = mobj.group(2)

 		# Downloader parameters
+		best_quality = False
 		format_param = None
+		quality_index = 0
 		if self._downloader is not None:
 			params = self._downloader.params
 			format_param = params.get('format', None)
+			if format_param == '0':
+				format_param = self._available_formats[quality_index]
+				best_quality = True

-		# Extension
-		video_extension = {
-			'17': '3gp',
-			'18': 'mp4',
-			'22': 'mp4',
-		}.get(format_param, 'flv')
+		while True:
+			# Extension
+			video_extension = self._video_extensions.get(format_param, 'flv')

-		# Normalize URL, including format
-		normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
-		if format_param is not None:
-			normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
-		request = urllib2.Request(normalized_url, None, std_headers)
-		try:
-			self.report_webpage_download(video_id)
-			video_webpage = urllib2.urlopen(request).read()
-		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
-			return
-		self.report_information_extraction(video_id)
-		
-		# "t" param
-		mobj = re.search(r', "t": "([^"]+)"', video_webpage)
-		if mobj is None:
-			self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
-			return
-		video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
-		if format_param is not None:
-			video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
-		self.report_video_url(video_id, video_real_url)
+			# Get video info
+			video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
+			request = urllib2.Request(video_info_url, None, std_headers)
+			try:
+				self.report_video_info_webpage_download(video_id)
+				video_info_webpage = urllib2.urlopen(request).read()
+			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+				self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
+				return
+			self.report_information_extraction(video_id)

-		# uploader
-		mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
-		if mobj is None:
-			self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
-			return
-		video_uploader = mobj.group(1)
+			# "t" param
+			mobj = re.search(r'(?m)&token=([^&]+)(?:&|$)', video_info_webpage)
+			if mobj is None:
+				# Attempt to see if YouTube has issued an error message
+				mobj = re.search(r'(?m)&reason=([^&]+)(?:&|$)', video_info_webpage)
+				if mobj is None:
+					self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
+					stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
+					stream.write(video_info_webpage)
+					stream.close()
+				else:
+					reason = urllib.unquote_plus(mobj.group(1))
+					self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
+				return
+			token = urllib.unquote(mobj.group(1))
+			video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
+			if format_param is not None:
+				video_real_url = '%s&fmt=%s' % (video_real_url, format_param)

-		# title
-		mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
-		if mobj is None:
-			self._downloader.trouble(u'ERROR: unable to extract video title')
-			return
-		video_title = mobj.group(1).decode('utf-8')
-		video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
-		video_title = video_title.replace(os.sep, u'%')
+			# uploader
+			mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage)
+			if mobj is None:
+				self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+				return
+			video_uploader = urllib.unquote(mobj.group(1))

-		# simplified title
-		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
-		simple_title = simple_title.strip(ur'_')
+			# title
+			mobj = re.search(r'(?m)&title=([^&]+)(?:&|$)', video_info_webpage)
+			if mobj is None:
+				self._downloader.trouble(u'ERROR: unable to extract video title')
+				return
+			video_title = urllib.unquote(mobj.group(1))
+			video_title = video_title.decode('utf-8')
+			video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
+			video_title = video_title.replace(os.sep, u'%')
+
+			# simplified title
+			simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
+			simple_title = simple_title.strip(ur'_')
+
+			try:
+				# Process video information
+				self._downloader.process_info({
+					'id':		video_id.decode('utf-8'),
+					'url':		video_real_url.decode('utf-8'),
+					'uploader':	video_uploader.decode('utf-8'),
+					'title':	video_title,
+					'stitle':	simple_title,
+					'ext':		video_extension.decode('utf-8'),
+				})
+
+				return
+
+			except UnavailableFormatError, err:
+				if best_quality:
+					if quality_index == len(self._available_formats) - 1:
+						# I don't ever expect this to happen
+						self._downloader.trouble(u'ERROR: no known formats available for video')
+						return
+					else:
+						self.report_unavailable_format(video_id, format_param)
+						quality_index += 1
+						format_param = self._available_formats[quality_index]
+						continue
+				else: 
+					self._downloader.trouble('ERROR: format not available for video')
+					return

-		# Process video information
-		self._downloader.process_info({
-			'id':		video_id.decode('utf-8'),
-			'url':		video_real_url.decode('utf-8'),
-			'uploader':	video_uploader.decode('utf-8'),
-			'title':	video_title,
-			'stitle':	simple_title,
-			'ext':		video_extension.decode('utf-8'),
-			})

 class MetacafeIE(InfoExtractor):
 	"""Information Extractor for metacafe.com."""
@@ -703,19 +845,21 @@ class MetacafeIE(InfoExtractor):

 		# Extract URL, uploader and title from webpage
 		self.report_extraction(video_id)
-		mobj = re.search(r'(?m)&mediaURL=(http.*?\.flv)', webpage)
+		mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: unable to extract media URL')
 			return
 		mediaURL = urllib.unquote(mobj.group(1))

-		mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
-		if mobj is None:
-			self._downloader.trouble(u'ERROR: unable to extract gdaKey')
-			return
-		gdaKey = mobj.group(1)
+		#mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
+		#if mobj is None:
+		#	self._downloader.trouble(u'ERROR: unable to extract gdaKey')
+		#	return
+		#gdaKey = mobj.group(1)
+		#
+		#video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)

-		video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
+		video_url = mediaURL

 		mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
 		if mobj is None:
@@ -729,15 +873,18 @@ class MetacafeIE(InfoExtractor):
 			return
 		video_uploader = mobj.group(1)

-		# Process video information
-		self._downloader.process_info({
-			'id':		video_id.decode('utf-8'),
-			'url':		video_url.decode('utf-8'),
-			'uploader':	video_uploader.decode('utf-8'),
-			'title':	video_title,
-			'stitle':	simple_title,
-			'ext':		video_extension.decode('utf-8'),
+		try:
+			# Process video information
+			self._downloader.process_info({
+				'id':		video_id.decode('utf-8'),
+				'url':		video_url.decode('utf-8'),
+				'uploader':	video_uploader.decode('utf-8'),
+				'title':	video_title,
+				'stitle':	simple_title,
+				'ext':		video_extension.decode('utf-8'),
 			})
+		except UnavailableFormatError:
+			self._downloader.trouble(u'ERROR: format not available for video')


 class YoutubeSearchIE(InfoExtractor):
@@ -745,7 +892,7 @@ class YoutubeSearchIE(InfoExtractor):
 	_VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
 	_TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
 	_VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
-	_MORE_PAGES_INDICATOR = r'>Next</a>'
+	_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
 	_youtube_ie = None
 	_max_youtube_results = 1000

@@ -780,7 +927,7 @@ class YoutubeSearchIE(InfoExtractor):
 			return
 		else:
 			try:
-				n = int(prefix)
+				n = long(prefix)
 				if n <= 0:
 					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 					return
@@ -789,7 +936,7 @@ class YoutubeSearchIE(InfoExtractor):
 					n = self._max_youtube_results
 				self._download_n_results(query, n)
 				return
-			except ValueError: # parsing prefix as int fails
+			except ValueError: # parsing prefix as integer fails
 				self._download_n_results(query, 1)
 				return

@@ -822,7 +969,7 @@ class YoutubeSearchIE(InfoExtractor):
 							self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
 						return

-			if self._MORE_PAGES_INDICATOR not in page:
+			if re.search(self._MORE_PAGES_INDICATOR, page) is None:
 				for id in video_ids:
 					self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
 				return
@@ -832,10 +979,10 @@ class YoutubeSearchIE(InfoExtractor):
 class YoutubePlaylistIE(InfoExtractor):
 	"""Information Extractor for YouTube playlists."""

-	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)'
+	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
 	_TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
 	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
-	_MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&amp;page=%s'
+	_MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
 	_youtube_ie = None

 	def __init__(self, youtube_ie, downloader=None):
@@ -881,7 +1028,7 @@ class YoutubePlaylistIE(InfoExtractor):
 					ids_in_page.append(mobj.group(1))
 			video_ids.extend(ids_in_page)

-			if (self._MORE_PAGES_INDICATOR % (playlist_id, pagenum + 1)) not in page:
+			if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
 				break
 			pagenum = pagenum + 1

@@ -919,7 +1066,7 @@ class PostProcessor(object):
 		"""Run the PostProcessor.

 		The "information" argument is a dictionary like the ones
-		returned by InfoExtractors. The only difference is that this
+		composed by InfoExtractors. The only difference is that this
 		one has an extra field called "filepath" that points to the
 		downloaded file.

@@ -949,76 +1096,96 @@ if __name__ == '__main__':

 		# Parse command line
 		parser = optparse.OptionParser(
-				usage='Usage: %prog [options] url...',
-				version='2009.04.25',
-				conflict_handler='resolve',
-				)
+			usage='Usage: %prog [options] url...',
+			version='2009.09.13',
+			conflict_handler='resolve',
+		)
+
 		parser.add_option('-h', '--help',
 				action='help', help='print this help text and exit')
 		parser.add_option('-v', '--version',
 				action='version', help='print program version and exit')
-		parser.add_option('-u', '--username',
-				dest='username', metavar='UN', help='account username')
-		parser.add_option('-p', '--password',
-				dest='password', metavar='PW', help='account password')
-		parser.add_option('-o', '--output',
-				dest='outtmpl', metavar='TPL', help='output filename template')
-		parser.add_option('-q', '--quiet',
-				action='store_true', dest='quiet', help='activates quiet mode', default=False)
-		parser.add_option('-s', '--simulate',
-				action='store_true', dest='simulate', help='do not download video', default=False)
-		parser.add_option('-t', '--title',
-				action='store_true', dest='usetitle', help='use title in file name', default=False)
-		parser.add_option('-l', '--literal',
-				action='store_true', dest='useliteral', help='use literal title in file name', default=False)
-		parser.add_option('-n', '--netrc',
-				action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
-		parser.add_option('-g', '--get-url',
-				action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
-		parser.add_option('-e', '--get-title',
-				action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
-		parser.add_option('-f', '--format',
-				dest='format', metavar='FMT', help='video format code')
-		parser.add_option('-m', '--mobile-version',
-				action='store_const', dest='format', help='alias for -f 17', const='17')
-		parser.add_option('-d', '--high-def',
-				action='store_const', dest='format', help='alias for -f 22', const='22')
 		parser.add_option('-i', '--ignore-errors',
 				action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
 		parser.add_option('-r', '--rate-limit',
 				dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
-		parser.add_option('-a', '--batch-file',
+
+		authentication = optparse.OptionGroup(parser, 'Authentication Options')
+		authentication.add_option('-u', '--username',
+				dest='username', metavar='UN', help='account username')
+		authentication.add_option('-p', '--password',
+				dest='password', metavar='PW', help='account password')
+		authentication.add_option('-n', '--netrc',
+				action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
+		parser.add_option_group(authentication)
+
+		video_format = optparse.OptionGroup(parser, 'Video Format Options')
+		video_format.add_option('-f', '--format',
+				action='store', dest='format', metavar='FMT', help='video format code')
+		video_format.add_option('-b', '--best-quality',
+				action='store_const', dest='format', help='download the best quality video possible', const='0')
+		video_format.add_option('-m', '--mobile-version',
+				action='store_const', dest='format', help='alias for -f 17', const='17')
+		video_format.add_option('-d', '--high-def',
+				action='store_const', dest='format', help='alias for -f 22', const='22')
+		parser.add_option_group(video_format)
+
+		verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
+		verbosity.add_option('-q', '--quiet',
+				action='store_true', dest='quiet', help='activates quiet mode', default=False)
+		verbosity.add_option('-s', '--simulate',
+				action='store_true', dest='simulate', help='do not download video', default=False)
+		verbosity.add_option('-g', '--get-url',
+				action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
+		verbosity.add_option('-e', '--get-title',
+				action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
+		parser.add_option_group(verbosity)
+
+		filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
+		filesystem.add_option('-t', '--title',
+				action='store_true', dest='usetitle', help='use title in file name', default=False)
+		filesystem.add_option('-l', '--literal',
+				action='store_true', dest='useliteral', help='use literal title in file name', default=False)
+		filesystem.add_option('-o', '--output',
+				dest='outtmpl', metavar='TPL', help='output filename template')
+		filesystem.add_option('-a', '--batch-file',
 				dest='batchfile', metavar='F', help='file containing URLs to download')
-		parser.add_option('-w', '--no-overwrites',
+		filesystem.add_option('-w', '--no-overwrites',
 				action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
+		filesystem.add_option('-c', '--continue',
+				action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
+		parser.add_option_group(filesystem)
+
 		(opts, args) = parser.parse_args()

 		# Batch file verification
 		batchurls = []
 		if opts.batchfile is not None:
 			try:
-				batchurls = [line.strip() for line in open(opts.batchfile, 'r')]
+				batchurls = open(opts.batchfile, 'r').readlines()
+				batchurls = [x.strip() for x in batchurls]
+				batchurls = [x for x in batchurls if len(x) > 0]
 			except IOError:
 				sys.exit(u'ERROR: batch file could not be read')
 		all_urls = batchurls + args

 		# Conflicting, missing and erroneous options
 		if len(all_urls) < 1:
-			sys.exit(u'ERROR: you must provide at least one URL')
+			parser.error(u'you must provide at least one URL')
 		if opts.usenetrc and (opts.username is not None or opts.password is not None):
-			sys.exit(u'ERROR: using .netrc conflicts with giving username/password')
+			parser.error(u'using .netrc conflicts with giving username/password')
 		if opts.password is not None and opts.username is None:
-			sys.exit(u'ERROR: account username missing')
+			parser.error(u'account username missing')
 		if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
-			sys.exit(u'ERROR: using output template conflicts with using title or literal title')
+			parser.error(u'using output template conflicts with using title or literal title')
 		if opts.usetitle and opts.useliteral:
-			sys.exit(u'ERROR: using title conflicts with using literal title')
+			parser.error(u'using title conflicts with using literal title')
 		if opts.username is not None and opts.password is None:
 			opts.password = getpass.getpass(u'Type account password and press return:')
 		if opts.ratelimit is not None:
 			numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
 			if numeric_limit is None:
-				sys.exit(u'ERROR: invalid rate limit specified')
+				parser.error(u'invalid rate limit specified')
 			opts.ratelimit = numeric_limit

 		# Information extractors
@@ -1037,13 +1204,14 @@ if __name__ == '__main__':
 			'forcetitle': opts.gettitle,
 			'simulate': (opts.simulate or opts.geturl or opts.gettitle),
 			'format': opts.format,
-			'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
+			'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
 				or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
 				or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
 				or u'%(id)s.%(ext)s'),
 			'ignoreerrors': opts.ignoreerrors,
 			'ratelimit': opts.ratelimit,
 			'nooverwrites': opts.nooverwrites,
+			'continuedl': opts.continue_dl,
 			})
 		fd.add_info_extractor(youtube_search_ie)
 		fd.add_info_extractor(youtube_pl_ie)
Author	SHA1	Message	Date
Ricardo Garcia	ad0525b3e6	Bump version number	2010-10-31 11:24:52 +01:00
Ricardo Garcia	30edbf89e4	Report final URL with -g and do not print URLs normally (fixes issue #49 )	2010-10-31 11:24:52 +01:00
Ricardo Garcia	eae2666cb4	Handle weird OSX locale settings gracefully (fixes issue #43 )	2010-10-31 11:24:52 +01:00
Ricardo Garcia	2a04438c7c	Restore INTERNAL version number	2010-10-31 11:24:52 +01:00
Ricardo Garcia	dd24ff44ab	Bump version number	2010-10-31 11:24:48 +01:00
Ricardo Garcia	304a4d85ea	Modify _MORE_PAGES_INDICATOR for searches (fixes issue #41 )	2010-10-31 11:24:48 +01:00
Ricardo Garcia	d899774377	Accept my_playlists syntax for playlists	2010-10-31 11:24:48 +01:00
Ricardo Garcia	fade05990c	Do not specify any format as a last option for the -b flag	2010-10-31 11:24:48 +01:00
Ricardo Garcia	e5b1604882	Set INTERNAL version number	2010-10-31 11:24:48 +01:00
Ricardo Garcia	0c8beb43f2	Bump version number	2010-10-31 11:24:44 +01:00
Ricardo Garcia	71b7300e63	Use get_video_info to work around captcha problems (fixes issue #31 )	2010-10-31 11:24:44 +01:00
Ricardo Garcia	8497c36d5a	Fix minor problem with size formatting method	2010-10-31 11:24:44 +01:00
Ricardo Garcia	110cd3462e	Update User-agent string	2010-10-31 11:24:43 +01:00
Ricardo Garcia	18963a36b0	Fix metacafe.com code due to recent changes in the site	2010-10-31 11:24:43 +01:00
Ricardo Garcia	df1ceb1fd9	Include format 5 in best quality list	2010-10-31 11:24:43 +01:00
Ricardo Garcia	7eb0e89742	Properly encode messages sent to stderr (fixes issue #34 )	2010-10-31 11:24:43 +01:00
Ricardo Garcia	8b07dec5f6	Bump version number	2010-10-31 11:24:40 +01:00
Ricardo Garcia	113e5266cc	Modify "more pages" check in YouTube playlist (fixes issue #29 )	2010-10-31 11:24:40 +01:00
Ricardo Garcia	55e7c75e12	Delay opening file until there is data to write Fixes issue #19.	2010-10-31 11:24:40 +01:00
Ricardo Garcia	ff21a710ae	Restore INTERNAL version number	2010-10-31 11:24:36 +01:00
Ricardo Garcia	7374795552	Bump version number	2010-10-31 11:24:36 +01:00
Ricardo Garcia	0cd61126fc	Document new "continuedl" FileDownloader option	2010-10-31 11:24:36 +01:00
Ricardo Garcia	e1f18b8a84	Remove integer casts and replace them with long integer casts	2010-10-31 11:24:36 +01:00
Ricardo Garcia	6a0015a7e0	Fix missing cast preventing detection of already downloaded file	2010-10-31 11:24:36 +01:00
Ricardo Garcia	7db85b2c70	Tweaks to ivanov's code	2010-10-31 11:24:36 +01:00
Paul Ivanov	f76c2df64e	Added -c option (--continue) interrupted downloads will properly resume and append to the previously downloaded data, instead of overwriting the file. There's some error checking - if the length of the file to be download matches the length of the previously downloaded data, we report that this file has already been downloaded and do nothing. If there is some other HTTP 416 'Requested range not satisfiable' error, we simply re-download the whole file (reverting to the original functionality) All other HTTP errors are simply raised. Resuming does not override -w (--nooverwrite), since it is not clear what should happen if file on disk is larger than file to be downloaded. Thus, -c does nothing if -w is present.	2010-10-31 11:24:36 +01:00
Ricardo Garcia	daa88ccc2e	Fix TypeError when using the -f option (fixes issue #24 )	2010-10-31 11:24:36 +01:00
Ricardo Garcia	eb5d184157	Restore INTERNAL version number	2010-10-31 11:24:36 +01:00
Ricardo Garcia	5745bfdcdc	Bump version number	2010-10-31 11:24:32 +01:00
Ricardo Garcia	320becd692	Remove trails from the "append_const" change (fixes issue #23 )	2010-10-31 11:24:32 +01:00
Ricardo Garcia	968aa88438	Only catch UnavailableFormatError in call to process_info	2010-10-31 11:24:32 +01:00
Ricardo Garcia	cbfff4db63	Verify URLs in simulate mode (fixes issue #22 )	2010-10-31 11:24:32 +01:00
Ricardo Garcia	781daeabdb	Restore "INTERNAL" version number	2010-10-31 11:24:32 +01:00
Ricardo Garcia	705804f5d1	Update version number in LATEST_VERSION	2010-10-31 11:24:24 +01:00
Ricardo Garcia	1d50e3d153	Bump version number	2010-10-31 11:24:19 +01:00
Ricardo Garcia	d69a1c9189	Handle "content too short" errors properly	2010-10-31 11:24:19 +01:00
Ricardo Garcia	488f619471	Close video file before removing it.	2010-10-31 11:24:19 +01:00
Ricardo Garcia	097ba9472b	Remove .hgignore from version tracking	2010-10-31 11:24:19 +01:00
Ricardo Garcia	554f3e284c	Add LATEST_VERSION to further ease checking which is the latest stable version	2010-10-31 11:24:19 +01:00
Ricardo Garcia	cab60d710a	Put back INTERNAL version	2010-10-31 11:24:19 +01:00
Ricardo Garcia	152edc0d4c	Set version number	2010-10-31 11:24:16 +01:00
Ricardo Garcia	b74c859d0f	Use store_const instead of append_const as the latter requires Python 2.5	2010-10-31 11:24:16 +01:00
Ricardo Garcia	0e54320009	Restore INTERNAL version number	2010-10-31 11:24:16 +01:00
Ricardo Garcia	43f35682e9	Put version number in place	2010-10-31 11:24:12 +01:00
Ricardo Garcia	ad274509aa	Add an "epoch" keyword to the output template	2010-10-31 11:24:12 +01:00
Ricardo Garcia	d09744d055	Add format 35 (flv) as second best in quality	2010-10-31 11:24:12 +01:00
Ricardo Garcia	1c76e23e0f	Move the -t and -l options to the filesystem group	2010-10-31 11:24:12 +01:00
dannycolligan	42bcd27d3b	Some consistency changes and ghost-file bugfix after discussion with rg3	2010-10-31 11:24:12 +01:00
dannycolligan	2740c509b3	Fixed ambiguity of multiple video option specifiers by dissalowing it; changed some sys.ext calls to parser.error	2010-10-31 11:24:12 +01:00
dannycolligan	7b7759f5a4	Added -b option and created option groups for help prompt	2010-10-31 11:24:12 +01:00
Ricardo Garcia	8d2c83eda5	Update and correct (format,extension) table for YouTube	2010-10-31 11:24:12 +01:00
Ricardo Garcia	2f11508ada	Minor documentation change	2010-10-31 11:24:12 +01:00
Ricardo Garcia	b65740e474	Skip blank lines in batch file -- fixes issue #9	2010-10-31 11:24:12 +01:00
Ricardo Garcia	a825f0ca83	Revert version number to INTERNAL	2010-10-31 11:24:12 +01:00