Update version number in LATEST_VERSION

Bump version number
Handle "content too short" errors properly
2025-08-02 10:30:04 -05:00 · 2010-10-31 11:24:24 +01:00 · 2010-10-31 11:24:19 +01:00 · 2010-10-31 11:24:19 +01:00 · 2010-10-31 11:24:19 +01:00 · 2010-10-31 11:24:19 +01:00
3 changed files with 338 additions and 259 deletions
--- a/.hgignore
+++ b/.hgignore
@@ -1,2 +0,0 @@
-syntax: glob
-.*.swp
--- a/1
+++ b/1
@@ -0,0 +1 @@
+2009.05.23
--- a/594
+++ b/594
@@ -19,7 +19,7 @@ import urllib
 import urllib2

 std_headers = {
-	'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5',
+	'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8',
 	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 	'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
 	'Accept-Language': 'en-us,en;q=0.5',
@@ -52,6 +52,29 @@ class PostProcessingError(Exception):
 	"""
 	pass

+class UnavailableFormatError(Exception):
+	"""Unavailable Format exception.
+
+	This exception will be thrown when a video is requested
+	in a format that is not available for that video.
+	"""
+	pass
+
+class ContentTooShortError(Exception):
+	"""Content Too Short exception.
+
+	This exception may be raised by FileDownloader objects when a file they
+	download is too small for what the server announced first, indicating
+	the connection was probably interrupted.
+	"""
+	# Both in bytes
+	downloaded = None
+	expected = None
+
+	def __init__(self, downloaded, expected):
+		self.downloaded = downloaded
+		self.expected = expected
+
 class FileDownloader(object):
 	"""File Downloader class.

@@ -65,9 +88,10 @@ class FileDownloader(object):
 	For this, file downloader objects have a method that allows
 	InfoExtractors to be registered in a given order. When it is passed
 	a URL, the file downloader handles it to the first InfoExtractor it
-	finds that reports being able to handle it. The InfoExtractor returns
-	all the information to the FileDownloader and the latter downloads the
-	file or does whatever it's instructed to do.
+	finds that reports being able to handle it. The InfoExtractor extracts
+	all the information about the video or videos the URL refers to, and
+	asks the FileDownloader to process the video information, possibly
+	downloading the video.

 	File downloaders accept a lot of parameters. In order not to saturate
 	the object constructor with arguments, it receives a dictionary of
@@ -95,11 +119,13 @@ class FileDownloader(object):
 	params = None
 	_ies = []
 	_pps = []
+	_download_retcode = None

 	def __init__(self, params):
 		"""Create a FileDownloader object with the given options."""
 		self._ies = []
 		self._pps = []
+		self._download_retcode = 0
 		self.params = params
 	
 	@staticmethod
@@ -187,7 +213,7 @@ class FileDownloader(object):
 	def to_stdout(self, message, skip_eol=False):
 		"""Print message to stdout if not in quiet mode."""
 		if not self.params.get('quiet', False):
-			print u'%s%s' % (message, [u'\n', u''][skip_eol]),
+			print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()),
 			sys.stdout.flush()
 	
 	def to_stderr(self, message):
@@ -203,15 +229,13 @@ class FileDownloader(object):

 		Depending on if the downloader has been configured to ignore
 		download errors or not, this method may throw an exception or
-		not when errors are found, after printing the message. If it
-		doesn't raise, it returns an error code suitable to be returned
-		later as a program exit code to indicate error.
+		not when errors are found, after printing the message.
 		"""
 		if message is not None:
 			self.to_stderr(message)
 		if not self.params.get('ignoreerrors', False):
 			raise DownloadError(message)
-		return 1
+		self._download_retcode = 1

 	def slow_down(self, start_time, byte_counter):
 		"""Sleep if the download speed is over the rate limit."""
@@ -239,77 +263,86 @@ class FileDownloader(object):
 		"""Report download finished."""
 		self.to_stdout(u'')

+	def process_info(self, info_dict):
+		"""Process a single dictionary returned by an InfoExtractor."""
+		# Forced printings
+		if self.params.get('forcetitle', False):
+			print info_dict['title'].encode(locale.getpreferredencoding())
+		if self.params.get('forceurl', False):
+			print info_dict['url'].encode(locale.getpreferredencoding())
+			
+		# Do nothing else if in simulate mode
+		if self.params.get('simulate', False):
+			return
+
+		try:
+			template_dict = dict(info_dict)
+			template_dict['epoch'] = unicode(long(time.time()))
+			filename = self.params['outtmpl'] % template_dict
+			self.report_destination(filename)
+		except (ValueError, KeyError), err:
+			self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
+		if self.params['nooverwrites'] and os.path.exists(filename):
+			self.to_stderr('WARNING: file exists: %s; skipping' % filename)
+			return
+
+		try:
+			self.pmkdir(filename)
+		except (OSError, IOError), err:
+			self.trouble('ERROR: unable to create directories: %s' % str(err))
+			return
+
+		try:
+			outstream = open(filename, 'wb')
+		except (OSError, IOError), err:
+			self.trouble('ERROR: unable to open for writing: %s' % str(err))
+			return
+
+		try:
+			self._do_download(outstream, info_dict['url'])
+			outstream.close()
+		except (OSError, IOError), err:
+			outstream.close()
+			os.remove(filename)
+			raise UnavailableFormatError
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self.trouble('ERROR: unable to download video data: %s' % str(err))
+			return
+		except (ContentTooShortError, ), err:
+			self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+			return
+
+		try:
+			self.post_process(filename, info_dict)
+		except (PostProcessingError), err:
+			self.trouble('ERROR: postprocessing: %s' % str(err))
+			return
+
 	def download(self, url_list):
 		"""Download a given list of URLs."""
-		retcode = 0
 		if len(url_list) > 1 and self.fixed_template():
 			raise SameFileError(self.params['outtmpl'])

 		for url in url_list:
 			suitable_found = False
 			for ie in self._ies:
+				# Go to next InfoExtractor if not suitable
 				if not ie.suitable(url):
 					continue
+
 				# Suitable InfoExtractor found
 				suitable_found = True
-				all_results = ie.extract(url)
-				results = [x for x in all_results if x is not None]
-				if len(results) != len(all_results):
-					retcode = self.trouble()

-				if len(results) > 1 and self.fixed_template():
-					raise SameFileError(self.params['outtmpl'])
-
-				for result in results:
-					# Forced printings
-					if self.params.get('forcetitle', False):
-						print result['title']
-					if self.params.get('forceurl', False):
-						print result['url']
-						
-					# Do nothing else if in simulate mode
-					if self.params.get('simulate', False):
-						continue
-
-					try:
-						filename = self.params['outtmpl'] % result
-						self.report_destination(filename)
-					except (ValueError, KeyError), err:
-						retcode = self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
-						continue
-					if self.params['nooverwrites'] and os.path.exists(filename):
-						self.to_stderr('WARNING: file exists: %s; skipping' % filename)
-						continue
-					try:
-						self.pmkdir(filename)
-					except (OSError, IOError), err:
-						retcode = self.trouble('ERROR: unable to create directories: %s' % str(err))
-						continue
-					try:
-						outstream = open(filename, 'wb')
-					except (OSError, IOError), err:
-						retcode = self.trouble('ERROR: unable to open for writing: %s' % str(err))
-						continue
-					try:
-						self._do_download(outstream, result['url'])
-						outstream.close()
-					except (OSError, IOError), err:
-						retcode = self.trouble('ERROR: unable to write video data: %s' % str(err))
-						continue
-					except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-						retcode = self.trouble('ERROR: unable to download video data: %s' % str(err))
-						continue
-					try:
-						self.post_process(filename, result)
-					except (PostProcessingError), err:
-						retcode = self.trouble('ERROR: postprocessing: %s' % str(err))
-						continue
+				# Extract information from URL and process it
+				ie.extract(url)

+				# Suitable InfoExtractor had been found; go to next URL
 				break
-			if not suitable_found:
-				retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url)

-		return retcode
+			if not suitable_found:
+				self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
+
+		return self._download_retcode

 	def post_process(self, filename, ie_info):
 		"""Run the postprocessing chain on the given file."""
@@ -351,7 +384,7 @@ class FileDownloader(object):

 		self.report_finish()
 		if data_len is not None and str(byte_counter) != data_len:
-			raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
+			raise ContentTooShortError(byte_counter, long(data_len))

 class InfoExtractor(object):
 	"""Information Extractor class.
@@ -359,9 +392,10 @@ class InfoExtractor(object):
 	Information extractors are the classes that, given a URL, extract
 	information from the video (or videos) the URL refers to. This
 	information includes the real video URL, the video title and simplified
-	title, author and others. It is returned in a list of dictionaries when
-	calling its extract() method. It is a list because a URL can refer to
-	more than one video (think of playlists). The dictionaries must include
+	title, author and others. The information is stored in a dictionary
+	which is then passed to the FileDownloader. The FileDownloader
+	processes this information possibly downloading the video to the file
+	system, among other possible outcomes. The dictionaries must include
 	the following fields:

 	id:		Video identifier.
@@ -405,15 +439,6 @@ class InfoExtractor(object):
 		"""Sets the downloader for this IE."""
 		self._downloader = downloader
 	
-	def to_stdout(self, message):
-		"""Print message to stdout if downloader is not in quiet mode."""
-		if self._downloader is None or not self._downloader.params.get('quiet', False):
-			print message
-	
-	def to_stderr(self, message):
-		"""Print message to stderr."""
-		print >>sys.stderr, message
-
 	def _real_initialize(self):
 		"""Real initialization process. Redefine in subclasses."""
 		pass
@@ -430,6 +455,13 @@ class YoutubeIE(InfoExtractor):
 	_LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
 	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 	_NETRC_MACHINE = 'youtube'
+	_available_formats = ['22', '35', '18', '17', '13'] # listed in order of priority for -b flag
+	_video_extensions = {
+		'13': '3gp',
+		'17': 'mp4',
+		'18': 'mp4',
+		'22': 'mp4',
+	}

 	@staticmethod
 	def suitable(url):
@@ -460,27 +492,31 @@ class YoutubeIE(InfoExtractor):

 	def report_lang(self):
 		"""Report attempt to set language."""
-		self.to_stdout(u'[youtube] Setting language')
+		self._downloader.to_stdout(u'[youtube] Setting language')

 	def report_login(self):
 		"""Report attempt to log in."""
-		self.to_stdout(u'[youtube] Logging in')
+		self._downloader.to_stdout(u'[youtube] Logging in')
 	
 	def report_age_confirmation(self):
 		"""Report attempt to confirm age."""
-		self.to_stdout(u'[youtube] Confirming age')
+		self._downloader.to_stdout(u'[youtube] Confirming age')
 	
 	def report_webpage_download(self, video_id):
 		"""Report attempt to download webpage."""
-		self.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
+		self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
 	
 	def report_information_extraction(self, video_id):
 		"""Report attempt to extract video information."""
-		self.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
+		self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
 	
 	def report_video_url(self, video_id, video_real_url):
 		"""Report extracted video URL."""
-		self.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
+		self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
+	
+	def report_unavailable_format(self, video_id, format):
+		"""Report extracted video URL."""
+		self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
 	
 	def _real_initialize(self):
 		if self._downloader is None:
@@ -503,7 +539,7 @@ class YoutubeIE(InfoExtractor):
 				else:
 					raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 			except (IOError, netrc.NetrcParseError), err:
-				self.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
+				self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 				return

 		# Set language
@@ -512,7 +548,7 @@ class YoutubeIE(InfoExtractor):
 			self.report_lang()
 			urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'WARNING: unable to set language: %s' % str(err))
+			self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 			return

 		# No authentication to be performed
@@ -532,10 +568,10 @@ class YoutubeIE(InfoExtractor):
 			self.report_login()
 			login_results = urllib2.urlopen(request).read()
 			if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
-				self.to_stderr(u'WARNING: unable to log in: bad username or password')
+				self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 				return
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'WARNING: unable to log in: %s' % str(err))
+			self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 			return
 	
 		# Confirm age
@@ -548,88 +584,110 @@ class YoutubeIE(InfoExtractor):
 			self.report_age_confirmation()
 			age_results = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
+			self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 			return

 	def _real_extract(self, url):
 		# Extract video id from URL
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
-			self.to_stderr(u'ERROR: invalid URL: %s' % url)
-			return [None]
+			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+			return
 		video_id = mobj.group(2)

 		# Downloader parameters
+		best_quality = False
 		format_param = None
+		quality_index = 0
 		if self._downloader is not None:
 			params = self._downloader.params
 			format_param = params.get('format', None)
+			if format_param == '0':
+				format_param = self._available_formats[quality_index]
+				best_quality = True

-		# Extension
-		video_extension = {
-			'17': '3gp',
-			'18': 'mp4',
-			'22': 'mp4',
-		}.get(format_param, 'flv')
+		while True:
+			try:
+				# Extension
+				video_extension = self._video_extensions.get(format_param, 'flv')

-		# Normalize URL, including format
-		normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
-		if format_param is not None:
-			normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
-		request = urllib2.Request(normalized_url, None, std_headers)
-		try:
-			self.report_webpage_download(video_id)
-			video_webpage = urllib2.urlopen(request).read()
-		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err))
-			return [None]
-		self.report_information_extraction(video_id)
-		
-		# "t" param
-		mobj = re.search(r', "t": "([^"]+)"', video_webpage)
-		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract "t" parameter')
-			return [None]
-		video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1))
-		if format_param is not None:
-			video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
-		self.report_video_url(video_id, video_real_url)
+				# Normalize URL, including format
+				normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
+				if format_param is not None:
+					normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
+				request = urllib2.Request(normalized_url, None, std_headers)
+				try:
+					self.report_webpage_download(video_id)
+					video_webpage = urllib2.urlopen(request).read()
+				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+					self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+					return
+				self.report_information_extraction(video_id)
+				
+				# "t" param
+				mobj = re.search(r', "t": "([^"]+)"', video_webpage)
+				if mobj is None:
+					self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
+					return
+				video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
+				if format_param is not None:
+					video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
+				self.report_video_url(video_id, video_real_url)

-		# uploader
-		mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
-		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract uploader nickname')
-			return [None]
-		video_uploader = mobj.group(1)
+				# uploader
+				mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
+				if mobj is None:
+					self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+					return
+				video_uploader = mobj.group(1)

-		# title
-		mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
-		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract video title')
-			return [None]
-		video_title = mobj.group(1).decode('utf-8')
-		video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
-		video_title = video_title.replace(os.sep, u'%')
+				# title
+				mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
+				if mobj is None:
+					self._downloader.trouble(u'ERROR: unable to extract video title')
+					return
+				video_title = mobj.group(1).decode('utf-8')
+				video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
+				video_title = video_title.replace(os.sep, u'%')

-		# simplified title
-		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
-		simple_title = simple_title.strip(ur'_')
+				# simplified title
+				simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
+				simple_title = simple_title.strip(ur'_')
+
+				# Process video information
+				self._downloader.process_info({
+					'id':		video_id.decode('utf-8'),
+					'url':		video_real_url.decode('utf-8'),
+					'uploader':	video_uploader.decode('utf-8'),
+					'title':	video_title,
+					'stitle':	simple_title,
+					'ext':		video_extension.decode('utf-8'),
+				})
+
+				return
+
+			except UnavailableFormatError, err:
+				if best_quality:
+					if quality_index == len(self._available_formats) - 1:
+						# I don't ever expect this to happen
+						self._downloader.trouble(u'ERROR: no known formats available for video')
+						return
+					else:
+						self.report_unavailable_format(video_id, format_param)
+						quality_index += 1
+						format_param = self._available_formats[quality_index]
+						continue
+				else: 
+					self._downloader.trouble('ERROR: format not available for video')
+					return

-		# Return information
-		return [{
-			'id':		video_id.decode('utf-8'),
-			'url':		video_real_url.decode('utf-8'),
-			'uploader':	video_uploader.decode('utf-8'),
-			'title':	video_title,
-			'stitle':	simple_title,
-			'ext':		video_extension.decode('utf-8'),
-			}]

 class MetacafeIE(InfoExtractor):
 	"""Information Extractor for metacafe.com."""

 	_VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
 	_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
+	_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
 	_youtube_ie = None

 	def __init__(self, youtube_ie, downloader=None):
@@ -642,19 +700,19 @@ class MetacafeIE(InfoExtractor):

 	def report_disclaimer(self):
 		"""Report disclaimer retrieval."""
-		self.to_stdout(u'[metacafe] Retrieving disclaimer')
+		self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')

 	def report_age_confirmation(self):
 		"""Report attempt to confirm age."""
-		self.to_stdout(u'[metacafe] Confirming age')
+		self._downloader.to_stdout(u'[metacafe] Confirming age')
 	
 	def report_download_webpage(self, video_id):
 		"""Report webpage download."""
-		self.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
+		self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
 	
 	def report_extraction(self, video_id):
 		"""Report information extraction."""
-		self.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
+		self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)

 	def _real_initialize(self):
 		# Retrieve disclaimer
@@ -663,7 +721,7 @@ class MetacafeIE(InfoExtractor):
 			self.report_disclaimer()
 			disclaimer = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
+			self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
 			return

 		# Confirm age
@@ -671,27 +729,28 @@ class MetacafeIE(InfoExtractor):
 			'filters': '0',
 			'submit': "Continue - I'm over 18",
 			}
-		request = urllib2.Request('http://www.metacafe.com/', urllib.urlencode(disclaimer_form), std_headers)
+		request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
 		try:
 			self.report_age_confirmation()
 			disclaimer = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
+			self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 			return
 	
 	def _real_extract(self, url):
 		# Extract id and simplified title from URL
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
-			self.to_stderr(u'ERROR: invalid URL: %s' % url)
-			return [None]
+			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+			return

 		video_id = mobj.group(1)

 		# Check if video comes from YouTube
 		mobj2 = re.match(r'^yt-(.*)$', video_id)
 		if mobj2 is not None:
-			return self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
+			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
+			return

 		simple_title = mobj.group(2).decode('utf-8')
 		video_extension = 'flv'
@@ -702,46 +761,49 @@ class MetacafeIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err))
-			return [None]
+			self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
+			return

 		# Extract URL, uploader and title from webpage
 		self.report_extraction(video_id)
-		mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage)
+		mobj = re.search(r'(?m)&mediaURL=(http.*?\.flv)', webpage)
 		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract media URL')
-			return [None]
-		mediaURL = mobj.group(1).replace('\\', '')
+			self._downloader.trouble(u'ERROR: unable to extract media URL')
+			return
+		mediaURL = urllib.unquote(mobj.group(1))

-		mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage)
+		mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
 		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract gdaKey')
-			return [None]
+			self._downloader.trouble(u'ERROR: unable to extract gdaKey')
+			return
 		gdaKey = mobj.group(1)

 		video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)

 		mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
 		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract title')
-			return [None]
+			self._downloader.trouble(u'ERROR: unable to extract title')
+			return
 		video_title = mobj.group(1).decode('utf-8')

-		mobj = re.search(r'(?m)<li id="ChnlUsr">.*?Submitter:<br />(.*?)</li>', webpage)
+		mobj = re.search(r'(?ms)<li id="ChnlUsr">.*?Submitter:.*?<a .*?>(.*?)<', webpage)
 		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract uploader nickname')
-			return [None]
-		video_uploader = re.sub(r'<.*?>', '', mobj.group(1))
+			self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+			return
+		video_uploader = mobj.group(1)

-		# Return information
-		return [{
-			'id':		video_id.decode('utf-8'),
-			'url':		video_url.decode('utf-8'),
-			'uploader':	video_uploader.decode('utf-8'),
-			'title':	video_title,
-			'stitle':	simple_title,
-			'ext':		video_extension.decode('utf-8'),
-			}]
+		try:
+			# Process video information
+			self._downloader.process_info({
+				'id':		video_id.decode('utf-8'),
+				'url':		video_url.decode('utf-8'),
+				'uploader':	video_uploader.decode('utf-8'),
+				'title':	video_title,
+				'stitle':	simple_title,
+				'ext':		video_extension.decode('utf-8'),
+			})
+		except UnavailableFormatError:
+			self._downloader.trouble(u'ERROR: format not available for video')


 class YoutubeSearchIE(InfoExtractor):
@@ -751,6 +813,7 @@ class YoutubeSearchIE(InfoExtractor):
 	_VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
 	_MORE_PAGES_INDICATOR = r'>Next</a>'
 	_youtube_ie = None
+	_max_youtube_results = 1000

 	def __init__(self, youtube_ie, downloader=None):
 		InfoExtractor.__init__(self, downloader)
@@ -762,7 +825,7 @@ class YoutubeSearchIE(InfoExtractor):

 	def report_download_page(self, query, pagenum):
 		"""Report attempt to download playlist page with given number."""
-		self.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
+		self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))

 	def _real_initialize(self):
 		self._youtube_ie.initialize()
@@ -770,24 +833,31 @@ class YoutubeSearchIE(InfoExtractor):
 	def _real_extract(self, query):
 		mobj = re.match(self._VALID_QUERY, query)
 		if mobj is None:
-			self.to_stderr(u'ERROR: invalid search query "%s"' % query)
-			return [None]
+			self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
+			return

 		prefix, query = query.split(':')
 		prefix = prefix[8:]
 		if prefix == '':
-			return self._download_n_results(query, 1)
+			self._download_n_results(query, 1)
+			return
 		elif prefix == 'all':
-			return self._download_n_results(query, -1)
+			self._download_n_results(query, self._max_youtube_results)
+			return
 		else:
 			try:
 				n = int(prefix)
 				if n <= 0:
-					self.to_stderr(u'ERROR: invalid download number %s for query "%s"' % (n, query))
-					return [None]
-				return self._download_n_results(query, n)
+					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
+					return
+				elif n > self._max_youtube_results:
+					self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
+					n = self._max_youtube_results
+				self._download_n_results(query, n)
+				return
 			except ValueError: # parsing prefix as int fails
-				return self._download_n_results(query, 1)
+				self._download_n_results(query, 1)
+				return

 	def _download_n_results(self, query, n):
 		"""Downloads a specified number of results for a query"""
@@ -803,8 +873,8 @@ class YoutubeSearchIE(InfoExtractor):
 			try:
 				page = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err))
-				return [None]
+				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+				return

 			# Extract video identifiers
 			for mobj in re.finditer(self._VIDEO_INDICATOR, page):
@@ -814,16 +884,14 @@ class YoutubeSearchIE(InfoExtractor):
 					already_seen.add(video_id)
 					if len(video_ids) == n:
 						# Specified n videos reached
-						information = []
 						for id in video_ids:
-							information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id))
-						return information
+							self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+						return

 			if self._MORE_PAGES_INDICATOR not in page:
-				information = []
 				for id in video_ids:
-					information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id))
-				return information
+					self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+				return

 			pagenum = pagenum + 1

@@ -846,7 +914,7 @@ class YoutubePlaylistIE(InfoExtractor):

 	def report_download_page(self, playlist_id, pagenum):
 		"""Report attempt to download playlist page with given number."""
-		self.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
+		self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))

 	def _real_initialize(self):
 		self._youtube_ie.initialize()
@@ -855,8 +923,8 @@ class YoutubePlaylistIE(InfoExtractor):
 		# Extract playlist id
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
-			self.to_stderr(u'ERROR: invalid url: %s' % url)
-			return [None]
+			self._downloader.trouble(u'ERROR: invalid url: %s' % url)
+			return

 		# Download playlist pages
 		playlist_id = mobj.group(1)
@@ -869,8 +937,8 @@ class YoutubePlaylistIE(InfoExtractor):
 			try:
 				page = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err))
-				return [None]
+				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+				return

 			# Extract video identifiers
 			ids_in_page = []
@@ -883,10 +951,9 @@ class YoutubePlaylistIE(InfoExtractor):
 				break
 			pagenum = pagenum + 1

-		information = []
 		for id in video_ids:
-			information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id))
-		return information
+			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+		return

 class PostProcessor(object):
 	"""Post Processor class.
@@ -910,15 +977,6 @@ class PostProcessor(object):
 	def __init__(self, downloader=None):
 		self._downloader = downloader

-	def to_stdout(self, message):
-		"""Print message to stdout if downloader is not in quiet mode."""
-		if self._downloader is None or not self._downloader.params.get('quiet', False):
-			print message
-	
-	def to_stderr(self, message):
-		"""Print message to stderr."""
-		print >>sys.stderr, message
-
 	def set_downloader(self, downloader):
 		"""Sets the downloader for this PP."""
 		self._downloader = downloader
@@ -927,7 +985,7 @@ class PostProcessor(object):
 		"""Run the PostProcessor.

 		The "information" argument is a dictionary like the ones
-		returned by InfoExtractors. The only difference is that this
+		composed by InfoExtractors. The only difference is that this
 		one has an extra field called "filepath" that points to the
 		downloaded file.

@@ -957,77 +1015,102 @@ if __name__ == '__main__':

 		# Parse command line
 		parser = optparse.OptionParser(
-				usage='Usage: %prog [options] url...',
-				version='2009.04.06',
-				conflict_handler='resolve',
-				)
+			usage='Usage: %prog [options] url...',
+			version='2009.05.23',
+			conflict_handler='resolve',
+		)
+
 		parser.add_option('-h', '--help',
 				action='help', help='print this help text and exit')
 		parser.add_option('-v', '--version',
 				action='version', help='print program version and exit')
-		parser.add_option('-u', '--username',
-				dest='username', metavar='UN', help='account username')
-		parser.add_option('-p', '--password',
-				dest='password', metavar='PW', help='account password')
-		parser.add_option('-o', '--output',
-				dest='outtmpl', metavar='TPL', help='output filename template')
-		parser.add_option('-q', '--quiet',
-				action='store_true', dest='quiet', help='activates quiet mode', default=False)
-		parser.add_option('-s', '--simulate',
-				action='store_true', dest='simulate', help='do not download video', default=False)
-		parser.add_option('-t', '--title',
-				action='store_true', dest='usetitle', help='use title in file name', default=False)
-		parser.add_option('-l', '--literal',
-				action='store_true', dest='useliteral', help='use literal title in file name', default=False)
-		parser.add_option('-n', '--netrc',
-				action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
-		parser.add_option('-g', '--get-url',
-				action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
-		parser.add_option('-e', '--get-title',
-				action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
-		parser.add_option('-f', '--format',
-				dest='format', metavar='FMT', help='video format code')
-		parser.add_option('-m', '--mobile-version',
-				action='store_const', dest='format', help='alias for -f 17', const='17')
-		parser.add_option('-d', '--high-def',
-				action='store_const', dest='format', help='alias for -f 22', const='22')
 		parser.add_option('-i', '--ignore-errors',
 				action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
 		parser.add_option('-r', '--rate-limit',
 				dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
-		parser.add_option('-a', '--batch-file',
+
+		authentication = optparse.OptionGroup(parser, 'Authentication Options')
+		authentication.add_option('-u', '--username',
+				dest='username', metavar='UN', help='account username')
+		authentication.add_option('-p', '--password',
+				dest='password', metavar='PW', help='account password')
+		authentication.add_option('-n', '--netrc',
+				action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
+		parser.add_option_group(authentication)
+
+		video_format = optparse.OptionGroup(parser, 'Video Format Options')
+		video_format.add_option('-f', '--format',
+				action='append', dest='format', metavar='FMT', help='video format code')
+		video_format.add_option('-b', '--best-quality',
+				action='store_const', dest='format', help='download the best quality video possible', const='0')
+		video_format.add_option('-m', '--mobile-version',
+				action='store_const', dest='format', help='alias for -f 17', const='17')
+		video_format.add_option('-d', '--high-def',
+				action='store_const', dest='format', help='alias for -f 22', const='22')
+		parser.add_option_group(video_format)
+
+		verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
+		verbosity.add_option('-q', '--quiet',
+				action='store_true', dest='quiet', help='activates quiet mode', default=False)
+		verbosity.add_option('-s', '--simulate',
+				action='store_true', dest='simulate', help='do not download video', default=False)
+		verbosity.add_option('-g', '--get-url',
+				action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
+		verbosity.add_option('-e', '--get-title',
+				action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
+		parser.add_option_group(verbosity)
+
+		filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
+		filesystem.add_option('-t', '--title',
+				action='store_true', dest='usetitle', help='use title in file name', default=False)
+		filesystem.add_option('-l', '--literal',
+				action='store_true', dest='useliteral', help='use literal title in file name', default=False)
+		filesystem.add_option('-o', '--output',
+				dest='outtmpl', metavar='TPL', help='output filename template')
+		filesystem.add_option('-a', '--batch-file',
 				dest='batchfile', metavar='F', help='file containing URLs to download')
-		parser.add_option('-w', '--no-overwrites',
+		filesystem.add_option('-w', '--no-overwrites',
 				action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
+		parser.add_option_group(filesystem)
+
 		(opts, args) = parser.parse_args()

 		# Batch file verification
 		batchurls = []
 		if opts.batchfile is not None:
 			try:
-				batchurls = [line.strip() for line in open(opts.batchfile, 'r')]
+				batchurls = open(opts.batchfile, 'r').readlines()
+				batchurls = [x.strip() for x in batchurls]
+				batchurls = [x for x in batchurls if len(x) > 0]
 			except IOError:
 				sys.exit(u'ERROR: batch file could not be read')
 		all_urls = batchurls + args

 		# Conflicting, missing and erroneous options
 		if len(all_urls) < 1:
-			sys.exit(u'ERROR: you must provide at least one URL')
+			parser.error(u'you must provide at least one URL')
 		if opts.usenetrc and (opts.username is not None or opts.password is not None):
-			sys.exit(u'ERROR: using .netrc conflicts with giving username/password')
+			parser.error(u'using .netrc conflicts with giving username/password')
 		if opts.password is not None and opts.username is None:
-			sys.exit(u'ERROR: account username missing')
+			parser.error(u'account username missing')
 		if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
-			sys.exit(u'ERROR: using output template conflicts with using title or literal title')
+			parser.error(u'using output template conflicts with using title or literal title')
 		if opts.usetitle and opts.useliteral:
-			sys.exit(u'ERROR: using title conflicts with using literal title')
+			parser.error(u'using title conflicts with using literal title')
 		if opts.username is not None and opts.password is None:
 			opts.password = getpass.getpass(u'Type account password and press return:')
 		if opts.ratelimit is not None:
 			numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
 			if numeric_limit is None:
-				sys.exit(u'ERROR: invalid rate limit specified')
+				parser.error(u'invalid rate limit specified')
 			opts.ratelimit = numeric_limit
+		if opts.format is not None and len(opts.format) > 1:
+			parser.error(u'pass at most one of the video format option flags (-f, -b, -m, -d)')
+		if opts.format is None:
+			real_format = None
+		else:
+			real_format = opts.format[0]
+

 		# Information extractors
 		youtube_ie = YoutubeIE()
@@ -1036,9 +1119,6 @@ if __name__ == '__main__':
 		youtube_search_ie = YoutubeSearchIE(youtube_ie)

 		# File downloader
-		charset = locale.getdefaultlocale()[1]
-		if charset is None:
-			charset = 'ascii'
 		fd = FileDownloader({
 			'usenetrc': opts.usenetrc,
 			'username': opts.username,
@@ -1047,8 +1127,8 @@ if __name__ == '__main__':
 			'forceurl': opts.geturl,
 			'forcetitle': opts.gettitle,
 			'simulate': (opts.simulate or opts.geturl or opts.gettitle),
-			'format': opts.format,
-			'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(charset))
+			'format': real_format,
+			'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
 				or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
 				or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
 				or u'%(id)s.%(ext)s'),
Author	SHA1	Message	Date
Ricardo Garcia	705804f5d1	Update version number in LATEST_VERSION	2010-10-31 11:24:24 +01:00
Ricardo Garcia	1d50e3d153	Bump version number	2010-10-31 11:24:19 +01:00
Ricardo Garcia	d69a1c9189	Handle "content too short" errors properly	2010-10-31 11:24:19 +01:00
Ricardo Garcia	488f619471	Close video file before removing it.	2010-10-31 11:24:19 +01:00
Ricardo Garcia	097ba9472b	Remove .hgignore from version tracking	2010-10-31 11:24:19 +01:00
Ricardo Garcia	554f3e284c	Add LATEST_VERSION to further ease checking which is the latest stable version	2010-10-31 11:24:19 +01:00
Ricardo Garcia	cab60d710a	Put back INTERNAL version	2010-10-31 11:24:19 +01:00
Ricardo Garcia	152edc0d4c	Set version number	2010-10-31 11:24:16 +01:00
Ricardo Garcia	b74c859d0f	Use store_const instead of append_const as the latter requires Python 2.5	2010-10-31 11:24:16 +01:00
Ricardo Garcia	0e54320009	Restore INTERNAL version number	2010-10-31 11:24:16 +01:00
Ricardo Garcia	43f35682e9	Put version number in place	2010-10-31 11:24:12 +01:00
Ricardo Garcia	ad274509aa	Add an "epoch" keyword to the output template	2010-10-31 11:24:12 +01:00
Ricardo Garcia	d09744d055	Add format 35 (flv) as second best in quality	2010-10-31 11:24:12 +01:00
Ricardo Garcia	1c76e23e0f	Move the -t and -l options to the filesystem group	2010-10-31 11:24:12 +01:00
dannycolligan	42bcd27d3b	Some consistency changes and ghost-file bugfix after discussion with rg3	2010-10-31 11:24:12 +01:00
dannycolligan	2740c509b3	Fixed ambiguity of multiple video option specifiers by dissalowing it; changed some sys.ext calls to parser.error	2010-10-31 11:24:12 +01:00
dannycolligan	7b7759f5a4	Added -b option and created option groups for help prompt	2010-10-31 11:24:12 +01:00
Ricardo Garcia	8d2c83eda5	Update and correct (format,extension) table for YouTube	2010-10-31 11:24:12 +01:00
Ricardo Garcia	2f11508ada	Minor documentation change	2010-10-31 11:24:12 +01:00
Ricardo Garcia	b65740e474	Skip blank lines in batch file -- fixes issue #9	2010-10-31 11:24:12 +01:00
Ricardo Garcia	a825f0ca83	Revert version number to INTERNAL	2010-10-31 11:24:12 +01:00
Ricardo Garcia	27c3383e2d	Set version number	2010-10-31 11:24:08 +01:00
Ricardo Garcia	dbccb6cd84	Fix code for metacafe.com (this fixes issue #8 )	2010-10-31 11:24:08 +01:00
Ricardo Garcia	98164eb3b9	Fix some minor unicode-related problems	2010-10-31 11:24:08 +01:00
Ricardo Garcia	2851b2ca18	Update internal documentation to reflect the new behaviour	2010-10-31 11:24:08 +01:00
Ricardo Garcia	6f21f68629	Download videos after extracting information This is achieved by letting the InfoExtractors instruct its downloader to process the information dictionary just after extracting the information. As a consequence, some code is simplified too.	2010-10-31 11:24:08 +01:00
Ricardo Garcia	147753eb33	Replase self._downloader.to_stderr() with self._downloader.trouble()	2010-10-31 11:24:08 +01:00
Ricardo Garcia	3aaf887e98	Put the downloader in full control of output messages	2010-10-31 11:24:08 +01:00
Ricardo Garcia	9bf386d74b	Move the downloader return code to a class member This makes it possible to initialize it with value zero and later let the trouble() overwrite the value. It simplifies error treatment and paves the way for the InfoExtracto objects to call process_info() themselves, which should solve the issues with tor and some other problems.	2010-10-31 11:24:08 +01:00
Ricardo Garcia	2f4d18a9f7	Use getpreferredencoding() instead of getdefaultlocale() This fixes issue #7 and is recommended after a bug report I made to the Python team: http://bugs.python.org/issue5815	2010-10-31 11:24:08 +01:00
Ricardo Garcia	b0eddb2eb4	Update User-Agent string	2010-10-31 11:24:08 +01:00
Ricardo Garcia	9cee6d9035	Minor adjustments to closely match what a web browser does	2010-10-31 11:24:08 +01:00
Ricardo Garcia	c8619e0163	Move the code to process an InfoExtractor result to its own method	2010-10-31 11:24:08 +01:00
dannycolligan	257453b92b	Added cap if user requests ytsearch number over 1000 (with warning)	2010-10-31 11:24:08 +01:00
dannyc@omega	fd9288c315	Changed ytsearchall to retrieve max 1000 results	2010-10-31 11:24:07 +01:00
Ricardo Garcia	1db4ff6054	Restore internal version number indicator	2010-10-31 11:24:07 +01:00