Commits

Anonymous committed 89f3977

Added vimeo information extractor.

  • Participants
  • Parent commits 6bb8006

Comments (0)

Files changed (1)

 # Author: Ricardo Garcia Gonzalez
 # Author: Danny Colligan
 # Author: Benjamin Johnson
+# Contributor: Marian Sigler
 # License: Public domain code
 import htmlentitydefs
 import httplib
 import time
 import urllib
 import urllib2
+import xml.sax
+
 
 # parse_qs was moved from the cgi module to the urlparse module recently.
 try:
 	'Accept-Language': 'en-us,en;q=0.5',
 }
 
+mime_extension_mapping = {
+	'video/3gpp': '3gp',
+	'video/mpeg': 'mpeg',
+	'video/mp4': 'mp4',
+	'video/ogg': 'ogv',
+	'video/x-flv': 'flv',
+	'video/x-ms-wmv': 'wmv',
+	'video/x-msvideo': 'avi',
+}
+
 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
 
 def preferredencoding():
 			raise ContentTooShortError(byte_counter, long(data_len))
 		return True
 
+class HeadRequest(urllib2.Request):
+	def get_method(self):
+		return 'HEAD'
+
 class InfoExtractor(object):
 	"""Information Extractor class.
 
 				self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
 
 
+class VimeoXMLParser(xml.sax.handler.ContentHandler):
+	def __init__(self, info):
+		"""
+		Parses video information from the given xml file.
+		Data is stored in `info`.
+		"""
+		self.info = info
+		self.stack = []
+		self.cache = []
+
+	def extract_information(self):
+		data = ''.join(self.cache)
+		self.cache = []
+
+		map = (
+			([u'xml', u'video', u'thumbnail'], 'thumbnail'),
+			([u'xml', u'video', u'caption'], 'caption'),
+			([u'xml', u'video', u'uploader_display_name'], 'uploader'),
+			([u'xml', u'request_signature'], 'request_signature'),
+			([u'xml', u'request_signature_expires'], 'request_signature_expires'),
+			([u'xml', u'error', u'error_id'], 'error_id'),
+			([u'xml', u'error', u'message'], 'error_message'),
+		)
+		for st, key in map:
+			if self.stack == st:
+				self.info[key] = data
+				break
+
+	def startElement(self, name, attrs):
+		self.extract_information()
+		self.stack.append(name)
+
+	def endElement(self, name):
+		self.extract_information()
+		if self.stack.pop() != name:
+			raise ValueError(u'invalid xml')
+
+	def characters(self, data):
+		self.cache.append(data)
+
+
+class VimeoIE(InfoExtractor):
+	"""Information Extractor for vimeo.com. Thanks to http://ossguy.com/?p=172"""
+	_VALID_URL = r'^http://vimeo.com/(\d+)$'
+	_video_info_url = 'http://vimeo.com/moogaloop/load/clip:%d'
+	_video_download_url = 'http://www.vimeo.com/moogaloop/play/clip:%d/%s/%s/?q=sd'
+
+	@classmethod
+	def suitable(cls, url):
+		return (re.match(cls._VALID_URL, url) is not None)
+
+	def _real_extract(self, url):
+		m = re.match(self._VALID_URL, url)
+		if m is None:
+			self._downloader.trouble(u'ERROR: invalid url: %s' % url)
+			return
+		clip_id = int(m.group(1))
+
+		info_url = self._video_info_url % clip_id
+		self._downloader.to_stdout(u'[vimeo] %s: Downloading video information' % clip_id)
+		req = urllib2.Request(info_url, headers=std_headers)
+
+		parser = xml.sax.make_parser()
+		info = {}
+		parser.setContentHandler(VimeoXMLParser(info))
+
+		try:
+			parser.parse(urllib2.urlopen(req))
+		except urllib2.URLError, e:
+			self._downloader.to_stderr(u'[vimeo] ERROR: Unable to retrieve '
+                                       u'video information: %s' % e)
+			return
+
+		if 'error_id' in info:
+			self._downloader.to_stderr(u'[vimeo] ERROR: unable to retrieve '
+				u'video information: %s' % info['error_message'])
+			return
+
+		try:
+			title = info['caption']
+			stitle = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_',
+                            info['caption']).strip(u'_')
+			uploader = info['uploader']
+			url = self._video_download_url % (
+				clip_id,
+				info['request_signature'],
+				info['request_signature_expires'],
+			)
+		except KeyError:
+			self._downloader.trouble(u'ERROR: incomplete video information returned')
+			return
+
+		print 'url %s' % url
+		try:
+			req = HeadRequest(url, headers=std_headers)
+			video_mime = urllib2.urlopen(req).info()['content-type']
+		except (urllib2.URLError, KeyError):
+			video_mime = None
+		ext = mime_extension_mapping.get(video_mime, 'vid')
+
+		try:
+			self._downloader.process_info({
+				'id':			clip_id,
+				'url':      	url,
+				'title':		title,
+				'stitle':		stitle,
+				'uploader':		uploader,
+				'ext':			ext,
+				'format':		'NA',
+				'player_url':	None,
+			})
+		except UnavailableVideoError, err:
+			self._downloader.trouble(u'ERROR: unable to download video')
+
+
 class MetacafeIE(InfoExtractor):
 	"""Information Extractor for metacafe.com."""
 
 
 		# Information extractors
 		youtube_ie = YoutubeIE()
+		vimeo_ie = VimeoIE()
 		metacafe_ie = MetacafeIE(youtube_ie)
 		dailymotion_ie = DailymotionIE()
 		youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
 			'playliststart': opts.playliststart,
 			})
 		fd.add_info_extractor(youtube_search_ie)
+		fd.add_info_extractor(vimeo_ie)
 		fd.add_info_extractor(youtube_pl_ie)
 		fd.add_info_extractor(youtube_user_ie)
 		fd.add_info_extractor(metacafe_ie)
 		sys.exit(u'ERROR: fixed output name but more than one file to download')
 	except KeyboardInterrupt:
 		sys.exit(u'\nERROR: Interrupted by user')
+
+# vim:noexpandtab,tabstop=4