Commits

Janto Dreijer committed 6869a68

"contribute" page
play with xmlrpc calls to pypi.org
change some template lookup behaviour

  • Participants
  • Parent commits 7222ebd

Comments (0)

Files changed (7)

 
 """
 
-def get_url(url, force_fetch=False):
-	result = memcache.get(url)
-	if result is None or force_fetch:
-		logger.debug("fetching %s" % url)
-		result = urlfetch.fetch(url)
-		assert memcache.set(key=url, value=result, time=60*60*5), url
-	else:
-		logger.debug("cache hit for %s" % url)
-	return result
-
 class Page(webapp.RequestHandler):
 
 	name = ""
 		webapp.RequestHandler.__init__(self)
 		self.logger = logging.getLogger(self.name)
 
-		# load from templates.py
-		for t in [
-			"header_template",
-			"footer_template",
-			"main_page_template",
-			"about_template",
-			"package_info_template",
-		]:
-			template_name = t.rsplit("_", 1)[0]
-			text = getattr(templates, t)
-
-			template = PageTemplate.all().filter("name =", template_name).get()
-			if template:
-				pass
-			else:
-				self.logger.info("loading %s from templates.py into datastore" % template_name)
-				template = PageTemplate(name=template_name, text=text, modified=datetime.datetime.now())
-				template.put()
-
 	def write(self, text):
 		self.response.out.write(text)
 
 		self.write(get_template("main_page")% locals())
 		self.print_footer()
 
+class ContributePage(Page):
+
+	name = "contribute"
+
+	def get(self):
+		self.print_header()
+		self.print_menu()
+		self.write(get_template("contribute_page") % locals())
+		self.print_footer()
+
+
 class PackagesPage(Page):
 
 	name = "scikits"
 		self.print_header()
 		self.print_menu()
 
-		packages = sorted(System.packages().values())
+		packages = sorted(Package.packages().values())
 
 		# force fetch of some package
 		if packages:
 			package_name = args[0]
 
 		# done before printing header to build title
-		package = System.packages().get(package_name)
+		package = Package.packages().get(package_name)
 		if package is None:
 			package_name = "scikits.%s" % package_name
-			package = System.packages().get(package_name)
+			package = Package.packages().get(package_name)
 		if package is None:
 			self.error(404)
 			self.write("404 not found")
 	return "\n".join(result)
 
 class Package(object):
-	def __init__(self, repo_url):
+	def __init__(self, name, repo_url):
 		"""
 		init should cache minimal information. actual information extraction should be done on page requests. with memcaching where needed.
 		"""
+		self.name = name
 		self.repo_url = repo_url
 
-		self.name = "scikits.%s" % os.path.split(self.repo_url)[1]
+	@classmethod
+	def packages(self):
+		packages = memcache.get("packages")
+		if packages is None:
+			packages = {}
 
-		self.readme_filename = os.path.join(self.repo_url, "README")
+			from_repo = 1
+			if from_repo:
+				logger.info("loading packages from repo")
+				for repo_url in fetch_dir_links(REPO_PATH):
+					package_name = "scikits.%s" % os.path.split(repo_url)[1]
 
-		url = os.path.join(self.repo_url, "setup.py")
-		result = get_url(url)
-		if result.status_code != 200:
-			self.valid = False
-			return
+					# check if really a package
+					url = os.path.join(repo_url, "setup.py")
+					result = get_url(url)
+					if result.status_code != 200: # setup.py was not found
+						continue
 
-		self.valid = True
+					package = Package(name=package_name, repo_url=repo_url)
+					packages[package.name] = package
+
+			from_pypi_search = 0
+			if from_pypi_search:
+				logger.info("loading packages from PyPI")
+				server = xmlrpclib.ServerProxy('http://pypi.python.org/pypi', transport=GoogleXMLRPCTransport())
+				results = server.search(dict(name="scikits"))
+				for package_name in set(result["name"] for result in results): # unique names, pypi contains duplicate names
+
+					#XXX remove this once no longer scanning repo for package name
+					if package_name in packages:
+						continue
+
+					repo_url = ""
+					package = Package(name=package_name, repo_url=repo_url)
+					packages[package.name] = package
+
+			assert memcache.set(key="packages", value=packages, time=FETCH_CACHE_AGE), package
+
+		return packages
 
 	def __cmp__(self, other):
 		return cmp(self.name, other.name)
 
+	def download_links_html(self):
+		text = []
+		server = xmlrpclib.ServerProxy('http://pypi.python.org/pypi', transport=GoogleXMLRPCTransport())
+		versions = server.package_releases(self.name)
+		for version in versions:
+			text.append("<table>")
+			text.append("""<tr>
+				<th>Python version</th>
+				<th>URL</th>
+				<th>Size</th>
+				</tr>""")
+			for d in server.release_urls(self.name, version):
+				text.append("<tr>")
+				text.append("""
+				<td>%(python_version)s</td>
+				<td><a href="%(url)s">%(filename)s</a></td>
+				<td>%(size)s</td>
+				"""% d)
+				text.append("</tr>")
+			text.append("</table>")
+
+			break # only the first listed version?
+		return "\n".join(text)
+
 	def info(self, force_fetch=False):
 		d = dict(
 			name=self.name,
 		if doap_result.status_code == 200:
 
 			doap_text = doap_result.content
-			#~ http://wiki.python.org/moin/PyPIXmlRpc?highlight=(CategoryDocumentation)
 			try:
 				tuples = rdfToPython(doap_text)
 			except:
 	items = re.findall('<a href="(.+?)/">.+?</a>', result.content)
 	return [os.path.join(url, item) for item in items if not item.startswith("http://") and not item.startswith("..")]
 
-class System:
-
-	@classmethod
-	def init(self):
-		pass
-
-	@classmethod
-	def packages(self):
-		packages = memcache.get("packages")
-		if packages is None:
-			packages = {}
-			for url in fetch_dir_links(REPO_PATH):
-				logger.debug(url)
-				package = Package(repo_url=url)
-				if package.valid: # setup.py was not found
-					packages[package.name] = package
-		return packages
-
-#~ class RecentChangesPage(Page):
-	#~ def get(self):
-		#~ rss = RSS2.RSS2(
-			#~ title = "",
-			#~ link = "",
-			#~ description = "",
-
-			#~ lastBuildDate = datetime.datetime.now(),
-
-			#~ items = [
-
-				#~ RSS2.RSSItem(
-					#~ title = "PyRSS2Gen-0.0 released",
-					#~ link = "http://www.dalkescientific.com/news/030906-PyRSS2Gen.html",
-					#~ description = "Dalke Scientific today announced PyRSS2Gen-0.0, a library for generating RSS feeds for Python.  ",
-					#~ guid = RSS2.Guid("http://www.dalkescientific.com/news/"
-					#~ "030906-PyRSS2Gen.html"),
-					#~ pubDate = datetime.datetime(2003, 9, 6, 21, 31)
-					#~ ),
-
-			#~ ])
-
-		#~ rss.write_xml(self)
-
 class SearchPage(Page):
 	name="search"
 
 		self.write("</p>")
 
 		packages = []
-		for package in System.packages().values():
+		for package in Package.packages().values():
 			d = package.info()
 			if any(text in field for field in package.info().values()):
 				packages.append(package)
 		self.print_footer()
 
 def get_template(name):
-	#~ return getattr(templates, name+"_template")
 	template = PageTemplate.all().filter("name =", name).get()
-	return template.text
+	if template is not None:
+		return template.text
+	return getattr(templates, name+"_template")
 
 class PageTemplate(db.Model):
 	name = db.StringProperty(required=True)
 		self.print_header()
 		self.print_menu()
 
+		# authorize
+
 		user = users.get_current_user()
 		self.write("<p>")
 		if not user:
 		self.write('<a href="%s">sign out</a>.' % users.create_logout_url("/admin"))
 		self.write("</p>")
 
+		# backup and stats
+
 		self.write("<p>")
 		if self.request.get("email_backup") == "yes":
 			t = datetime.datetime.now()
 		</p>
 		""")
 
+		# save modifications
+
 		template_name = self.request.get("template_name")
-		template = PageTemplate.all().filter("name =", template_name).get()
-		saved = False
-		if template:
+		template_text = self.request.get("template_text", "").strip()
+		if template_name and template_text: # user provided new content
+			template = PageTemplate.all().filter("name =", template_name).get()
+			if not template:
+				template = PageTemplate(name=template_name)
+			template.text = template_text
+			template.modified = datetime.datetime.now()
+			template.username = user.nickname()
+			template.put()
+
 			self.write("<p>")
-			template_text = self.request.get("template_text")
-			if template_text.strip():
-				template.text = template_text
-				template.modified = datetime.datetime.now()
-				template.username = user.nickname()
-				template.put()
-				saved = True
-			if saved:
-				self.write("<strong>saved</strong><br />")
+			self.write("<strong>saved</strong><br />")
 			self.write("last_modified(<em>%s</em>) = %s" % (template.name, template.modified))
 			self.write("</p>")
 
-		query = PageTemplate.all()
-		for template in query.order("name"):
-			modified = template.modified
-			template_name = template.name
-			template_text = htmlquote(template.text)
+		# list templates
+
+		for template_name in [
+			"header",
+			"footer",
+			"main_page",
+			"about",
+			"contribute_page",
+			"package_info",
+		]:
+			# check if in db
+			template = PageTemplate.all().filter("name =", template_name).get()
+			if template:
+				template_text = htmlquote(template.text)
+				modified_time = template.modified
+				modified_username = template.username
+			else:
+				template_text = htmlquote(get_template(template_name))
+				modified_time = None
+				modified_username = None
+
 			self.write("""
 <h2>%(template_name)s</h2>
 <p>
 <form action="/admin" method="post">
 <textarea name="template_text" cols="80" rows="20">%(template_text)s</textarea>
 <input type="hidden" name="template_name" value="%(template_name)s">
+<br />
 <input type="submit" value="Save" />
-modified %(modified)s
+modified %(modified_time)s by %(modified_username)s
 </form>
 </p>
 			""" % locals())
 		#~ http://wiki.python.org/moin/PyPiXmlRpc
 		server = xmlrpclib.ServerProxy('http://pypi.python.org/pypi', transport=GoogleXMLRPCTransport())
 
-		result = server.package_releases('roundup')
-		self.write(result)
+		self.write("scikits:<br />\n")
+		results = server.search(dict(name="scikits"))
+		package_names = sorted(set(result["name"] for result in results)) # unique names, pypi contains duplicate names
+		self.write(package_names)
+		self.write("<br />\n")
 
-		result = server.package_urls('roundup', '1.1.2')
-		self.write(result)
+		#~ for result in results:
+			#~ self.write(result)
+			#~ self.write("<br />\n")
+
+		package_name = 'scikits.ann'
+		versions = server.package_releases(package_name)
+		self.write(versions)
+		self.write("<br />\n")
+
+		for version in versions:
+			d = server.release_data(package_name, version)
+			self.write(d)
+			self.write("<br />\n")
+
+			for d in server.release_urls(package_name, version):
+				self.write(d)
+				self.write("<br />\n")
+
+			break # only latest
+
+		self.write("download:")
+		for p in Package.packages().values():
+			self.write(p.download_links_html())
 
 		self.print_footer()
 
 	('/(scikits[.].+)', PackageInfoPage),
 
 	('/about', AboutPage),
+	('/contribute', ContributePage),
 
 	#~ ('/recent_changes', RecentChangesPage),
 	('/search', SearchPage),
 	], debug=True)
 
 def main():
-	System.init()
 	wsgiref.handlers.CGIHandler().run(application)
 
 if __name__ == '__main__':

code/static/images/scipyshiny_small.png

Added
New image

code/templates.py

 
 <div style="background-color: white; text-align: left; padding: 10px 10px 15px 15px">
 <table></tr>
-<td><a href="/"><img src="/static/images/download_large.png" width="64" border="0" /></a>
+<td><a href="/"><img src="/static/images/scipyshiny_small.png" width="64" border="0" /></a>
 <td><a href="/"><span style="font-size: 36px;">SciKits</span></a></td>
 </tr></table>
 </div>
 	<h3>Navigation</h3>
 	<ul>
 	<li><a href="/">Home</a> |&nbsp;</li>
-	<li><a href="/about">What are SciKits?</a> |&nbsp;</li>
-	<li><a href="/scikits">Get SciKits</a></li>
+	<li><a href="/about">About SciKits</a> |&nbsp;</li>
+	<li><a href="/scikits">Get SciKits</a> |&nbsp;</li>
+	<li><a href="/contribute">Contribute</a></li>
 	</ul>
 </div>
 
 	<span class="linkdescr">what scikits are all about</span></p>
 	<p class="biglink"><a class="biglink" href="/scikits">Get SciKits</a><br/>
 	<span class="linkdescr">index of all scikits</span></p>
+	<p class="biglink"><a class="biglink" href="/contribute">Contribute</a><br/>
+	<span class="linkdescr">add your own scikit or join a project</span></p>
 	</td>
 </tr>
 </table>
 <br />
 """
 
+contribute_page_template = """
+<h1>Contribute</h1>
+
+<h3>Add your own package</h3>
+<p>
+<!-- Register at PyPI or add to SVN repository -->
+</p>
+
+<h3>Join a project</h3>
+<p>
+Join a mailing list.
+</p>
+
+"""
+
 package_info_template = """
 <a href="" style="text-decoration:none"><h1>%(name)s</h1></a>
 <i>%(revision)s</i>
 <img src="/static/images/download_large.png" width="16" border="0" /> Download:  %(download_link)s <br />
 Homepage: <a href="%(homepage)s">%(homepage)s</a> <br />
 PyPI: <a href="http://pypi.python.org/pypi/%(name)s">http://pypi.python.org/pypi/%(name)s</a> <br />
-Source: <a href="%(repo_url)s">%(repo_url)s</a> <br />
+Source Repository: <a href="%(repo_url)s">%(repo_url)s</a> <br />
 People: %(people)s <br />
 </p>
 
 
 import xmlrpclib
 
-#~ import xml.etree.ElementTree as ET
-
 # set up locations
 ROOT = os.path.dirname(__file__)
+ON_DEV_SERVER = os.environ.get("SERVER_SOFTWARE", "dev").lower().startswith("dev")
+REPO_PATH = "http://svn.scipy.org/svn/scikits/trunk"
 
-ON_DEV_SERVER = os.environ.get("SERVER_SOFTWARE", "dev").lower().startswith("dev")
-
-REPO_PATH = "http://svn.scipy.org/svn/scikits/trunk"
+# how often new data needs to be loaded
+FETCH_CACHE_AGE = 60 * 60 * 2
 
 import time
 
 	sink = Sink()
 	return rdfxml.parseRDF(s, base=None, sink=sink).result
 
+def get_url(url, force_fetch=False):
+	result = memcache.get(url)
+	if result is None or force_fetch:
+		logger.debug("fetching %s" % url)
+		result = urlfetch.fetch(url)
+		assert memcache.set(key=url, value=result, time=FETCH_CACHE_AGE), url
+	else:
+		logger.debug("cache hit for %s" % url)
+	return result
+
 class GoogleXMLRPCTransport(object):
 	"""Handles an HTTP transaction to an XML-RPC server."""
 
 		result = None
 		url = 'http://%s%s' % (host, handler)
 		try:
-			response = urlfetch.fetch(url,
+			response = urlfetch.fetch(
+				url,
 				payload=request_body,
 				method=urlfetch.POST,
-				headers={'Content-Type': 'text/xml'})
+				headers={'Content-Type': 'text/xml'},
+				)
 		except:
 			msg = 'Failed to fetch %s' % url
-			raise xmlrpclib.ProtocolError(host + handler, 500, msg, {})
+			raise
 
 		if response.status_code != 200:
-			logging.error('%s returned status code %s' % (url, response.status_code))
+			logger.error('%s returned status code %s' % (url, response.status_code))
 			raise xmlrpclib.ProtocolError(host + handler,
 				  response.status_code,
 				  "",

misc/package_icon_license.jpg

Added
New image

misc/urlfetch_stub.py

+#!/usr/bin/env python
+#
+# Copyright 2007 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Stub version of the urlfetch API, based on httplib."""
+
+
+
+import httplib
+import logging
+import socket
+import urllib
+import urlparse
+
+from google.appengine.api import apiproxy_stub
+from google.appengine.api import urlfetch
+from google.appengine.api import urlfetch_errors
+from google.appengine.api import urlfetch_service_pb
+from google.appengine.runtime import apiproxy_errors
+
+
+MAX_RESPONSE_SIZE = 2 ** 24
+
+MAX_REDIRECTS = urlfetch.MAX_REDIRECTS
+
+REDIRECT_STATUSES = frozenset([
+  httplib.MOVED_PERMANENTLY,
+  httplib.FOUND,
+  httplib.SEE_OTHER,
+  httplib.TEMPORARY_REDIRECT,
+])
+
+PORTS_ALLOWED_IN_PRODUCTION = (
+    None, '80', '443', '4443', '8080', '8081', '8082', '8083', '8084', '8085',
+    '8086', '8087', '8088', '8089', '8188', '8444', '8990')
+
+_API_CALL_DEADLINE = 5.0
+
+
+_UNTRUSTED_REQUEST_HEADERS = frozenset([
+  'content-length',
+  'host',
+  'referer',
+  'user-agent',
+  'vary',
+  'via',
+  'x-forwarded-for',
+])
+
+class URLFetchServiceStub(apiproxy_stub.APIProxyStub):
+  """Stub version of the urlfetch API to be used with apiproxy_stub_map."""
+
+  def __init__(self, service_name='urlfetch'):
+    """Initializer.
+
+    Args:
+      service_name: Service name expected for all calls.
+    """
+    super(URLFetchServiceStub, self).__init__(service_name)
+
+  def _Dynamic_Fetch(self, request, response):
+    """Trivial implementation of URLFetchService::Fetch().
+
+    Args:
+      request: the fetch to perform, a URLFetchRequest
+      response: the fetch response, a URLFetchResponse
+    """
+    (protocol, host, path, parameters, query, fragment) = urlparse.urlparse(request.url())
+
+    payload = ''
+    if request.method() == urlfetch_service_pb.URLFetchRequest.GET:
+      method = 'GET'
+    elif request.method() == urlfetch_service_pb.URLFetchRequest.POST:
+      method = 'POST'
+      payload = request.payload()
+    elif request.method() == urlfetch_service_pb.URLFetchRequest.HEAD:
+      method = 'HEAD'
+    elif request.method() == urlfetch_service_pb.URLFetchRequest.PUT:
+      method = 'PUT'
+      payload = request.payload()
+    elif request.method() == urlfetch_service_pb.URLFetchRequest.DELETE:
+      method = 'DELETE'
+    else:
+      logging.error('Invalid method: %s', request.method())
+      raise apiproxy_errors.ApplicationError(
+        urlfetch_service_pb.URLFetchServiceError.UNSPECIFIED_ERROR)
+
+    if not (protocol == 'http' or protocol == 'https'):
+      logging.error('Invalid protocol: %s', protocol)
+      raise apiproxy_errors.ApplicationError(
+        urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
+
+    sanitized_headers = self._SanitizeHttpHeaders(_UNTRUSTED_REQUEST_HEADERS,
+                                                  request.header_list())
+    request.clear_header()
+    request.header_list().extend(sanitized_headers)
+
+    self._RetrieveURL(request.url(), payload, method,
+                      request.header_list(), response,
+                      follow_redirects=request.followredirects())
+
+  def _RetrieveURL(self, url, payload, method, headers, response,
+                   follow_redirects=True):
+    """Retrieves a URL.
+
+    Args:
+      url: String containing the URL to access.
+      payload: Request payload to send, if any.
+      method: HTTP method to use (e.g., 'GET')
+      headers: List of additional header objects to use for the request.
+      response: Response object
+      follow_redirects: optional setting (defaulting to True) for whether or not
+        we should transparently follow redirects (up to MAX_REDIRECTS)
+
+    Raises:
+      Raises an apiproxy_errors.ApplicationError exception with FETCH_ERROR
+      in cases where:
+        - MAX_REDIRECTS is exceeded
+        - The protocol of the redirected URL is bad or missing.
+    """
+    last_protocol = ''
+    last_host = ''
+
+    for redirect_number in xrange(MAX_REDIRECTS + 1):
+      parsed = urlparse.urlparse(url)
+      protocol, host, path, parameters, query, fragment = parsed
+
+      port = urllib.splitport(urllib.splituser(host)[1])[1]
+
+      if port not in PORTS_ALLOWED_IN_PRODUCTION:
+        logging.warning(
+          'urlfetch received %s ; port %s is not allowed in production!' %
+          (url, port))
+
+      if host == '' and protocol == '':
+        host = last_host
+        protocol = last_protocol
+
+      adjusted_headers = {
+        'Content-Length': len(payload),
+        'Host': host,
+        'Accept': '*/*',
+      }
+      if method == 'POST' and payload:
+        adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded'
+
+      for header in headers:
+        adjusted_headers[header.key().title()] = header.value()
+
+      logging.debug('Making HTTP request: host = %s, '
+                    'url = %s, payload = %s, headers = %s',
+                    host, url, payload, adjusted_headers)
+      try:
+        if protocol == 'http':
+          connection = httplib.HTTPConnection("proxy.sun.ac.za", 3128)
+          #~ connection = httplib.HTTPConnection(host)
+        elif protocol == 'https':
+          connection = httplib.HTTPSConnection(host)
+          #~ connection = httplib.HTTPSConnection("proxy.sun.ac.za", 3128)
+        else:
+          error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
+          logging.error(error_msg)
+          raise apiproxy_errors.ApplicationError(
+              urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg)
+
+        last_protocol = protocol
+        last_host = host
+
+        if query != '':
+          full_path = path + '?' + query
+        else:
+          full_path = path
+
+        orig_timeout = socket.getdefaulttimeout()
+        try:
+          socket.setdefaulttimeout(_API_CALL_DEADLINE)
+          full_path = protocol + "://" + host + "/" + full_path
+          print full_path
+          connection.request(method, full_path, payload, adjusted_headers)
+          http_response = connection.getresponse()
+          http_response_data = http_response.read()
+        finally:
+          socket.setdefaulttimeout(orig_timeout)
+          connection.close()
+      except (httplib.error, socket.error, IOError), e:
+        raise apiproxy_errors.ApplicationError(
+          urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))
+
+      if http_response.status in REDIRECT_STATUSES and follow_redirects:
+        url = http_response.getheader('Location', None)
+        if url is None:
+          error_msg = 'Redirecting response was missing "Location" header'
+          logging.error(error_msg)
+          raise apiproxy_errors.ApplicationError(
+              urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg)
+      else:
+        response.set_statuscode(http_response.status)
+        response.set_content(http_response_data[:MAX_RESPONSE_SIZE])
+        for header_key, header_value in http_response.getheaders():
+          header_proto = response.add_header()
+          header_proto.set_key(header_key)
+          header_proto.set_value(header_value)
+
+        if len(http_response_data) > MAX_RESPONSE_SIZE:
+          response.set_contentwastruncated(True)
+
+        break
+    else:
+      error_msg = 'Too many repeated redirects'
+      logging.error(error_msg)
+      raise apiproxy_errors.ApplicationError(
+          urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg)
+
+  def _SanitizeHttpHeaders(self, untrusted_headers, headers):
+    """Cleans "unsafe" headers from the HTTP request/response.
+
+    Args:
+      untrusted_headers: set of untrusted headers names
+      headers: list of string pairs, first is header name and the second is header's value
+    """
+    return (h for h in headers if h.key().lower() not in untrusted_headers)

package_icon_license.jpg

Removed
Old image