Commits

ken cochrane committed 2828dae

trying to fix the issue with __*__ style python package names, which conflict with GAE model key_name,s which don't allow those.

Comments (0)

Files changed (3)

 from google.appengine.api.labs import taskqueue
 from google.appengine.api.urlfetch import DownloadError
 import model
+from model import get_package_key_name
 
 UA = 'appengine-mirror'
 # where should we mirror files from?
 ################### Transfer ######################################
 
 def simple_page(m, uproject):
+    logging.info("%s %s" % (m, uproject))
     project = uproject.encode('utf-8')
     h = httplib.HTTPConnection('pypi.python.org')
     if project:
         h.putrequest('GET', '/simple/'+urllib2.quote(project)+'/')
-        obj = model.Project.get_by_key_name(uproject)
+        obj = model.Project.get_by_key_name(get_package_key_name(uproject))
     else:
         h.putrequest('GET', '/simple/')
         obj = m
     if obj:
         obj.simple = html
     else:
-        obj = model.Project(key_name=uproject, simple=html)
+        obj = model.Project(key_name=get_package_key_name(uproject),
+                            simple=html)
     if project:
         # the root index is not signed
         h.putrequest('GET', '/serversig/'+urllib2.quote(project)+'/')
         delete_extra_files(name, files)
 
 def copy_file(m, todo, (package, path)):
-    project = model.Project.get_by_key_name(package)
+    project = model.Project.get_by_key_name(get_package_key_name(package))
     f = model.File.all().filter("path = ", path).fetch(1)
     if f:
         f = f[0]
     obj.delete()
 
 def delete_extra_files(name, files):
-    p = model.Project.get_by_key_name(name)
+    p = model.Project.get_by_key_name(get_package_key_name(name))
     for f in model.File.all().filter("project = ", p):
         if f.path not in files:
             delete_file(f)
 from google.appengine.ext import webapp, blobstore
 from google.appengine.ext.webapp import template, blobstore_handlers
 import model, fetch, stats
+from model import get_package_key_name
 
 def tpl_path(template_file_name):
     return os.path.join(os.path.dirname(__file__), 'templates', template_file_name)
         elif '/' in path:
             return self.error(404)
         else:
-            p = model.Project.get_by_key_name(path)
+            p = model.Project.get_by_key_name(get_package_key_name(path))
             if p is None:
                 self.response.set_status(404)
                 self.response.out.write("<html><head><title>404 Not Found</title></head><body>Package %s does not exist</body></html>" % path)
             self.response.headers['content-type'] = 'text/plain'
             # no content
         else:
-            p = model.Project.get_by_key_name(path)
+            p = model.Project.get_by_key_name(get_package_key_name(path))
             if p and p.sig:
                 self.response.headers['content-type'] = 'application/octet-stream'
                 self.response.out.write(p.sig)
     day = db.StringProperty()
     data = db.BlobProperty()
     partial = db.BlobProperty()
+
+# this was added here because there is no utils.py and models.py is the only
+# package imported into all the others, putting it here limits the chance
+# of a cyclical import somewhere. This can be moved some where else in the
+# future if someone wants to clean this up.
+def get_package_key_name(package_name):
+    """ GAE models do not allow you to have __*__ key_name's for their
+    models. But pypi allows people to have package names in this format
+    see ``__past__``. When this occurs it breaks the pypi mirror sync
+    because we can't create a new Product model.
+
+    To solve that, we are going to clean up the package names before
+    we add the key_name to the model, and whenever we search by key_name.
+
+    We will look for __*__ type packages and escape the name with a
+    unique string that will be highly unlikely to be in a package name
+    to reduce duplicates.
+    """
+    escape_str = "!^!"
+    # if we don't have a package_name return what we got, no reason to go on.
+    if not package_name:
+        return package_name
+
+    # remove any leading or trailing whitespace
+    package_name = package_name.strip()
+
+    # look if it is in the format of __*__ 
+    if package_name.startswith("__") and package_name.endswith("__"):
+        # we found one, change the package_name to prepend and append
+        # the escape str to prevent the GAE key_name model issue.
+        return "%s%s%s" % (escape_str, package_name , escape_str)
+
+    # it is a normal package name, keep it the way it is.
+    return package_name