Commits

zbyte64 committed 412513a

handle unicode urls

Comments (0)

Files changed (1)

 METADATA_PREFIX = 'x-amz-meta-'
 AMAZON_HEADER_PREFIX = 'x-amz-'
 
+def url_fix(s, charset='utf-8'):
+    """Sometimes you get an URL by a user that just isn't a real
+    URL because it contains unsafe characters like ' ' and so on.  This
+    function can fix some of the problems in a similar way browsers
+    handle data entered by the user:
+
+    >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffsklärung)')
+    'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29'
+
+    :param charset: The target charset for the URL if the url was
+                    given as unicode string.
+    """
+    #note this handles unicode properly while urllib.quote_plus does 
+    #special thanks for this function: http://stackoverflow.com/questions/120951/how-can-i-normalize-a-url-in-python
+    if isinstance(s, unicode):
+        s = s.encode(charset, 'ignore')
+    scheme, netloc, path, qs, anchor = urlparse.urlsplit(s)
+    path = urllib.quote(path, '/%')
+    qs = urllib.quote_plus(qs, ':&=')
+    return urlparse.urlunsplit((scheme, netloc, path, qs, anchor))
+
+
 # generates the aws canonical string for the given parameters
 def canonical_string(method, bucket="", key="", query_args={}, headers={}, expires=None):
     interesting_headers = {}
         buf += "/%s" % bucket
 
     # add the key.  even if it doesn't exist, add the slash
-    buf += "/%s" % urllib.quote_plus(key)
+    buf += "/%s" % url_fix(key)
 
     # handle special query string arguments
 
     for k, v in query_args.items():
         piece = k
         if v != None:
-            piece += "=%s" % urllib.quote_plus(str(v))
+            piece += "=%s" % url_fix(v)
         pairs.append(piece)
 
     return '&'.join(pairs)
 
         # add the slash after the bucket regardless
         # the key will be appended if it is non-empty
-        path += "/%s" % urllib.quote_plus(key)
+        path += "/%s" % url_fix(key)
 
 
         # build the path_argument string
 
         url = CallingFormat.build_url_base(self.protocol, self.server, self.port, bucket, self.calling_format)
 
-        url += "/%s" % urllib.quote_plus(key)
+        url += "/%s" % url_fix(key)
 
         query_args['Signature'] = encoded_canonical
         query_args['Expires'] = expires
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.