Commits

Ian Bicking committed b8f5e7f

Added a python3 version

  • Participants
  • Parent commits 6e12bb6

Comments (0)

Files changed (2)

File request_uri.py

 Only for Python 2 so far; need to implement urllib.request.(un)quote
 properly to get a meaningful Python 3 version
 """
+
 from urllib import quote, unquote
 
 def request_uri_to_path(request_uri, script_name, path_info, safe=True):
     This isn't perfect, but it's the best we can do in the
     circumstances.  If you do ``safe=False`` then it will simply
     return None.
+
+    In Python 3 ``script_name`` and ``path_info`` *must* be bytes.
+    ``request_uri`` may be either.  The return value will be unicode
+    (Python 3's ``str``).  In Python 2, all values will be bytes
+    (Python 2's ``str``).  If you get SCRIPT_NAME/PATH_INFO in unicode
+    you must convert it to bytes yourself.
     """
     if ('%2f' not in request_uri and '%2F' not in request_uri):
         # We can be a bit faster in figuring out the unparsed versions

File request_uri3.py

+"""Translate REQUEST_URI to wsgi.script_name/wsgi.path_info
+
+Only for Python 2 so far; need to implement urllib.request.(un)quote
+properly to get a meaningful Python 3 version
+"""
+
+from urllib.parse import quote_from_bytes, unquote_to_bytes
+
+def request_uri_to_path(request_uri, script_name, path_info, safe=True):
+    """Given CGI-style ``SCRIPT_NAME`` and ``PATH_INFO`` variables, as
+    well as the unquoted ``REQUEST_URI``, this returns
+    ``(wsgi.script_name, wsgi.path_info)``
+
+    If it cannot determine the unquoted values from ``request_uri`` it
+    will return a quoted version of ``(script_name, path_info)``.
+    This isn't perfect, but it's the best we can do in the
+    circumstances.  If you do ``safe=False`` then it will simply
+    return None.
+
+    In Python 3 ``script_name`` and ``path_info`` *must* be bytes.
+    ``request_uri`` may be either.  The return value will be unicode
+    (Python 3's ``str``).  In Python 2, all values will be bytes
+    (Python 2's ``str``).  If you get SCRIPT_NAME/PATH_INFO in unicode
+    you must convert it to bytes yourself.
+    """
+    if isinstance(request_uri, bytes):
+        ## FIXME: is 'ignore' a good decoding default?
+        request_uri = request_uri.decode('ascii', 'ignore')
+    assert isinstance(script_name, bytes), (
+        "script_name must be bytes")
+    assert isinstance(path_info, bytes), (
+        "path_info must be bytes")
+    if ('%2f' not in request_uri and '%2F' not in request_uri):
+        # We can be a bit faster in figuring out the unparsed versions
+        # when there's no %2f (which is /, url-quoted)
+        script_name_segments = script_name.count(b'/') + 1
+        request_uri_parts = request_uri.split('/')
+        qscript_name = '/'.join(request_uri_parts[:script_name_segments])
+        qpath_info = '/' + '/'.join(request_uri_parts[script_name_segments:])
+        if (unquote_to_bytes(qscript_name) != script_name
+            or unquote_to_bytes(qpath_info) != path_info):
+            if safe:
+                return quote_from_bytes(script_name), quote_from_bytes(path_info)
+            else:
+                return None
+        return qscript_name, qpath_info
+    else:
+        script_name_segments = script_name.count(b'/') + 1
+        request_uri_parts = request_uri.split('/')
+        qscript_name_parts, qpath_info_parts = request_uri_parts[:script_name_segments], request_uri_parts[script_name_segments:]
+        remove_segments = sum(part.lower().count('%2f') for part in qscript_name_parts)
+        while remove_segments:
+            remove_segments = remove_segments - 1 - qscript_name_parts[-1].lower().count('%2f')
+            qpath_info_parts.insert(0, qscript_name_parts[-1])
+            qscript_name_parts.pop()
+        qscript_name = '/'.join(qscript_name_parts)
+        qpath_info = '/' + '/'.join(qpath_info_parts)
+        if (unquote_to_bytes(qscript_name) != script_name
+            or unquote_to_bytes(qpath_info) != path_info):
+            if safe:
+                return quote_from_bytes(script_name), quote_from_bytes(path_info)
+            else:
+                return None
+        return qscript_name, qpath_info
+
+__test__ = {
+    'general':
+    """\
+>>> def t(request_uri, script_name, path_info):
+...     result = request_uri_to_path(request_uri, script_name, path_info, safe=False)
+...     if result is None:
+...         print('No match')
+...     else:
+...         print('{} -> {} + {}'.format(request_uri, result[0], result[1]))
+>>> t('/foo/bar', b'/foo', b'/bar')
+/foo/bar -> /foo + /bar
+>>> t('/foo%20bar/baz', b'', b'/foo bar/baz')
+/foo%20bar/baz ->  + /foo%20bar/baz
+>>> t('/blahblah%2f/baz/foo', b'/blahblah//baz', b'/foo')
+/blahblah%2f/baz/foo -> /blahblah%2f/baz + /foo
+"""}
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()