wsgi-peps / request_uri3.py

"""Translate REQUEST_URI to wsgi.script_name/wsgi.path_info
"""

from urllib.parse import quote_from_bytes, unquote_to_bytes

def request_uri_to_path(request_uri, script_name, path_info, safe=True):
    """Given CGI-style ``SCRIPT_NAME`` and ``PATH_INFO`` variables, as
    well as the unquoted ``REQUEST_URI``, this returns
    ``(wsgi.script_name, wsgi.path_info)``

    If it cannot determine the unquoted values from ``request_uri`` it
    will return a quoted version of ``(script_name, path_info)``.
    This isn't perfect, but it's the best we can do in the
    circumstances.  If you do ``safe=False`` then it will simply
    return None.

    In Python 3 ``script_name`` and ``path_info`` *must* be bytes.
    ``request_uri`` may be either.  The return value will be unicode
    (Python 3's ``str``).  In Python 2, all values will be bytes
    (Python 2's ``str``).  If you get SCRIPT_NAME/PATH_INFO in unicode
    you must convert it to bytes yourself.
    """
    if isinstance(request_uri, bytes):
        ## FIXME: is 'ignore' a good decoding default?
        request_uri = request_uri.decode('ascii', 'ignore')
    assert isinstance(script_name, bytes), (
        "script_name must be bytes")
    assert isinstance(path_info, bytes), (
        "path_info must be bytes")
    if ('%2f' not in request_uri and '%2F' not in request_uri):
        # We can be a bit faster in figuring out the unparsed versions
        # when there's no %2f (which is /, url-quoted)
        script_name_segments = script_name.count(b'/') + 1
        request_uri_parts = request_uri.split('/')
        qscript_name = '/'.join(request_uri_parts[:script_name_segments])
        qpath_info = '/' + '/'.join(request_uri_parts[script_name_segments:])
        if (unquote_to_bytes(qscript_name) != script_name
            or unquote_to_bytes(qpath_info) != path_info):
            if safe:
                return quote_from_bytes(script_name), quote_from_bytes(path_info)
            else:
                return None
        return qscript_name, qpath_info
    else:
        script_name_segments = script_name.count(b'/') + 1
        request_uri_parts = request_uri.split('/')
        qscript_name_parts, qpath_info_parts = request_uri_parts[:script_name_segments], request_uri_parts[script_name_segments:]
        remove_segments = sum(part.lower().count('%2f') for part in qscript_name_parts)
        while remove_segments:
            if not qscript_name_parts:
                # Something isn't matching up
                if safe:
                    return quote_from_bytes(script_name), quote_from_bytes(path_info)
                else:
                    return None
            remove_segments = remove_segments - 1 - qscript_name_parts[-1].lower().count('%2f')
            qpath_info_parts.insert(0, qscript_name_parts[-1])
            qscript_name_parts.pop()
        qscript_name = '/'.join(qscript_name_parts)
        qpath_info = '/' + '/'.join(qpath_info_parts)
        if (unquote_to_bytes(qscript_name) != script_name
            or unquote_to_bytes(qpath_info) != path_info):
            if safe:
                return quote_from_bytes(script_name), quote_from_bytes(path_info)
            else:
                return None
        return qscript_name, qpath_info

__test__ = {
    'general':
    """\
>>> def t(request_uri, script_name, path_info):
...     result = request_uri_to_path(request_uri, script_name, path_info, safe=False)
...     if result is None:
...         print('No match')
...     else:
...         print('{} -> {} + {}'.format(request_uri, result[0], result[1]))
>>> t('/foo/bar', b'/foo', b'/bar')
/foo/bar -> /foo + /bar
>>> t('/foo%20bar/baz', b'', b'/foo bar/baz')
/foo%20bar/baz ->  + /foo%20bar/baz
>>> t('/blahblah%2f/baz/foo', b'/blahblah//baz', b'/foo')
/blahblah%2f/baz/foo -> /blahblah%2f/baz + /foo
>>> t('/a%2fb/c/d', b'/wsgi20.wsgi', b'/a/b/c/d')
No match
"""}

if __name__ == '__main__':
    import doctest
    doctest.testmod()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.