Source

wsgi-peps / request_uri.py

"""Translate REQUEST_URI to wsgi.script_name/wsgi.path_info

Only for Python 2 so far; need to implement urllib.request.(un)quote
properly to get a meaningful Python 3 version
"""
from urllib import quote, unquote

def request_uri_to_path(request_uri, script_name, path_info, safe=True):
    """Given CGI-style ``SCRIPT_NAME`` and ``PATH_INFO`` variables, as
    well as the unquoted ``REQUEST_URI``, this returns
    ``(wsgi.script_name, wsgi.path_info)``

    If it cannot determine the unquoted values from ``request_uri`` it
    will return a quoted version of ``(script_name, path_info)``.
    This isn't perfect, but it's the best we can do in the
    circumstances.  If you do ``safe=False`` then it will simply
    return None.
    """
    if ('%2f' not in request_uri and '%2F' not in request_uri):
        # We can be a bit faster in figuring out the unparsed versions
        # when there's no %2f (which is /, url-quoted)
        script_name_segments = script_name.count('/') + 1
        request_uri_parts = request_uri.split('/')
        qscript_name = '/'.join(request_uri_parts[:script_name_segments])
        qpath_info = '/' + '/'.join(request_uri_parts[script_name_segments:])
        if (unquote(qscript_name) != script_name
            or unquote(qpath_info) != path_info):
            if safe:
                return quote(script_name), quote(path_info)
            else:
                return None
        return qscript_name, qpath_info
    else:
        script_name_segments = script_name.count('/') + 1
        request_uri_parts = request_uri.split('/')
        qscript_name_parts, qpath_info_parts = request_uri_parts[:script_name_segments], request_uri_parts[script_name_segments:]
        remove_segments = sum(part.lower().count('%2f') for part in qscript_name_parts)
        while remove_segments:
            remove_segments = remove_segments - 1 - qscript_name_parts[-1].lower().count('%2f')
            qpath_info_parts.insert(0, qscript_name_parts[-1])
            qscript_name_parts.pop()
        qscript_name = '/'.join(qscript_name_parts)
        qpath_info = '/' + '/'.join(qpath_info_parts)
        if (unquote(qscript_name) != script_name
            or unquote(qpath_info) != path_info):
            if safe:
                return quote(script_name), quote(path_info)
            else:
                return None
        return qscript_name, qpath_info

__test__ = {
    'general':
    """\
>>> def t(request_uri, script_name, path_info):
...     result = request_uri_to_path(request_uri, script_name, path_info, safe=False)
...     if result is None:
...         print 'No match'
...     else:
...         print '%s -> %s + %s' % (request_uri, result[0], result[1])
>>> t('/foo/bar', '/foo', '/bar')
/foo/bar -> /foo + /bar
>>> t('/foo%20bar/baz', '', '/foo bar/baz')
/foo%20bar/baz ->  + /foo%20bar/baz
>>> t('/blahblah%2f/baz/foo', '/blahblah//baz', '/foo')
/blahblah%2f/baz/foo -> /blahblah%2f/baz + /foo
"""}

if __name__ == '__main__':
    import doctest
    doctest.testmod()