Chris Adams avatar Chris Adams committed 32afa6f

Avoid UnicodeDecodeErrors when a querystring includes unescaped non-ASCII text

I've seen a small number of requests which contain characters which Python's
ascii codec cannot represent. This patch avoids forcing str->unicode->str
conversions in two places to avoid raising decode exceptions.

Comments (0)

Files changed (3)

localeurl/middleware.py

                 locale = accept_langs[0]
         locale_path = utils.locale_path(path, locale)
         if locale_path != request.path_info:
-            if request.META.get("QUERY_STRING", ""):
-                locale_path = "%s?%s" % (locale_path,
-                        request.META['QUERY_STRING'])
             locale_url = utils.add_script_prefix(locale_path)
+
+            qs = request.META.get("QUERY_STRING", "")
+            if qs:
+                # Force this to remain a byte-string by encoding locale_path
+                # first to avoid Unicode tainting - downstream will need to
+                # handle the job of handling in-the-wild character encodings:
+                locale_url = "%s?%s" % (locale_path.encode("utf-8"), qs)
+
             redirect_class = HttpResponsePermanentRedirect
             if not localeurl_settings.LOCALE_REDIRECT_PERMANENT:
                 redirect_class = HttpResponseRedirect

localeurl/tests/tests.py

         self.assertEqual(301, r2.status_code)
         self.assertEqual('/en/test/?somevar=someval', r2['Location'])
 
+    def test_with_unescaped_query_string(self):
+        # This contains an ISO-88591-2 latin small letter C with cedilla,
+        # received in a request declared as windows-1251:
+        r1 = self.request_factory.get('/test/?somevar=Mudan\xe7as_recentes')
+        r2 = self.middleware.process_request(r1)
+        self.assertEqual(301, r2.status_code)
+        self.assertEqual('/en/test/?somevar=Mudan%E7as_recentes', r2['Location'])
+
 
 
 class NoDefaultPrefixMiddlewareTestCase(MiddlewareTestCase):

localeurl/utils.py

 def add_script_prefix(path):
     """
     Prepends the SCRIPT_PREFIX to a path.
+    """
 
-    """
     return ''.join([urlresolvers.get_script_prefix(), path[1:]])
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.