Commits

Chris Adams committed 5a8c38a

Handle in-the-wild URLs with embedded newlines

I've seen requests from Googlebot following malformed links which produced an
endless redirect chain like '/foo -> /en/foo -> /en/en/foo'. This did not cause
problems but changing the PATH_RE flags allows returning a simple 404 quickly.

Comments (0)

Files changed (2)

localeurl/settings.py

 
 SUPPORTED_LOCALES = dict(
     (code.lower(), name) for code, name in settings.LANGUAGES)
+
 # Issue #15. Sort locale codes to avoid matching e.g. 'pt' before 'pt-br'
 LOCALES_RE = '|'.join(
     sorted(SUPPORTED_LOCALES.keys(), key=lambda i: len(i), reverse=True))
-PATH_RE = re.compile(r'^/(?P<locale>%s)(?P<path>.*)$' % LOCALES_RE, re.I)
+
+PATH_RE = re.compile(r'^/(?P<locale>%s)(?P<path>.*)$' % LOCALES_RE,
+                     flags=re.IGNORECASE | re.DOTALL)
 
 LOCALE_INDEPENDENT_PATHS = [re.compile(p) for p in
                             getattr(settings, 'LOCALE_INDEPENDENT_PATHS', [])]

localeurl/tests/tests.py

         self.assertEqual(('', '/de/about/localeurl/'),
                 utils.strip_path('/de/about/localeurl/'))
 
+    def test_strip_path_with_embedded_newlines(self):
+        self.assertEqual(('en', '/index.html%0D%0Anoise'),
+                         utils.strip_path("/en/index.html%0D%0Anoise"))
+
 
     def test_strip_path_takes_longer_code_first(self):
         # Refs issue #15.