Luke Plant committed f86fdce

[1.2.X] Fixed #14235 - UnicodeDecodeError in CSRF middleware

Thanks to jbg for the report.

This changeset essentially backs out [13698] in favour of a method that
sanitizes the token rather than escaping it.

Backport of [13732] from trunk.

Comments (0)

Files changed (3)


 from django.core.urlresolvers import get_callable
 from django.utils.cache import patch_vary_headers
 from django.utils.hashcompat import md5_constructor
-from django.utils.html import escape
 from django.utils.safestring import mark_safe
 def get_token(request):
-    Returns the the CSRF token required for a POST form. No assumptions should
-    be made about what characters might be in the CSRF token.
+    Returns the the CSRF token required for a POST form. The token is an
+    alphanumeric value.
     A side effect of calling this function is to make the the csrf_protect
     decorator and the CsrfViewMiddleware add a CSRF cookie and a 'Vary: Cookie'
     return request.META.get("CSRF_COOKIE", None)
+def _sanitize_token(token):
+    # Allow only alphanum, and ensure we return a 'str' for the sake of the post
+    # processing middleware.
+    token = re.sub('[^a-zA-Z0-9]', '', str(token.decode('ascii', 'ignore')))
+    if token == "":
+        # In case the cookie has been truncated to nothing at some point.
+        return _get_new_csrf_key()
+    else:
+        return token
 class CsrfViewMiddleware(object):
     Middleware that requires a present and correct csrfmiddlewaretoken
         # request, so it's available to the view.  We'll store it in a cookie when
         # we reach the response.
-            request.META["CSRF_COOKIE"] = request.COOKIES[settings.CSRF_COOKIE_NAME]
+            # In case of cookies from untrusted sources, we strip anything
+            # dangerous at this point, so that the cookie + token will have the
+            # same, sanitized value.
+            request.META["CSRF_COOKIE"] = _sanitize_token(request.COOKIES[settings.CSRF_COOKIE_NAME])
             cookie_is_new = False
         except KeyError:
             # No cookie, so create one.  This will be sent with the next
                 """Returns the matched <form> tag plus the added <input> element"""
                 return mark_safe( + "<div style='display:none;'>" + \
                 "<input type='hidden' " + + \
-                " name='csrfmiddlewaretoken' value='" + escape(csrf_token) + \
+                " name='csrfmiddlewaretoken' value='" + csrf_token + \
                 "' /></div>")
             # Modify any POST forms


 from django.template import get_library, Library, InvalidTemplateLibrary
 from django.template.smartif import IfParser, Literal
 from django.conf import settings
-from django.utils.html import escape
 from django.utils.encoding import smart_str, smart_unicode
 from django.utils.safestring import mark_safe
             if csrf_token == 'NOTPROVIDED':
                 return mark_safe(u"")
-                return mark_safe(u"<div style='display:none'><input type='hidden' name='csrfmiddlewaretoken' value='%s' /></div>" % escape(csrf_token))
+                return mark_safe(u"<div style='display:none'><input type='hidden' name='csrfmiddlewaretoken' value='%s' /></div>" % csrf_token)
             # It's very probable that the token is missing because of
             # misconfiguration, so we raise a warning


 from django.views.decorators.csrf import csrf_exempt, csrf_view_exempt
 from django.core.context_processors import csrf
 from django.contrib.sessions.middleware import SessionMiddleware
-from django.utils.html import escape
 from django.utils.importlib import import_module
 from django.conf import settings
 from django.template import RequestContext, Template
 # Response/views used for CsrfResponseMiddleware and CsrfViewMiddleware tests
 def post_form_response():
-    resp = HttpResponse(content="""
-<html><body><form method="post"><input type="text" /></form></body></html>
+    resp = HttpResponse(content=u"""
+<html><body><h1>\u00a1Unicode!<form method="post"><input type="text" /></form></body></html>
 """, mimetype="text/html")
     return resp
 class CsrfMiddlewareTest(TestCase):
     # The csrf token is potentially from an untrusted source, so could have
-    # characters that need escaping
-    _csrf_id = "<1>"
+    # characters that need dealing with.
+    _csrf_id_cookie = "<1>\xc2\xa1"
+    _csrf_id = "1"
     # This is a valid session token for this ID and secret key.  This was generated using
     # the old code that we're to be backwards-compatible with.  Don't use the CSRF code
     def _get_GET_csrf_cookie_request(self):
         req = TestingHttpRequest()
-        req.COOKIES[settings.CSRF_COOKIE_NAME] = self._csrf_id
+        req.COOKIES[settings.CSRF_COOKIE_NAME] = self._csrf_id_cookie
         return req
     def _get_POST_csrf_cookie_request(self):
         return req
     def _check_token_present(self, response, csrf_id=None):
-        self.assertContains(response, "name='csrfmiddlewaretoken' value='%s'" % escape(csrf_id or self._csrf_id))
+        self.assertContains(response, "name='csrfmiddlewaretoken' value='%s'" % (csrf_id or self._csrf_id))
     # Check the post processing and outgoing cookie
     def test_process_response_no_csrf_cookie(self):
         resp = token_view(req)
         self.assertEquals(u"", resp.content)
+    def test_token_node_empty_csrf_cookie(self):
+        """
+        Check that we get a new token if the csrf_cookie is the empty string
+        """
+        req = self._get_GET_no_csrf_cookie_request()
+        req.COOKIES[settings.CSRF_COOKIE_NAME] = ""
+        CsrfViewMiddleware().process_view(req, token_view, (), {})
+        resp = token_view(req)
+        self.assertNotEqual(u"", resp.content)
     def test_token_node_with_csrf_cookie(self):
         Check that CsrfTokenNode works when a CSRF cookie is set
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.