Commits

Robert Brewer committed 1a73a72

2.x backport of RFC-2047 header encoding/decoding (see [1166] et al).

Comments (0)

Files changed (4)

cherrypy/_cphttptools.py

             # Warning: if there is more than one header entry for cookies (AFAIK,
             # only Konqueror does that), only the last one will remain in headers
             # (but they will be correctly stored in request.simple_cookie).
-            self.headers[name] = value
+            self.headers[name] = httptools.decode_TEXT(value)
             
             # Handle cookies differently because on Konqueror, multiple
             # cookies come on different lines with the same key
                 self.headers['Content-Length'] = len(content)
         
         # Transform our header dict into a sorted list of tuples.
-        self.header_list = self.headers.sorted_list()
+        self.header_list = self.headers.sorted_list(protocol=self.version)
         
         cookie = self.simple_cookie.output()
         if cookie:

cherrypy/lib/httptools.py

 
 
 import cgi
+from email.Header import Header, decode_header
 import re
 import time
 import urllib
     result.sort()
     return result
 
+def decode_TEXT(value):
+    """Decode RFC-2047 TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr")."""
+    atoms = decode_header(value)
+    decodedvalue = ""
+    for atom, charset in atoms:
+        if charset is not None:
+            atom = atom.decode(charset)
+        decodedvalue += atom
+    return decodedvalue
 
 def validStatus(status):
     """Return legal HTTP status Code, Reason-phrase and Message.
     for _ in entity_fields:
         order_map[_] = 2
     
-    def sorted_list(self):
+    def sorted_list(self, protocol=(1, 0)):
         """Transform self into a sorted list of (name, value) tuples.
         
         From http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
             order = self.order_map.get(key, 3)
             if not isinstance(valueList, list):
                 valueList = [valueList]
-            for value in valueList:
-                header_list.append((order, (key, str(value))))
+            for v in valueList:
+                
+                if isinstance(v, unicode):
+                    # HTTP/1.0 says, "Words of *TEXT may contain octets
+                    # from character sets other than US-ASCII." and
+                    # "Recipients of header field TEXT containing octets
+                    # outside the US-ASCII character set may assume that
+                    # they represent ISO-8859-1 characters."
+                    try:
+                        v = v.encode("iso-8859-1")
+                    except UnicodeEncodeError:
+                        if protocol >= (1, 1):
+                            # Encode RFC-2047 TEXT
+                            # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?=").
+                            v = Header(v, 'utf-8').encode()
+                        else:
+                            raise
+                else:
+                    # This coercion should not take any time at all
+                    # if value is already of type "str".
+                    v = str(v)
+                
+                header_list.append((order, (key, v)))
         header_list.sort()
         return [item[1] for item in header_list]
 

cherrypy/test/test.py

         self.server = None
         self.protocol = "HTTP/1.1"
         
-        longopts = ['cover', 'profile', 'dumb', '1.1', 'help',
+        longopts = ['cover', 'profile', 'dumb', '1.0', 'help',
                     'basedir=', 'port=', 'server=']
         longopts.extend(self.available_tests)
         try:

cherrypy/test/test_core.py

             hMap['Expires'] = 'Thu, 01 Dec 2194 16:00:00 GMT'
             
             return "double header test"
-
-
+        
+        def ifmatch(self):
+            val = cherrypy.request.headers['If-Match']
+            cherrypy.response.headers['ETag'] = val
+            return repr(val)
+    
+    
     class HeaderElements(Test):
         
         def get_elements(self, headername):
         for key in ['Content-Length', 'Content-Type', 'Date',
                     'Expires', 'Location', 'Server']:
             self.assertEqual(hnames.count(key), 1)
+        
+        if cherrypy.config.get('server.protocol_version') == "HTTP/1.1":
+            # Test RFC-2047-encoded request and response header values
+            c = "=E2=84=ABngstr=C3=B6m"
+            self.getPage("/headers/ifmatch", [('If-Match', '=?utf-8?q?%s?=' % c)])
+            self.assertBody("u'\\u212bngstr\\xf6m'")
+            self.assertHeader("ETag", '=?utf-8?b?4oSrbmdzdHLDtm0=?=')
+            
+            # Test a *LONG* RFC-2047-encoded request and response header value
+            self.getPage("/headers/ifmatch",
+                         [('If-Match', '=?utf-8?q?%s?=' % (c * 10))])
+            self.assertBody("u'%s'" % ('\\u212bngstr\\xf6m' * 10))
+            self.assertHeader("ETag",
+                              '=?utf-8?b?4oSrbmdzdHLDtm3ihKtuZ3N0csO2beKEq25nc3Ryw7Zt4oSrbmdzdHLDtm0=?='
+                              '=?utf-8?b?4oSrbmdzdHLDtm3ihKtuZ3N0csO2beKEq25nc3Ryw7Zt4oSrbmdzdHLDtm0=?='
+                              '=?utf-8?b?4oSrbmdzdHLDtm3ihKtuZ3N0csO2bQ==?=')
     
     def testHTTPMethods(self):
         # Test that all defined HTTP methods work.