1. cherrypy
  2. CherryPy

Commits

Robert Brewer  committed 0e433ec

Added support for RFC 2047 encoding and decoding of header values.

  • Participants
  • Parent commits 8fececd
  • Branches cherrypy

Comments (0)

Files changed (3)

File _cprequest.py

View file
         # Process the headers into self.headers
         for name, value in self.header_list:
             value = value.strip()
+            
             # Warning: if there is more than one header entry for cookies (AFAIK,
             # only Konqueror does that), only the last one will remain in headers
             # (but they will be correctly stored in request.simple_cookie).
-            self.headers[name] = value
+            self.headers[name] = http.decode_TEXT(value)
             
             # Handle cookies differently because on Konqueror, multiple
             # cookies come on different lines with the same key
             for line in cookie.split("\n"):
                 name, value = line.split(": ", 1)
                 self.header_list.append((name, value))
+        
+        self.header_list = [(k, http.encode_TEXT(v)) for k, v in self.header_list]

File lib/http.py

View file
 
 
 import cgi
+from email.Header import Header, decode_header
 import re
 import rfc822
 HTTPDate = rfc822.formatdate
             params = {}
         self.params = params
     
+    def __unicode__(self):
+        p = [";%s=%s" % (k, v) for k, v in self.params.iteritems()]
+        return u"%s%s" % (self.value, "".join(p))
+    
     def __str__(self):
-        p = [";%s=%s" % (k, v) for k, v in self.params.iteritems()]
-        return "%s%s" % (self.value, "".join(p))
+        return str(self.__unicode__())
     
     def parse(elementstr):
         """Transform 'token;key=val' to ('token', {'key': 'val'})."""
     result.sort()
     return result
 
+def encode_TEXT(value):
+    """Encode RFC-2047 TEXT (e.g. u"\u8200" -> "=?utf-8?b?6IiA?=")."""
+    try:
+        value = value.encode("iso-8859-1")
+    except UnicodeEncodeError:
+        value = value.encode("utf-8")
+        value = Header(value, 'utf-8').encode()
+    return value
+
+def decode_TEXT(value):
+    """Decode RFC-2047 TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr")."""
+    atoms = decode_header(value)
+    decodedvalue = ""
+    for atom, charset in atoms:
+        if charset is not None:
+            atom = atom.decode(charset)
+        decodedvalue += atom
+    return decodedvalue
 
 def validStatus(status):
     """Return legal HTTP status Code, Reason-phrase and Message.
             if not isinstance(valueList, list):
                 valueList = [valueList]
             for value in valueList:
-                header_list.append((order, (key, str(value))))
+                header_list.append((order, (key, unicode(value))))
         header_list.sort()
         return [item[1] for item in header_list]
 

File test/test_core.py

View file
             hMap['Expires'] = 'Thu, 01 Dec 2194 16:00:00 GMT'
             
             return "double header test"
-
-
+        
+        def ifmatch(self):
+            val = cherrypy.request.headers['If-Match']
+            cherrypy.response.headers['ETag'] = val
+            return repr(val)
+    
+    
     class HeaderElements(Test):
         
         def get_elements(self, headername):
             e = cherrypy.request.headers.elements(headername)
-            return "\n".join([str(x) for x in e])
+            return "\n".join([unicode(x) for x in e])
     
     
     class Method(Test):
         for key in ['Content-Length', 'Content-Type', 'Date',
                     'Expires', 'Location', 'Server']:
             self.assertEqual(hnames.count(key), 1)
+        
+        # Test RFC-2047-encoded request and response header values
+        self.getPage("/headers/ifmatch",
+                     [('If-Match', '=?utf-8?q?=E2=84=ABngstr=C3=B6m?=')])
+        self.assertHeader("ETag", '=?utf-8?b?4oSrbmdzdHLDtm0=?=')
+        self.assertBody("u'\\u212bngstr\\xf6m'")
     
     def testHTTPMethods(self):
         # Test that all defined HTTP methods work.