Commits

Robert Brewer  committed b3b3a28

trunk: syncing with python3, except preferring WSGI 1.0 and sticking with bytes for request attrs.

  • Participants
  • Parent commits ee8b18f

Comments (0)

Files changed (10)

File cherrypy/_cperror.py

     pass
 
 
-class MaxSizeExceeded(CherryPyException):
-    """Exception raised when the request body is longer than allowed."""
-    pass
-
-
 class TimeoutError(CherryPyException):
     """Exception raised when Response.timed_out is detected."""
     pass
             request = cherrypy.serving.request
             path = request.script_name + request.path_info
         self.args = (path,)
-        HTTPError.__init__(self, 404, "The path %r was not found." % path)
+        HTTPError.__init__(self, 404, "The path '%s' was not found." % path)
 
 
 _HTTPErrorTemplate = '''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"

File cherrypy/_cpreqbody.py

 from urllib import unquote_plus
 
 import cherrypy
-from cherrypy._cperror import MaxSizeExceeded
 from cherrypy.lib import httputil
 
 
         self.bytes_read. The number may be smaller than 'size' when 1) the
         client sends fewer bytes, 2) the 'Content-Length' request header
         specifies fewer bytes than requested, or 3) the number of bytes read
-        exceeds self.maxbytes (in which case, MaxSizeExceeded is raised).
+        exceeds self.maxbytes (in which case, 413 is raised).
         
         If the 'fp_out' argument is None (the default), all bytes read are
         returned in a single byte string.
             # Check lengths.
             self.bytes_read += datalen
             if self.maxbytes and self.bytes_read > self.maxbytes:
-                raise MaxSizeExceeded()
+                raise cherrypy.HTTPError(413)
             
             # Store the data.
             if fp_out is None:
             # Check lengths.
             self.bytes_read += datalen
             if self.maxbytes and self.bytes_read > self.maxbytes:
-                raise MaxSizeExceeded()
+                raise cherrypy.HTTPError(413)
             
             # Store the data.
             if fp_out is None:
             else:
                 self.attempt_charsets.append(u'ISO-8859-1')
         
-        self.fp = SizedReader(self.fp, self.length,
-                              self.maxbytes, bufsize=self.bufsize)
         # Temporary fix while deprecating passing .parts as .params.
         self.processors[u'multipart'] = _old_process_multipart
         
         if u'Content-Length' not in h and u'Transfer-Encoding' not in h:
             raise cherrypy.HTTPError(411)
         
+        self.fp = SizedReader(self.fp, self.length,
+                              self.maxbytes, bufsize=self.bufsize)
         super(RequestBody, self).process()
         
         # Body params should also be a part of the request_params

File cherrypy/_cpserver.py

     ssl_private_key = None
     ssl_module = 'pyopenssl'
     nodelay = True
+    wsgi_version = (1, 0)
     
     def __init__(self):
         self.bus = cherrypy.engine

File cherrypy/_cptree.py

         if isinstance(root, Application):
             app = root
             if script_name != "" and script_name != app.script_name:
-                raise ValueError("Cannot specify a different script name and pass an Application instance to cherrypy.mount")
+                raise ValueError("Cannot specify a different script name and "
+                                 "pass an Application instance to cherrypy.mount")
             script_name = app.script_name
         else:
             app = Application(root, script_name)
         # If you're calling this, then you're probably setting SCRIPT_NAME
         # to '' (some WSGI servers always set SCRIPT_NAME to '').
         # Try to look up the app using the full path.
-        path = httputil.urljoin(environ.get('SCRIPT_NAME', ''),
-                                environ.get('PATH_INFO', ''))
+        env11 = environ
+        if environ.get(u'wsgi.version') == (1, 1):
+            env11 = _cpwsgi.downgrade_wsgi_11_to_10(environ)
+        path = httputil.urljoin(env11.get('SCRIPT_NAME', ''),
+                                env11.get('PATH_INFO', ''))
         sn = self.script_name(path or "/")
         if sn is None:
             start_response('404 Not Found', [])
         
         # Correct the SCRIPT_NAME and PATH_INFO environ entries.
         environ = environ.copy()
-        environ['SCRIPT_NAME'] = sn
-        environ['PATH_INFO'] = path[len(sn.rstrip("/")):]
+        if environ.get(u'wsgi.version') == (1, 1):
+            # Python 2/WSGI 1.1: all strings MUST be of type unicode
+            enc = environ[u'wsgi.url_encoding']
+            environ[u'SCRIPT_NAME'] = sn.decode(enc)
+            environ[u'PATH_INFO'] = path[len(sn.rstrip("/")):].decode(enc)
+        else:
+            # Python 2/WSGI 1.0: all strings MUST be of type str
+            environ['SCRIPT_NAME'] = sn
+            environ['PATH_INFO'] = path[len(sn.rstrip("/")):]
         return app(environ, start_response)
 

File cherrypy/_cpwsgi.py

 from cherrypy.lib import httputil
 
 
+def downgrade_wsgi_11_to_10(environ):
+    """Return a new environ dict for WSGI 1.0 from the given WSGI 1.1 environ."""
+    env10 = {}
+    
+    enc = environ[u'wsgi.url_encoding']
+    for key in [u"PATH_INFO", u"SCRIPT_NAME", u"QUERY_STRING"]:
+        env10[str(key)] = environ[key].encode(enc)
+    
+    for k, v in environ.items():
+        if k in [u'PATH_INFO', u'SCRIPT_NAME', u'QUERY_STRING']:
+            continue
+        if isinstance(v, unicode) and k not in [u'REQUEST_URI', u'wsgi.input']:
+            v = v.encode('ISO-8859-1')
+        env10[k.encode('ISO-8859-1')] = v
+    
+    return env10
+
+
 class VirtualHost(object):
     """Select a different WSGI application based on the Host header.
     
     def __init__(self, environ, start_response, cpapp, recursive=False):
         self.redirections = []
         self.recursive = recursive
+        if environ.get(u'wsgi.version') == (1, 1):
+            environ = downgrade_wsgi_11_to_10(environ)
         self.environ = environ
         self.start_response = start_response
         self.cpapp = cpapp
     def next(self):
         try:
             chunk = self.iter_response.next()
-            # WSGI requires all data to be of type "str". This coercion should
-            # not take any time at all if chunk is already of type "str".
+            # WSGI 1.x requires all response data to be of type "str".
+            # This coercion should not take any time at all if chunk is
+            # already of type "str".
             # If it's unicode, it could be a big performance hit (x ~500).
             if not isinstance(chunk, str):
                 chunk = unicode(chunk).encode("ISO-8859-1")
         local = httputil.Host('', int(env('SERVER_PORT', 80)),
                            env('SERVER_NAME', ''))
         remote = httputil.Host(env('REMOTE_ADDR', ''),
-                            int(env('REMOTE_PORT', -1)),
-                            env('REMOTE_HOST', ''))
+                               int(env('REMOTE_PORT', -1)),
+                               env('REMOTE_HOST', ''))
         scheme = env('wsgi.url_scheme')
         sproto = env('ACTUAL_SERVER_PROTOCOL', "HTTP/1.1")
         request, resp = self.cpapp.get_serving(local, remote, scheme, sproto)

File cherrypy/_cpwsgi_server.py

 
 class CPHTTPRequest(wsgiserver.HTTPRequest):
     
-    def __init__(self, sendall, environ, wsgi_app):
+    def __init__(self, rfile, wfile, environ, wsgi_app):
         s = cherrypy.server
         self.max_request_header_size = s.max_request_header_size or 0
         self.max_request_body_size = s.max_request_body_size or 0
-        wsgiserver.HTTPRequest.__init__(self, sendall, environ, wsgi_app)
+        wsgiserver.HTTPRequest.__init__(self, rfile, wfile, environ, wsgi_app)
 
 
 class CPHTTPConnection(wsgiserver.HTTPConnection):
         # We have to make custom subclasses of wsgiserver internals here
         # so that our server.* attributes get applied to every request.
         class _CPHTTPRequest(wsgiserver.HTTPRequest):
-            def __init__(self, sendall, environ, wsgi_app):
+            def __init__(self, rfile, wfile, environ, wsgi_app):
                 s = server_adapter
                 self.max_request_header_size = s.max_request_header_size or 0
                 self.max_request_body_size = s.max_request_body_size or 0
-                wsgiserver.HTTPRequest.__init__(self, sendall, environ, wsgi_app)
+                wsgiserver.HTTPRequest.__init__(self, rfile, wfile, environ, wsgi_app)
         class _CPHTTPConnection(wsgiserver.HTTPConnection):
             RequestHandlerClass = _CPHTTPRequest
         self.ConnectionClass = _CPHTTPConnection
         self.protocol = self.server_adapter.protocol_version
         self.nodelay = self.server_adapter.nodelay
         
+        self.environ["wsgi.version"] = self.server_adapter.wsgi_version
+        
         if self.server_adapter.ssl_context:
             adapter_class = self.get_ssl_adapter_class()
             s.ssl_adapter = adapter_class(self.server_adapter.ssl_certificate,

File cherrypy/test/test_config.py

     def test_request_body_namespace(self):
         self.getPage("/plain", method='POST', headers=[
             ('Content-Type', 'application/x-www-form-urlencoded'),
-            ('Content-Length', 13)],
+            ('Content-Length', '13')],
             body='\xff\xfex\x00=\xff\xfea\x00b\x00c\x00')
         self.assertBody("abc")
 

File cherrypy/test/test_config_server.py

             return "Size: %s" % len(file.file.read())
         upload.exposed = True
         
-        def tinyupload(self, maxlen):
-            cherrypy.request.rfile.maxlen = int(maxlen)
-            cl = int(cherrypy.request.headers['Content-Length'])
-            try:
-                body = cherrypy.request.rfile.read(cl)
-            except Exception, e:
-                if e.__class__.__name__ == 'MaxSizeExceeded':
-                    # Post data is too big
-                    raise cherrypy.HTTPError(413)
-                else:
-                    raise
-            return body
+        def tinyupload(self):
+            return cherrypy.request.body.read()
         tinyupload.exposed = True
-        tinyupload._cp_config = {'request.process_request_body': False}
+        tinyupload._cp_config = {'request.body.maxbytes': 100}
     
     cherrypy.tree.mount(Root())
     
         self.assertBody(str(self.PORT))
     
     def testAdditionalServers(self):
+        if self.scheme == 'https':
+            return self.skip("not available under ssl")
         self.PORT = 9877
         self.getPage("/")
         self.assertBody(str(self.PORT))
         if getattr(cherrypy.server, "using_apache", False):
             return self.skip("skipped due to known Apache differences... ")
         
-        self.getPage('/tinyupload?maxlen=100', method="POST", body="x" * 100)
+        self.getPage('/tinyupload', method="POST",
+                     headers=[('Content-Type', 'text/plain'),
+                              ('Content-Length', '100')],
+                     body="x" * 100)
         self.assertStatus(200)
         self.assertBody("x" * 100)
-        self.getPage('/tinyupload?maxlen=100', method="POST", body="x" * 101)
+        
+        self.getPage('/tinyupload', method="POST",
+                     headers=[('Content-Type', 'text/plain'),
+                              ('Content-Length', '101')],
+                     body="x" * 101)
         self.assertStatus(413)
     
     def testMaxRequestSize(self):

File cherrypy/test/test_request_obj.py

         def index(self):
             yield "<h1>Choose your document</h1>\n"
             yield "<ul>\n"
-            for id, contents in self.documents.iteritems():
+            for id, contents in self.documents.items():
                 yield ("    <li><a href='/divorce/get?ID=%s'>%s</a>: %s</li>\n"
                        % (id, id, contents))
             yield "</ul>"
         self.persistent = True
         try:
             conn = self.HTTP_CONN
-##            conn.set_debuglevel(10)
             conn.putrequest("PUT", "/method/request_body", skip_host=True)
             conn.putheader("Host", self.HOST)
             conn.putheader('Content-Length', str(len(b)))

File cherrypy/wsgiserver/__init__.py

 Want SSL support? Just set server.ssl_adapter to an SSLAdapter instance.
 
 This won't call the CherryPy engine (application side) at all, only the
-WSGI server, which is independant from the rest of CherryPy. Don't
+WSGI server, which is independent from the rest of CherryPy. Don't
 let the name "CherryPyWSGIServer" throw you; the name merely reflects
 its origin, not its coupling.
 
         return data
 
 
+class KnownLengthRFile(object):
+    """Wraps a file-like object, returning an empty string when exhausted."""
+    
+    def __init__(self, rfile, content_length):
+        self.rfile = rfile
+        self.remaining = content_length
+    
+    def read(self, size=None):
+        if self.remaining == 0:
+            return ''
+        if size is None:
+            size = self.remaining
+        else:
+            size = min(size, self.remaining)
+        
+        data = self.rfile.read(size)
+        self.remaining -= len(data)
+        return data
+    
+    def readline(self, size=None):
+        if self.remaining == 0:
+            return ''
+        if size is None:
+            size = self.remaining
+        else:
+            size = min(size, self.remaining)
+        
+        data = self.rfile.readline(size)
+        self.remaining -= len(data)
+        return data
+    
+    def readlines(self, sizehint=0):
+        # Shamelessly stolen from StringIO
+        total = 0
+        lines = []
+        line = self.readline(sizehint)
+        while line:
+            lines.append(line)
+            total += len(line)
+            if 0 < sizehint <= total:
+                break
+            line = self.readline(sizehint)
+        return lines
+    
+    def close(self):
+        self.rfile.close()
+    
+    def __iter__(self):
+        return self
+    
+    def __next__(self):
+        data = next(self.rfile)
+        self.remaining -= len(data)
+        return data
+
+
 class HTTPRequest(object):
     """An HTTP Request (and response).
     
     send: the 'send' method from the connection's socket object.
     wsgi_app: the WSGI application to call.
     environ: a partial WSGI environ (server and connection entries).
-        The caller MUST set the following entries:
-        * All wsgi.* entries, including .input
+        Because this server supports both WSGI 1.0 and 1.1, this attribute is
+        neither; instead, it has unicode keys and byte string values. It is
+        converted to the appropriate WSGI version when the WSGI app is called.
+        
+        The caller MUST set the following entries (because this class doesn't):
+        * All wsgi.* entries except .input and .url_encoding
         * SERVER_NAME and SERVER_PORT
         * Any SSL_* entries
         * Any custom entries like REMOTE_ADDR and REMOTE_PORT
     max_request_header_size = 0
     max_request_body_size = 0
     
-    def __init__(self, wfile, environ, wsgi_app):
-        self.rfile = environ['wsgi.input']
+    def __init__(self, rfile, wfile, environ, wsgi_app):
+        self._rfile = rfile
+        self.rfile = rfile
         self.wfile = wfile
         self.environ = environ.copy()
         self.wsgi_app = wsgi_app
     
     def parse_request(self):
         """Parse the next HTTP request start-line and message-headers."""
-        self.rfile.maxlen = self.max_request_header_size
-        self.rfile.bytes_read = 0
-        
+        self.rfile = SizeCheckWrapper(self._rfile, self.max_request_header_size)
         try:
             self._parse_request()
         except MaxSizeExceeded:
         # (although your TCP stack might suffer for it: cf Apache's history
         # with FIN_WAIT_2).
         request_line = self.rfile.readline()
+        
         # Set started_request to True so communicate() knows to send 408
         # from here on out.
         self.started_request = True
         if '?' in path:
             path, qs = path.split('?', 1)
         
-        uri_enc = environ.get('REQUEST_URI_ENCODING', 'utf-8')
-        
         # Unquote the path+params (e.g. "/this%20path" -> "this path").
         # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
         #
         rp = int(req_protocol[5]), int(req_protocol[7])
         server_protocol = environ["ACTUAL_SERVER_PROTOCOL"]
         sp = int(server_protocol[5]), int(server_protocol[7])
-
+        
         if sp[0] != rp[0]:
             self.simple_response("505 HTTP Version Not Supported")
             return
         if environ.get("HTTP_EXPECT", "") == "100-continue":
             # Don't use simple_response here, because it emits headers
             # we don't want. See http://www.cherrypy.org/ticket/951
-            msg = "%s 100 Continue\r\n\r\n" % self.environ['ACTUAL_SERVER_PROTOCOL']
+            msg = self.environ['ACTUAL_SERVER_PROTOCOL'] + " 100 Continue\r\n\r\n"
             try:
                 self.wfile.sendall(msg)
             except socket.error, x:
                     raise
         
         self.ready = True
-
+    
     def parse_request_uri(self, uri):
         """Parse a Request-URI into (scheme, authority, path).
         
                     k, v = line.split(":", 1)
                 except ValueError:
                     raise ValueError("Illegal header line.")
-                k, v = k.strip().upper(), v.strip()
+                k = k.strip().decode('ISO-8859-1').upper()
+                v = v.strip()
                 envname = "HTTP_" + k.replace("-", "_")
             
             if k in comma_separated_headers:
     
     def decode_chunked(self):
         """Decode the 'chunked' transfer coding."""
+        self.rfile = SizeCheckWrapper(self._rfile, self.max_request_body_size)
         cl = 0
         data = StringIO.StringIO()
         while True:
             crlf = self.rfile.read(2)
             if crlf != CRLF:
                 self.simple_response("400 Bad Request",
-                                     "Bad chunked transfer coding "
-                                     "(expected '\\r\\n', got " + repr(crlf) + ")")
+                     "Bad chunked transfer coding (expected '\\r\\n', "
+                     "got " + repr(crlf) + ")")
                 return
         
         # Grab any trailer headers
         self.read_headers()
         
         data.seek(0)
-        self.environ["wsgi.input"] = data
+        self.rfile = data
         self.environ["CONTENT_LENGTH"] = str(cl) or ""
         return True
     
     def respond(self):
         """Call the appropriate WSGI app and write its iterable output."""
-        # Set rfile.maxlen to ensure we don't read past Content-Length.
-        # This will also be used to read the entire request body if errors
-        # are raised before the app can read the body.
         if self.chunked_read:
             # If chunked, Content-Length will be 0.
-            self.rfile.maxlen = self.max_request_body_size
+            try:
+                if not self.decode_chunked():
+                    self.close_connection = True
+                    return
+            except MaxSizeExceeded:
+                self.simple_response("413 Request Entity Too Large")
+                return
         else:
             cl = int(self.environ.get("CONTENT_LENGTH", 0))
-            if self.max_request_body_size:
-                self.rfile.maxlen = min(cl, self.max_request_body_size)
-            else:
-                self.rfile.maxlen = cl
-        self.rfile.bytes_read = 0
+            if self.max_request_body_size and self.max_request_body_size < cl:
+                if not self.sent_headers:
+                    self.simple_response("413 Request Entity Too Large")
+                return
+            self.rfile = KnownLengthRFile(self._rfile, cl)
         
-        try:
-            self._respond()
-        except MaxSizeExceeded:
-            if not self.sent_headers:
-                self.simple_response("413 Request Entity Too Large")
-            return
+        self.environ["wsgi.input"] = self.rfile
+        self._respond()
     
     def _respond(self):
-        if self.chunked_read:
-            if not self.decode_chunked():
-                self.close_connection = True
-                return
-        
-        response = self.wsgi_app(self.environ, self.start_response)
+        env = self.get_version_specific_environ()
+        #for k, v in sorted(env.items()):
+        #    print(k, '=', v)
+        response = self.wsgi_app(env, self.start_response)
         try:
             for chunk in response:
                 # "The start_response callable must not actually transmit
         if self.chunked_write:
             self.wfile.sendall("0\r\n\r\n")
     
+    def get_version_specific_environ(self):
+        """Return a new environ dict targeting the given wsgi.version"""
+        # Note that our internal environ type has keys decoded with ISO-8859-1
+        # but byte string values.
+        if self.environ["wsgi.version"] == (1, 0):
+            # Encode all keys.
+            env10 = {}
+            for k, v in self.environ.items():
+                if isinstance(k, unicode):
+                    k = k.encode('ISO-8859-1')
+                env10[k] = v
+            return env10
+        
+        env11 = self.environ.copy()
+        
+        # Request-URI
+        env11.setdefault('wsgi.url_encoding', 'utf-8')
+        try:
+            for key in ["PATH_INFO", "SCRIPT_NAME", "QUERY_STRING"]:
+                env11[key] = self.environ[key].decode(env11['wsgi.url_encoding'])
+        except UnicodeDecodeError:
+            # Fall back to latin 1 so apps can transcode if needed.
+            env11['wsgi.url_encoding'] = 'ISO-8859-1'
+            for key in ["PATH_INFO", "SCRIPT_NAME", "QUERY_STRING"]:
+                env11[key] = self.environ[key].decode(env11['wsgi.url_encoding'])
+        
+        for k, v in sorted(env11.items()):
+            if isinstance(v, str) and k not in (
+                'REQUEST_URI', 'PATH_INFO', 'SCRIPT_NAME', 'QUERY_STRING',
+                'wsgi.input'):
+                env11[k] = v.decode('ISO-8859-1')
+        
+        return env11
+    
     def simple_response(self, status, msg=""):
         """Write a simple response back to the client."""
         status = str(status)
-        buf = ["%s %s\r\n" % (self.environ['ACTUAL_SERVER_PROTOCOL'], status),
+        buf = [self.environ['ACTUAL_SERVER_PROTOCOL'] + " " +
+               status + CRLF,
                "Content-Length: %s\r\n" % len(msg),
                "Content-Type: text/plain\r\n"]
         
             # requirement is not be construed as preventing a server from
             # defending itself against denial-of-service attacks, or from
             # badly broken client implementations."
-            size = self.rfile.maxlen - self.rfile.bytes_read
-            if size > 0:
-                self.rfile.read(size)
+            remaining = getattr(self.rfile, 'remaining', 0)
+            if remaining > 0:
+                self.rfile.read(remaining)
         
         if "date" not in hkeys:
             self.outheaders.append(("Date", rfc822.formatdate()))
     
     rbufsize = -1
     RequestHandlerClass = HTTPRequest
-    environ = {"wsgi.version": (1, 0),
-               "wsgi.url_scheme": "http",
+    environ = {"wsgi.url_scheme": "http",
                "wsgi.multithread": True,
                "wsgi.multiprocess": False,
                "wsgi.run_once": False,
         
         self.rfile = makefile(sock, "rb", self.rbufsize)
         self.wfile = makefile(sock, "wb", -1)
-        
-        # Wrap wsgi.input but not HTTPConnection.rfile itself.
-        # We're also not setting maxlen yet; we'll do that separately
-        # for headers and body for each iteration of self.communicate
-        # (if maxlen is 0 the wrapper doesn't check length).
-        self.environ["wsgi.input"] = SizeCheckWrapper(self.rfile, 0)
     
     def communicate(self):
         """Read each request and respond appropriately."""
                 # the RequestHandlerClass constructor, the error doesn't
                 # get written to the previous request.
                 req = None
-                req = self.RequestHandlerClass(self.wfile, self.environ,
-                                               self.wsgi_app)
+                req = self.RequestHandlerClass(
+                    self.rfile, self.wfile, self.environ, self.wsgi_app)
                 
                 # This order of operations should guarantee correct pipelining.
                 req.parse_request()