Commits

Robert Brewer committed eceb4cb

WSGI-related changes:

1. Changed Request.run from request_line arg to deconstructed args: method, path, query_string, and protocol.
2. Moved HTTP protocol checking from _cprequest to _cpwsgiserver. What was cherrypy.response.version is now cherrypy.request.protocol (tuple form of SERVER_PROTOCOL). request.version and response.version attributes removed. _cpwsgiserver now writes out server.protocol, not SERVER_PROTOCOL (which is a misnomer, it really should have been REQUEST_PROTOCOL).
3. path unquoting was also moved from _cprequest to _cpwsgiserver (like most other WSGI servers).
4. New test for absoluteURI in the Request-Line.

Comments (0)

Files changed (12)

     def close(self):
         pass
     
-    def run(self, request_line, headers, rfile):
+    def run(self, method, path, query_string, protocol, headers, rfile):
         self.method = "GET"
         cherrypy.HTTPError(503, self.msg).set_response()
         cherrypy.response.finalize()
         # browser support for 301 is quite messy. Do 302/303 instead. See
         # http://ppewww.ph.gla.ac.uk/~flavell/www/post-redirect.html
         if status is None:
-            if cherrypy.response.version >= (1, 1):
+            if cherrypy.request.protocol >= (1, 1):
                 status = 303
             else:
                 status = 302
         request.multiprocess = bool(forked)
         
         # Run the CherryPy Request object and obtain the response
-        requestLine = req.the_request
         headers = req.headers_in.items()
         rfile = _ReadOnlyRequest(req)
-        response = request.run(requestLine, headers, rfile)
+        response = request.run(req.method, req.uri, req.args or "",
+                               req.protocol, headers, rfile)
         
         sendResponse(req, response.status, response.header_list, response.body)
         request.close()
     method = "GET"
     path = ""
     query_string = ""
-    protocol = ""
+    protocol = (1, 1)
     params = {}
-    version = http.version_from_http("HTTP/1.1")
     
     # Message attributes
     header_list = []
             self.hooks.run('on_end_request')
             cherrypy.serving.__dict__.clear()
     
-    def run(self, request_line, headers, rfile):
+    def run(self, method, path, query_string, protocol, headers, rfile):
         """Process the Request.
         
-        request_line should be of the form "GET /path HTTP/1.0".
+        method, path, query_string, and protocol should be pulled directly
+            from the Request-Line (e.g. "GET /path?key=val HTTP/1.0").
+        path should be %XX-unquoted, but query_string should not be.
         headers should be a list of (name, value) tuples.
         rfile should be a file-like object containing the HTTP request entity.
         
         attributes to build the outbound stream.
         
         """
-        self.error_response = cherrypy.HTTPError(500).set_response
-        
-        self.request_line = request_line.strip()
-        self.header_list = list(headers)
-        self.rfile = rfile
-        self.headers = http.HeaderMap()
-        self.simple_cookie = Cookie.SimpleCookie()
-        self.handler = None
-        
         try:
-            self.process_request_line()
+            self.error_response = cherrypy.HTTPError(500).set_response
+            
+            self.method = method
+            self.path = path or "/"
+            self.query_string = query_string
+            self.protocol = int(protocol[5]), int(protocol[7])
+            
+            # Rebuild first line of the request (e.g. "GET /path HTTP/1.0").
+            url = path
+            if query_string:
+                url += '?' + query_string
+            self.request_line = '%s %s %s' % (method, url, protocol)
+            
+            self.header_list = list(headers)
+            self.rfile = rfile
+            self.headers = http.HeaderMap()
+            self.simple_cookie = Cookie.SimpleCookie()
+            self.handler = None
             
             # Get the 'Host' header, so we can do HTTPRedirects properly.
             self.process_headers()
                 if r is None:
                     raise cherrypy.NotFound()
                 self.app = cherrypy.tree.apps[r]
+            else:
+                self.script_name = self.app.script_name
             
             # path_info should be the path from the
             # app root (script_name) to the handler.
         finally:
             self.hooks.run('on_end_resource')
     
-    def process_request_line(self):
-        """Parse the first line (e.g. "GET /path HTTP/1.1") of the request."""
-        rl = self.request_line
-        method, path, qs, proto = http.parse_request_line(rl)
-        if path == "*":
-            path = "global"
-        
-        self.method = method
-        self.path = path
-        self.query_string = qs
-        self.protocol = proto
-        
-        # Compare request and server HTTP versions, in case our server does
-        # not support the requested version. We can't tell the server what
-        # version number to write in the response, so we limit our output
-        # to min(req, server). We want the following output:
-        #     request    server     actual written   supported response
-        #     version    version   response version  feature set (resp.v)
-        # a     1.0        1.0           1.0                1.0
-        # b     1.0        1.1           1.1                1.0
-        # c     1.1        1.0           1.0                1.0
-        # d     1.1        1.1           1.1                1.1
-        # Notice that, in (b), the response will be "HTTP/1.1" even though
-        # the client only understands 1.0. RFC 2616 10.5.6 says we should
-        # only return 505 if the _major_ version is different.
-        
-        # cherrypy.request.version == request.protocol in a Version instance.
-        self.version = http.version_from_http(self.protocol)
-        
-        # cherrypy.response.version should be used to determine whether or
-        # not to include a given HTTP/1.1 feature in the response content.
-        server_v = cherrypy.config.get('server.protocol_version', 'HTTP/1.0')
-        server_v = http.version_from_http(server_v)
-        cherrypy.response.version = min(self.version, server_v)
-    
     def process_headers(self):
         self.params = http.parseQueryString(self.query_string)
         
             # All Internet-based HTTP/1.1 servers MUST respond with a 400
             # (Bad Request) status code to any HTTP/1.1 request message
             # which lacks a Host header field.
-            if self.version >= (1, 1):
+            if self.protocol >= (1, 1):
                 msg = "HTTP/1.1 requires a 'Host' request header."
                 raise cherrypy.HTTPError(400, msg)
         host = dict.__getitem__(headers, 'Host')
     headers = http.HeaderMap()
     simple_cookie = Cookie.SimpleCookie()
     body = Body()
-    version = (1, 0)
     
     def __init__(self):
         self.status = None
                 dict.__setitem__(headers, 'Content-Length', len(content))
         
         # Transform our header dict into a sorted list of tuples.
-        self.header_list = h = headers.output(self.version)
+        self.header_list = h = headers.output(cherrypy.request.protocol)
         
         cookie = self.simple_cookie.output()
         if cookie:
 from cherrypy.lib import http
 
 
-def request_line(environ):
-    """Rebuild first line of the request (e.g. "GET /path HTTP/1.0")."""
-    
-    resource = environ.get('SCRIPT_NAME', '') + environ.get('PATH_INFO', '')
-    if not (resource == "*" or resource.startswith("/")):
-        resource = "/" + resource
-    
-    qString = environ.get('QUERY_STRING')
-    if qString:
-        resource += '?' + qString
-    
-    resource = resource.replace(" ", "%20")
-    
-    return ('%s %s %s' % (environ['REQUEST_METHOD'],
-                          resource or '/',
-                          environ['SERVER_PROTOCOL']
-                          )
-            )
-
 headerNames = {'HTTP_CGI_AUTHORIZATION': 'Authorization',
                'CONTENT_LENGTH': 'Content-Length',
                'CONTENT_TYPE': 'Content-Type',
         # LOGON_USER is served by IIS, and is the name of the
         # user after having been mapped to a local account.
         # Both IIS and Apache set REMOTE_USER, when possible.
-        request.login = (env('LOGON_USER') or env('REMOTE_USER') or None)
+        request.login = env('LOGON_USER') or env('REMOTE_USER') or None
         
         request.multithread = environ['wsgi.multithread']
         request.multiprocess = environ['wsgi.multiprocess']
         
         if app:
             request.app = app
-            request.script_name = app.script_name
         
-        response = request.run(request_line(environ),
+        path = environ.get('SCRIPT_NAME', '') + environ.get('PATH_INFO', '')
+        response = request.run(environ['REQUEST_METHOD'], path,
+                               environ.get('QUERY_STRING'),
+                               environ.get('SERVER_PROTOCOL'),
                                translate_headers(environ),
                                environ['wsgi.input'])
         s, h, b = response.status, response.header_list, response.body
             _cpwsgiserver.HTTPRequest.parse_request(self)
         except http.MaxSizeExceeded:
             msg = "Request Entity Too Large"
-            proto = self.environ.get("SERVER_PROTOCOL", "HTTP/1.0")
-            self.wfile.write("%s 413 %s\r\n" % (proto, msg))
+            self.wfile.write("%s 413 %s\r\n" % (self.server.protocol, msg))
             self.wfile.write("Content-Length: %s\r\n\r\n" % len(msg))
             self.wfile.write(msg)
             self.wfile.flush()
                    conf('server.socket_host'),
                    request_queue_size = conf('server.socket_queue_size'),
                    )
+        s.protocol = conf('server.protocol_version', 'HTTP/1.0')
 
 """A high-speed, production ready, thread pooled, generic WSGI server."""
 
+import mimetools # todo: use email
+import Queue
+import re
+quoted_slash = re.compile("(?i)%2F")
+import rfc822
 import socket
+import sys
 import threading
-import Queue
-import mimetools # todo: use email
-import rfc822
-import sys
 import time
 import traceback
+from urllib import unquote
+from urlparse import urlparse
 
 import errno
 socket_errors_to_ignore = []
         if not request_line:
             self.ready = False
             return
-        method,path,version = request_line.strip().split(" ", 2)
-        if "?" in path:
-            path, qs = path.split("?", 1)
-        else:
-            qs = ""
+        
+        method, path, req_protocol = request_line.strip().split(" ", 2)
         self.environ["REQUEST_METHOD"] = method
         
+        # path may be an abs_path (including "http://host.domain.tld");
+        scheme, location, path, params, qs, frag = urlparse(path)
+        if params:
+            path = path + ";" + params
+        
+        # Unquote the path+params (e.g. "/this%20path" -> "this path").
+        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
+        #
+        # But note that "...a URI must be separated into its components
+        # before the escaped characters within those components can be
+        # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2
+        atoms = [unquote(x) for x in quoted_slash.split(path)]
+        path = "%2F".join(atoms)
+        
         for mount_point, wsgi_app in self.server.mount_points:
             if path == "*":
                 # This means, of course, that the first wsgi_app will
             self.abort("404 Not Found")
             return
         
+        # Note that, like wsgiref and most other WSGI servers,
+        # we unquote the path but not the query string.
         self.environ["QUERY_STRING"] = qs
-        self.environ["SERVER_PROTOCOL"] = version
-        self.environ["SERVER_NAME"] = self.server.server_name
+        
+        # Compare request and server HTTP protocol versions, in case our
+        # server does not support the requested protocol. Limit our output
+        # to min(req, server). We want the following output:
+        #     request    server     actual written   supported response
+        #     protocol   protocol  response protocol feature set (SERVER_PROTOCOL)
+        # a     1.0        1.0           1.0                1.0
+        # b     1.0        1.1           1.1                1.0
+        # c     1.1        1.0           1.0                1.0
+        # d     1.1        1.1           1.1                1.1
+        # Notice that, in (b), the response will be "HTTP/1.1" even though
+        # the client only understands 1.0. RFC 2616 10.5.6 says we should
+        # only return 505 if the _major_ version is different.
+        rp = int(req_protocol[5]), int(req_protocol[7])
+        sp = int(self.server.protocol[5]), int(self.server.protocol[7])
+        if sp[0] != rp[0]:
+            self.abort("505 HTTP Version Not Supported")
+            return
+        self.environ["SERVER_PROTOCOL"] = "HTTP/%s.%s" % min(rp, sp)
+        
+        # If the Request-URI was an absoluteURI, use its location atom.
+        self.environ["SERVER_NAME"] = location or self.server.server_name
+        
         if isinstance(self.server.bind_addr, basestring):
             # AF_UNIX. This isn't really allowed by WSGI, which doesn't
             # address unix domain sockets. But it's better than nothing.
             self.environ["REMOTE_HOST"] = self.addr[0]
             self.environ["REMOTE_ADDR"] = self.addr[0]
             self.environ["REMOTE_PORT"] = str(self.addr[1])
+        
         # then all the http headers
         headers = mimetools.Message(self.rfile)
         self.environ["CONTENT_TYPE"] = headers.getheader("Content-type", "")
     
     def abort(self, status, msg=""):
         """Write a simple error message back to the client."""
-        proto = self.environ.get("SERVER_PROTOCOL", "HTTP/1.0")
-        self.wfile.write("%s %s\r\n" % (proto, status))
+        self.wfile.write("%s %s\r\n" % (self.server.protocol, status))
         self.wfile.write("Content-Length: %s\r\n\r\n" % len(msg))
         if msg:
             self.wfile.write(msg)
             self.outheaders.append(("Date", rfc822.formatdate()))
         if "server" not in self.outheaderkeys:
             self.outheaders.append(("Server", self.server.version))
-        if (self.environ["SERVER_PROTOCOL"] == "HTTP/1.1"
+        if (self.server.protocol == "HTTP/1.1"
             and "connection" not in self.outheaderkeys):
             self.outheaders.append(("Connection", "close"))
-        self.wfile.write(self.environ["SERVER_PROTOCOL"] + " " + self.status + "\r\n")
+        self.wfile.write(self.server.protocol + " " + self.status + "\r\n")
         for (k,v) in self.outheaders:
             self.wfile.write(k + ": " + v + "\r\n")
         self.wfile.write("\r\n")
     timeout: the timeout in seconds for accepted connections (default 10).
     """
     
+    protocol = "HTTP/1.0"
     version = "CherryPy/3.0.0alpha"
     ready = False
     _interrupt = None
         if secs == 0:
             if force or "Pragma" not in cherrypy.response.headers:
                 cherrypy.response.headers["Pragma"] = "no-cache"
-            if cherrypy.request.version >= (1, 1):
+            if cherrypy.request.protocol >= (1, 1):
                 if force or "Cache-Control" not in cherrypy.response.headers:
                     cherrypy.response.headers["Cache-Control"] = "no-cache"
         
 import rfc822
 HTTPDate = rfc822.formatdate
 import time
-from urllib import unquote
-from urlparse import urlparse
 
 
 def urljoin(*atoms):
         url = url.replace("//", "/")
     return url
 
-def version_from_http(version_str):
-    """Return a Version tuple from the given 'HTTP/x.y' string."""
-    return int(version_str[5]), int(version_str[7])
+def protocol_from_http(protocol_str):
+    """Return a protocol tuple from the given 'HTTP/x.y' string."""
+    return int(protocol_str[5]), int(protocol_str[7])
 
 def getRanges(headervalue, content_length):
     """Return a list of (start, stop) indices from a Range header, or None.
     return code, reason, message
 
 
-quoted_slash = re.compile("(?i)%2F")
-
-def parse_request_line(request_line):
-    """Return (method, path, querystring, protocol) from a request_line."""
-    method, path, protocol = request_line.split()
-    
-    # path may be an abs_path (including "http://host.domain.tld");
-    # Ignore scheme, location, and fragments (so config lookups work).
-    # [Therefore, this assumes all hosts are valid for this server.
-    # Note that we are also violating the RFC which says: if the host
-    # given is an abs_path, it must override any Host header.]
-    scheme, location, path, params, qs, frag = urlparse(path)
-    
-    if params:
-        path = path + ";" + params
-    
-    # Unquote the path (e.g. "/this%20path" -> "this path").
-    # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
-    #
-    # But note that "...a URI must be separated into its components
-    # before the escaped characters within those components can be
-    # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2
-    #
-    # Note also that cgi.parse_qs will decode the querystring for us.
-    atoms = [unquote(x) for x in quoted_slash.split(path)]
-    path = "%2F".join(atoms)
-    
-    return method, path, qs, protocol
-
-
 image_map_pattern = re.compile(r"[0-9]+,[0-9]+")
 
 def parseQueryString(query_string, keep_blank_values=True):
             return []
         return header_elements(key, h)
     
-    def output(self, version=(1, 1)):
+    def output(self, protocol=(1, 1)):
         """Transform self into a list of (name, value) tuples."""
         header_list = []
         for key, value in self.iteritems():
                         # "Recipients of header field TEXT containing octets
                         # outside the US-ASCII character set may assume that
                         # they represent ISO-8859-1 characters."
-                        if version >= (1, 1):
+                        if protocol >= (1, 1):
                             v = v.encode("utf-8")
                             v = Header(v, 'utf-8').encode()
                         else:
     bodyfile = open(path, 'rb')
     
     # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code
-    if cherrypy.response.version >= (1, 1):
+    if cherrypy.request.protocol >= (1, 1):
         response.headers["Accept-Ranges"] = "bytes"
         r = http.getRanges(cherrypy.request.headers.get('Range'), c_len)
         if r == []:
 
 import sys
 
-import cherrypy
-
-
+import cherrypy
+
+
 # is this sufficient for start_response?
 def start_response(status, response_headers, exc_info=None):
     cherrypy.response.status = status
     environ["PATH_INFO"] = cherrypy.request.path_info
     environ["QUERY_STRING"] = cherrypy.request.query_string
     environ["SERVER_PROTOCOL"] = cherrypy.request.protocol
-    environ["SERVER_NAME"] = cherrypy.request.wsgi_environ['SERVER_NAME']
-    environ["SERVER_PORT"] = cherrypy.request.wsgi_environ['SERVER_PORT']
+    server_name = getattr(cherrypy.server.httpserver, 'server_name', "None")
+    environ["SERVER_NAME"] = server_name 
+    environ["SERVER_PORT"] = cherrypy.config.get('server.socket_port')
     environ["REMOTE_HOST"] = cherrypy.request.remote_host
     environ["REMOTE_ADDR"] = cherrypy.request.remote_addr
     environ["REMOTE_PORT"] = cherrypy.request.remote_port
         environ[envname] = v
     return environ
 
-
-def run(app, env=None):
-    """Run the (WSGI) app and set response.body to its output"""
-    try:
-        environ = cherrypy.request.wsgi_environ
-    except AttributeError:
-        environ = make_environ()
-    environ['SCRIPT_NAME'] = cherrypy.request.script_name
-    environ['PATH_INFO'] = cherrypy.request.path_info
-    
-    if env:
-        environ.update(env)
-    
-    # run the wsgi app and have it set response.body
-    cherrypy.response.body = app(environ, start_response)
-    
-    return True
-
+
+def run(app, env=None):
+    """Run the (WSGI) app and set response.body to its output"""
+    try:
+        environ = cherrypy.request.wsgi_environ
+        environ['SCRIPT_NAME'] = cherrypy.request.script_name
+        environ['PATH_INFO'] = cherrypy.request.path_info
+    except AttributeError:
+        environ = make_environ()
+    
+    if env:
+        environ.update(env)
+    
+    # run the wsgi app and have it set response.body
+    cherrypy.response.body = app(environ, start_response)
+    
+    return True
+

test/benchmark.py

     def close(self):
         pass
     
-    def run(self, request_line, headers, rfile):
+    def run(self, method, path, query_string, protocol, headers, rfile):
         cherrypy.response.status = "204 No Content"
         cherrypy.response.header_list = [("Content-Type", 'text/html'),
                                          ("Server", "Null CherryPy"),

test/test_objectmapping.py

         
         self.script_name = ""
         
+        # Test absoluteURI's in the Request-Line
+        self.getPage('http://localhost/')
+        self.assertBody('world')
+        
         # Test that the "isolated" app doesn't leak url's into the root app.
         # If it did leak, Root.default() would answer with
         #   "default:('isolated', 'doesnt', 'exist')".
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.