Commits

Robert Brewer committed cda129e

Great progress on #718 (High count of uncollectable objects). Folded the separate InternalRedirect WSGI middleware back into AppResponse.

Comments (0)

Files changed (4)

cherrypy/_cpwsgi.py

 from cherrypy.lib import http as _http
 
 
-#                            Internal Redirect                            #
+#                           WSGI-to-CP Adapter                           #
 
 
-class InternalRedirector(object):
-    """WSGI middleware which handles cherrypy.InternalRedirect.
+class AppResponse(object):
+    """A WSGI application for CherryPy Application responses.
     
-    When cherrypy.InternalRedirect is raised, this middleware traps it,
-    rewrites the WSGI environ using the new path and query_string,
-    and calls the next application again. Because the wsgi.input stream
-    may have already been consumed by the next application, the redirected
-    call will always be of HTTP method "GET", and therefore any params must
-    be passed in the InternalRedirect object's query_string attribute.
-    If you need something more complicated, make and raise your own
-    exception and your own WSGI middleware to trap it. ;)
-    
-    It would be a bad idea to raise InternalRedirect after you've already
-    yielded response content, although an enterprising soul could choose
-    to abuse this.
-    
-    nextapp: the next application callable in the WSGI chain.
-    
-    recursive: if False (the default), each URL (path + qs) will be
-    stored, and, if the same URL is requested again, RuntimeError will
-    be raised. If 'recursive' is True, no such error will be raised.
+    recursive: if False (the default), each URL (path + qs) will be stored,
+    and, if the same URL is requested again via an InternalRedirect,
+    RuntimeError will be raised. If 'recursive' is True, no such error
+    will be raised.
     """
     
-    def __init__(self, nextapp, recursive=False):
-        self.nextapp = nextapp
-        self.recursive = recursive
+    throws = (KeyboardInterrupt, SystemExit)
+    request = None
     
-    def __call__(self, environ, start_response):
-        return IRResponse(self.nextapp, environ, start_response, self.recursive)
-
-
-class IRResponse(object):
-    
-    def __init__(self, nextapp, environ, start_response, recursive):
+    def __init__(self, environ, start_response, cpapp, recursive=False):
         self.redirections = []
         self.recursive = recursive
-        self.environ = environ.copy()
-        self.nextapp = nextapp
+        self.environ = environ
         self.start_response = start_response
-        self.response = None
-        self.iter_response = None
+        self.cpapp = cpapp
         self.setapp()
     
     def setapp(self):
-        while True:
-            try:
-                self.response = self.nextapp(self.environ, self.start_response)
-                self.iter_response = iter(self.response)
-                return
-            except _cherrypy.InternalRedirect, ir:
-                self.close()
-                self.setenv(ir)
-    
-    def setenv(self, ir):
-        env = self.environ
-        if not self.recursive:
-            if ir.path in self.redirections:
-                raise RuntimeError("InternalRedirector visited the "
-                                   "same URL twice: %r" % ir.path)
-            else:
-                # Add the *previous* path_info + qs to redirections.
-                sn = env.get('SCRIPT_NAME', '')
-                path = env.get('PATH_INFO', '')
-                qs = env.get('QUERY_STRING', '')
-                if qs:
-                    qs = "?" + qs
-                self.redirections.append(sn + path + qs)
-        
-        # Munge environment and try again.
-        env['REQUEST_METHOD'] = "GET"
-        env['PATH_INFO'] = ir.path
-        env['QUERY_STRING'] = ir.query_string
-        env['wsgi.input'] = _StringIO.StringIO()
-        env['CONTENT_LENGTH'] = "0"
-    
-    def close(self):
-        if hasattr(self.response, "close"):
-            self.response.close()
-    
-    def __iter__(self):
-        return self
-    
-    def next(self):
-        while True:
-            try:
-                return self.iter_response.next()
-            except _cherrypy.InternalRedirect, ir:
-                self.close()
-                self.setenv(ir)
-                self.setapp()
-
-
-
-#                           WSGI-to-CP Adapter                           #
-
-
-class AppResponse(object):
-    
-    throws = (KeyboardInterrupt, SystemExit, _cherrypy.InternalRedirect)
-    request = None
-    
-    def __init__(self, environ, start_response, cpapp):
+        # Stage 1: by whatever means necessary, obtain a status, header
+        #          set and body, with an optional exception object.
         try:
-            self.request = self.get_engine_request(environ, cpapp)
-            
-            meth = environ['REQUEST_METHOD']
-            path = environ.get('SCRIPT_NAME', '') + environ.get('PATH_INFO', '')
-            qs = environ.get('QUERY_STRING', '')
-            rproto = environ.get('SERVER_PROTOCOL')
-            headers = self.translate_headers(environ)
-            rfile = environ['wsgi.input']
-            
-            response = self.request.run(meth, path, qs, rproto, headers, rfile)
-            s, h, b = response.status, response.header_list, response.body
+            self.request = self.get_engine_request()
+            s, h, b = self.get_response()
             exc = None
         except self.throws:
             self.close()
             raise
+        except _cherrypy.InternalRedirect, ir:
+            self.environ['cherrypy.previous_request'] = _cherrypy._serving.request
+            self.close()
+            self.iredirect(ir.path, ir.query_string)
+            return
         except:
             if getattr(self.request, "throw_errors", False):
                 self.close()
         
         self.iter_response = iter(b)
         
+        # Stage 2: now that we have a status, header set, and body,
+        #          finish the WSGI conversation by calling start_response.
         try:
-            start_response(s, h, exc)
+            self.start_response(s, h, exc)
         except self.throws:
             self.close()
             raise
             s, h, b = _cperror.bare_error()
             self.iter_response = iter(b)
     
+    def iredirect(self, path, query_string):
+        """Doctor self.environ and perform an internal redirect.
+        
+        When cherrypy.InternalRedirect is raised, this method is called.
+        It rewrites the WSGI environ using the new path and query_string,
+        and calls a new CherryPy Request object. Because the wsgi.input
+        stream may have already been consumed by the next application,
+        the redirected call will always be of HTTP method "GET"; therefore,
+        any params must be passed in the query_string argument, which is
+        formed from InternalRedirect.query_string when using that exception.
+        If you need something more complicated, make and raise your own
+        exception and write your own AppResponse subclass to trap it. ;)
+        
+        It would be a bad idea to redirect after you've already yielded
+        response content, although an enterprising soul could choose
+        to abuse this.
+        """
+        env = self.environ
+        if not self.recursive:
+            sn = env.get('SCRIPT_NAME', '')
+            qs = query_string
+            if qs:
+                qs = "?" + qs
+            if sn + path + qs in self.redirections:
+                raise RuntimeError("InternalRedirector visited the "
+                                   "same URL twice: %r + %r + %r" %
+                                   (sn, path, qs))
+            else:
+                # Add the *previous* path_info + qs to redirections.
+                p = env.get('PATH_INFO', '')
+                qs = env.get('QUERY_STRING', '')
+                if qs:
+                    qs = "?" + qs
+                self.redirections.append(sn + p + qs)
+        
+        # Munge environment and try again.
+        env['REQUEST_METHOD'] = "GET"
+        env['PATH_INFO'] = path
+        env['QUERY_STRING'] = query_string
+        env['wsgi.input'] = _StringIO.StringIO()
+        env['CONTENT_LENGTH'] = "0"
+        
+        self.setapp()
+    
     def __iter__(self):
         return self
     
                 chunk = chunk.encode("ISO-8859-1")
             return chunk
         except self.throws:
+            self.close()
             raise
+        except _cherrypy.InternalRedirect, ir:
+            self.environ['cherrypy.previous_request'] = _cherrypy._serving.request
+            self.close()
+            self.iredirect(ir.path, ir.query_string)
         except StopIteration:
             raise
         except:
             if getattr(self.request, "throw_errors", False):
+                self.close()
                 raise
             
             _cherrypy.log(traceback=True)
     def close(self):
         _cherrypy.engine.release()
     
-    def get_engine_request(self, environ, cpapp):
+    def get_response(self):
+        """Grab a request object from the engine and return its response."""
+        meth = self.environ['REQUEST_METHOD']
+        path = (self.environ.get('SCRIPT_NAME', '') +
+                self.environ.get('PATH_INFO', ''))
+        qs = self.environ.get('QUERY_STRING', '')
+        rproto = self.environ.get('SERVER_PROTOCOL')
+        headers = self.translate_headers(self.environ)
+        rfile = self.environ['wsgi.input']
+        response = self.request.run(meth, path, qs, rproto, headers, rfile)
+        return response.status, response.header_list, response.body
+    
+    def get_engine_request(self):
         """Return a Request object from the CherryPy Engine using environ."""
-        env = environ.get
+        env = self.environ.get
         
         local = _http.Host('', int(env('SERVER_PORT', 80)),
                            env('SERVER_NAME', ''))
         # user after having been mapped to a local account.
         # Both IIS and Apache set REMOTE_USER, when possible.
         request.login = env('LOGON_USER') or env('REMOTE_USER') or None
-        request.multithread = environ['wsgi.multithread']
-        request.multiprocess = environ['wsgi.multiprocess']
-        request.wsgi_environ = environ
-        request.app = cpapp
-        request.prev = env('cherrypy.request')
-        environ['cherrypy.request'] = request
+        request.multithread = self.environ['wsgi.multithread']
+        request.multiprocess = self.environ['wsgi.multiprocess']
+        request.wsgi_environ = self.environ
+        request.app = self.cpapp
+        request.prev = env('cherrypy.previous_request', None)
         return request
     
     headerNames = {'HTTP_CGI_AUTHORIZATION': 'Authorization',
         named WSGI callable (from the pipeline) as keyword arguments.
     """
     
-    pipeline = [('iredir', InternalRedirector)]
+    pipeline = []
     head = None
     config = {}
     

cherrypy/test/benchmark.py

         },
     }
 app = cherrypy.tree.mount(Root(), SCRIPT_NAME, appconf)
-# Remove internalredirect (nastily on by default)
-app.wsgiapp.pipeline = []
 
 
 class NullRequest:

cherrypy/test/test.py

     prefer_parent_path()
     
     testList = [
-        # Run refleak test ASAP to make debugging easier.
-        'test_refleaks',
-        
         'test_proxy',
         'test_caching',
         'test_config',
         'test_xmlrpc',
         'test_wsgiapps',
         'test_wsgi_ns',
+        
+        # Run refleak test as late as possible to
+        # catch refleaks from all exercised tests.
+        'test_refleaks',
     ]
     
     try:

cherrypy/test/test_refleaks.py

         index.exposed = True
         
         def gc_stats(self):
-            return "%s %s %s %s" % (gc.collect(),
-                                    len(get_instances(_cprequest.Request)),
-                                    len(get_instances(_cprequest.Response)),
-                                    len(gc.get_referrers(data)))
+            output = []
+            
+            # Uncollectable garbage
+            
+            # gc_collect isn't perfectly synchronous, because it may
+            # break reference cycles that then take time to fully
+            # finalize. Call it twice and hope for the best.
+            gc.collect()
+            unreachable = gc.collect()
+            if unreachable:
+                output.append("\n%s unreachable objects:" % unreachable)
+                trash = {}
+                for x in gc.garbage:
+                    trash[type(x)] = trash.get(type(x), 0) + 1
+                trash = [(v, k) for k, v in trash.iteritems()]
+                trash.sort()
+                for pair in trash:
+                    output.append("    " + repr(pair))
+            
+            # Request references
+            reqs = get_instances(_cprequest.Request)
+            lenreqs = len(reqs)
+            if lenreqs < 2:
+                output.append("\nMissing Request reference. Should be 1 in "
+                              "this request thread and 1 in the main thread.")
+            elif lenreqs > 2:
+                output.append("\nToo many Request references (%r)." % lenreqs)
+                for req in reqs:
+                    output.append("Referrers for %s:" % repr(req))
+                    for ref in gc.get_referrers(req):
+                        if ref is not reqs:
+                            output.append("    %s" % repr(ref))
+            
+            # Response references
+            resps = get_instances(_cprequest.Response)
+            lenresps = len(resps)
+            if lenresps < 2:
+                output.append("\nMissing Response reference. Should be 1 in "
+                              "this request thread and 1 in the main thread.")
+            elif lenresps > 2:
+                output.append("\nToo many Response references (%r)." % lenresps)
+                for resp in resps:
+                    output.append("Referrers for %s:" % repr(resp))
+                    for ref in gc.get_referrers(resp):
+                        if ref is not resps:
+                            output.append("    %s" % repr(ref))
+            
+            return "\n".join(output)
         gc_stats.exposed = True
-        
-        def gc_objtypes(self):
-            data = {}
-            for x in gc.get_objects():
-                data[type(x)] = data.get(type(x), 0) + 1
-            
-            data = [(v, k) for k, v in data.iteritems()]
-            data.sort()
-            return "\n".join([repr(pair) for pair in data])
-        gc_objtypes.exposed = True
     
     cherrypy.tree.mount(Root())
     cherrypy.config.update({'environment': 'test_suite'})
             t.join()
         
         self.getPage("/gc_stats")
-        self.assertBody("0 1 1 1")
-        
-        # If gc_stats fails, choose "ignore" to see the type counts for
-        # all the unreachable objects in this body.
-        self.getPage("/gc_objtypes")
         self.assertBody("")
 
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.