Commits

Robert Brewer committed 39c4530

Fix for #930 (The tidy and nsgmls tools will fail for concurrent requests). Removed the tidy/nsgmls tools.

Comments (0)

Files changed (4)

cherrypy/_cptools.py

 """
 
 import cherrypy
+import warnings
 
 
 def _getargs(func):
 
 #                              Builtin tools                              #
 
-from cherrypy.lib import cptools, encoding, auth, static, tidy, jsontools
+from cherrypy.lib import cptools, encoding, auth, static, jsontools
 from cherrypy.lib import sessions as _sessions, xmlrpc as _xmlrpc
 from cherrypy.lib import caching as _caching
 from cherrypy.lib import auth_basic, auth_digest
                     tool._setup()
 
 
+class DeprecatedTool(Tool):
+    
+    _name = None
+    warnmsg = "This Tool is deprecated."
+    
+    def __init__(self, point, warnmsg=None):
+        self.point = point
+        if warnmsg is not None:
+            self.warnmsg = warnmsg
+    
+    def __call__(self, *args, **kwargs):
+        warnings.warn(self.warnmsg)
+        def tool_decorator(f):
+            return f
+        return tool_decorator
+    
+    def _setup(self):
+        warnings.warn(self.warnmsg)
+
+
 default_toolbox = _d = Toolbox("tools")
 _d.session_auth = SessionAuthTool(cptools.session_auth)
 _d.proxy = Tool('before_request_body', cptools.proxy, priority=30)
 _d.xmlrpc = ErrorTool(_xmlrpc.on_error)
 _d.caching = CachingTool('before_handler', _caching.get, 'caching')
 _d.expires = Tool('before_finalize', _caching.expires)
-_d.tidy = Tool('before_finalize', tidy.tidy)
-_d.nsgmls = Tool('before_finalize', tidy.nsgmls)
+_d.tidy = DeprecatedTool('before_finalize',
+    "The tidy tool has been removed from the standard distribution of CherryPy. "
+    "The most recent version can be found at http://tools.cherrypy.org/browser.")
+_d.nsgmls = DeprecatedTool('before_finalize',
+    "The nsgmls tool has been removed from the standard distribution of CherryPy. "
+    "The most recent version can be found at http://tools.cherrypy.org/browser.")
 _d.ignore_headers = Tool('before_request_body', cptools.ignore_headers)
 _d.referer = Tool('before_request_body', cptools.referer)
 _d.basic_auth = Tool('on_start_resource', auth.basic_auth)
 _d.auth_basic = Tool('before_handler', auth_basic.basic_auth, priority=1)
 _d.auth_digest = Tool('before_handler', auth_digest.digest_auth, priority=1)
 
-del _d, cptools, encoding, auth, static, tidy
+del _d, cptools, encoding, auth, static

cherrypy/lib/tidy.py

-"""Functions to run cherrypy.response through Tidy or NSGML."""
-
-import cgi
-import os
-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
-import traceback
-
-import cherrypy
-    
-def tidy(temp_dir, tidy_path, strict_xml=False, errors_to_ignore=None,
-         indent=False, wrap=False, warnings=True):
-    """Run cherrypy.response through Tidy.
-    
-    If either 'indent' or 'wrap' are specified, then response.body will be
-    set to the output of tidy. Otherwise, only errors (including warnings,
-    if warnings is True) will change the body.
-    
-    Note that we use the standalone Tidy tool rather than the python
-    mxTidy module. This is because this module does not seem to be
-    stable and it crashes on some HTML pages (which means that the
-    server would also crash)
-    """
-    response = cherrypy.response
-    
-    # the tidy tool, by its very nature it's not generator friendly, 
-    # so we just collapse the body and work with it.
-    orig_body = response.collapse_body()
-    
-    fct = response.headers.get('Content-Type', '')
-    ct = fct.split(';')[0]
-    encoding = ''
-    i = fct.find('charset=')
-    if i != -1:
-        encoding = fct[i + 8:]
-    
-    if ct == 'text/html':
-        page_file = os.path.join(temp_dir, 'page.html')
-        open(page_file, 'wb').write(orig_body)
-        
-        out_file = os.path.join(temp_dir, 'tidy.out')
-        err_file = os.path.join(temp_dir, 'tidy.err')
-        tidy_enc = encoding.replace('-', '')
-        if tidy_enc:
-            tidy_enc = '-' + tidy_enc
-        
-        strict_xml = ("", " -xml")[bool(strict_xml)]
-        
-        if indent:
-            indent = ' -indent'
-        else:
-            indent = ''
-        
-        if wrap is False:
-            wrap = ''
-        else:
-            try:
-                wrap = ' -wrap %d' % int(tidyWrap)
-            except:
-                wrap = ''
-        
-        result = os.system('"%s" %s%s%s%s -f %s -o %s %s' %
-                           (tidy_path, tidy_enc, strict_xml, indent, wrap,
-                            err_file, out_file, page_file))
-        use_output = bool(indent or wrap) and not result
-        if use_output:
-            output = open(out_file, 'rb').read()
-        
-        new_errs = []
-        for err in open(err_file, 'rb').read().splitlines():
-            if (err.find('Error') != -1 or
-                (warnings and err.find('Warning') != -1)):
-                ignore = 0
-                for err_ign in errors_to_ignore or []:
-                    if err.find(err_ign) != -1:
-                        ignore = 1
-                        break
-                if not ignore:
-                    new_errs.append(err)
-        
-        if new_errs:
-            response.body = wrong_content('<br />'.join(new_errs), orig_body)
-            if "Content-Length" in response.headers:
-                # Delete Content-Length header so finalize() recalcs it.
-                del response.headers["Content-Length"]
-            return
-        elif strict_xml:
-            # The HTML is OK, but is it valid XML?
-            # Use elementtree to parse XML
-            from elementtree.ElementTree import parse
-            tag_list = ['nbsp', 'quot']
-            for tag in tag_list:
-                orig_body = orig_body.replace('&' + tag + ';', tag.upper())
-            
-            if encoding:
-                enctag = '<?xml version="1.0" encoding="%s"?>' % encoding
-                orig_body = enctag + orig_body
-            
-            f = StringIO(orig_body)
-            try:
-                tree = parse(f)
-            except:
-                # Wrong XML
-                body_file = StringIO()
-                traceback.print_exc(file = body_file)
-                body_file = '<br />'.join(body_file.getvalue())
-                response.body = wrong_content(body_file, orig_body, "XML")
-                if "Content-Length" in response.headers:
-                    # Delete Content-Length header so finalize() recalcs it.
-                    del response.headers["Content-Length"]
-                return
-        
-        if use_output:
-            response.body = [output]
-            if "Content-Length" in response.headers:
-                # Delete Content-Length header so finalize() recalcs it.
-                del response.headers["Content-Length"]
-
-def html_space(text):
-    """Escape text, replacing space with nbsp and tab with 4 nbsp's."""
-    return cgi.escape(text).replace('\t', '    ').replace(' ', '&nbsp;')
-
-def html_break(text):
-    """Escape text, replacing newline with HTML br element."""
-    return cgi.escape(text).replace('\n', '<br />')
-
-def wrong_content(header, body, content_type="HTML"):
-    output = ["Wrong %s:<br />%s<br />" % (content_type, html_break(header))]
-    for i, line in enumerate(body.splitlines()):
-        output.append("%03d - %s" % (i + 1, html_space(line)))
-    return "<br />".join(output)
-
-
-def nsgmls(temp_dir, nsgmls_path, catalog_path, errors_to_ignore=None):
-    response = cherrypy.response
-    
-    # the tidy tool, by its very nature it's not generator friendly, 
-    # so we just collect the body and work with it.
-    orig_body = response.collapse_body()
-    
-    fct = response.headers.get('Content-Type', '')
-    ct = fct.split(';')[0]
-    encoding = ''
-    i = fct.find('charset=')
-    if i != -1:
-        encoding = fct[i + 8:]
-    if ct == 'text/html':
-        # Remove bits of Javascript (nsgmls doesn't seem to handle
-        #   them correctly (for instance, if <a appears in your
-        #   Javascript code nsgmls complains about it)
-        while True:
-            i = orig_body.find('<script')
-            if i == -1:
-                break
-            j = orig_body.find('</script>', i)
-            if j == -1:
-                break
-            orig_body = orig_body[:i] + orig_body[j+9:]
-
-        page_file = os.path.join(temp_dir, 'page.html')
-        open(page_file, 'wb').write(orig_body)
-        
-        err_file = os.path.join(temp_dir, 'nsgmls.err')
-        command = ('%s -c%s -f%s -s -E10 %s' %
-                   (nsgmls_path, catalog_path, err_file, page_file))
-        command = command.replace('\\', '/')
-        os.system(command)
-        errs = open(err_file, 'rb').read()
-        
-        new_errs = []
-        for err in errs.splitlines():
-            ignore = False
-            for err_ign in errors_to_ignore or []:
-                if err.find(err_ign) != -1:
-                    ignore = True
-                    break
-            if not ignore:
-                new_errs.append(err)
-        
-        if new_errs:
-            response.body = wrong_content('<br />'.join(new_errs), orig_body)
-            if "Content-Length" in response.headers:
-                # Delete Content-Length header so finalize() recalcs it.
-                del response.headers["Content-Length"]
-

cherrypy/test/test.py

         'test_sessionauthenticate',
         'test_states',
         'test_config_server',
-        'test_tidy',
         'test_xmlrpc',
         'test_wsgiapps',
         'test_wsgi_ns',

cherrypy/test/test_tidy.py

-from cherrypy.test import test
-test.prefer_parent_path()
-
-import os
-localDir = os.path.join(os.getcwd(), os.path.dirname(__file__))
-tidy_path = os.path.join(localDir, "tidy")
-
-import cherrypy
-from cherrypy import tools
-
-doctype = ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" '
-           '"http://www.w3.org/TR/xhtml1/DTD/strict.dtd">')
-
-def setup_server():
-    class Root:
-        _cp_config = {
-            'tools.tidy.on': True,
-            'tools.tidy.tidy_path': tidy_path,
-            'tools.tidy.temp_dir': localDir,
-            }
-        
-        def plaintext(self):
-            yield "Hello, world"
-        plaintext.exposed = True
-        plaintext._cp_config = {'tools.tidy.warnings': False}
-        
-        def validhtml(self):
-            return "<html><body><h1>This should be valid</h1></body></html>"
-        validhtml.exposed = True
-        validhtml._cp_config = {'tools.tidy.warnings': False}
-        
-        def warning(self, skip_doctype=False):
-            if skip_doctype:
-                # This should raise a warning
-                pass
-            else:
-                yield doctype
-            
-            yield "<html><head><title>Meh</title></head>"
-            yield "<body>Normal body</body></html>"
-        warning.exposed = True
-    
-    cherrypy.tree.mount(Root())
-
-
-from cherrypy.test import helper
-
-class TidyTest(helper.CPWebCase):
-
-    def test_Tidy_Tool(self):
-        if not os.path.exists(tidy_path) and not os.path.exists(tidy_path + ".exe"):
-            return self.skip("skipped (tidy not found) ")
-        
-        self.getPage('/validhtml')
-        self.assertStatus(200)
-        self.assertBody("<html><body><h1>This should be valid</h1></body></html>")
-        
-        self.getPage('/plaintext')
-        self.assertStatus(200)
-        self.assertBody('Hello, world')
-        
-        self.getPage('/warning')
-        self.assertStatus(200)
-        self.assertBody(doctype + "<html><head><title>Meh</title></head>"
-                        "<body>Normal body</body></html>")
-        
-        self.getPage('/warning?skip_doctype=YES')
-        self.assertStatus(200)
-        self.assertInBody("Wrong HTML")
-
-
-
-if __name__ == "__main__":
-    helper.testmain()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.