Commits

Gustavo Picon committed c905ed0

Extracted decode, encode and gzip tools.

  • Participants
  • Parent commits 8199a2e
  • Branches cp4

Comments (0)

Files changed (7)

cherrypy/lib/compression.py

+import struct
+import time
+
+from cherrypy.lib.compat import BytesIO
+
+
+def compress(body, compress_level):
+    """Compress 'body' at the given compress_level."""
+    import zlib
+
+    # See http://www.gzip.org/zlib/rfc-gzip.html
+    yield b'\x1f\x8b'       # ID1 and ID2: gzip marker
+    yield b'\x08'           # CM: compression method
+    yield b'\x00'           # FLG: none set
+    # MTIME: 4 bytes
+    yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16))
+    yield b'\x02'           # XFL: max compression, slowest algo
+    yield b'\xff'           # OS: unknown
+
+    crc = zlib.crc32(b"")
+    size = 0
+    zobj = zlib.compressobj(compress_level,
+                            zlib.DEFLATED, -zlib.MAX_WBITS,
+                            zlib.DEF_MEM_LEVEL, 0)
+    for line in body:
+        size += len(line)
+        crc = zlib.crc32(line, crc)
+        yield zobj.compress(line)
+    yield zobj.flush()
+
+    # CRC32: 4 bytes
+    yield struct.pack("<L", crc & int('FFFFFFFF', 16))
+    # ISIZE: 4 bytes
+    yield struct.pack("<L", size & int('FFFFFFFF', 16))
+
+
+def decompress(body):
+    import gzip
+
+    zbuf = BytesIO()
+    zbuf.write(body)
+    zbuf.seek(0)
+    zfile = gzip.GzipFile(mode='rb', fileobj=zbuf)
+    data = zfile.read()
+    zfile.close()
+    return data

cherrypy/lib/tools/__init__.py

 
 
 def _prepare_default_toolbox():
-    from cherrypy.lib.tools import auth_basic, auth_digest
-    from cherrypy.lib.tools import encoding, session_auth
-    from cherrypy.lib.tools import sessions, xmlrpcutil, caching
+    from cherrypy.lib.tools import auth_basic, auth_digest, caching
+    from cherrypy.lib.tools import session_auth, sessions, xmlrpcutil
     from cherrypy.lib.tools.autovary import autovary
     from cherrypy.lib.tools.etags import validate_etags
     from cherrypy.lib.tools.accept import accept
     from cherrypy.lib.tools.staticdir import staticdir
     from cherrypy.lib.tools.json_in import json_in
     from cherrypy.lib.tools.json_out import json_out
+    from cherrypy.lib.tools.decode import decode
+    from cherrypy.lib.tools.encode import ResponseEncoder
+    from cherrypy.lib.tools.gzip import gzip
 
     _d = Toolbox("tools")
     _d.session_auth = session_auth.SessionAuthTool(session_auth.session_auth)
     _d.log_hooks = Tool('on_end_request', log_hooks, priority=100)
     _d.err_redirect = ErrorTool(redirect)
     _d.etags = Tool('before_finalize', validate_etags, priority=75)
-    _d.decode = Tool('before_request_body', encoding.decode)
+    _d.decode = Tool('before_request_body', decode)
     # the order of encoding, gzip, caching is important
-    _d.encode = Tool('before_handler', encoding.ResponseEncoder, priority=70)
-    _d.gzip = Tool('before_finalize', encoding.gzip, priority=80)
+    _d.encode = Tool('before_handler', ResponseEncoder, priority=70)
+    _d.gzip = Tool('before_finalize', gzip, priority=80)
     _d.staticdir = HandlerTool(staticdir)
     _d.staticfile = HandlerTool(staticfile)
     _d.sessions = sessions.SessionTool()

cherrypy/lib/tools/decode.py

+import cherrypy
+
+
+def decode(encoding=None, default_encoding='utf-8'):
+    """Replace or extend the list of charsets used to decode a request entity.
+
+    Either argument may be a single string or a list of strings.
+
+    encoding
+        If not None, restricts the set of charsets attempted while decoding
+        a request entity to the given set (even if a different charset is
+        given in the Content-Type request header).
+
+    default_encoding
+        Only in effect if the 'encoding' argument is not given.
+        If given, the set of charsets attempted while decoding a request
+        entity is *extended* with the given value(s).
+
+    """
+    body = cherrypy.request.body
+    if encoding is not None:
+        if not isinstance(encoding, list):
+            encoding = [encoding]
+        body.attempt_charsets = encoding
+    elif default_encoding:
+        if not isinstance(default_encoding, list):
+            default_encoding = [default_encoding]
+        body.attempt_charsets = body.attempt_charsets + default_encoding

cherrypy/lib/tools/encode.py

+import cherrypy
+from cherrypy.lib.compat import basestring, BytesIO, unicodestr
+from cherrypy.lib import file_generator
+
+
+class ResponseEncoder:
+
+    default_encoding = 'utf-8'
+    failmsg = "Response body could not be encoded with %r."
+    encoding = None
+    errors = 'strict'
+    text_only = True
+    add_charset = True
+    debug = False
+
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+        self.attempted_charsets = set()
+        request = cherrypy.serving.request
+        if request.handler is not None:
+            # Replace request.handler with self
+            if self.debug:
+                cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE')
+            self.oldhandler = request.handler
+            request.handler = self
+
+    def encode_stream(self, encoding):
+        """Encode a streaming response body.
+
+        Use a generator wrapper, and just pray it works as the stream is
+        being written out.
+        """
+        if encoding in self.attempted_charsets:
+            return False
+        self.attempted_charsets.add(encoding)
+
+        def encoder(body):
+            for chunk in body:
+                if isinstance(chunk, unicodestr):
+                    chunk = chunk.encode(encoding, self.errors)
+                yield chunk
+        self.body = encoder(self.body)
+        return True
+
+    def encode_string(self, encoding):
+        """Encode a buffered response body."""
+        if encoding in self.attempted_charsets:
+            return False
+        self.attempted_charsets.add(encoding)
+        body = []
+        for chunk in self.body:
+            if isinstance(chunk, unicodestr):
+                try:
+                    chunk = chunk.encode(encoding, self.errors)
+                except (LookupError, UnicodeError):
+                    return False
+            body.append(chunk)
+        self.body = body
+        return True
+
+    def find_acceptable_charset(self):
+        request = cherrypy.serving.request
+        response = cherrypy.serving.response
+
+        if self.debug:
+            cherrypy.log('response.stream %r' %
+                         response.stream, 'TOOLS.ENCODE')
+        if response.stream:
+            encoder = self.encode_stream
+        else:
+            encoder = self.encode_string
+            if "Content-Length" in response.headers:
+                # Delete Content-Length header so finalize() recalcs it.
+                # Encoded strings may be of different lengths from their
+                # unicode equivalents, and even from each other. For example:
+                # >>> t = u"\u7007\u3040"
+                # >>> len(t)
+                # 2
+                # >>> len(t.encode("UTF-8"))
+                # 6
+                # >>> len(t.encode("utf7"))
+                # 8
+                del response.headers["Content-Length"]
+
+        # Parse the Accept-Charset request header, and try to provide one
+        # of the requested charsets (in order of user preference).
+        encs = request.headers.elements('Accept-Charset')
+        charsets = [enc.value.lower() for enc in encs]
+        if self.debug:
+            cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE')
+
+        if self.encoding is not None:
+            # If specified, force this encoding to be used, or fail.
+            encoding = self.encoding.lower()
+            if self.debug:
+                cherrypy.log('Specified encoding %r' %
+                             encoding, 'TOOLS.ENCODE')
+            if (not charsets) or "*" in charsets or encoding in charsets:
+                if self.debug:
+                    cherrypy.log('Attempting encoding %r' %
+                                 encoding, 'TOOLS.ENCODE')
+                if encoder(encoding):
+                    return encoding
+        else:
+            if not encs:
+                if self.debug:
+                    cherrypy.log('Attempting default encoding %r' %
+                                 self.default_encoding, 'TOOLS.ENCODE')
+                # Any character-set is acceptable.
+                if encoder(self.default_encoding):
+                    return self.default_encoding
+                else:
+                    raise cherrypy.HTTPError(500, self.failmsg %
+                                             self.default_encoding)
+            else:
+                for element in encs:
+                    if element.qvalue > 0:
+                        if element.value == "*":
+                            # Matches any charset. Try our default.
+                            if self.debug:
+                                cherrypy.log('Attempting default encoding due '
+                                             'to %r' % element, 'TOOLS.ENCODE')
+                            if encoder(self.default_encoding):
+                                return self.default_encoding
+                        else:
+                            encoding = element.value
+                            if self.debug:
+                                cherrypy.log('Attempting encoding %s (qvalue >'
+                                             '0)' % element, 'TOOLS.ENCODE')
+                            if encoder(encoding):
+                                return encoding
+
+                if "*" not in charsets:
+                    # If no "*" is present in an Accept-Charset field, then all
+                    # character sets not explicitly mentioned get a quality
+                    # value of 0, except for ISO-8859-1, which gets a quality
+                    # value of 1 if not explicitly mentioned.
+                    iso = 'iso-8859-1'
+                    if iso not in charsets:
+                        if self.debug:
+                            cherrypy.log('Attempting ISO-8859-1 encoding',
+                                         'TOOLS.ENCODE')
+                        if encoder(iso):
+                            return iso
+
+        # No suitable encoding found.
+        ac = request.headers.get('Accept-Charset')
+        if ac is None:
+            msg = "Your client did not send an Accept-Charset header."
+        else:
+            msg = "Your client sent this Accept-Charset header: %s." % ac
+        _charsets = ", ".join(sorted(self.attempted_charsets))
+        msg += " We tried these charsets: %s." % (_charsets,)
+        raise cherrypy.HTTPError(406, msg)
+
+    def __call__(self, *args, **kwargs):
+        response = cherrypy.serving.response
+        self.body = self.oldhandler(*args, **kwargs)
+
+        if isinstance(self.body, basestring):
+            # strings get wrapped in a list because iterating over a single
+            # item list is much faster than iterating over every character
+            # in a long string.
+            if self.body:
+                self.body = [self.body]
+            else:
+                # [''] doesn't evaluate to False, so replace it with [].
+                self.body = []
+        elif hasattr(self.body, 'read'):
+            self.body = file_generator(self.body)
+        elif self.body is None:
+            self.body = []
+
+        ct = response.headers.elements("Content-Type")
+        if self.debug:
+            cherrypy.log('Content-Type: %r' % [str(h)
+                         for h in ct], 'TOOLS.ENCODE')
+        if ct and self.add_charset:
+            ct = ct[0]
+            if self.text_only:
+                if ct.value.lower().startswith("text/"):
+                    if self.debug:
+                        cherrypy.log(
+                            'Content-Type %s starts with "text/"' % ct,
+                            'TOOLS.ENCODE')
+                    do_find = True
+                else:
+                    if self.debug:
+                        cherrypy.log('Not finding because Content-Type %s '
+                                     'does not start with "text/"' % ct,
+                                     'TOOLS.ENCODE')
+                    do_find = False
+            else:
+                if self.debug:
+                    cherrypy.log('Finding because not text_only',
+                                 'TOOLS.ENCODE')
+                do_find = True
+
+            if do_find:
+                # Set "charset=..." param on response Content-Type header
+                ct.params['charset'] = self.find_acceptable_charset()
+                if self.debug:
+                    cherrypy.log('Setting Content-Type %s' % ct,
+                                 'TOOLS.ENCODE')
+                response.headers["Content-Type"] = str(ct)
+
+        return self.body

cherrypy/lib/tools/encoding.py

-import struct
-import time
-
-import cherrypy
-from cherrypy.lib.compat import basestring, BytesIO, unicodestr
-from cherrypy.lib import file_generator
-from cherrypy.lib import set_vary_header
-
-
-def decode(encoding=None, default_encoding='utf-8'):
-    """Replace or extend the list of charsets used to decode a request entity.
-
-    Either argument may be a single string or a list of strings.
-
-    encoding
-        If not None, restricts the set of charsets attempted while decoding
-        a request entity to the given set (even if a different charset is
-        given in the Content-Type request header).
-
-    default_encoding
-        Only in effect if the 'encoding' argument is not given.
-        If given, the set of charsets attempted while decoding a request
-        entity is *extended* with the given value(s).
-
-    """
-    body = cherrypy.request.body
-    if encoding is not None:
-        if not isinstance(encoding, list):
-            encoding = [encoding]
-        body.attempt_charsets = encoding
-    elif default_encoding:
-        if not isinstance(default_encoding, list):
-            default_encoding = [default_encoding]
-        body.attempt_charsets = body.attempt_charsets + default_encoding
-
-
-class ResponseEncoder:
-
-    default_encoding = 'utf-8'
-    failmsg = "Response body could not be encoded with %r."
-    encoding = None
-    errors = 'strict'
-    text_only = True
-    add_charset = True
-    debug = False
-
-    def __init__(self, **kwargs):
-        for k, v in kwargs.items():
-            setattr(self, k, v)
-
-        self.attempted_charsets = set()
-        request = cherrypy.serving.request
-        if request.handler is not None:
-            # Replace request.handler with self
-            if self.debug:
-                cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE')
-            self.oldhandler = request.handler
-            request.handler = self
-
-    def encode_stream(self, encoding):
-        """Encode a streaming response body.
-
-        Use a generator wrapper, and just pray it works as the stream is
-        being written out.
-        """
-        if encoding in self.attempted_charsets:
-            return False
-        self.attempted_charsets.add(encoding)
-
-        def encoder(body):
-            for chunk in body:
-                if isinstance(chunk, unicodestr):
-                    chunk = chunk.encode(encoding, self.errors)
-                yield chunk
-        self.body = encoder(self.body)
-        return True
-
-    def encode_string(self, encoding):
-        """Encode a buffered response body."""
-        if encoding in self.attempted_charsets:
-            return False
-        self.attempted_charsets.add(encoding)
-        body = []
-        for chunk in self.body:
-            if isinstance(chunk, unicodestr):
-                try:
-                    chunk = chunk.encode(encoding, self.errors)
-                except (LookupError, UnicodeError):
-                    return False
-            body.append(chunk)
-        self.body = body
-        return True
-
-    def find_acceptable_charset(self):
-        request = cherrypy.serving.request
-        response = cherrypy.serving.response
-
-        if self.debug:
-            cherrypy.log('response.stream %r' %
-                         response.stream, 'TOOLS.ENCODE')
-        if response.stream:
-            encoder = self.encode_stream
-        else:
-            encoder = self.encode_string
-            if "Content-Length" in response.headers:
-                # Delete Content-Length header so finalize() recalcs it.
-                # Encoded strings may be of different lengths from their
-                # unicode equivalents, and even from each other. For example:
-                # >>> t = u"\u7007\u3040"
-                # >>> len(t)
-                # 2
-                # >>> len(t.encode("UTF-8"))
-                # 6
-                # >>> len(t.encode("utf7"))
-                # 8
-                del response.headers["Content-Length"]
-
-        # Parse the Accept-Charset request header, and try to provide one
-        # of the requested charsets (in order of user preference).
-        encs = request.headers.elements('Accept-Charset')
-        charsets = [enc.value.lower() for enc in encs]
-        if self.debug:
-            cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE')
-
-        if self.encoding is not None:
-            # If specified, force this encoding to be used, or fail.
-            encoding = self.encoding.lower()
-            if self.debug:
-                cherrypy.log('Specified encoding %r' %
-                             encoding, 'TOOLS.ENCODE')
-            if (not charsets) or "*" in charsets or encoding in charsets:
-                if self.debug:
-                    cherrypy.log('Attempting encoding %r' %
-                                 encoding, 'TOOLS.ENCODE')
-                if encoder(encoding):
-                    return encoding
-        else:
-            if not encs:
-                if self.debug:
-                    cherrypy.log('Attempting default encoding %r' %
-                                 self.default_encoding, 'TOOLS.ENCODE')
-                # Any character-set is acceptable.
-                if encoder(self.default_encoding):
-                    return self.default_encoding
-                else:
-                    raise cherrypy.HTTPError(500, self.failmsg %
-                                             self.default_encoding)
-            else:
-                for element in encs:
-                    if element.qvalue > 0:
-                        if element.value == "*":
-                            # Matches any charset. Try our default.
-                            if self.debug:
-                                cherrypy.log('Attempting default encoding due '
-                                             'to %r' % element, 'TOOLS.ENCODE')
-                            if encoder(self.default_encoding):
-                                return self.default_encoding
-                        else:
-                            encoding = element.value
-                            if self.debug:
-                                cherrypy.log('Attempting encoding %s (qvalue >'
-                                             '0)' % element, 'TOOLS.ENCODE')
-                            if encoder(encoding):
-                                return encoding
-
-                if "*" not in charsets:
-                    # If no "*" is present in an Accept-Charset field, then all
-                    # character sets not explicitly mentioned get a quality
-                    # value of 0, except for ISO-8859-1, which gets a quality
-                    # value of 1 if not explicitly mentioned.
-                    iso = 'iso-8859-1'
-                    if iso not in charsets:
-                        if self.debug:
-                            cherrypy.log('Attempting ISO-8859-1 encoding',
-                                         'TOOLS.ENCODE')
-                        if encoder(iso):
-                            return iso
-
-        # No suitable encoding found.
-        ac = request.headers.get('Accept-Charset')
-        if ac is None:
-            msg = "Your client did not send an Accept-Charset header."
-        else:
-            msg = "Your client sent this Accept-Charset header: %s." % ac
-        _charsets = ", ".join(sorted(self.attempted_charsets))
-        msg += " We tried these charsets: %s." % (_charsets,)
-        raise cherrypy.HTTPError(406, msg)
-
-    def __call__(self, *args, **kwargs):
-        response = cherrypy.serving.response
-        self.body = self.oldhandler(*args, **kwargs)
-
-        if isinstance(self.body, basestring):
-            # strings get wrapped in a list because iterating over a single
-            # item list is much faster than iterating over every character
-            # in a long string.
-            if self.body:
-                self.body = [self.body]
-            else:
-                # [''] doesn't evaluate to False, so replace it with [].
-                self.body = []
-        elif hasattr(self.body, 'read'):
-            self.body = file_generator(self.body)
-        elif self.body is None:
-            self.body = []
-
-        ct = response.headers.elements("Content-Type")
-        if self.debug:
-            cherrypy.log('Content-Type: %r' % [str(h)
-                         for h in ct], 'TOOLS.ENCODE')
-        if ct and self.add_charset:
-            ct = ct[0]
-            if self.text_only:
-                if ct.value.lower().startswith("text/"):
-                    if self.debug:
-                        cherrypy.log(
-                            'Content-Type %s starts with "text/"' % ct,
-                            'TOOLS.ENCODE')
-                    do_find = True
-                else:
-                    if self.debug:
-                        cherrypy.log('Not finding because Content-Type %s '
-                                     'does not start with "text/"' % ct,
-                                     'TOOLS.ENCODE')
-                    do_find = False
-            else:
-                if self.debug:
-                    cherrypy.log('Finding because not text_only',
-                                 'TOOLS.ENCODE')
-                do_find = True
-
-            if do_find:
-                # Set "charset=..." param on response Content-Type header
-                ct.params['charset'] = self.find_acceptable_charset()
-                if self.debug:
-                    cherrypy.log('Setting Content-Type %s' % ct,
-                                 'TOOLS.ENCODE')
-                response.headers["Content-Type"] = str(ct)
-
-        return self.body
-
-# GZIP
-
-
-def compress(body, compress_level):
-    """Compress 'body' at the given compress_level."""
-    import zlib
-
-    # See http://www.gzip.org/zlib/rfc-gzip.html
-    yield b'\x1f\x8b'       # ID1 and ID2: gzip marker
-    yield b'\x08'           # CM: compression method
-    yield b'\x00'           # FLG: none set
-    # MTIME: 4 bytes
-    yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16))
-    yield b'\x02'           # XFL: max compression, slowest algo
-    yield b'\xff'           # OS: unknown
-
-    crc = zlib.crc32(b"")
-    size = 0
-    zobj = zlib.compressobj(compress_level,
-                            zlib.DEFLATED, -zlib.MAX_WBITS,
-                            zlib.DEF_MEM_LEVEL, 0)
-    for line in body:
-        size += len(line)
-        crc = zlib.crc32(line, crc)
-        yield zobj.compress(line)
-    yield zobj.flush()
-
-    # CRC32: 4 bytes
-    yield struct.pack("<L", crc & int('FFFFFFFF', 16))
-    # ISIZE: 4 bytes
-    yield struct.pack("<L", size & int('FFFFFFFF', 16))
-
-
-def decompress(body):
-    import gzip
-
-    zbuf = BytesIO()
-    zbuf.write(body)
-    zbuf.seek(0)
-    zfile = gzip.GzipFile(mode='rb', fileobj=zbuf)
-    data = zfile.read()
-    zfile.close()
-    return data
-
-
-def gzip(compress_level=5, mime_types=['text/html', 'text/plain'],
-         debug=False):
-    """Try to gzip the response body if Content-Type in mime_types.
-
-    cherrypy.response.headers['Content-Type'] must be set to one of the
-    values in the mime_types arg before calling this function.
-
-    The provided list of mime-types must be of one of the following form:
-        * type/subtype
-        * type/*
-        * type/*+subtype
-
-    No compression is performed if any of the following hold:
-        * The client sends no Accept-Encoding request header
-        * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header
-        * No 'gzip' or 'x-gzip' with a qvalue > 0 is present
-        * The 'identity' value is given with a qvalue > 0.
-
-    """
-    request = cherrypy.serving.request
-    response = cherrypy.serving.response
-
-    set_vary_header(response, "Accept-Encoding")
-
-    if not response.body:
-        # Response body is empty (might be a 304 for instance)
-        if debug:
-            cherrypy.log('No response body', context='TOOLS.GZIP')
-        return
-
-    # If returning cached content (which should already have been gzipped),
-    # don't re-zip.
-    if getattr(request, "cached", False):
-        if debug:
-            cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP')
-        return
-
-    acceptable = request.headers.elements('Accept-Encoding')
-    if not acceptable:
-        # If no Accept-Encoding field is present in a request,
-        # the server MAY assume that the client will accept any
-        # content coding. In this case, if "identity" is one of
-        # the available content-codings, then the server SHOULD use
-        # the "identity" content-coding, unless it has additional
-        # information that a different content-coding is meaningful
-        # to the client.
-        if debug:
-            cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP')
-        return
-
-    ct = response.headers.get('Content-Type', '').split(';')[0]
-    for coding in acceptable:
-        if coding.value == 'identity' and coding.qvalue != 0:
-            if debug:
-                cherrypy.log('Non-zero identity qvalue: %s' % coding,
-                             context='TOOLS.GZIP')
-            return
-        if coding.value in ('gzip', 'x-gzip'):
-            if coding.qvalue == 0:
-                if debug:
-                    cherrypy.log('Zero gzip qvalue: %s' % coding,
-                                 context='TOOLS.GZIP')
-                return
-
-            if ct not in mime_types:
-                # If the list of provided mime-types contains tokens
-                # such as 'text/*' or 'application/*+xml',
-                # we go through them and find the most appropriate one
-                # based on the given content-type.
-                # The pattern matching is only caring about the most
-                # common cases, as stated above, and doesn't support
-                # for extra parameters.
-                found = False
-                if '/' in ct:
-                    ct_media_type, ct_sub_type = ct.split('/')
-                    for mime_type in mime_types:
-                        if '/' in mime_type:
-                            media_type, sub_type = mime_type.split('/')
-                            if ct_media_type == media_type:
-                                if sub_type == '*':
-                                    found = True
-                                    break
-                                elif '+' in sub_type and '+' in ct_sub_type:
-                                    ct_left, ct_right = ct_sub_type.split('+')
-                                    left, right = sub_type.split('+')
-                                    if left == '*' and ct_right == right:
-                                        found = True
-                                        break
-
-                if not found:
-                    if debug:
-                        cherrypy.log('Content-Type %s not in mime_types %r' %
-                                     (ct, mime_types), context='TOOLS.GZIP')
-                    return
-
-            if debug:
-                cherrypy.log('Gzipping', context='TOOLS.GZIP')
-            # Return a generator that compresses the page
-            response.headers['Content-Encoding'] = 'gzip'
-            response.body = compress(response.body, compress_level)
-            if "Content-Length" in response.headers:
-                # Delete Content-Length header so finalize() recalcs it.
-                del response.headers["Content-Length"]
-
-            return
-
-    if debug:
-        cherrypy.log('No acceptable encoding found.', context='GZIP')
-    cherrypy.HTTPError(406, "identity, gzip").set_response()

cherrypy/lib/tools/gzip.py

+import cherrypy
+from cherrypy.lib import set_vary_header
+from cherrypy.lib.compression import compress
+
+
+def gzip(compress_level=5, mime_types=['text/html', 'text/plain'],
+         debug=False):
+    """Try to gzip the response body if Content-Type in mime_types.
+
+    cherrypy.response.headers['Content-Type'] must be set to one of the
+    values in the mime_types arg before calling this function.
+
+    The provided list of mime-types must be of one of the following form:
+        * type/subtype
+        * type/*
+        * type/*+subtype
+
+    No compression is performed if any of the following hold:
+        * The client sends no Accept-Encoding request header
+        * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header
+        * No 'gzip' or 'x-gzip' with a qvalue > 0 is present
+        * The 'identity' value is given with a qvalue > 0.
+
+    """
+    request = cherrypy.serving.request
+    response = cherrypy.serving.response
+
+    set_vary_header(response, "Accept-Encoding")
+
+    if not response.body:
+        # Response body is empty (might be a 304 for instance)
+        if debug:
+            cherrypy.log('No response body', context='TOOLS.GZIP')
+        return
+
+    # If returning cached content (which should already have been gzipped),
+    # don't re-zip.
+    if getattr(request, "cached", False):
+        if debug:
+            cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP')
+        return
+
+    acceptable = request.headers.elements('Accept-Encoding')
+    if not acceptable:
+        # If no Accept-Encoding field is present in a request,
+        # the server MAY assume that the client will accept any
+        # content coding. In this case, if "identity" is one of
+        # the available content-codings, then the server SHOULD use
+        # the "identity" content-coding, unless it has additional
+        # information that a different content-coding is meaningful
+        # to the client.
+        if debug:
+            cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP')
+        return
+
+    ct = response.headers.get('Content-Type', '').split(';')[0]
+    for coding in acceptable:
+        if coding.value == 'identity' and coding.qvalue != 0:
+            if debug:
+                cherrypy.log('Non-zero identity qvalue: %s' % coding,
+                             context='TOOLS.GZIP')
+            return
+        if coding.value in ('gzip', 'x-gzip'):
+            if coding.qvalue == 0:
+                if debug:
+                    cherrypy.log('Zero gzip qvalue: %s' % coding,
+                                 context='TOOLS.GZIP')
+                return
+
+            if ct not in mime_types:
+                # If the list of provided mime-types contains tokens
+                # such as 'text/*' or 'application/*+xml',
+                # we go through them and find the most appropriate one
+                # based on the given content-type.
+                # The pattern matching is only caring about the most
+                # common cases, as stated above, and doesn't support
+                # for extra parameters.
+                found = False
+                if '/' in ct:
+                    ct_media_type, ct_sub_type = ct.split('/')
+                    for mime_type in mime_types:
+                        if '/' in mime_type:
+                            media_type, sub_type = mime_type.split('/')
+                            if ct_media_type == media_type:
+                                if sub_type == '*':
+                                    found = True
+                                    break
+                                elif '+' in sub_type and '+' in ct_sub_type:
+                                    ct_left, ct_right = ct_sub_type.split('+')
+                                    left, right = sub_type.split('+')
+                                    if left == '*' and ct_right == right:
+                                        found = True
+                                        break
+
+                if not found:
+                    if debug:
+                        cherrypy.log('Content-Type %s not in mime_types %r' %
+                                     (ct, mime_types), context='TOOLS.GZIP')
+                    return
+
+            if debug:
+                cherrypy.log('Gzipping', context='TOOLS.GZIP')
+            # Return a generator that compresses the page
+            response.headers['Content-Encoding'] = 'gzip'
+            response.body = compress(response.body, compress_level)
+            if "Content-Length" in response.headers:
+                # Delete Content-Length header so finalize() recalcs it.
+                del response.headers["Content-Length"]
+
+            return
+
+    if debug:
+        cherrypy.log('No acceptable encoding found.', context='GZIP')
+    cherrypy.HTTPError(406, "identity, gzip").set_response()

cherrypy/test/test_caching.py

 import cherrypy
 from cherrypy.lib.compat import next, ntob, quote, xrange
 from cherrypy.lib import httputil
-import cherrypy.lib.tools.encoding
+from cherrypy.lib.compression import decompress
 
 gif_bytes = ntob(
     'GIF89a\x01\x00\x01\x00\x82\x00\x01\x99"\x1e\x00\x00\x00\x00\x00'
         self.getPage("/", method="GET", headers=[('Accept-Encoding', 'gzip')])
         self.assertHeader('Content-Encoding', 'gzip')
         self.assertHeader('Vary')
-        self.assertEqual(
-            cherrypy.lib.tools.encoding.decompress(self.body),
-            ntob("visit #5")
-        )
+        self.assertEqual(decompress(self.body), ntob("visit #5"))
 
         # Now check that a second request gets the gzip header and gzipped body
         # This also tests a bug in 3.0 to 3.0.2 whereby the cached, gzipped
         # response body was being gzipped a second time.
         self.getPage("/", method="GET", headers=[('Accept-Encoding', 'gzip')])
         self.assertHeader('Content-Encoding', 'gzip')
-        self.assertEqual(
-            cherrypy.lib.tools.encoding.decompress(
-                self.body), ntob("visit #5")
-        )
+        self.assertEqual(decompress(self.body), ntob("visit #5"))
 
         # Now check that a third request that doesn't accept gzip
         # skips the cache (because the 'Vary' header denies it).