Commits

Ralph Bean committed c6eb286

PEP8 for the main strainer module.

  • Participants
  • Parent commits 785bd6d

Comments (0)

Files changed (10)

     pass
 
 from setuptools import setup, find_packages
-import sys, os
+import sys
+import os
 
 version = '0.1.4'
 
 setup(name='strainer',
       version=version,
-      description="Tools to allow developers to cleanup web serialization objects (HTML, JSON, XHTML)",
+      description="Tools to allow developers to cleanup web " +
+      "serialization objects (HTML, JSON, XHTML)",
       long_description="""\
 Provides middleware for detecting and correcting errors in web pages that are
 served via the standard WSGI protocol used by most Python web frameworks.
     >>> from strainer.middleware import WellformednessCheckerMiddleware
     >>> app = WellformednessCheckerMiddleware(app)
 
-This uses the expat parser to detect most syntax errors and mismatched tags, 
-but it won't perform stricter checks that the document structure matches the 
-XHTML DTD, such as detecting disallowed child tags or attributes.  For that 
-you should install a recent version of lxml (e.g. "easy_install lxml") and 
+This uses the expat parser to detect most syntax errors and mismatched tags,
+but it won't perform stricter checks that the document structure matches the
+XHTML DTD, such as detecting disallowed child tags or attributes.  For that
+you should install a recent version of lxml (e.g. "easy_install lxml") and
 use XHTMLValidatorMiddleware instead, with code such as::
 
     >>> from strainer.middleware import XHTMLValidatorMiddleware
     >>> from strainer.middleware import JSONValidatorMiddleware
     >>> app = JSONValidatorMiddleware(app)
 
-If your web framework doesn't provide an alternative handler for the error 
-messages that are logged to the "strainer.middleware" channel, you can have 
+If your web framework doesn't provide an alternative handler for the error
+messages that are logged to the "strainer.middleware" channel, you can have
 them printed to sys.stderr with::
 
     >>> import logging
     >>> from strainer.middleware import XHTMLifyMiddleware
     >>> app = XHTMLifyMiddleware(app)
 
-This is somewhat experimental, but it will improve faster if people use it 
+This is somewhat experimental, but it will improve faster if people use it
 and email us bug reports...
 
 As with all (or at least most) WSGI middleware, you can also combine them::
     >>> app = JSONValidatorMiddleware(app)
 
 The middleware in this package buffer the output internally (this violates
-the PEP 333 specification, but it seems unavoidable), so it is best to use 
+the PEP 333 specification, but it seems unavoidable), so it is best to use
 them near the top of the middleware stack.
 """,
-      classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
+      classifiers=[],
       keywords='html xhtml json wsgi',
       author='Tom Lynn and Chris Perkins',
       author_email='chris@percious.com',

strainer/almostequal.py

+
+
 ## {{{ http://code.activestate.com/recipes/577124/ (r1)
 def _float_approx_equal(x, y, tol=1e-18, rel=1e-7):
     if tol is rel is None:
-        raise TypeError('cannot specify both absolute and relative errors are None')
+        raise TypeError(
+            'cannot specify both absolute and relative errors are None'
+        )
     tests = []
-    if tol is not None: tests.append(tol)
-    if rel is not None: tests.append(rel*abs(x))
+    if tol is not None:
+        tests.append(tol)
+
+    if rel is not None:
+        tests.append(rel * abs(x))
+
     assert tests
     return abs(x - y) <= max(tests)
 
     if not (type(x) is type(y) is float):
         # Skip checking for __approx_equal__ in the common case of two floats.
         methodname = '__approx_equal__'
-        # Allow the objects to specify what they consider "approximately equal",
-        # giving precedence to x. If either object has the appropriate method, we
-        # pass on any optional arguments untouched.
-        for a,b in ((x, y), (y, x)):
+        # Allow the objects to specify what they consider "approximately
+        # equal", giving precedence to x. If either object has the
+        # appropriate method, we pass on any optional arguments untouched.
+        for a, b in ((x, y), (y, x)):
             try:
                 method = getattr(a, methodname)
             except AttributeError:
 
 import unittest
 
-__all__ = ['STestCase', 'call_super', 'DelayedException', 'exception_raiser_generator', 'exc_raiser_gen', 'erg']
+__all__ = [
+    'STestCase',
+    'call_super',
+    'DelayedException',
+    'exception_raiser_generator',
+    'exc_raiser_gen',
+    'erg',
+]
+
 
 class STestCase(unittest.TestCase):
     '''
         for obj, attr_name, orig in reversed(self.__mocked):
             setattr(obj, attr_name, orig)
 
+
 class DelayedException(Exception):
     """Delayed Exception"""
     def __str__(self):
-        return self.__class__.__doc__ + ": " + ', '.join([str(e) for e in self.args])
+        return self.__class__.__doc__ + ": " + \
+                ', '.join([str(e) for e in self.args])
 
 #Default delay action is to not delay
 delay = False
 
+
 def call_super(after=None, before=None, delay=delay):
     """
     Call the super class's method of the same name either before or after
 
     If before and after are both set, the value for "after" will take
     precedence.
-    
+
     This method can handle recursive calls up the tree.  It uses C{super} to
     determine the baseclass method to call, so your class structure must
     support that.
     # we trigger on after, so set after based on before if after is not set
     if after is None and before:
         after = False
+
     def dec_call_super(func):
         """
         Always call the superclass's matching method, regardless of the outcome
         """
         def wrapped_call_super(self, klass=None):
             delayed = []
+
             def append_delayed_and_print_traceback(e):
                 # With the caught exception, log it, and delay it
                 delayed.append(e)
                 log.error(str(e))
 
             def get_and_call_super(klass):
-                # Calls the super class function, catching, logging and delaying any exceptions that might occur
+                # Calls the super class function, catching, logging and
+                # delaying any exceptions that might occur
                 try:
                     if not klass:
                         klass = self.__class__
-                    #skip generations that don't define a method named func.__name__
+                    # skip generations that don't define a method
+                    # named func.__name__
                     for cls in inspect.getmro(klass):
                         if func.__name__ in cls.__dict__:
                             klass = cls
                             break
                     else:
-                        raise AttributeError('%s not found on %s' % (func.__name__, func.im_class.__name__))
+                        raise AttributeError('%s not found on %s' % (
+                            func.__name__, func.im_class.__name__))
                     superfunc = getattr(super(klass, self), func.__name__)
                     #superfunc is bound, so "self" is implied
                     #Args and KW needed?
             if delayed:
                 raise DelayedException(*delayed)
             return ret
-        wrapped_call_super.__wrapped_func__  = func
+        wrapped_call_super.__wrapped_func__ = func
         return wrapped_call_super
     dec_call_super.args = (after,)
     return dec_call_super
 
+
 def assert_raises(exc, method, *args, **kwargs):
-    '''Like the nose tool of the same name, but returns the exception raised so that args can be checked'''
+    '''Like the nose tool of the same name, but returns the exception
+    raised so that args can be checked'''
     try:
         ret = method(*args, **kwargs)
     except exc, e:
         return e
     else:
-        raise AssertionError('The expected exception (%s) was not raised' % exc.__name__)
+        raise AssertionError(
+            'The expected exception (%s) was not raised' % exc.__name__)
+
 
 def exception_raiser_generator(exc, *args, **kwargs):
     '''
     the specified exception, with the supplied arguments.
     @param exc: Exception class to raise
     @type exc: A class descended from Exception, or other raiseable error
-    @returns: A method that when called with any arguments raises the specified exception
+    @returns: A method that when called with any arguments raises the
+              specified exception
     @rtype: method
     '''
     def raiser(*a, **kw):
     return raiser
 
 exc_raiser_gen = erg = exception_raiser_generator
-

strainer/doctypes.py

 DOCTYPE_XHTML1_FRAMESET = (
     '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" '
     '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">\n')
-
 ch.setLevel(logging.DEBUG)
 
 # create formatter
-formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+fmt = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+formatter = logging.Formatter(fmt)
 
 # add formatter to ch
 ch.setFormatter(formatter)
 log.addHandler(ch)
 
 
-__all__ = ['log']
+__all__ = ['log']

strainer/middleware.py

 
 LOG = logging.getLogger('strainer.middleware')
 
+
 def get_content_type(headers, default=''):
     """Returns the value of the content-type header or default."""
     for key, value in headers:
-        if key.lower()=='content-type':
+        if key.lower() == 'content-type':
             return value
     return default
 
+
 class BufferingMiddleware(object):
     """Buffers the response and passes it through self.filter()."""
     def __init__(self, app):
     def __call__(self, environ, start_response):
         output = StringIO()
         start_response_args = []
+
         def dummy_start_response(status, headers, exc_info=None):
             start_response_args.append((status, headers, exc_info))
             return output.write
     def filter(self, status, headers, exc_info, response):
         content_type = get_content_type(headers)
         parts = content_type.split(';', 1)
-        if len(parts)==2:
+        if len(parts) == 2:
             content_type, rest = parts
         else:
             rest = ''
 
 from wellformed import is_wellformed_xhtml, is_wellformed_xml
 
+
 class WellformednessCheckerMiddleware(BufferingMiddleware):
     """Checks that served webpages are well-formed HTML/XHTML/XML,
        according to the Content-Type header.
         content_type = content_type.split(';')[0].strip()
         if content_type in ('text/html', 'application/xml+html'):
             is_wellformed_xhtml(response, record_error=self.record_error)
-        elif content_type.split('+')[0]=='application/xml':
+        elif content_type.split('+')[0] == 'application/xml':
             is_wellformed_xml(response, record_error=self.record_error)
         return response
 
 from validate import validate_json, JSONSyntaxError
 
+
 class JSONValidatorMiddleware(BufferingMiddleware):
     def __init__(self, app, doctype='', record_error=LOG.error):
         """The middleware will output JSON validation error messages
     def filter(self, status, headers, exc_info, response):
         content_type = get_content_type(headers)
         content_type = content_type.split(';')[0].strip()
-        if content_type=='text/json':
+        if content_type == 'text/json':
             try:
                 validate_json(response)
             except JSONSyntaxError, e:

strainer/operators.py

 from xhtmlify import xhtmlify, XMLParsingError, ValidationError
 from xml.etree import ElementTree as etree
 from xml.parsers.expat import ExpatError
-import copy, re
+import copy
+import re
 from pprint import pformat, pprint
 try:
     from simplejson import loads
 
 log = log.log
 
+
 def remove_whitespace_nodes(node):
     new_node = copy.copy(node)
     new_node._children = []
         new_node.append(child)
     return new_node
 
+
 def remove_namespace(doc):
     """Remove namespace in the passed document in place."""
     for elem in doc.getiterator():
         if match:
             elem.tag = match.group(2)
 
+
 def replace_escape_chars(needle):
     needle = needle.replace('&nbsp;', ' ')
     needle = needle.replace(u'\xa0', ' ')
     return needle
 
+
 def normalize_to_xhtml(needle):
-    # We still need this, when in a webtest response, &nbsp; gets replaced with \xa0, and xhtmlify can't handle non-acii
+    # We still need this, when in a webtest response, &nbsp; gets replaced
+    # with \xa0, and xhtmlify can't handle non-acii
     needle = replace_escape_chars(needle)
     #first, we need to make sure the needle is valid html
     needle = xhtmlify(needle)
     try:
         needle_node = etree.fromstring(needle)
     except ExpatError, e:
-        raise XMLParsingError('Could not parse %s into xml. %s'%(needle, e.args[0]))
+        raise XMLParsingError(
+            'Could not parse %s into xml. %s' % (needle, e.args[0]))
     needle_node = remove_whitespace_nodes(needle_node)
     remove_namespace(needle_node)
     needle_s = etree.tostring(needle_node)
     return needle_s
 
+
 def in_xhtml(needle, haystack):
     try:
         needle_s = normalize_to_xhtml(needle)
     except ValidationError, e:
-        raise XMLParsingError('Could not parse needle: %s into xml. %s'%(needle, e.message))
+        raise XMLParsingError(
+            'Could not parse needle: %s into xml. %s' %
+            (needle, e.message))
     try:
         haystack_s = normalize_to_xhtml(haystack)
     except ValidationError, e:
-        raise XMLParsingError('Could not parse haystack: %s into xml. %s'%(haystack, e.message))
+        raise XMLParsingError(
+            'Could not parse haystack: %s into xml. %s' %
+            (haystack, e.message))
     return needle_s in haystack_s
 
+
 def eq_xhtml(needle, haystack, wrap=False):
     if wrap:
         needle = '<div id="wrapper">%s</div>'
     try:
         needle_s = normalize_to_xhtml(needle)
     except ValidationError, e:
-        raise XMLParsingError('Could not parse needle: %s into xml. %s'%(needle, e.message))
+        raise XMLParsingError(
+            'Could not parse needle: %s into xml. %s' %
+            (needle, e.message))
     try:
         haystack_s = normalize_to_xhtml(haystack)
     except ValidationError, e:
-        raise XMLParsingError('Could not parse haystack: %s into xml. %s'%(haystack, e.message))
+        raise XMLParsingError(
+            'Could not parse haystack: %s into xml. %s' %
+            (haystack, e.message))
     return needle_s == haystack_s
 
+
 def assert_in_xhtml(needle, haystack):
     """
     assert that one xhtml stream can be found within another
     """
-    assert in_xhtml(needle, haystack), "%s not found in %s"%(needle, haystack)
+    assert in_xhtml(needle, haystack), \
+            "%s not found in %s" % (needle, haystack)
+
 
 def assert_eq_xhtml(needle, haystack, wrap=False):
     """
     assert that one xhtml stream equals another
     """
-    assert eq_xhtml(needle, haystack, wrap), "%s \n --- does not equal ---\n%s"%(needle, haystack)
+    assert eq_xhtml(needle, haystack, wrap), \
+            "%s \n --- does not equal ---\n%s" % (needle, haystack)
+
 
 def assert_raises(exc, method, *args, **kw):
     try:
     except exc, e:
         return e
     else:
-        raise AssertionError('%s() did not raise %s' % (method.__name__, exc.__name__))
+        raise AssertionError(
+            '%s() did not raise %s' % (method.__name__, exc.__name__))
+
 
 def num_eq(one, two):
-    assert type(one)==type(two), 'The types %s and %s do not match' % (type(one), type(two))
+    assert type(one) == type(two), \
+            'The types %s and %s do not match' % (type(one), type(two))
     eq_(one, two, 'The values %s and %s do not equal' % (one, two))
 
-def neq_(one, two, msg = None):
+
+def neq_(one, two, msg=None):
     """Shorthand for 'assert a != b, "%r == %r" % (a, b)
     """
     assert a != b, msg or "%r == %r" % (a, b)
 
+
 def eq_pprint(a, b, msg=None):
     if a != b:
         log.error(msg)
         return False
     return True
 
+
 def _eq_list(ca, cb, ignore=None):
-    r = eq_pprint(len(ca), len(cb), "The lengths of the lists are different %s != %s" % (str(ca), str(cb)))
+    r = eq_pprint(len(ca), len(cb),
+                  "The lengths of the lists are different %s != %s" %
+                  (str(ca), str(cb)))
     if not r:
         return False
     for i, v in enumerate(ca):
                 return False
     return True
 
+
 def _eq_dict(ca, cb, ignore=None):
     # assume ca and cb can be destructively modified
     if ignore:
             if key in cb:
                 del cb[key]
 
-    #this needs to be recursive so we can '&ignore'-out ids anywhere in a json stream
+    # this needs to be recursive so we can '&ignore'-out ids anywhere
+    # in a json stream
     for key in set(ca.keys() + cb.keys()):
         if key not in ca:
-            log.error('%s!= %s\n key "%s" not in first argument' %(ca, cb, key))
+            log.error('%s!= %s\n key "%s" not in first argument' %
+                      (ca, cb, key))
             return False
         if key not in cb:
-            log.error('%s!= %s\n key "%s" not in second argument' %(ca, cb, key))
+            log.error('%s!= %s\n key "%s" not in second argument' %
+                      (ca, cb, key))
             return False
-        
+
         v1 = ca[key]
         v2 = cb[key]
         log.info('Comparing values for key: %s', key)
             continue
         if not isinstance(v2, basestring) and isinstance(v1, basestring):
             if not eq_pprint(type(v1), type(v2)):
-                log.error('The types of values for "%s" do not match (%s vs. %s)' %(key, v1, v2))
+                log.error(
+                    'The types of values for "%s" do not match (%s vs. %s)' %
+                    (key, v1, v2))
                 return False
         if isinstance(v1, list):
             if not _eq_list(v1, v2, ignore=ignore):
                 return False
         elif isinstance(v1, float) and isinstance(v2, float):
             if not approx_equal(v1, v2):
-                log.error('The values for "%s" do not match (%.30f vs. %.30f)' %(key, v1, v2))
+                log.error(
+                    'The values for "%s" do not match (%.30f vs. %.30f)' %
+                    (key, v1, v2))
                 return False
         else:
             if not v1 == v2:
-                log.error('The values for "%s" do not match (%s vs. %s)' %(key, v1, v2))
+                log.error(
+                    'The values for "%s" do not match (%s vs. %s)' %
+                    (key, v1, v2))
                 return False
     return True
 
+
 def eq_dict(a, b, ignore=None):
-    #Make a copy as our search for ignored values is destructive
+    # Make a copy as our search for ignored values is destructive
     ca = copy.deepcopy(a)
     cb = copy.deepcopy(b)
-                
+
     return _eq_dict(ca, cb, ignore=ignore)
 
+
 def eq_json(a, b):
     if isinstance(a, basestring):
         a = loads(a)
     if isinstance(b, basestring):
         b = loads(b)
-        
+
     return eq_dict(a, b)
 
 
 __all__ = [_key for _key in locals().keys() if not _key.startswith('_')]
-

strainer/validate.py

 DEFAULT_XHTML_TEMPLATE = ('<html><head><title/></head><body><div>\n'
                           '%s</div></body></html>')
 
+
 class XHTMLSyntaxError(ValueError):
     pass
 
+
 class JSONSyntaxError(ValueError):
     pass
 
 _parser = None
 
+
 def _get_parser():
     global _parser, lxml
     if _parser is not None:
         return _parser
     if lxml is None:
         import lxml.etree
+
     class CustomResolver(lxml.etree.Resolver):
         def __init__(self):
             super(CustomResolver, self).__init__()
                              'xhtml-lat1.ent', 'xhtml-special.ent',
                              'xhtml-symbol.ent']:
                 url = 'http://www.w3.org/TR/xhtml1/DTD/' + filename
-                self.cache[url] = resource_string(__name__, 'dtds/'+filename)
+                self.cache[url] = resource_string(__name__, 'dtds/' + filename)
 
         def resolve(self, url, id, context):
             return self.resolve_string(self.cache[url], context)
     _parser.resolvers.add(resolver)
     return _parser
 
+
 def validate_xhtml(xhtml, doctype=''):
     """Validates that doctype + xhtml matches the DTD.
        If not given or '', doctype will be extracted from the document.
         # relative to xhtml.
         tline = doctype.count('\n')
         message = re.sub(r'line (\d+)',
-                         lambda m: 'line %s' % (int(m.group(1))-tline),
+                         lambda m: 'line %s' % (int(m.group(1)) - tline),
                          e.message)
         raise XHTMLSyntaxError(message)
 
+
 def validate_xhtml_fragment(xhtml_fragment, doctype=None, template=None):
     """Validates that xhtml_fragment matches the doctype, after it
        has been inserted into a basic template document's body tag.
         # Try to fix up the error message so line numbers are
         # relative to the fragment.
         message = re.sub(r'line (\d+)',
-                         lambda m: 'line %s' % (int(m.group(1))-tline),
+                         lambda m: 'line %s' % (int(m.group(1)) - tline),
                          e.message)
         raise XHTMLSyntaxError(message)
 
+
 def validate_json(jsonstr):
     """Validates that json is a valid JSON string (by loading it)."""
     try:

strainer/wellformed.py

                              entitydefs=htmlentitydefs.entitydefs,
                              record_error=record_error)
 
+
 def is_wellformed_xml(docpart, doctype='', entitydefs={}, record_error=None):
     """Prefixes doctype to docpart and parses the resulting string.
        Returns True if it parses as XML without error. If entitydefs
         parser.feed(doc)
         parser.close()
         return True
-    except SAXParseException, e:  # catches our exception and other parse errors
+    except SAXParseException, e:
+        # catches our exception and other parse errors
         if record_error is not None:
             line, column = e.getLineNumber(), e.getColumnNumber()
             # Correct location to account for our adding a doctype prefix.
             if line == 1:
                 column -= len(doctype) - (doctype.rfind('\n') + 1)
             # Convert column to 1-based indexing
-            record_error('line %d, column %d: %s' % (line, column+1, e.message))
+            record_error('line %d, column %d: %s' % (
+                line, column + 1, e.message
+            ))
         return False
 
+
 def test():
     assert is_wellformed_xhtml('<foo>&nbsp;&auml;&#65;</foo>')
 
-if __name__=='__main__':
+
+if __name__ == '__main__':
     test()

strainer/xhtmlify.py

 #!/usr/bin/env python
 """An HTML to XHTML converter."""
-import re, htmlentitydefs, codecs
+import re
+import htmlentitydefs
+import codecs
 import encodings.aliases
 
 
-__all__ = ['xhtmlify', 'xmldecl', 'fix_xmldecl', 'sniff_encoding', 'ValidationError']
+__all__ = [
+    'xhtmlify',
+    'xmldecl',
+    'fix_xmldecl',
+    'sniff_encoding',
+    'ValidationError',
+]
 
-DEBUG = False  # if true, show stack of tags in error messages
+# if true, show stack of tags in error messages
+DEBUG = False
 NAME_RE = r'(?:[A-Za-z_][A-Za-z0-9_.-]*(?::[A-Za-z_][A-Za-z0-9_.-]*)?)'
-    # low ascii chars of <http://www.w3.org/TR/xml-names>'s "QName" token
+# low ascii chars of <http://www.w3.org/TR/xml-names>'s "QName" token
 BAD_ATTR_RE = r'''[^> \t\r\n]+'''
-ATTR_RE = r'''%s[ \t\r\n]*(?:=[ \t\r\n]*(?:"[^"]*"|'[^']*'|%s))?[ \t\r\n]*''' % (NAME_RE, BAD_ATTR_RE)
+ATTR_RE = r'''%s[ \t\r\n]*(?:=[ \t\r\n]*(?:"[^"]*"|'[^']*'|%s))?[ \t\r\n]*'''\
+        % (NAME_RE, BAD_ATTR_RE)
 CDATA_RE = r'<!\[CDATA\[.*?\]\]>'
-#COMMENT_RE = r'<!--.*?-->|<![ \t\r\n]*%s.*?>' % NAME_RE # comment or doctype-alike
+# comment or doctype-alike
+#COMMENT_RE = r'<!--.*?-->|<![ \t\r\n]*%s.*?>' % NAME_RE
 COMMENT_RE = r'<!--.*?-->'
-TAG_RE = r'''%s|%s|<((?:[^<>'"]+|'[^']*'|"[^"]*"|'|")*)>|<''' % (COMMENT_RE, CDATA_RE)
-INNARDS_RE = r'(%s(?:[ \t\r\n]+%s)*[ \t\r\n]*(/?)\Z)|(/%s[ \t\r\n]*\Z)|(.*)' % (
-                 NAME_RE, ATTR_RE, NAME_RE)
+TAG_RE = r'''%s|%s|<((?:[^<>'"]+|'[^']*'|"[^"]*"|'|")*)>|<'''\
+        % (COMMENT_RE, CDATA_RE)
+INNARDS_RE = r'(%s(?:[ \t\r\n]+%s)*[ \t\r\n]*(/?)\Z)|(/%s[ \t\r\n]*\Z)|(.*)'\
+        % (NAME_RE, ATTR_RE, NAME_RE)
 
 SELF_CLOSING_TAGS = [
     # As per XHTML 1.0 sections 4.6, C.2 and C.3, these are the elements
     'section', 'article', 'aside', 'header', 'footer', 'nav'  # HTML 5
 ]
 
-class StrainerError(Exception): pass
+
+class StrainerError(Exception):
+    pass
+
 
 class ValidationError(StrainerError):
     def __init__(self, message, pos, line, offset, tags):
-        message += ' at line %d, column %d (char %d)' % (line, offset, pos+1)
+        message += ' at line %d, column %d (char %d)' % (line, offset, pos + 1)
         if DEBUG:
             message += '\n%r' % tags
         super(ValidationError, self).__init__(message)
         self.line = line
         self.offset = offset
 
-class XMLParsingError(StrainerError):pass
+
+class XMLParsingError(StrainerError):
+    pass
+
 
 def ampfix(value):
     """Replaces ampersands in value that aren't part of an HTML entity.
     "<" or ">" outside of any CDATA sections with "&lt;" or "&gt;"."""
     def fixup(m):
         text = m.group(0)
-        if text=='&':
+        if text == '&':
             pass
         elif text[:2] == "&#":
             # character reference
             else:
                 # "&#X...;" is invalid in XHTML
                 c = ord(c)
-                if c in (0x9, 0xA, 0xD) or 0x0020<=c<=0xD7FF or (
-                   0xE000<=c<=0xFFFD) or 0x10000<=c<=0x10FFFF: 
+                if c in (0x9, 0xA, 0xD) or 0x0020 <= c <= 0xD7FF or (
+                   0xE000 <= c <= 0xFFFD) or 0x10000 <= c <= 0x10FFFF:
                     return text.lower()  # well-formed
                 else:
                     pass
                 else:
                     pass
         return '&amp;' + text[1:]
+
     def fix2(m):
         g = m.group()
         if g.startswith('<!'):
             return g
-        elif g=='<':
+        elif g == '<':
             return '&lt;'
-        elif g=='>':
+        elif g == '>':
             return '&gt;'
         else:
             return re.sub("&#?\w+;|&", fixup, g)
     R = re.compile('(<!\[CDATA\[.*?\]\]>)|<!--.*?-->|<|>|[^<>]+', re.DOTALL)
     return R.sub(fix2, value)
 
+
 def fix_attrs(tagname, attrs, ERROR=None):
     """Returns an XHTML-clean version of attrs, the attributes part
        of an (X)HTML tag. Tries to make as few changes as possible,
        but does convert all attribute names to lowercase."""
-    if not attrs and tagname!='html':
+    if not attrs and tagname != 'html':
         return ''  # most tags have no attrs, quick exit in that case
     lastpos = 0
     result = []
                 ERROR('Repeated attribute "%s"' % name, m.start())
             else:
                 seen[name] = 1
-            if len(value)>1 and value[0]+value[-1] in ("''", '""'):
+
+            if len(value) > 1 and value[0] + value[-1] in ("''", '""'):
                 if value[0] not in value[1:-1]:  # preserve their quoting
                     value = ampfix(value)
-                    output('%s%s=%s%s%s' % (name, postname, preval, value, postval))
+                    output('%s%s=%s%s%s' % (
+                        name, postname, preval, value, postval))
                     continue
                 value = value[1:-1]
             value = ampfix(value.replace('"', '&quot;'))
             output('%s%s=%s"%s"%s' % (name, postname, preval, value, postval))
     after = attrs[lastpos:]
-    if re.match(r'[ \t\r\n]*/?', after).end()==len(after):
+    if re.match(r'[ \t\r\n]*/?', after).end() == len(after):
         output(after)
     else:
         ERROR("Malformed tag contents", lastpos)
-    if tagname=='html' and 'xmlns' not in seen:
+
+    if tagname == 'html' and 'xmlns' not in seen:
         output(space_before + 'xmlns="http://www.w3.org/1999/xhtml"')
     return ''.join(result)
 
+
 def cdatafix(value):
     """Alters value, the body of a <script> or <style> tag, so that
        it will be parsed equivalently by the underlying language parser
     cdata_re = re.compile('(%s)' % CDATA_RE, re.DOTALL)
     result = []
     output = result.append
-    outside_lexer  = re.compile(r'''((/\*|"|')|(<!\[CDATA\[)|(\]\]>)|\]|(<)|(>)|(&))|/|[^/"'<>&\]]+''')
-    comment_lexer  = re.compile(r'''((\*/)|(<!\[CDATA\[)|(\]\]>)|\]|(<)|(>)|(&))|\*|[^\*<>&\]]+''')
-    dqstring_lexer = re.compile(r'''\\[^<>]|((")|(<!\[CDATA\[)|(\]\]>)|\]|(\\<|<)|(\\>|>)|(\\&|&))|[^\\"<>&\]]+''', re.DOTALL)
-    sqstring_lexer = re.compile(r'''\\[^<>]|((')|(<!\[CDATA\[)|(\]\]>)|\]|(\\<|<)|(\\>|>)|(\\&|&))|[^\\'<>&\]]+''', re.DOTALL)
+    outside_lexer = re.compile(
+        r'''((/\*|"|')|(<!\[CDATA\[)|(\]\]>)|\]|(<)|(>)|(&))|/|[^/"'<>&\]]+''')
+    comment_lexer = re.compile(
+        r'''((\*/)|(<!\[CDATA\[)|(\]\]>)|\]|(<)|(>)|(&))|\*|[^\*<>&\]]+''')
+    dqstring_lexer = re.compile(
+        r'''\\[^<>]|((")|(<!\[CDATA\[)|(\]\]>)|\]|(\\<|<)|(\\>|>)|(\\&|&))|[^\\"<>&\]]+''', re.DOTALL)
+    sqstring_lexer = re.compile(
+        r'''\\[^<>]|((')|(<!\[CDATA\[)|(\]\]>)|\]|(\\<|<)|(\\>|>)|(\\&|&))|[^\\'<>&\]]+''', re.DOTALL)
     Outside, Comment, DQString, SQString = [], [], [], []
     Outside += (outside_lexer.match,
                 '/*<![CDATA[*/ < /*]]>*/',
     while pos < len(value):
         m = lexer(value, pos)
         #print '%s:' % names[lexer], 'in_cdata=%d' % in_cdata, repr(m.group())
-        assert m.start()==pos  # no gaps
+        assert m.start() == pos  # no gaps
         pos = m.end()
         (interesting, state_changer, cdata_start, cdata_end,
          lt, gt, amp) = m.groups()
                     output(m.group())
                 else:
                     output(']]')
-                    pos = m.start()+2  # so > gets escaped as normal
+                    pos = m.start() + 2  # so > gets escaped as normal
                 in_cdata = False
             elif lt:
                 output(in_cdata and m.group() or lt_rep)
                 output(in_cdata and m.group() or gt_rep)
             elif amp:
                 output(in_cdata and m.group() or amp_rep)
-            elif m.group()==']':
+            elif m.group() == ']':
                 output(']')
             else:
                 output(in_cdata and m.group() or state_changer)
-                lexer, lt_rep, gt_rep, amp_rep, next_state = next_state[state_changer]
+                lexer, lt_rep, gt_rep, amp_rep, next_state = \
+                        next_state[state_changer]
         else:
             output(m.group())
     assert not in_cdata  # enforced by calling parser (I think)
     return ''.join(result)
 
+
 def xmldecl(version='1.0', encoding=None, standalone=None):
     """Returns a valid <?xml ...?> declaration suitable for using
        at the start of a document. Note that no other characters are
                                   0, 1, 1, [])
     sddecl = ''
     if standalone is not None:
-        if standalone is True or standalone=='yes':
+        if standalone is True or standalone == 'yes':
             sddecl = ' standalone="yes"'
-        elif standalone is False or standalone=='no':
+        elif standalone is False or standalone == 'no':
             sddecl = ' standalone="no"'
         else:
             # Don't tell them expected format, guessing won't help
                                   0, 1, 1, [])
     return '<?xml version="%s"%s%s ?>' % (version, encodingdecl, sddecl)
 
+
 def fix_xmldecl(xml, encoding=None, add_encoding=False, default_version='1.0'):
     """Looks for an XML declaration near the start of xml, cleans it up,
        and returns the adjusted version of xml. Doesn't add a declaration
         if starts_utf16_re.match(encoding):
             # XML spec 4.3.3 says "Entities encoded in UTF-16 MUST [...]
             # begin with the Byte Order Mark".
-            if not unicode_input and not (xml.startswith(codecs.BOM_UTF16_LE) or
-                                          xml.startswith(codecs.BOM_UTF16_BE)):
+            if not unicode_input and not (
+                xml.startswith(codecs.BOM_UTF16_LE) or
+                xml.startswith(codecs.BOM_UTF16_BE)):
+
                 xml = u'\ufeff'.encode(encoding) + xml
             elif unicode_input and bomless_utf16_re.match(encoding):
                 xml = u'\ufeff' + xml
     chars_we_need = ('''abcdefghijklmnopqrstuvwxyz'''
                      '''ABCDEFGHIJKLMNOPQRSTUVWXYZ'''
                      '''0123456789.-_ \t\r\n<?'"[]:()+*>''')
-    assert encode(chars_we_need*3)==encode(chars_we_need)*3, enc
+    assert encode(chars_we_need * 3) == encode(chars_we_need) * 3, enc
     L = lambda s: re.escape(encode(s))  # encoded form of literal s
     group = lambda s: '(%s)' % s
     optional = lambda s: '(?:%s)?' % s
     lower = charset('abcdefghijklmnopqrstuvwxyz')
     digits = charset('0123456789')
     punc = charset('._-')
-    Name = '(?:%s%s*)' % (oneof([upper, lower]), 
+    Name = '(?:%s%s*)' % (oneof([upper, lower]),
                           oneof([upper, lower, digits, punc]))
-    Ss = charset(' \t\r\n\f')+'*'  # optional white space (inc. formfeed)
-    Sp = charset(' \t\r\n\f')+'+'  # required white space (inc. formfeed)
+    Ss = charset(' \t\r\n\f') + '*'  # optional white space (inc. formfeed)
+    Sp = charset(' \t\r\n\f') + '+'  # required white space (inc. formfeed)
     VERSION = encode('version')
     ENCODING = encode('encoding')
     STANDALONE = encode('standalone')
                                 L('Xml'), L('XmL'), L('XMl'), L('XML')])])
     Attr = ''.join([group(Sp), group(Name), group(''.join([Ss, L('='), Ss])),
         oneof([
-            group(L('"')+all_until(oneof([L('"'), L('<'), L('>')]))+L('"')),
-            group(L("'")+all_until(oneof([L("'"), L('<'), L('>')]))+L("'")),
+            group(L('"') + all_until(oneof([L('"'), L('<'), L('>')])) + L('"')),
+            group(L("'") + all_until(oneof([L("'"), L('<'), L('>')])) + L("'")),
             group(all_until(oneof([Sp, L('?'), L('<'), L('>')]))),
-        ]) ])
+        ])
+    ])
     Attr_re = re.compile(Attr, re.DOTALL)
-    EndDecl = ''.join([group(Ss), oneof([''.join([L('?'), Ss, L('>')]), L('>')])])
+    EndDecl = ''.join([
+        group(Ss), oneof([''.join([L('?'), Ss, L('>')]), L('>')])
+    ])
     m = re.match(StartDecl, xml)
     if m:
         pos = m.end()
                     value = unquoted
                 if name in attrs:
                     pass  # TODO: warn: already got a value for xxx
-                elif name==VERSION:
+                elif name == VERSION:
                     m3 = re.match(Ss + group(L("1.") + digits) + Ss + EOS,
                                   value)
                     if m3:
-                        attrs[name] = wspace + name + eq + quotes + m3.group(1) + quotes
+                        attrs[name] = wspace + name + eq + \
+                                quotes + m3.group(1) + quotes
                     else:
                         pass  # TODO: warn: expected 1.x
-                elif name==ENCODING:
+                elif name == ENCODING:
                     m3 = re.match(Ss + group(Name) + Ss + EOS, value)
                     if m3:
-                        attrs[name] = wspace + name + eq + quotes + m3.group(1) + quotes
+                        attrs[name] = wspace + name + eq + \
+                                quotes + m3.group(1) + quotes
                     else:
                         pass  # TODO: warn: expected a name
-                elif name==STANDALONE:
+                elif name == STANDALONE:
                     m3 = re.match(
                         Ss + oneof([
                             group(oneof([
                     if m3:
                         yes, no = m3.groups()
                         if yes:
-                            attrs[name] = wspace + name + eq + quotes + encode('yes') + quotes
+                            attrs[name] = wspace + name + eq + \
+                                    quotes + encode('yes') + quotes
                         else:
-                            attrs[name] = wspace + name + eq + quotes + encode('no') + quotes
+                            attrs[name] = wspace + name + eq + \
+                                    quotes + encode('no') + quotes
                     else:
                         pass  # TODO: warn: expected yes or no
                 else:
             attrs[ENCODING] = encode(" encoding='%s'" % enc)
         m4 = re.compile(EndDecl).match(xml, pos)
         if m4:
-            return (prefix + encode('<?xml') +
-                    attrs.get(VERSION, encode(" version='%s'" % default_version)) +
-                    (attrs.get(ENCODING) if ENCODING in attrs else '') +
-                    (attrs.get(STANDALONE) if STANDALONE in attrs else '') +
-                    m4.group(1).replace(encode('\f'), encode(' ')) +
-                    encode('?>') + xml[m4.end():])
+            return (
+                prefix + encode('<?xml') +
+                attrs.get(VERSION, encode(" version='%s'" % default_version)) +
+                (attrs.get(ENCODING) if ENCODING in attrs else '') +
+                (attrs.get(STANDALONE) if STANDALONE in attrs else '') +
+                m4.group(1).replace(encode('\f'), encode(' ')) +
+                encode('?>') + xml[m4.end():])
         else:
             m5 = re.compile(oneof([L('>'), L('<')])).search(xml, pos)
             if m5:
-                if m5.group()==encode('>'):
+                if m5.group() == encode('>'):
                     endpos = m5.end()
                 else:
                     endpos = m5.start()
         xml = xml.decode(enc, 'strict')  # reverse the encoding done earlier
     return xml  # no decl detected
 
+
 def fix_doctype(html):
     """\
     Searches for a doctype declaration at the start of html, after any
     NameStartChar = (u'[:A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D'
                      u'\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF'
                      u'\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]')
-    if len(u'\U00010000')==1:
+    if len(u'\U00010000') == 1:
         NameStartChar = NameStartChar[:-1] + u'\U00010000-\U000EFFFF]'
     NameChar = NameStartChar[:-1] + u"0-9\xB7\u0300-\u036F\u203F-\u2040\-]"
     Name = NameStartChar + any(NameChar)
     def ERROR(message, charpos=None):
         if charpos is None:
             charpos = pos
-        line = html.count('\n', 0, charpos)+1
+        line = html.count('\n', 0, charpos) + 1
         offset = charpos - html.rfind('\n', 0, charpos)
         raise ValidationError(message, charpos, line, offset, [])
 
                      fix, body)
     return before + doctype + body, m.end()
 
+
 def xhtmlify(html, encoding=None,
                    self_closing_tags=SELF_CLOSING_TAGS,
                    cdata_tags=CDATA_TAGS,
     # "in HTML, the Formfeed character (U+000C) is treated as white space"
     html = html.replace(u'\u000C', u' ')
     # Replace disallowed characters with U+FFFD (unicode replacement char)
-    if len(u'\U00010000')==1:
+    if len(u'\U00010000') == 1:
         html = re.sub(  # XML 1.0 section 2.2, "Char" production
             u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD'
               u'\U00010000-\U0010FFFF]',  # <-- 32 bit characters
     def ERROR(message, charpos=None):
         if charpos is None:
             charpos = pos
-        line = html.count('\n', 0, charpos)+1
+        line = html.count('\n', 0, charpos) + 1
         offset = charpos - html.rfind('\n', 0, charpos)
         raise ValidationError(message, charpos, line, offset, tags)
 
     doctype, lastpos = fix_doctype(html)
     output(doctype)
     if html.startswith('<?xml') or html.startswith(u'\ufeff<?xml'):
-        pos = html.find('>')+1
+        pos = html.find('>') + 1
         if not doctype:
             output(html[:pos])
             lastpos = pos
                     output('<!DOCTYPE')
                     lastpos = tag_match.start() + len('<!doctype')
                 continue
-            assert whole_tag=='<'
+            assert whole_tag == '<'
             if prevtag in cdata_tags:
                 continue  # ignore until we have all the text
             else:
         text = html[lastpos:pos]
         if prevtag in cdata_tags:
             m = re.match(r'/(%s)[ \t\r\n]*\Z' % NAME_RE, innards)
-            if not m or m.group(1).lower()!=prevtag:
+            if not m or m.group(1).lower() != prevtag:
                 continue  # not the closing tag we need, keep treating as text
             output(cdatafix(text))
         else:
             output(ampfix(text))
         m = re.compile(INNARDS_RE, re.DOTALL).match(innards)
-        if m.group(1): # opening tag
+        if m.group(1):  # opening tag
             endslash = m.group(2)
             m = re.match(NAME_RE, innards)
             TagName, attrs = m.group(), innards[m.end():]
             tagname = TagName.lower()
             attrs = fix_attrs(tagname, attrs,
                 ERROR=lambda msg, relpos:
-                        ERROR(msg, tag_match.start(1)+m.end()+relpos))
+                        ERROR(msg, tag_match.start(1) + m.end() + relpos))
             if prevtag in self_closing_tags:
                 tags.pop()
                 prevtag = tags and tags[-1][0].lower() or None
             bad_parents = prohibitors_of.get(tagname, [])
             for ancestor, _ in tags:
                 if ancestor in bad_parents:
-                    if tagname==ancestor:
+                    if tagname == ancestor:
                         other_text = 'other '
                     else:
                         other_text = ''
             # I'm assuming only the tags listed below can self-nest,
             # and we automatically close <p> tags before structural tags.
             # HTML5 has many others like <section> that we don't support.
-            if (tagname==prevtag and tagname not in ('div', 'span',
+            if (tagname == prevtag and tagname not in ('div', 'span',
                     'fieldset', 'q', 'blockquote', 'ins', 'del', 'bdo',
                     'sub', 'sup', 'big', 'small')
-               ) or (prevtag=='p' and tagname in structural_tags):
+               ) or (prevtag == 'p' and tagname in structural_tags):
                 tags.pop()
                 output('</%s>' % prevtag)
                 #prevtag = tags and tags[-1][0].lower() or None  # not needed
             if endslash:
                 output('<%s%s>' % (tagname, attrs))
             elif tagname in self_closing_tags:
-                if attrs.rstrip()==attrs:
+                if attrs.rstrip() == attrs:
                     attrs += ' '
                 output('<%s%s/>' % (tagname, attrs))  # preempt any closing tag
                 tags.append((TagName, pos))
             else:
                 output('<%s%s>' % (tagname, attrs))
                 tags.append((TagName, pos))
-        elif m.group(3): # closing tag
+        elif m.group(3):  # closing tag
             TagName = re.match(r'/(\w+)', innards).group(1)
             tagname = TagName.lower()
             if prevtag in self_closing_tags:
                 # The tag has already been output in self-closed form.
-                if prevtag==tagname: # explicit close
+                if prevtag == tagname:  # explicit close
                     # Minor hack: discard any whitespace we just output
                     if result[-1].strip():
                         ERROR("Self-closing tag <%s/> is not empty" %
             # If we have found a mismatched close tag, we may insert
             # a close tag for the previous tag to fix it in some cases.
             # Specifically, closing a container can close an open child.
-            if prevtag!=tagname and (
-                 (prevtag=='p' and tagname in structural_tags) or
-                 (prevtag=='li' and tagname in ('ol', 'ul')) or
-                 (prevtag=='dd' and tagname=='dl') or
-                 (prevtag=='area' and tagname=='map') or
-                 (prevtag=='td' and tagname=='tr') or
-                 (prevtag=='th' and tagname=='tr')
+            if prevtag != tagname and (
+                 (prevtag == 'p' and tagname in structural_tags) or
+                 (prevtag == 'li' and tagname in ('ol', 'ul')) or
+                 (prevtag == 'dd' and tagname == 'dl') or
+                 (prevtag == 'area' and tagname == 'map') or
+                 (prevtag == 'td' and tagname == 'tr') or
+                 (prevtag == 'th' and tagname == 'tr')
             ):
                 output('</%s>' % prevtag)
                 tags.pop()
                 prevtag = tags and tags[-1][0].lower() or None
-            if prevtag==tagname:
+            if prevtag == tagname:
                 if tagname not in self_closing_tags:
                     output(tag_match.group().lower())
                     tags.pop()
             else:
                 ERROR("Unexpected closing tag </%s>" % TagName)
-        elif m.group(4): # mismatch
+        elif m.group(4):  # mismatch
             ERROR("Malformed tag")
         else:
             # We don't do any validation on pre-processing tags (<? ... >).
         result = result.encode(encoding)
     return result
 
+
 def test(html=None):
     if html is None:
         import sys
-        if len(sys.argv)==2:
-            if sys.argv[1]=='-':
+        if len(sys.argv) == 2:
+            if sys.argv[1] == '-':
                 html = sys.stdin.read()
             else:
                 html = open(sys.argv[1]).read()
             sys.exit('usage: %s HTMLFILE' % sys.argv[0])
     xhtml = xhtmlify(html)
     try:
-        assert xhtml==xhtmlify(xhtml)
+        assert xhtml == xhtmlify(xhtml)
     except ValidationError:
         print xhtml
         raise
     xmlparse(re.sub('(?s)<!(?!\[).*?>', '', xhtml))  # ET can't handle <!...>
-    if len(sys.argv)==2:
+    if len(sys.argv) == 2:
         sys.stdout.write(xhtml)
     return xhtml
 
+
 def xmlparse(snippet, encoding=None, wrap=None):
     """Parse snippet as XML with ElementTree/expat.  By default it wraps the
        snippet in an outer <document> element before parsing (unless the
     except xml.parsers.expat.ExpatError, e:
         lineno, offset = e.lineno, e.offset
         lineno -= 1
-        if lineno==input.count('\n'):  # last line => </document>
+        if lineno == input.count('\n'):  # last line => </document>
             lineno -= 1
             offset = len(snippet) - snippet.rfind('\n')
         message = re.sub(r'line \d+', 'line %d' % lineno,
         parse_error.code = e.code
         raise parse_error
 
+
 def sniff_encoding(xml):
     """Detects the XML encoding as per XML 1.0 section F.1."""
     if isinstance(xml, str):
     digit = charset('0123456789')
     digits = digit + '+'
     punc = charset('._-')
-    name = '(?:%s%s*)' % (oneof([upper, lower]), 
+    name = '(?:%s%s*)' % (oneof([upper, lower]),
                           oneof([upper, lower, digit, punc]))
-    Ss = charset(' \t\r\n')+'*'  # optional white space
-    Sp = charset(' \t\r\n')+'+'  # required white space
+    Ss = charset(' \t\r\n') + '*'  # optional white space
+    Sp = charset(' \t\r\n') + '+'  # required white space
     Eq = ''.join([Ss, L('='), Ss])
     VersionInfo = ''.join([
-        Sp, L('version'), Eq, oneof([L("'1.")+digits+L("'"),
-                                     L('"1.')+digits+L('"')]) ])
+        Sp,
+        L('version'),
+        Eq,
+        oneof([
+            L("'1.") + digits + L("'"),
+            L('"1.') + digits + L('"'),
+        ])
+    ])
     EncodingDecl = ''.join([
-        Sp, L('encoding'), Eq, oneof([
+        Sp,
+        L('encoding'),
+        Eq,
+        oneof([
             L("'") + '(?P<enc_dq>%s)' % name + L("'"),
-            L('"') + '(?P<enc_sq>%s)' % name + L('"') ]) ])
+            L('"') + '(?P<enc_sq>%s)' % name + L('"')
+        ])
+    ])
     # standalone="yes" is valid XML but almost certainly a lie...
     SDDecl = ''.join([
-        Sp, L('standalone'), Eq, oneof([
-            L("'")+oneof([L('yes'), L('no')])+L("'"),
-            L('"')+oneof([L('yes'), L('no')])+L('"') ]) ])
-    R = ''.join([prefix, L('<?xml'), VersionInfo, optional(EncodingDecl),
-                 optional(SDDecl), Ss, L('?>') ])
+        Sp,
+        L('standalone'),
+        Eq,
+        oneof([
+            L("'") + oneof([L('yes'), L('no')]) + L("'"),
+            L('"') + oneof([L('yes'), L('no')]) + L('"'),
+        ])
+    ])
+    R = ''.join([
+        prefix,
+        L('<?xml'),
+        VersionInfo,
+        optional(EncodingDecl),
+        optional(SDDecl),
+        Ss,
+        L('?>')
+    ])
     m = re.match(R, xml)
     if m:
         encvalue = m.group('enc_dq')
                 return enc
         decl_enc = encvalue.decode(enc).encode('ascii')
         bom_codec = None
+
         def get_codec(encoding):
             encoding = encoding.lower()
-            if encoding=='ebcdic':
+            if encoding == 'ebcdic':
                 encoding = 'cp037'  # good enough
             elif encoding in ('utf_16_le', 'utf_16_be'):
                 encoding = 'utf_16'
         except LookupError:
             pass  # unknown BOM codec, old version of Python maybe?
         try:
-            if (bom_codec and enc==enc.lower() and
-                get_codec(decl_enc)!=bom_codec):
+            if (bom_codec and enc == enc.lower() and
+                get_codec(decl_enc) != bom_codec):
                     raise ValidationError(
                         "Multiply-specified encoding "
                         "(BOM: %s, XML decl: %s)" % (enc, decl_enc),
     else:
         return 'UTF-8'
 
+
 def sniff_bom_encoding(xml):
     """Reads any byte-order marker. Returns the implied encoding.
        If the returned encoding is lowercase it means the BOM uniquely
     # Warning: The UTF-32 codecs aren't present before Python 2.6...
     # See also http://bugs.python.org/issue1399
     enc = {
-        '\x00\x00\xFE\xFF': 'utf_32', #UCS4 1234, utf_32_be with BOM
-        '\xFF\xFE\x00\x00': 'utf_32', #UCS4 4321, utf_32_le with BOM
-        '\x00\x00\xFF\xFE': 'undefined', #UCS4 2143 (rare, we give up)
-        '\xFE\xFF\x00\x00': 'undefined', #UCS4 3412 (rare, we give up)
-        '\x00\x00\x00\x3C': 'UTF_32_BE', #UCS4 1234 (no BOM)
-        '\x3C\x00\x00\x00': 'UTF_32_LE', #UCS4 4321 (no BOM)
-        '\x00\x00\x3C\x00': 'undefined', #UCS4 2143 (no BOM, we give up)
-        '\x00\x3C\x00\x00': 'undefined', #UCS4 3412 (no BOM, we give up)
-        '\x00\x3C\x00\x3F': 'UTF_16_BE', # missing BOM
-        '\x3C\x00\x3F\x00': 'UTF_16_LE', # missing BOM
+        '\x00\x00\xFE\xFF': 'utf_32',  # UCS4 1234, utf_32_be with BOM
+        '\xFF\xFE\x00\x00': 'utf_32',  # UCS4 4321, utf_32_le with BOM
+        '\x00\x00\xFF\xFE': 'undefined',  # UCS4 2143 (rare, we give up)
+        '\xFE\xFF\x00\x00': 'undefined',  # UCS4 3412 (rare, we give up)
+        '\x00\x00\x00\x3C': 'UTF_32_BE',  # UCS4 1234 (no BOM)
+        '\x3C\x00\x00\x00': 'UTF_32_LE',  # UCS4 4321 (no BOM)
+        '\x00\x00\x3C\x00': 'undefined',  # UCS4 2143 (no BOM, we give up)
+        '\x00\x3C\x00\x00': 'undefined',  # UCS4 3412 (no BOM, we give up)
+        '\x00\x3C\x00\x3F': 'UTF_16_BE',  # missing BOM
+        '\x3C\x00\x3F\x00': 'UTF_16_LE',  # missing BOM
         '\x3C\x3F\x78\x6D': 'ASCII',
         '\x4C\x6F\xA7\x94': 'CP037',  # EBCDIC (unknown code page)
     }.get(xml[:4])
-    if enc and enc==enc.lower():
+    if enc and enc == enc.lower():
         return enc
     if not enc:
-        if xml[:3]=='\xEF\xBB\xBF':
+        if xml[:3] == '\xEF\xBB\xBF':
             return 'utf_8_sig'  # UTF-8 with these three bytes prefixed
-        elif xml[:2]=='\xFF\xFE':
+        elif xml[:2] == '\xFF\xFE':
             return 'utf_16_le'
-        elif xml[:2]=='\xFE\xFF':
+        elif xml[:2] == '\xFE\xFF':
             return 'utf_16_be'
         else:
             enc = 'UTF-8'  # "Other"
     return enc
 
-if __name__=='__main__':
+if __name__ == '__main__':
     test()
-