Commits

Geoffrey Sneddon committed 9c59aa5

This hopefully more or less brings us in line with PEP8.

Comments (0)

Files changed (5)

 #!/usr/bin/env python
+# coding=UTF-8
+# Copyright (c) 2008 Geoffrey Sneddon
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
 """usage: anolis [options] input output
 
 Post-process a document, adding cross-references, table of contents, etc.
 
 from anolislib import generator, utils
 
+
 def main():
-	# Create the options parser
-	optParser = getOptParser()
-	opts, args = optParser.parse_args()
-	
-	# Check we have enough arguments
-	if len(args) >= 2:
-		try:
-			# Get options
-			kwargs = vars(opts)
-			
-			# Get input and generate
-			input = open(args[0], "rb")
-			tree = generator.fromFile(input, **kwargs)
-			input.close()
-			
-			# Write output
-			output = open(args[1], "wb")
-			generator.toFile(tree, output, **kwargs)
-			output.close()
-		except (utils.AnolisException, IOError, etree.XMLSyntaxError), e:
-			sys.stderr.write(unicode(e) + u"\n")
-			sys.exit(1)
-	else:
-		sys.stderr.write(u"anolis expects two arguments. Use -h for help\n")
-		sys.exit(2)
+    # Create the options parser
+    optParser = getOptParser()
+    opts, args = optParser.parse_args()
+
+    # Check we have enough arguments
+    if len(args) >= 2:
+        try:
+            # Get options
+            kwargs = vars(opts)
+
+            # Get input and generate
+            input = open(args[0], "rb")
+            tree = generator.fromFile(input, **kwargs)
+            input.close()
+
+            # Write output
+            output = open(args[1], "wb")
+            generator.toFile(tree, output, **kwargs)
+            output.close()
+        except (utils.AnolisException, IOError, etree.XMLSyntaxError), e:
+            sys.stderr.write(unicode(e) + u"\n")
+            sys.exit(1)
+    else:
+        sys.stderr.write(u"anolis expects two arguments. Use -h for help\n")
+        sys.exit(2)
+
 
 def getOptParser():
-	parser = OptionParser(usage = __doc__, version="%prog 1.0")
-	
-	parser.add_option("", "--enable", action="callback", callback=enable,
-		type="string", dest="processes", help="Enable the process given as the option value")
-	
-	parser.add_option("", "--disable", action="callback", callback=disable,
-		type="string", help="Disable the process given as the option value")
-	
-	#parser.add_option("", "", action="store_true",
-	#	dest="xml", help="Use an XML parser/serializer.")
-	
-	parser.add_option("", "--lxml.html", action="store_true",
-		dest="lxml_html", help="Use lxml's HTML parser/serializer.")
-	
-	parser.add_option("", "--newline-char", action="store", type="string",
-		dest="newline_char", help="Set the newline character/string used when creating new newlines. This should match the rest of the newlines in the document.")
-	
-	parser.add_option("", "--indent-char", action="store", type="string",
-		dest="indent_char", help="Set the character/string used when creating indenting new blocks of (X)HTML. This should match the rest of the indentation in the document.")
-	
-	parser.add_option("", "--force-html4-id", action="store_true",
-		dest="force_html4_id", help="Force the ID generation algorithm to create HTML 4 compliant IDs regardless of the DOCTYPE.")
-	
-	parser.add_option("", "--min-depth", action="store", type="int",
-		default=2, dest="min_depth", help="Highest ranking header to number/insert into TOC.")
-	
-	parser.add_option("", "--max-depth", action="store", type="int",
-		default=6, dest="max_depth", help="Lowest ranking header to number/insert into TOC.")
-	
-	parser.add_option("", "--allow-duplicate-dfns", action="store_true",
-		dest="allow_duplicate_dfns", help="Allow multiple definitions of terms when cross-referencing (the last instance of the term is used when referencing it).")
-	
-	parser.add_option("", "--w3c-compat", action="store_true",
-		dest="w3c_compat", help="Behave in a (mostly) compatible way to the W3C CSS WG's Postprocessor (this implies all of the other --w3c-compat options with the exception of --w3c-compat-crazy-substitution, as that is too crazy).")
-	
-	parser.add_option("", "--w3c-compat-xref-elements", action="store_true",
-		dest="w3c_compat_xref_elements", help="Uses the same list of elements to look for cross-references in as the W3C CSS WG's Postprocessor, even when the elements shouldn't semantically be used for cross-reference terms.")
-	
-	parser.add_option("", "--w3c-compat-xref-a-placement", action="store_true",
-		dest="w3c_compat_xref_a_placement", help="When cross-referencing elements apart from span, put the a element inside the element instead of outside the element.")
-	
-	parser.add_option("", "--w3c-compat-xref-normalization", action="store_true",
-		dest="w3c_compat_xref_normalization", help="Only use ASCII letters, numbers, and spaces in comparison of cross-reference terms.")
-	
-	parser.add_option("", "--w3c-compat-class-toc", action="store_true",
-		dest="w3c_compat_class_toc", help="Add @class='toc' on every ol element in the table of contents (instead of only the root ol element).")
-	
-	parser.add_option("", "--w3c-compat-substitutions", action="store_true",
-		dest="w3c_compat_substitutions", help="Do W3C specific substitutions.")
-	
-	parser.add_option("", "--w3c-compat-crazy-substitutions", action="store_true",
-		dest="w3c_compat_crazy_substitutions", help="Do crazy W3C specific substitutions, which may cause unexpected behaviour (i.e., replacing random strings within the document with no special marker).")
-	
-	try:
-		import hotshot
-		import hotshot.stats
-		parser.add_option("", "--profile", action="store_true",
-			dest="profile", help=SUPPRESS_HELP)
-	except ImportError:
-		pass
-	
-	parser.add_option("", "--inject-meta-charset", action="store_true",
-		dest="inject_meta_charset", help=SUPPRESS_HELP)
-	
-	parser.add_option("", "--strip-whitespace", action="store_true",
-		dest="strip_whitespace", help=SUPPRESS_HELP)
+    def enable(option, opt_str, value, parser, *args, **kwargs):
+        parser.values.processes.add(opt_str)
 
-	parser.add_option("", "--omit-optional-tags", action="store_true",
-		dest="omit_optional_tags", help=SUPPRESS_HELP)
+    def disable(option, opt_str, value, parser, *args, **kwargs):
+        parser.values.processes.discard(opt_str)
 
-	parser.add_option("", "--quote-attr-values", action="store_true",
-		dest="quote_attr_values", help=SUPPRESS_HELP)
+    parser = OptionParser(usage = __doc__, version="%prog 1.0")
 
-	parser.add_option("", "--use-best-quote-char", action="store_true",
-		dest="use_best_quote_char",	help=SUPPRESS_HELP)
+    parser.add_option("", "--enable", action="callback", callback=enable,
+                      type="string", dest="processes",
+                      help="Enable the process given as the option value")
 
-	parser.add_option("", "--no-minimize-boolean-attributes",
-		action="store_false", default=True,
-		dest="minimize_boolean_attributes", help=SUPPRESS_HELP)
+    parser.add_option("", "--disable", action="callback", callback=disable,
+                      type="string",
+                      help="Disable the process given as the option value")
 
-	parser.add_option("", "--use-trailing-solidus", action="store_true",
-		dest="use_trailing_solidus", help=SUPPRESS_HELP)
+    #parser.add_option("", "", action="store_true",
+    #                  dest="xml", help="Use an XML parser/serializer.")
 
-	parser.add_option("", "--space-before-trailing-solidus",
-		action="store_true", default=False,
-		dest="space_before_trailing_solidus", help=SUPPRESS_HELP)
+    parser.add_option("", "--lxml.html", action="store_true",
+                      dest="lxml_html",
+                      help="Use lxml's HTML parser/serializer.")
 
-	parser.add_option("", "--escape-lt-in-attrs", action="store_true",
-		dest="escape_lt_in_attrs", help=SUPPRESS_HELP)
+    parser.add_option("", "--newline-char", action="store", type="string",
+                      dest="newline_char",
+                      help="Set the newline character/string used when creating new newlines. This should match the rest of the newlines in the document.")
 
-	parser.add_option("", "--escape-rcdata", action="store_true",
-		dest="escape_rcdata", help=SUPPRESS_HELP)
-	
-	parser.set_defaults(
-		processes=set(["sub", "xref", "toc"]),
-		xml=False,
-		lxml_html=False,
-		newline_char=u"\n",
-		indent_char=u"\t",
-		force_html4_id=False,
-		min_depth=2,
-		max_depth=6,
-		allow_duplicate_dfns=False,
-		w3c_compat=False,
-		w3c_compat_xref_elements=False,
-		w3c_compat_xref_a_placement=False,
-		w3c_compat_xref_normalization=False,
-		w3c_compat_class_toc=False,
-		w3c_compat_substitutions=False,
-		w3c_compat_crazy_substitutions=False,
-		profile=False,
-		inject_meta_charset=False,
-		omit_optional_tags=False,
-		quote_attr_values=False,
-		use_best_quote_char=False,
-		minimize_boolean_attributes=False,
-		use_trailing_solidus=False,
-		space_before_trailing_solidus=False,
-		escape_lt_in_attrs=False,
-		escape_rcdata=False
-	)
+    parser.add_option("", "--indent-char", action="store", type="string",
+                      dest="indent_char",
+                      help="Set the character/string used when creating indenting new blocks of (X)HTML. This should match the rest of the indentation in the document.")
 
-	return parser
+    parser.add_option("", "--force-html4-id", action="store_true",
+                      dest="force_html4_id",
+                      help="Force the ID generation algorithm to create HTML 4 compliant IDs regardless of the DOCTYPE.")
 
-def enable(option, opt_str, value, parser, *args, **kwargs):
-	parser.values.processes.add(opt_str)
+    parser.add_option("", "--min-depth", action="store", type="int",
+                      default=2, dest="min_depth",
+                      help="Highest ranking header to number/insert into TOC.")
 
-def disable(option, opt_str, value, parser, *args, **kwargs):
-	parser.values.processes.discard(opt_str)
+    parser.add_option("", "--max-depth", action="store", type="int",
+                      default=6, dest="max_depth",
+                      help="Lowest ranking header to number/insert into TOC.")
+
+    parser.add_option("", "--allow-duplicate-dfns", action="store_true",
+                      dest="allow_duplicate_dfns",
+                      help="Allow multiple definitions of terms when cross-referencing (the last instance of the term is used when referencing it).")
+
+    parser.add_option("", "--w3c-compat", action="store_true",
+                      dest="w3c_compat",
+                      help="Behave in a (mostly) compatible way to the W3C CSS WG's Postprocessor (this implies all of the other --w3c-compat options with the exception of --w3c-compat-crazy-substitution, as that is too crazy).")
+
+    parser.add_option("", "--w3c-compat-xref-elements", action="store_true",
+                      dest="w3c_compat_xref_elements",
+                      help="Uses the same list of elements to look for cross-references in as the W3C CSS WG's Postprocessor, even when the elements shouldn't semantically be used for cross-reference terms.")
+
+    parser.add_option("", "--w3c-compat-xref-a-placement", action="store_true",
+                      dest="w3c_compat_xref_a_placement",
+                      help="When cross-referencing elements apart from span, put the a element inside the element instead of outside the element.")
+
+    parser.add_option("", "--w3c-compat-xref-normalization", action="store_true",
+                      dest="w3c_compat_xref_normalization",
+                      help="Only use ASCII letters, numbers, and spaces in comparison of cross-reference terms.")
+
+    parser.add_option("", "--w3c-compat-class-toc", action="store_true",
+                      dest="w3c_compat_class_toc",
+                      help="Add @class='toc' on every ol element in the table of contents (instead of only the root ol element).")
+
+    parser.add_option("", "--w3c-compat-substitutions", action="store_true",
+                      dest="w3c_compat_substitutions",
+                      help="Do W3C specific substitutions.")
+
+    parser.add_option("", "--w3c-compat-crazy-substitutions", action="store_true",
+                      dest="w3c_compat_crazy_substitutions",
+                      help="Do crazy W3C specific substitutions, which may cause unexpected behaviour (i.e., replacing random strings within the document with no special marker).")
+
+    try:
+        import hotshot
+        import hotshot.stats
+        parser.add_option("", "--profile", action="store_true",
+            dest="profile", help=SUPPRESS_HELP)
+    except ImportError:
+        pass
+
+    parser.add_option("", "--inject-meta-charset", action="store_true",
+                      dest="inject_meta_charset", help=SUPPRESS_HELP)
+
+    parser.add_option("", "--strip-whitespace", action="store_true",
+                      dest="strip_whitespace", help=SUPPRESS_HELP)
+
+    parser.add_option("", "--omit-optional-tags", action="store_true",
+                      dest="omit_optional_tags", help=SUPPRESS_HELP)
+
+    parser.add_option("", "--quote-attr-values", action="store_true",
+                      dest="quote_attr_values", help=SUPPRESS_HELP)
+
+    parser.add_option("", "--use-best-quote-char", action="store_true",
+                      dest="use_best_quote_char", help=SUPPRESS_HELP)
+
+    parser.add_option("", "--no-minimize-boolean-attributes",
+                      action="store_false", default=True,
+                      dest="minimize_boolean_attributes", help=SUPPRESS_HELP)
+
+    parser.add_option("", "--use-trailing-solidus", action="store_true",
+                      dest="use_trailing_solidus", help=SUPPRESS_HELP)
+
+    parser.add_option("", "--space-before-trailing-solidus",
+                      action="store_true", default=False,
+                      dest="space_before_trailing_solidus", help=SUPPRESS_HELP)
+
+    parser.add_option("", "--escape-lt-in-attrs", action="store_true",
+                      dest="escape_lt_in_attrs", help=SUPPRESS_HELP)
+
+    parser.add_option("", "--escape-rcdata", action="store_true",
+                      dest="escape_rcdata", help=SUPPRESS_HELP)
+
+    parser.set_defaults(
+        processes=set(["sub", "xref", "toc"]),
+        xml=False,
+        lxml_html=False,
+        newline_char=u"\n",
+        indent_char=u"\t",
+        force_html4_id=False,
+        min_depth=2,
+        max_depth=6,
+        allow_duplicate_dfns=False,
+        w3c_compat=False,
+        w3c_compat_xref_elements=False,
+        w3c_compat_xref_a_placement=False,
+        w3c_compat_xref_normalization=False,
+        w3c_compat_class_toc=False,
+        w3c_compat_substitutions=False,
+        w3c_compat_crazy_substitutions=False,
+        profile=False,
+        inject_meta_charset=False,
+        omit_optional_tags=False,
+        quote_attr_values=False,
+        use_best_quote_char=False,
+        minimize_boolean_attributes=False,
+        use_trailing_solidus=False,
+        space_before_trailing_solidus=False,
+        escape_lt_in_attrs=False,
+        escape_rcdata=False
+    )
+
+    return parser
 
 if __name__ == "__main__":
-	main()
+    main()

anolislib/processes/outliner.py

 rank = {u"h1": -1, u"h2": -2, u"h3": -3, u"h4": -4, u"h5": -5, u"h6": -6,
         u"header": -1}
 
+
 class section(list):
     """Represents the section of a document."""
 
         for child in children:
             child.parent = self
 
+
 class Outliner:
     """Build the outline of an HTML document."""
 
                         # this new section. Let the element being entered be
                         # the new heading for the current section. Abort these
                         # substeps.
-                        if rank[element.tag] < rank[candidate_section.header.tag]:
+                        if rank[element.tag] < \
+                           rank[candidate_section.header.tag]:
                             self.current_section = section()
                             candidate_section.append(self.current_section)
                             self.current_section.header = element
                             break
-                        # Let new candidate section be the section that contains candidate section in the outline of current outlinee.
+                        # Let new candidate section be the section that
+                        # contains candidate section in the outline of current
+                        # outlinee.
                         # Let candidate section be new candidate section.
                         candidate_section = candidate_section.parent
                         # Return to step 2.
-                # Push the element being entered onto the stack. (This causes the algorithm to skip any descendants of the element.)
+                # Push the element being entered onto the stack. (This causes
+                # the algorithm to skip any descendants of the element.)
                 self.stack.append(element)
 
-        # If the current outlinee is null, then there was no sectioning content element or sectioning root element in the DOM. There is no outline.
+        # If the current outlinee is null, then there was no sectioning content
+        # element or sectioning root element in the DOM. There is no outline.
         try:
             return self.outlines[self.current_outlinee]
         except KeyError:
-            return None
+            return None

anolislib/processes/sub.py

 # coding=UTF-8
 # Copyright (c) 2008 Geoffrey Sneddon
-# 
+#
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
-# 
+#
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
-# 
+#
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 
 from anolislib import utils
 
-latest_version = re.compile(u"latest[%s]+version" % utils.spaceCharacters, re.IGNORECASE)
+latest_version = re.compile(u"latest[%s]+version" % utils.spaceCharacters,
+                            re.IGNORECASE)
 
-w3c_tr_url_status = re.compile(r"http://www\.w3\.org/TR/[^/]*/(MO|WD|CR|PR|REC|PER|NOTE)-")
+w3c_tr_url_status = r"http://www\.w3\.org/TR/[^/]*/(MO|WD|CR|PR|REC|PER|NOTE)-"
+w3c_tr_url_status = re.compile(w3c_tr_url_status)
 
 year = re.compile(r"\[YEAR[^\]]*\]")
 year_sub = time.strftime(u"%Y", time.gmtime())
 logo_sub = etree.fromstring(u'<p><a href="http://www.w3.org/"><img alt="W3C" src="http://www.w3.org/Icons/w3c_home"/></a></p>')
 
 copyright = u"copyright"
-copyright_sub = etree.fromstring(u'<p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> &#xA9; ' + time.strftime(u"%Y", time.gmtime()) + u' <a href="http://www.w3.org/"><acronym title="World Wide Web Consortium">W3C</acronym></a><sup>&#xAE;</sup> (<a href="http://www.csail.mit.edu/"><acronym title="Massachusetts Institute of Technology">MIT</acronym></a>, <a href="http://www.ercim.org/"><acronym title="European Research Consortium for Informatics and Mathematics">ERCIM</acronym></a>, <a href="http://www.keio.ac.jp/">Keio</a>), All Rights Reserved. W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>, <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a> and <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a> rules apply.</p>')
+copyright_sub = etree.fromstring(u'<p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> &#xA9; %s <a href="http://www.w3.org/"><acronym title="World Wide Web Consortium">W3C</acronym></a><sup>&#xAE;</sup> (<a href="http://www.csail.mit.edu/"><acronym title="Massachusetts Institute of Technology">MIT</acronym></a>, <a href="http://www.ercim.org/"><acronym title="European Research Consortium for Informatics and Mathematics">ERCIM</acronym></a>, <a href="http://www.keio.ac.jp/">Keio</a>), All Rights Reserved. W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>, <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a> and <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a> rules apply.</p>' % time.strftime(u"%Y", time.gmtime()))
 
 basic_comment_subs = ()
 
+
 class sub(object):
     """Perform substitutions."""
-    
-    def __init__(self, ElementTree, w3c_compat=False, w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, **kwargs):
-        if w3c_compat or w3c_compat_substitutions or w3c_compat_crazy_substitutions:
+
+    def __init__(self, ElementTree, w3c_compat=False,
+                 w3c_compat_substitutions=False,
+                 w3c_compat_crazy_substitutions=False, **kwargs):
+        if w3c_compat or w3c_compat_substitutions or \
+           w3c_compat_crazy_substitutions:
             self.w3c_status = self.getW3CStatus(ElementTree, **kwargs)
-        self.stringSubstitutions(ElementTree, w3c_compat, w3c_compat_substitutions, w3c_compat_crazy_substitutions, **kwargs)
-        self.commentSubstitutions(ElementTree, w3c_compat, w3c_compat_substitutions, w3c_compat_crazy_substitutions, **kwargs)
-    
-    def stringSubstitutions(self, ElementTree, w3c_compat=False, w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, **kwargs):
+        self.stringSubstitutions(ElementTree, w3c_compat,
+                                 w3c_compat_substitutions,
+                                 w3c_compat_crazy_substitutions, **kwargs)
+        self.commentSubstitutions(ElementTree, w3c_compat,
+                                  w3c_compat_substitutions,
+                                  w3c_compat_crazy_substitutions, **kwargs)
+
+    def stringSubstitutions(self, ElementTree, w3c_compat=False,
+                            w3c_compat_substitutions=False,
+                            w3c_compat_crazy_substitutions=False, **kwargs):
         # Get doc_title from the title element
         try:
-            doc_title = utils.textContent(ElementTree.getroot().find(u"head").find(u"title"))
+            doc_title = utils.textContent(ElementTree.getroot().find(u"head")
+                                                               .find(u"title"))
         except (AttributeError, TypeError):
             doc_title = u""
-        
+
         if w3c_compat or w3c_compat_substitutions:
             # Get the right long status
             doc_longstatus = longstatus_map[self.w3c_status]
-        
+
         if w3c_compat_crazy_substitutions:
             # Get the right stylesheet
-            doc_w3c_stylesheet = u"http://www.w3.org/StyleSheets/TR/W3C-" + self.w3c_status
-        
+            doc_w3c_stylesheet = u"http://www.w3.org/StyleSheets/TR/W3C-" + \
+                                 self.w3c_status
+
         # Get all the subs we want
-        instance_string_subs = string_subs + ((title, doc_title, title_identifier),)
-        
+        instance_string_subs = string_subs + \
+                               ((title, doc_title, title_identifier), )
+
         # And even more in compat. mode
         if w3c_compat or w3c_compat_substitutions:
-            instance_string_subs += ((status, self.w3c_status, status_identifier),
-                                     (longstatus, doc_longstatus, longstatus_identifier))
-        
+            instance_string_subs += ((status, self.w3c_status,
+                                      status_identifier),
+                                     (longstatus, doc_longstatus,
+                                      longstatus_identifier))
+
         # And more that aren't even enabled by default in compat. mode
         if w3c_compat_crazy_substitutions:
-            instance_string_subs += ((w3c_stylesheet, doc_w3c_stylesheet, w3c_stylesheet_identifier),)
-        
+            instance_string_subs += ((w3c_stylesheet, doc_w3c_stylesheet,
+                                      w3c_stylesheet_identifier), )
+
         for node in ElementTree.iter():
             for regex, sub, identifier in instance_string_subs:
                 if node.text is not None and identifier in node.text:
                 for name, value in node.attrib.items():
                     if identifier in value:
                         node.attrib[name] = regex.sub(sub, value)
-    
-    def commentSubstitutions(self, ElementTree, w3c_compat=False, w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, **kwargs):
+
+    def commentSubstitutions(self, ElementTree, w3c_compat=False, \
+                             w3c_compat_substitutions=False,
+                             w3c_compat_crazy_substitutions=False, **kwargs):
         # Basic substitutions
         instance_basic_comment_subs = basic_comment_subs
-        
+
         # Add more basic substitutions in compat. mode
         if w3c_compat or w3c_compat_substitutions:
             instance_basic_comment_subs += ((logo, logo_sub),
                                             (copyright, copyright_sub))
-        
+
         # Set of nodes to remove
         to_remove = set()
-        
+
         # Link
         in_link = False
         for node in ElementTree.iter():
             if in_link:
-                if node.tag is etree.Comment and node.text.strip(utils.spaceCharacters) == u"end-link":
+                if node.tag is etree.Comment and \
+                   node.text.strip(utils.spaceCharacters) == u"end-link":
                     if node.getparent() is not link_parent:
-                        raise DifferentParentException, u"begin-link and end-link have different parents"
+                        raise DifferentParentException(u"begin-link and end-link have different parents")
                     utils.removeInteractiveContentChildren(link)
                     link.set(u"href", utils.textContent(link))
                     in_link = False
                     if node.getparent() is link_parent:
                         link.append(deepcopy(node))
                     to_remove.add(node)
-            elif node.tag is etree.Comment and node.text.strip(utils.spaceCharacters) == u"begin-link":
+            elif node.tag is etree.Comment and \
+                 node.text.strip(utils.spaceCharacters) == u"begin-link":
                 link_parent = node.getparent()
                 in_link = True
                 link = etree.Element(u"a")
                 link.text = node.tail
                 node.tail = None
                 node.addnext(link)
-        
+
         # Basic substitutions
         for comment, sub in instance_basic_comment_subs:
             begin_sub = u"begin-" + comment
             in_sub = False
             for node in ElementTree.iter():
                 if in_sub:
-                    if node.tag is etree.Comment and node.text.strip(utils.spaceCharacters) == end_sub:
+                    if node.tag is etree.Comment and \
+                       node.text.strip(utils.spaceCharacters) == end_sub:
                         if node.getparent() is not sub_parent:
-                            raise DifferentParentException, u"%s and %s have different parents" % begin_sub, end_sub
+                            raise DifferentParentException(u"%s and %s have different parents" % begin_sub, end_sub)
                         in_sub = False
                     else:
                         to_remove.add(node)
                         node.addprevious(etree.Comment(end_sub))
                         node.getprevious().tail = node.tail
                         to_remove.add(node)
-        
+
         # Remove nodes
         for node in to_remove:
             node.getparent().remove(node)
-    
+
     def getW3CStatus(self, ElementTree, **kwargs):
-        # Get all text nodes that contain case-insensitively "latest version" with any amount of whitespace inside the phrase, or contain http://www.w3.org/TR/
+        # Get all text nodes that contain case-insensitively "latest version"
+        # with any amount of whitespace inside the phrase, or contain
+        # http://www.w3.org/TR/
         for text in ElementTree.xpath(u"//text()[contains(translate(., 'LATEST', 'latest'), 'latest') and contains(translate(., 'VERSION', 'version'), 'version') or contains(., 'http://www.w3.org/TR/')]"):
             if latest_version.search(text):
                 return u"ED"
         else:
             return u"ED"
 
+
 class DifferentParentException(utils.AnolisException):
     """begin-link and end-link do not have the same parent."""
     pass

anolislib/processes/toc.py

 # coding=UTF-8
 # Copyright (c) 2008 Geoffrey Sneddon
-# 
+#
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
-# 
+#
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
-# 
+#
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 from anolislib.processes import outliner
 
 # These are just the non-interactive elements to be removed
-remove_elements_from_toc = frozenset([u"dfn",])
+remove_elements_from_toc = frozenset([u"dfn", ])
 # These are, however, all the attributes to be removed
-remove_attributes_from_toc = frozenset([u"id",])
+remove_attributes_from_toc = frozenset([u"id", ])
+
 
 class toc(object):
     """Build and add TOC."""
-    
+
     toc = None
-    
+
     def __init__(self, ElementTree, **kwargs):
         self.toc = etree.Element(u"ol", {u"class": u"toc"})
         self.buildToc(ElementTree, **kwargs)
         self.addToc(ElementTree, **kwargs)
-    
-    def buildToc(self, ElementTree, min_depth = 2, max_depth = 6, w3c_compat = False, w3c_compat_class_toc = False, **kwargs):
+
+    def buildToc(self, ElementTree, min_depth=2, max_depth=6, w3c_compat=False,
+                 w3c_compat_class_toc=False, **kwargs):
         # Build the outline of the document
         outline_creator = outliner.Outliner(ElementTree, **kwargs)
         outline = outline_creator.build(**kwargs)
-        
+
         # Get a list of all the top level sections, and their depth (0)
         sections = [(section, 0) for section in reversed(outline)]
-        
+
         # Numbering
         num = []
-        
-        # Set of elements to remove (due to odd behaviour of Element.iter() this has to be done afterwards)
+
+        # Set of elements to remove (removing elements being iterated over is
+        # undefined).
         to_remove = set()
-        
+
         # Loop over all sections in a DFS
         while sections:
             # Get the section and depth at the end of list
             section, depth = sections.pop()
-                    
+
             # If we have a header, regardless of how deep we are
             if section.header is not None:
                 # Get the element that represents the section header's text
                 if section.header.tag == u"header":
                     i = 1
                     while i <= 6:
-                        section_header_text_element = section.header.find(u"h" + unicode(i))
-                        if section_header_text_element is not None:
+                        header_text = section.header \
+                                                      .find(u"h" + unicode(i))
+                        if header_text is not None:
                             break
                     else:
-                        section_header_text_element = None
+                        header_text = None
                 else:
-                    section_header_text_element = section.header
+                    header_text = section.header
             else:
-                section_header_text_element = None
-            
+                header_text = None
+
             # If we have a section heading text element, regardless of depth
-            if section_header_text_element is not None:
+            if header_text is not None:
                 # Remove any existing number
-                for element in section_header_text_element.iter(u"span"):
+                for element in header_text.iter(u"span"):
                     if utils.elementHasClass(element, u"secno"):
                         # Preserve the element tail
                         if element.tail is not None:
                                     element.getparent().text += element.tail
                         # Remove the element
                         to_remove.add(element)
-            
-            # Check we're in the valid depth range (min/max_depth are 1 based, depth is 0 based)
+
+            # Check we're in the valid depth range (min/max_depth are 1 based,
+            # depth is 0 based)
             if depth >= min_depth - 1 and depth <= max_depth - 1:
-                # Calculate the corrected depth (i.e., the actual depth within the numbering/TOC)
+                # Calculate the corrected depth (i.e., the actual depth within
+                # the numbering/TOC)
                 corrected_depth = depth - min_depth + 1
-                
+
                 # Numbering:
                 # No children, no sibling, move back to parent's sibling
                 if corrected_depth + 1 < len(num):
                 # Children
                 elif corrected_depth == len(num):
                     num.append(0)
-                
+
                 # Increment the current section's number
-                if section_header_text_element is not None and not utils.elementHasClass(section_header_text_element, u"no-num") or section_header_text_element is None and section:
+                if header_text is not None and \
+                   not utils.elementHasClass(header_text, u"no-num") or \
+                   header_text is None and section:
                     num[-1] += 1
-                
-                # Get the current TOC section for this depth, and add another item to it
-                if section_header_text_element is not None and not utils.elementHasClass(section_header_text_element, u"no-toc") or section_header_text_element is None and section:
-                    # Find the appropriate section of the TOC 
+
+                # Get the current TOC section for this depth, and add another
+                # item to it
+                if header_text is not None and \
+                   not utils.elementHasClass(header_text, u"no-toc") or \
+                   header_text is None and section:
+                    # Find the appropriate section of the TOC
                     i = 0
                     toc_section = self.toc
                     while i < corrected_depth:
                         try:
-                            # If the final li has no children, or the last children isn't an ol element
-                            if len(toc_section[-1]) == 0 or toc_section[-1][-1].tag != u"ol":
+                            # If the final li has no children, or the last
+                            # children isn't an ol element
+                            if len(toc_section[-1]) == 0 or \
+                               toc_section[-1][-1].tag != u"ol":
                                 toc_section[-1].append(etree.Element(u"ol"))
-                                self.indentNode(toc_section[-1][-1], (i + 1) * 2, **kwargs)
+                                self.indentNode(toc_section[-1][-1],
+                                                (i + 1) * 2, **kwargs)
                                 if w3c_compat or w3c_compat_class_toc:
                                     toc_section[-1][-1].set(u"class", u"toc")
                         except IndexError:
                             # If the current ol has no li in it
                             toc_section.append(etree.Element(u"li"))
-                            self.indentNode(toc_section[0], (i + 1) * 2 - 1, **kwargs)
+                            self.indentNode(toc_section[0], (i + 1) * 2 - 1,
+                                            **kwargs)
                             toc_section[0].append(etree.Element(u"ol"))
-                            self.indentNode(toc_section[0][0], (i + 1) * 2, **kwargs)
+                            self.indentNode(toc_section[0][0], (i + 1) * 2,
+                                            **kwargs)
                             if w3c_compat or w3c_compat_class_toc:
                                 toc_section[0][0].set(u"class", u"toc")
-                        # TOC Section is now the final child (ol) of the final item (li) in the previous section
+                        # TOC Section is now the final child (ol) of the final
+                        # item (li) in the previous section
                         assert toc_section[-1].tag == u"li"
                         assert toc_section[-1][-1].tag == u"ol"
                         toc_section = toc_section[-1][-1]
                     item = etree.Element(u"li")
                     toc_section.append(item)
                     self.indentNode(item, (i + 1) * 2 - 1, **kwargs)
-                    
+
                 # If we have a header
-                if section_header_text_element is not None:
-                    # Remove all the elements in the list of nodes to remove (so that the removal of existing numbers doesn't lead to crazy IDs)
+                if header_text is not None:
+                    # Remove all the elements in the list of nodes to remove
+                    # (so that the removal of existing numbers doesn't lead to
+                    # crazy IDs)
                     for element in to_remove:
                         element.getparent().remove(element)
                     to_remove = set()
-                    
+
                     # Add ID to header
-                    id = utils.generateID(section_header_text_element, **kwargs)
-                    if section_header_text_element.get(u"id") is not None:
-                        del section_header_text_element.attrib[u"id"]
+                    id = utils.generateID(header_text, **kwargs)
+                    if header_text.get(u"id") is not None:
+                        del header_text.attrib[u"id"]
                     section.header.set(u"id", id)
-                    
+
                     # Add number, if @class doesn't contain no-num
-                    if not utils.elementHasClass(section_header_text_element, u"no-num"):
-                        section_header_text_element[0:0] = [etree.Element(u"span", {u"class": u"secno"})]
-                        section_header_text_element[0].tail = section_header_text_element.text
-                        section_header_text_element.text = None
-                        section_header_text_element[0].text = u".".join(map(unicode, num))
-                        section_header_text_element[0].text += u" "
+                    if not utils.elementHasClass(header_text, u"no-num"):
+                        header_text[0:0] = [etree.Element(u"span", {u"class":
+                                                                    u"secno"})]
+                        header_text[0].tail = header_text.text
+                        header_text.text = None
+                        header_text[0].text = u".".join(map(unicode, num))
+                        header_text[0].text += u" "
                     # Add to TOC, if @class doesn't contain no-toc
-                    if not utils.elementHasClass(section_header_text_element, u"no-toc"):
-                        link = deepcopy(section_header_text_element)
+                    if not utils.elementHasClass(header_text, u"no-toc"):
+                        link = deepcopy(header_text)
                         item.append(link)
                         # Make it link to the header
                         link.tag = u"a"
                         utils.removeInteractiveContentChildren(link)
                         # Remove other child elements
                         for element_name in remove_elements_from_toc:
-                            # Iterate over all the desendants of the new link with that element name
+                            # Iterate over all the desendants of the new link
+                            # with that element name
                             for element in link.iterdescendants(element_name):
-                                # Copy content, to prepare for the node being removed
+                                # Copy content, to prepare for the node being
+                                # removed
                                 utils.copyContentForRemoval(element)
-                                # Add the element of the list of elements to remove
+                                # Add the element of the list of elements to
+                                # remove
                                 to_remove.add(element)
                         # Remove unwanted attributes
                         for element in link.iter(tag=etree.Element):
                             for attribute_name in remove_attributes_from_toc:
                                 if element.get(attribute_name) is not None:
                                     del element.attrib[attribute_name]
-                        # We don't want the old tail (or any tail, for that matter)
+                        # We don't want the old tail
                         link.tail = None
-            # Add subsections in reverse order (so the next one is executed next) with a higher depth value
-            sections.extend([(child_section, depth + 1) for child_section in reversed(section)])
+            # Add subsections in reverse order (so the next one is executed
+            # next) with a higher depth value
+            sections.extend([(child_section, depth + 1)
+                             for child_section in reversed(section)])
         # Remove all the elements in the list of nodes to remove
         for element in to_remove:
             element.getparent().remove(element)
-    
+
     def addToc(self, ElementTree, **kwargs):
         to_remove = set()
         in_toc = False
         for node in ElementTree.iter():
             if in_toc:
-                if node.tag is etree.Comment and node.text.strip(utils.spaceCharacters) == u"end-toc":
+                if node.tag is etree.Comment and \
+                   node.text.strip(utils.spaceCharacters) == u"end-toc":
                     if node.getparent() is not toc_parent:
-                        raise DifferentParentException, u"begin-toc and end-toc have different parents"
+                        raise DifferentParentException(u"begin-toc and end-toc have different parents")
                     in_toc = False
                 else:
                     to_remove.add(node)
                     to_remove.add(node)
         for node in to_remove:
             node.getparent().remove(node)
-    
-    def indentNode(self, node, indent=0, newline_char=u"\n", indent_char=u"\t", **kwargs):
+
+    def indentNode(self, node, indent=0, newline_char=u"\n", indent_char=u"\t",
+                   **kwargs):
         whitespace = newline_char + indent_char * indent
         if node.getprevious() is not None:
             if node.getprevious().tail is None:
             else:
                 node.getparent().text += whitespace
 
+
 class DifferentParentException(utils.AnolisException):
     """begin-toc and end-toc do not have the same parent."""
     pass

anolislib/processes/xref.py

 # coding=UTF-8
 # Copyright (c) 2008 Geoffrey Sneddon
-# 
+#
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
-# 
+#
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
-# 
+#
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 from anolislib import utils
 
 instance_elements = frozenset([u"span", u"abbr", u"code", u"var", u"i"])
-w3c_instance_elements = frozenset([u"abbr", u"acronym", u"b", u"bdo", u"big", u"code", u"del", u"em", u"i", u"ins", u"kbd", u"label", u"legend", u"q", u"samp", u"small", u"span", u"strong", u"sub", u"sup", u"tt", u"var"])
+w3c_instance_elements = frozenset([u"abbr", u"acronym", u"b", u"bdo", u"big",
+                                   u"code", u"del", u"em", u"i", u"ins",
+                                   u"kbd", u"label", u"legend", u"q", u"samp",
+                                   u"small", u"span", u"strong", u"sub",
+                                   u"sup", u"tt", u"var"])
 
-# Instances cannot be in the stack with any of these element, or with interactive elements
-instance_not_in_stack_with = frozenset([u"dfn",])
+# Instances cannot be in the stack with any of these element, or with
+# interactive elements
+instance_not_in_stack_with = frozenset([u"dfn", ])
 
 non_alphanumeric_spaces = re.compile(r"[^a-zA-Z0-9 \-]+")
 
+
 class xref(object):
     """Add cross-references."""
-    
+
     def __init__(self, ElementTree, **kwargs):
         self.dfns = {}
         self.buildReferences(ElementTree, **kwargs)
         self.addReferences(ElementTree, **kwargs)
-    
-    def buildReferences(self, ElementTree, allow_duplicate_dfns=False, **kwargs):
+
+    def buildReferences(self, ElementTree, allow_duplicate_dfns=False,
+                        **kwargs):
         for dfn in ElementTree.iter(u"dfn"):
             term = self.getTerm(dfn, **kwargs)
-            
+
             if len(term) > 0:
                 if not allow_duplicate_dfns and term in self.dfns:
-                    raise DuplicateDfnException, u'The term "%s" is defined more than once' % term
-                
+                    raise DuplicateDfnException(u'The term "%s" is defined more than once' % term)
+
                 link_to = dfn
-                
+
                 for parent_element in dfn.iterancestors(tag=etree.Element):
                     if parent_element.tag in utils.heading_content:
                         link_to = parent_element
                         break
-                
+
                 id = utils.generateID(link_to, **kwargs)
-                
+
                 link_to.set(u"id", id)
-                
+
                 self.dfns[term] = id
-    
-    def addReferences(self, ElementTree, w3c_compat = False, w3c_compat_xref_elements = False, w3c_compat_xref_a_placement = False, **kwargs):
+
+    def addReferences(self, ElementTree, w3c_compat=False,
+                      w3c_compat_xref_elements=False,
+                      w3c_compat_xref_a_placement=False, **kwargs):
         for element in ElementTree.iter(tag=etree.Element):
-            if element.tag in instance_elements or (w3c_compat or w3c_compat_xref_elements) and element.tag in w3c_instance_elements:
+            if element.tag in instance_elements or \
+               (w3c_compat or w3c_compat_xref_elements) and \
+               element.tag in w3c_instance_elements:
                 term = self.getTerm(element, w3c_compat=w3c_compat, **kwargs)
-                
+
                 if term in self.dfns:
                     goodParentingAndChildren = True
-                    
-                    for parent_element in element.iterancestors(tag=etree.Element):
-                        if parent_element.tag in instance_not_in_stack_with or utils.isInteractiveContent(parent_element):
+
+                    for parent_element in \
+                        element.iterancestors(tag=etree.Element):
+                        if (parent_element.tag in instance_not_in_stack_with or
+                            utils.isInteractiveContent(parent_element)):
                             goodParentingAndChildren = False
                             break
                     else:
-                        for child_element in element.iterdescendants(tag=etree.Element):
-                            if child_element.tag in instance_not_in_stack_with or utils.isInteractiveContent(child_element):
+                        for child_element in \
+                            element.iterdescendants(tag=etree.Element):
+                            if child_element.tag in instance_not_in_stack_with\
+                               or utils.isInteractiveContent(child_element):
                                 goodParentingAndChildren = False
                                 break
-                    
+
                     if goodParentingAndChildren:
                         if element.tag == u"span":
                             element.tag = u"a"
                             element.set(u"href", u"#" + self.dfns[term])
                         else:
-                            link = etree.Element(u"a", {u"href": u"#" + self.dfns[term]})
+                            link = etree.Element(u"a",
+                                                 {u"href":
+                                                  u"#" + self.dfns[term]})
                             if w3c_compat or w3c_compat_xref_a_placement:
                                 for node in element:
                                     link.append(node)
                                 link.append(element)
                                 link.tail = link[0].tail
                                 link[0].tail = None
-    
-    def getTerm(self, element, w3c_compat = False, w3c_compat_xref_normalization = False, **kwargs):
+
+    def getTerm(self, element, w3c_compat=False,
+                w3c_compat_xref_normalization=False, **kwargs):
         if element.get(u"title") is not None:
             term = element.get(u"title")
         else:
             term = utils.textContent(element)
-        
+
         term = term.strip(utils.spaceCharacters).lower()
-        
+
         term = utils.spacesRegex.sub(u" ", term)
-        
+
         if w3c_compat or w3c_compat_xref_normalization:
             term = non_alphanumeric_spaces.sub(u"", term)
-        
+
         return term
 
+
 class DuplicateDfnException(utils.AnolisException):
     """Term already defined."""
     pass