Commits

Geoffrey Sneddon committed ca7e147

Make the generator just take a tree and run all the processes against it, moving everything elsewhere.

  • Participants
  • Parent commits 6b0cdc9

Comments (0)

Files changed (3)

 import os
 import unittest
 
+import html5lib
+from html5lib import treebuilders, treewalkers, serializer
+
+from lxml import etree
+
 from specGen import generator
 
 def get_files(*args):
 			try:
 				# Get the input
 				input = open(file_name, "r")
-				
-				# Prepare the output
-				output = StringIO.StringIO()
+				parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("lxml", etree))
+				tree = parser.parse(input)
 				
 				# Get the expected result
 				expected = open(file_name[:-9] + ".html", "r")
 				
 				# Run the spec-gen
 				gen = generator.generator()
-				gen.process(input, output)
+				gen.process(tree)
+				
+				# Get the output
+				walker = treewalkers.getTreeWalker("lxml")
+				s = serializer.htmlserializer.HTMLSerializer()
+				output = s.render(walker(tree), encoding="utf-8")
 				
 				# Run the test
-				self.assertEquals(output.getvalue(), expected.read())
+				self.assertEquals(output, expected.read())
 				
 				# Close the files
 				input.close()
 import cProfile
 from optparse import OptionParser
 import sys
+import html5lib
+from html5lib import treebuilders, treewalkers, serializer
+import lxml.html
+from lxml import etree
 
 from specGen import generator
 
 def main():
+	# Create the options parser
 	optParser = getOptParser()
 	opts, args = optParser.parse_args()
 	
+	# Check we have enough arguments
 	if len(args) >= 2:
-		gen = generator.generator()
+		# Get input/output
 		input = file(args[0], "r")
 		output = file(args[1], "w")
+		
+		# Parse as XML:
+		if opts.xml:
+			tree = etree.parse(input)
+		# Parse as HTML using lmxl.html
+		elif opts.lxml_html:
+			tree = lxml.html.parse(input)
+		# Parse as HTML using html5lib
+		else:
+			parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("lxml", etree))
+			tree = parser.parse(input)
+		
+		# Create the generator
+		gen = generator.generator()
+		
+		# Turn the options into a dict
 		kwargs = vars(opts)
+		
+		# Run the generator, and profile, or not, as the case may be
 		if kwargs["profile"]:
-			cProfile.runctx("gen.process(input, output, **kwargs)", {}, {"gen": gen, "input": input, "output": output, "kwargs": kwargs})
+			cProfile.runctx("gen.process(input, output, **kwargs)", {}, {"gen": gen, "tree": tree, "kwargs": kwargs})
 		else:
-			gen.process(input, output, **kwargs)
+			gen.process(tree, **kwargs)
+		
+		# Serialize to XML
+		if opts.xml:
+			rendered = etree.tostring(tree, encoding="utf-8")
+		# Serialize to HTML using lxml.html
+		elif opts.lxml_html:
+			rendered = lxml.html.tostring(tree, encoding="utf-8")
+		# Serialize to HTML using html5lib
+		else:
+			walker = treewalkers.getTreeWalker("lxml")
+			s = serializer.htmlserializer.HTMLSerializer(**kwargs)
+			rendered = s.render(walker(tree), encoding="utf-8")
+		
+		# Write to the output
+		output.write(rendered)
 	else:
 		sys.stderr.write("spec-gen expects two arguments. Use -h for help\n")
 		sys.exit(1)
 def getOptParser():
 	parser = OptionParser(usage = __doc__)
 	
-	parser.add_option("", "--xml-input", action="store_true",
-		default=False, dest="xml_input", help="Use an XML parser to parse the input.")
+	parser.add_option("", "--xml", action="store_true",
+		default=False, dest="xml", help="Use an XML parser/serializer.")
 	
-	parser.add_option("", "--xml-output", action="store_true",
-		default=False, dest="xml_output", help="Serialize the output to XML.")
+	parser.add_option("", "--lxml.html", action="store_true",
+		default=False, dest="lxml_html", help="Use lxml's HTML parser/serializer.")
 	
 	parser.add_option("", "--min-depth", action="store", type="int",
 		default=2, dest="min_depth", help="Highest ranking header to number/insert into TOC.")

specGen/generator.py

 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 
-from collections import deque
-
-import html5lib
-from html5lib import treebuilders, treewalkers, serializer
-from lxml import etree
-
-#import processes.index, processes.num, processes.substitutions, processes.toc, processes.xref
-
 from processes import xref, toc, sub
 
 class generator(object):
 	""" This oversees all the actual work done """
 	
-	def process(self, input, output, processes = [xref.xref, toc.toc, sub.sub], xml_input = False, xml_output = False, **kwargs):
+	def process(self, tree, processes = [xref.xref, toc.toc, sub.sub], **kwargs):
 		""" Process the given "input" (a file-like object) writing to "output".
 		Preconditions for each process are here to avoid expensive function
 		calls. """
 		
-		if xml_input:
-			# Parse the XML
-			tree = etree.parse(input)
-		else:
-			# Parse the HTML
-			parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("lxml", etree))
-			tree = parser.parse(input)
-		
 		# Find number of passes to do
 		for process in processes:
-			process(tree, **kwargs)
-		
-		if xml_output:
-			# Serialize to XML
-			rendered = etree.tostring(tree, encoding="utf-8")
-		else:
-			# Serialize to HTML
-			walker = treewalkers.getTreeWalker("lxml")
-			s = serializer.htmlserializer.HTMLSerializer(**kwargs)
-			rendered = s.render(walker(tree), encoding="utf-8")
-		
-		# Write to the output
-		output.write(rendered)
+			process(tree, **kwargs)