Commits

Geoffrey Sneddon committed cf47703

Add fromFile, toFile, and fromToFile functions in generator. This simplifies a lot of other code.

Comments (0)

Files changed (3)

 Post-process a document, adding cross-references, table of contents, etc.
 """
 
-import cProfile
 from optparse import OptionParser, SUPPRESS_HELP
 import sys
-import html5lib
-from html5lib import treebuilders, treewalkers, serializer
-import lxml.html
-from lxml import etree
 
 from anolislib import generator, utils
 
 	# Check we have enough arguments
 	if len(args) >= 2:
 		try:
-			# Get input
-			input = file(args[0], "r")
+			# Get options
+			kwargs = vars(opts)
 			
-			# Parse as XML:
-			#if opts.xml:
-			if False:
-				tree = etree.parse(input)
-			# Parse as HTML using lxml.html
-			elif opts.lxml_html:
-				tree = lxml.html.parse(input)
-			# Parse as HTML using html5lib
-			else:
-				parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("lxml", etree))
-				tree = parser.parse(input)
-			
-			# Close the input file
+			# Get input and generate
+			input = open(args[0], "rb")
+			tree = generator.fromFile(input, **kwargs)
 			input.close()
 			
-			# Remove the option we pass as an argument
-			processes = opts.processes
-			del opts.processes
-			
-			# Turn the options into a dict
-			kwargs = vars(opts)
-			
-			# Run the generator, and profile, or not, as the case may be
-			if kwargs["profile"]:
-				cProfile.runctx("gen.process(tree, processes, **kwargs)", {}, {"gen": generator, "tree": tree, "processes": processes, "kwargs": kwargs})
-			else:
-				generator.process(tree, processes, **kwargs)
-			
-			# Serialize to XML
-			#if opts.xml:
-			if False:
-				rendered = etree.tostring(tree, encoding="utf-8")
-			# Serialize to HTML using lxml.html
-			elif opts.lxml_html:
-				rendered = lxml.html.tostring(tree, encoding="utf-8")
-			# Serialize to HTML using html5lib
-			else:
-				walker = treewalkers.getTreeWalker("lxml")
-				s = serializer.htmlserializer.HTMLSerializer(**kwargs)
-				rendered = s.render(walker(tree), encoding="utf-8")
-			
-			# Get the output
-			output = file(args[1], "w")
-			
-			# Write to the output
-			output.write(rendered)
-			
-			# Close the output
+			# Write output
+			output = open(args[1], "wb")
+			generator.toFile(tree, output, **kwargs)
 			output.close()
 		except (utils.AnolisException, IOError, etree.XMLSyntaxError), e:
 			sys.stderr.write(unicode(e) + u"\n")

anolislib/generator.py

 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 
+import hotshot
+import hotshot.stats
+import os
+import tempfile
+
+import html5lib
+from html5lib import treebuilders, treewalkers, serializer
+import lxml.html
+from lxml import etree
+
 def process(tree, processes=set(["sub", "toc", "xref"]), **kwargs):
 	""" Process the given tree. """
 	
 			process_module = __import__(process, globals(), locals(), [], -1)
 		
 		getattr(process_module, process)(tree, **kwargs)
+
+def fromFile(input, processes=set(["sub", "toc", "xref"]), xml=False, lxml_html=False, profile=False, **kwargs):
+	# Parse as XML:
+	#if xml:
+	if False:
+		tree = etree.parse(input)
+	# Parse as HTML using lxml.html
+	elif lxml_html:
+		tree = lxml.html.parse(input)
+	# Parse as HTML using html5lib
+	else:
+		parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("lxml", etree))
+		tree = parser.parse(input)
+	
+	# Close the input file
+	input.close()
+	
+	# Run the generator, and profile, or not, as the case may be
+	if profile:
+		statfile = tempfile.mkstemp()[1]
+		prof = hotshot.Profile(statfile)
+		prof.runcall(process, tree, processes, **kwargs)
+		prof.close()
+		stats = hotshot.stats.load(statfile)
+		stats.strip_dirs()
+		stats.sort_stats('time')
+		stats.print_stats()
+		os.remove(statfile)
+	else:
+		process(tree, processes, **kwargs)
+	
+	# Return the tree
+	return tree
+
+def toFile(tree, output, xml=False, lxml_html=False, **kwargs):			
+	# Serialize to XML
+	#if xml:
+	if False:
+		rendered = etree.tostring(tree, encoding="utf-8")
+	# Serialize to HTML using lxml.html
+	elif lxml_html:
+		rendered = lxml.html.tostring(tree, encoding="utf-8")
+	# Serialize to HTML using html5lib
+	else:
+		walker = treewalkers.getTreeWalker("lxml")
+		s = serializer.htmlserializer.HTMLSerializer(**kwargs)
+		rendered = s.render(walker(tree), encoding="utf-8")
+	
+	# Write to the output
+	output.write(rendered)
+
+def fromToFile(input, output, **kwargs):
+	tree = fromFile(input, **kwargs)
+	toFile(tree, output, **kwargs)
 # THE SOFTWARE.
 
 import glob
-import re
 import StringIO
 import os
 import unittest
 
-import html5lib
-from html5lib import treebuilders, treewalkers, serializer
-
-from lxml import etree
-
 from anolislib import generator
 
 def get_files(*args):
 		def testFunc(self, file_name=file_name):
 			try:
 				# Get the input
-				input = open(file_name, "r")
-				parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("lxml", etree))
-				tree = parser.parse(input)
+				input = open(file_name, "rb")
+				tree = generator.fromFile(input)
+				input.close()
+				
+				# Get the output
+				output = StringIO.StringIO()
+				generator.toFile(tree, output)
 				
 				# Get the expected result
-				expected = open(file_name[:-9] + ".html", "r")
-				
-				# Run anolis
-				generator.process(tree)
-				
-				# Get the output
-				walker = treewalkers.getTreeWalker("lxml")
-				s = serializer.htmlserializer.HTMLSerializer()
-				output = s.render(walker(tree), encoding="utf-8")
+				expected = open(file_name[:-9] + ".html", "rb")
 				
 				# Run the test
-				self.assertEquals(output, expected.read())
+				self.assertEquals(output.getvalue(), expected.read())
 				
 				# Close the files
-				input.close()
+				output.close()
 				expected.close()
 			except IOError, err:
 				self.fail(err)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.