Commits

jgraham committed 5145fbc

Refactor options for parsing and serializing

  • Participants
  • Parent commits cfe00c4

Comments (0)

Files changed (2)

     #parser.add_option("", "", action="store_true",
     #                  dest="xml", help="Use an XML parser/serializer.")
 
-    parser.add_option("", "--lxml.html", action="store_true",
-                      dest="lxml_html",
-                      help="Use lxml's HTML parser/serializer.")
+    parser.add_option("", "--parser", type="choice",
+                      choices=("html5lib", "lxml"))
+
+    parser.add_option("", "--serializer", type="choice",
+                      choices=("html5lib", "lxml"))
+
 
     parser.add_option("", "--newline-char", action="store", type="string",
                       dest="newline_char",
     parser.add_option("", "--escape-rcdata", action="store_true",
                       dest="escape_rcdata", help=SUPPRESS_HELP)
 
+    parser.add_option("", "--encoding", action="store", type=str,
+                      dest="encoding", help="Output encoding")
+
     parser.set_defaults(
         processes=set(["sub", "xref", "toc"]),
         xml=False,
-        lxml_html=False,
+        parser="html5lib",
+        serializer="html5lib",
         newline_char=u"\n",
         indent_char=u" ",
         force_html4_id=False,
         use_trailing_solidus=False,
         space_before_trailing_solidus=False,
         escape_lt_in_attrs=False,
-        escape_rcdata=False
+        escape_rcdata=False,
+        encoding="utf-8"
     )
 
     return parser

File anolislib/generator.py

 # THE SOFTWARE.
 
 import html5lib
-from html5lib import treebuilders, treewalkers, serializer
+from html5lib import treebuilders, treewalkers
+from html5lib.serializer import htmlserializer
+
 import lxml.html
 from lxml import etree
 
         getattr(process_module, process)(tree, **kwargs)
 
 
-def fromFile(input, processes=set(["sub", "toc", "xref"]), xml=False,
-             lxml_html=False, profile=False, **kwargs):
+def fromFile(input, processes=set(["sub", "toc", "xref"]), parser="html5lib",
+             profile=False, **kwargs):
     # Parse as XML:
     #if xml:
     if False:
         tree = etree.parse(input)
     # Parse as HTML using lxml.html
-    elif lxml_html:
+    elif parser == "lxml":
         tree = lxml.html.parse(input)
     # Parse as HTML using html5lib
     else:
-        parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("lxml",
-                                                                      etree))
-        tree = parser.parse(input)
+        tree = html5lib.parse(input, treebuilder="lxml")
 
     # Close the input file
     input.close()
     return tree
 
 
-def toFile(tree, output, xml=False, lxml_html=False, **kwargs):
+def toString(tree, encoding="utf-8", serializer="html5lib", **kwargs):
     # Serialize to XML
     #if xml:
     if False:
-        rendered = etree.tostring(tree, encoding="utf-8")
+        rendered = etree.tostring(tree, encoding=encoding)
     # Serialize to HTML using lxml.html
-    elif lxml_html:
-        rendered = lxml.html.tostring(tree, encoding="utf-8")
+    elif serializer == "lxml":
+        rendered = lxml.html.tostring(tree, encoding=encoding)
     # Serialize to HTML using html5lib
     else:
         walker = treewalkers.getTreeWalker("lxml")
-        s = serializer.htmlserializer.HTMLSerializer(**kwargs)
-        rendered = s.render(walker(tree), encoding="utf-8")
+        s = htmlserializer.HTMLSerializer(**kwargs)
+        rendered = s.render(walker(tree), encoding=encoding)
+    return rendered
+
+def toFile(tree, output, encoding="utf-8", serializer="html5lib", **kwargs):
+    
+    rendered = toString(tree, encoding=encoding, serializer=serlializer,
+                        **kwargs)
 
     # Write to the output
     output.write(rendered)