Commits

Luke Plant committed 04c203d

Made extract_presentation returned stripped HTML as well as presentation info

  • Participants
  • Parent commits 50ffce1

Comments (0)

Files changed (2)

File semanticeditor/tests.py

 class TestExtractPresentation(TestCase):
     def test_extract_presentation(self):
         html = "<div class=\"foo\"><h1>Heading 1</h1><div class=\"bar baz\"><h2>Heading 2</h2></div></div>"
-        pres = extract_presentation(html)
+        pres, html2 = extract_presentation(html)
         self.assertEqual({'Heading 1':set([PC('foo')]),
                           'Heading 2':set([PC('bar'), PC('baz')])
                           }, pres)
+        self.assertEqual("<h1>Heading 1</h1><h2>Heading 2</h2>", html2)
 
     # Lazy method - assume that combine works and check the round-trip.
     # This only works currently if we 'normalise' the presentation dict.
                         '4':set([NEWCOL]),
                         }
         combined = format_html(html, presentation)
-        pres2 = extract_presentation(combined)
+        pres2, html2 = extract_presentation(combined)
         self.assertEqual(presentation, pres2)
+        self.assertEqual(html, html2)

File semanticeditor/utils/presentation.py

 class TooManyColumns(BadStructure):
     pass
 
+AllUserErrors = (InvalidHtml, IncorrectHeadings, BadStructure, TooManyColumns)
+
 ### Definitions ###
 
 headingdef = ['h1','h2','h3','h4','h5','h6']
     # there are other block level elements that mess things up, we
     # raise BadStructure later, but divs have so semantics so can just
     # be removed.
-    cleanup(root, lambda t: t.tag != 'div')
+    _strip_presentation(root)
 
     headers = get_heading_nodes(root)
 
 
     _apply_commands(root, section_nodes, styleinfo, headers)
 
+    return _html_extract(root)
+
+def _html_extract(root):
     return ET.tostring(root).replace('<html>','').replace('</html>','')
 
+def _strip_presentation(tree):
+    cleanup(tree, lambda t: t.tag != 'div')
+
+
 def _sanitise_styleinfo(styleinfo, headingnames):
     # Replace lists with sets
     out = {}
                     pres[name].add(NEWROW)
                     pres[name].remove(NEWCOL) # not technically necessary
 
-    return pres
+    _strip_presentation(root)
+    out_html = _html_extract(root)
 
+    return (pres, out_html)
+