Commits

Matt Chaput committed 8a0af86

Added promote_elements function, cleaned up use of etree API.

Comments (0)

Files changed (2)

tests/test_filters.py

 import re
 
 from wikiparser.markup import fromstring, tostring, WIKI, dump_etree
-from wikiparser.filters.trees import group_blocks, sort_headings
+from wikiparser.filters.trees import group_blocks, sort_headings, promote_elements
 
 
 class TestBlockFilters(unittest.TestCase):
         #print "\no=", output, "\nt=", target
         self.assertEqual(output, target)
 
+    def test_promoteelements(self):
+        input = """
+        <ns0:root xmlns:ns0="http://whoosh.ca/ns/wiki/1.2/">
+            <ns0:b>
+                <ns0:bc>
+                    <ns0:t>Alfa </ns0:t>
+                    <strong>bravo</strong>
+                    <ns0:t> charlie</ns0:t>
+                </ns0:bc>
+            </ns0:b>
+            <ns0:b>
+                <ns0:bc><ns0:include href="test1">foo</ns0:include></ns0:bc>
+            </ns0:b>
+            <ns0:b>
+                <ns0:bc><ns0:t>delta</ns0:t></ns0:bc>
+                <ns0:indent>
+                    <ns0:b>
+                        <ns0:bc>
+                            <ns0:t> </ns0:t>
+                            <ns0:include href="test2" />
+                            <ns0:t> </ns0:t>
+                        </ns0:bc>
+                    </ns0:b>
+                </ns0:indent>
+            </ns0:b>
+        </ns0:root>
+        """
+        input = self._stripxml(input)
+        
+        target = """
+        <ns0:root xmlns:ns0="http://whoosh.ca/ns/wiki/1.2/">
+            <ns0:b>
+                <ns0:bc>
+                    <ns0:t>Alfa </ns0:t>
+                    <strong>bravo</strong>
+                    <ns0:t> charlie</ns0:t>
+                </ns0:bc>
+            </ns0:b>
+            <ns0:include href="test1">foo</ns0:include>
+            <ns0:b>
+                <ns0:bc><ns0:t>delta</ns0:t></ns0:bc>
+                <ns0:indent>
+                    <ns0:include href="test2"/>
+                </ns0:indent>
+            </ns0:b>
+        </ns0:root>
+        """
+        target = self._stripxml(target)
+        
+        element = fromstring(input)
+        promote_elements(element, frozenset([WIKI.include]))
+        output = tostring(element)
+        output = re.sub(" />", "/>", output)
+        #print "\no=", output, "\nt=", target
+        self.assertEqual(output, target)
 
 
 

wikiparser/filters/trees.py

 # limitations under the License.
 #===============================================================================
 
-from wikiparser.markup import Element, WIKI
+from wikiparser.markup import Element, WIKI, tostring
 
 
 def group_blocks(parent, function, recursive=True, elementclass=Element):
     # Keep track of the new list of children
     newkids = []
     for node in parent.getchildren():
-        # Remove the node for now... we'll put it back below
-        # when we flush "newkids"
-        parent.remove(node)
-        
         # If this node has indented blocks, call this function
         # recursively on them
         if recursive:
             newkids.append(node)
             currenttype = None
     
-    # Put the newly re-arranged children back into the parent
-    for node in newkids:
-        parent.append(node)
+    # Replace the parent's contents with the newly re-arranged children
+    parent[:] = newkids
 
 
 def group_by_type(parent, types):
     """
     
     group = None
-    newkids = []
-    for node in list(parent):
-        parent.remove(node)
-        
+    kids = parent.getchildren()
+    parent.clear()
+    for node in kids:
         if recursive:
             indent = node.find(WIKI.indent)
             if indent is not None:
             if t == lead:
                 group = elementclass(WIKI.group, type = grouptype)
                 group.append(node)
-                newkids.append(group)
+                parent.append(group)
             elif t == follow and group:
                 group.append(node)
             else:
                 group = None
-                newkids.append(node)
+                parent.append(node)
     
-    for node in newkids:
-        parent.append(node)
-
 
 def sort_headings(parent, relevel=1, elementclass=Element):
     """Converts a linear list of blocks into a heading hierarchy using
     # Level of the last heading seen for grabbing
     lastlevel = -1
     
-    
     for node in parent.getchildren():
         role = node.get("role") or node.get("type")
         if node.tag == WIKI.b and role == "heading":
                 for kid in indent:
                     element.append(kid)
             
-            parent.insert(i, element)
-            parent.remove(node)
-            
-
-def extract_elements(parent, tagset):
-    """Finds blocks that contain a single element in their content and no
-    indent, and if the element is in `tagset`, extracts the element up to
-    the block level.
-    
-    For example, the block::
-    
-        <b><bc><include /></bc></b>
-        
-    ...would become::
-    
-        <include />
-    """
-    
-    for i, node in enumerate(parent):
-        if node.tag == WIKI.b:
-            indent = node.find(WIKI.indent)
-            if indent is not None:
-                extract_elements(indent, tagset)
-                continue
-            bc = node.find(WIKI.bc)
-            if bc is None:
-                continue
-            kids = list(bc)
-            if len(kids) == 1:
-                element = kids[0]
-                if element.tag in tagset:
-                    parent.insert(i, element)
-                    parent.remove(node)
+            parent[i] = element
 
 
 def block_attribute_elements(parent, remove=True):
     """
     
     if parent.tag == WIKI.b:
-        indent = parent.find(WIKI.indent)
-        if indent is not None:
-            for node in indent.getchildren():
-                if node.tag == WIKI.b and node.get("type") == "prop":
-                    parent.set(unicode(node.get("name")), unicode(node.get("value")))
-                    if remove: indent.remove(node)
-    elif parent.tag == WIKI.document:
-        for node in parent.getchildren():
-            if node.tag == WIKI.b and node.get("type") == "prop":
-                parent.set(unicode(node.get("name")), unicode(node.get("value")))
-                if remove: parent.remove(node)
-
+        source = parent.find(WIKI.indent)
+        if source is None: return
+    else:
+        source = parent
+    
+    for node in parent.getchildren():
+        if node.tag == WIKI.b:
+            if node.get("type") == "prop":
+                parent.set(node.get("name"), node.get("value"))
+                if remove: source.remove(node)
+            else:
+                block_attribute_elements(node, remove=remove)
+    
 
 def flatten_text(parent):
     """Because the etree API has absolutely no sane support for working with mixed-content
             flatten_text(node)
 
 
-def pull_elements(parent):
-    for node in parent.getchildren():
-        if node.tag == WIKI.b:
+def promote_elements(parent, tagset):
+    t_tag = WIKI.t
+    b_tag = WIKI.b
+    for i, node in enumerate(parent.getchildren()):
+        if node.tag == b_tag:
             bc = node.find(WIKI.bc)
             indent = node.find(WIKI.indent)
+            # If this block has only a bc and no indent...
             if bc is not None and indent is None:
-                pass
+                c = None
+                # And the bc has only one element and no text inside...
+                for node in bc.getchildren():
+                    if node.tag == t_tag:
+                        # Ignore text elements that contain only whitespace
+                        if node.text and not node.text.isspace():
+                            c = None
+                            break
+                        else:
+                            continue
+                    elif c is not None:
+                        # We've already seen one non-text element, so we
+                        # know this isn't going to work
+                        c = None
+                        break
+                    else:
+                        # Remember this node
+                        c = node
+                
+                # If we found one and only one element inside and no
+                # (non-whitespace) text, check if this is one of the
+                # tags we should be promoting
+                if c is not None and c.tag in tagset:
+                    # Replace the block with the element
+                    parent[i] = c
+            
+            elif indent is not None:
+                promote_elements(indent, tagset)