Commits

Geoffrey Sneddon  committed d91fd1a

Fix bug with directly-adjacent/parent-child elements being removed and removing too much.

  • Participants
  • Parent commits 7281ef5

Comments (0)

Files changed (14)

File anolislib/processes/toc.py

         # Numbering
         num = []
 
-        # Set of elements to remove (removing elements being iterated over is
-        # undefined).
-        to_remove = set()
-
         # Loop over all sections in a DFS
         while sections:
             # Get the section and depth at the end of list
             # If we have a section heading text element, regardless of depth
             if header_text is not None:
                 # Remove any existing number
-                for element in header_text.iter(u"span"):
+                for element in header_text.findall(u".//span"):
                     if utils.elementHasClass(element, u"secno"):
-                        # Preserve the element tail
-                        if element.tail is not None:
-                            if element.getprevious() is not None:
-                                if element.getprevious().tail is None:
-                                    element.getprevious().tail = element.tail
-                                else:
-                                    element.getprevious().tail += element.tail
-                            else:
-                                if element.getparent().text is None:
-                                    element.getparent().text = element.tail
-                                else:
-                                    element.getparent().text += element.tail
-                        # Remove the element
-                        to_remove.add(element)
+                        # Copy content, to prepare for the node being
+                        # removed
+                        utils.copyContentForRemoval(element, text=False,
+                                                    children=False)
+                        # Remove the element (we can do this as we're not
+                        # iterating over the elements, but over a list)
+                        element.getparent().remove(element)
 
             # Check we're in the valid depth range (min/max_depth are 1 based,
             # depth is 0 based)
 
                 # If we have a header
                 if header_text is not None:
-                    # Remove all the elements in the list of nodes to remove
-                    # (so that the removal of existing numbers doesn't lead to
-                    # crazy IDs)
-                    for element in to_remove:
-                        element.getparent().remove(element)
-                    to_remove = set()
-
                     # Add ID to header
                     id = utils.generateID(header_text, **kwargs)
                     if header_text.get(u"id") is not None:
                         for element_name in remove_elements_from_toc:
                             # Iterate over all the desendants of the new link
                             # with that element name
-                            for element in link.iterdescendants(element_name):
+                            for element in link.findall(u".//" + element_name):
                                 # Copy content, to prepare for the node being
                                 # removed
                                 utils.copyContentForRemoval(element)
-                                # Add the element of the list of elements to
-                                # remove
-                                to_remove.add(element)
+                                # Remove the element (we can do this as we're
+                                # not iterating over the elements, but over a
+                                # list)
+                                element.getparent().remove(element)
                         # Remove unwanted attributes
                         for element in link.iter(tag=etree.Element):
                             for attribute_name in remove_attributes_from_toc:
             # next) with a higher depth value
             sections.extend([(child_section, depth + 1)
                              for child_section in reversed(section)])
-        # Remove all the elements in the list of nodes to remove
-        for element in to_remove:
-            element.getparent().remove(element)
 
     def addToc(self, ElementTree, **kwargs):
         to_remove = set()

File anolislib/utils.py

 
 
 def removeInteractiveContentChildren(element):
-    # Set of elements to remove
-    to_remove = set()
-
-    # Iter over decendants of element
-    for child in element.iterdescendants(etree.Element):
+    # Iter over list of decendants of element
+    for child in element.findall(u".//*"):
         if isInteractiveContent(child):
             # Copy content, to prepare for the node being removed
             copyContentForRemoval(child)
-            # Add the element of the list of elements to remove
-            to_remove.add(child)
-
-    # Remove all elements to be removed
-    for element in to_remove:
-        element.getparent().remove(element)
+            # Remove element
+            child.getparent().remove(child)
 
 
 def isInteractiveContent(element):
         return False
 
 
-def copyContentForRemoval(node):
+def copyContentForRemoval(node, text=True, children=True, tail=True):
     # Preserve the text, if it is an element
-    if isinstance(node.tag, basestring) and node.text is not None:
+    if isinstance(node.tag, basestring) and node.text is not None and text:
         if node.getprevious() is not None:
             if node.getprevious().tail is None:
                 node.getprevious().tail = node.text
             else:
                 node.getparent().text += node.text
     # Re-parent all the children of the element we're removing
-    for child in node:
-        node.addprevious(child)
+    if children:
+        for child in node:
+            node.addprevious(child)
     # Preserve the element tail
-    if node.tail is not None:
+    if node.tail is not None and tail:
         if node.getprevious() is not None:
             if node.getprevious().tail is None:
                 node.getprevious().tail = node.tail

File tests/basic/multi-num-pre-existing.html

+<!DOCTYPE html><head><meta charset=utf-8><h1>Foo</h1>
+<h2 id=bar><span class=secno>1 </span>Bar</h2>

File tests/basic/multi-num-pre-existing.src.html

+<!doctype html>
+<h1>Foo</h1>
+<h2><span class="secno">0 </span><span class="secno">1 </span>Bar</h2>

File tests/basic/num-within-em.html

+<!DOCTYPE html><head><meta charset=utf-8><h1>Foo</h1>
+<h2 id=bar><span class=secno>1 </span><em>Bar</em></h2>

File tests/basic/num-within-em.src.html

+<!doctype html>
+<h1>Foo</h1>
+<h2><em><span class="secno">0 </span>Bar</em></h2>

File tests/basic/toc-dfn-within-em.html

+<!DOCTYPE html><head><meta charset=utf-8><h1>Foo</h1>
+
+<!--begin-toc-->
+<ol class=toc>
+ <li><a href=#bar><span class=secno>1 </span><em>Bar</em></a></ol>
+<!--end-toc-->
+<h2 id=bar><span class=secno>1 </span><em><dfn>Bar</dfn></em></h2>

File tests/basic/toc-dfn-within-em.src.html

+<!doctype html>
+<h1>Foo</h1>
+<!--toc-->
+<h2><em><dfn>Bar</dfn></em></h2>

File tests/basic/toc-multi-a.html

+<!DOCTYPE html><head><meta charset=utf-8><h1>Foo</h1>
+
+<!--begin-toc-->
+<ol class=toc>
+ <li><a href=#bar-lol><span class=secno>1 </span>Bar LOL</a></ol>
+<!--end-toc-->
+<h2 id=bar-lol><span class=secno>1 </span><a href=http://example.com>Bar</a> <a href=http://example.net>LOL</a></h2>

File tests/basic/toc-multi-a.src.html

+<!doctype html>
+<h1>Foo</h1>
+<!--toc-->
+<h2><a href=http://example.com>Bar</a> <a href=http://example.net>LOL</a></h2>

File tests/basic/toc-multi-dfn-children.html

+<!DOCTYPE html><head><meta charset=utf-8><h1>Foo</h1>
+
+<!--begin-toc-->
+<ol class=toc>
+ <li><a href=#text-state-and-search-state><span class=secno>1 </span>Text state and Search state</a></ol>
+<!--end-toc-->
+<h6 id=text-state-and-search-state><span class=secno>1 </span><dfn title=attr-input-type-text>Text</dfn> state and <dfn title=attr-input-type-search>Search</dfn> state</h6>

File tests/basic/toc-multi-dfn-children.src.html

+<!doctype html>
+<h1>Foo</h1>
+<!--toc-->
+<h6><dfn title="attr-input-type-text">Text</dfn> state and <dfn title="attr-input-type-search">Search</dfn> state</h6>

File tests/basic/toc-nested-dfn.html

+<!DOCTYPE html><head><meta charset=utf-8><h1>Foo</h1>
+
+<!--begin-toc-->
+<ol class=toc>
+ <li><a href=#barlol><span class=secno>1 </span>BarLOL</a></ol>
+<!--end-toc-->
+<h2 id=barlol><span class=secno>1 </span><dfn><dfn>Bar</dfn>LOL</dfn></h2>

File tests/basic/toc-nested-dfn.src.html

+<!doctype html>
+<h1>Foo</h1>
+<!--toc-->
+<h2><dfn><dfn>Bar</dfn>LOL</dfn></h2>