Commits

Ed Brannin committed 2f2bf00

collapse() is starting to work in simple cases, but it's spammy.

Comments (0)

Files changed (2)

   return soup
 
 def collapse(soup):
+  print "======="
   did_stuff = False
   for tag in soup.findAll(None):
+    print "Looking at tag: %s" % tag
+    if tag.parent.name == '[document]':
+      # print "Ignore root"
+      next
+    elif tag.nextSibling == None and tag.previousSibling == None:
+      print "Only child!"
+      if tag.name not in ('html', 'a'):
+        for child in tag.contents:
+          tag.parent.insert(len(tag.parent.contents), child)
+        tag.extract()
+
     # if the tag is the only child of its parent and isn't a protected tag (like <html> or <a>) then we want to replace the tag with its contents.
     pass #FIXME
     #if tag
+  return soup
   
 
 if __name__ == '__main__':
   """.strip()
   assert observed_answer == desired_answer
 
+def test_collapse_simple():
+  soup = BeautifulSoup("<div><p>Hi</p></div>")
+  actual = str(collapse(soup)).strip() 
+  assert actual == "<div>Hi</div>"
 
-def test_collapse():
+def test_collapse_nested():
+  soup = BeautifulSoup("<div><p><b><i>Hi</i></b></p></div>")
+  actual = str(collapse(soup)).strip() 
+  assert actual == "<div>Hi</div>"
+
+def test_collapse_irl():
   soup = BeautifulSoup("""
   <div id="Oobj19" >
   <div id="Ggeo47" class="dfltt">