Commits

Ed Brannin committed d971586

Working a bit more on soupify

  • Participants
  • Parent commits 2f2bf00

Comments (0)

Files changed (2)

     if tag.parent.name == '[document]':
       # print "Ignore root"
       next
-    elif tag.nextSibling == None and tag.previousSibling == None:
+    #elif tag.nextSibling == None and tag.previousSibling == None:
+    elif len(tag.parent.contents) == 1:
       print "Only child!"
       if tag.name not in ('html', 'a'):
         for child in tag.contents:
           tag.parent.insert(len(tag.parent.contents), child)
         tag.extract()
+        did_stuff = True
+      else:
+        print "Tag is exempt: %s" % tag.name
 
     # if the tag is the only child of its parent and isn't a protected tag (like <html> or <a>) then we want to replace the tag with its contents.
     pass #FIXME
-    #if tag
-  return soup
+    if tag.name in ('br'):
+      tag.extract()
+
+  if did_stuff:
+    return collapse(BeautifulSoup(str(soup)))
+  else:
+    return soup
   
 
 if __name__ == '__main__':

File soupify_tests.py

   <font face="Times New Roman" class="fsx01"><B><I><a href="about.html" title="ABOUT">ABOUT</a><br></I></B></font></div>
   </div>
   """)
-  observed_answer = collapse(soup).prettify().strip()
+  observed_answer = str(collapse(soup)).strip()
   desired_answer = """
   <div><a href="about.html" title="ABOUT">ABOUT</a></div>
   """.strip()