Commits

Ed Brannin committed feb0d39

strip_spans works\!

  • Participants
  • Parent commits 64977ce

Comments (0)

Files changed (2)

 #!/usr/bin/python
 
-from BeautifulSoup import BeautifulSoup, HTMLParser, NavigableString
+from BeautifulSoup import BeautifulSoup, NavigableString
 import os, sys
 
 def handle(path):
 def strip_spans(soup):
   for tag in ('font', 'span', 'b', 'i', 'B', 'I'):
     for s in soup.findAll(tag):
+      parent = s.parent
       s.replaceWith(NavigableString(s.renderContents()))
+      print parent.prettify()
+    soup = BeautifulSoup(soup.prettify())
+  print "Soup is: %s" % soup
   return soup
   
 

File soupify_tests.py

+from BeautifulSoup import BeautifulSoup, NavigableString
 from soupify import *
 
 def test_strip_spans():
   <font face="Times New Roman" class="fsx01"><B><I><a href="about.html" title="ABOUT">ABOUT</a><br></I></B></font></div>
   </div>
   """)
- print strip_spans(soup).prettify()
+  desired_answer = """
+<div id="Oobj19" style="position:absolute; z-index:2; visibility:visible; left:359px; top:86px; width:60px; height:20px;">
+ <div id="Ggeo47" class="dfltt">
+  <a href="about.html" title="ABOUT">
+   ABOUT
+  </a>
+  <br />
+ </div>
+</div>
+""".strip()
+  actual_answer = strip_spans(soup).prettify().strip()
+  assert actual_answer == desired_answer