Commits

Ed Brannin committed 80956cd

Now able to strip id/class/style

  • Participants
  • Parent commits feb0d39

Comments (0)

Files changed (2)

 
   raise Exception("stop after one file for now")
 
+def strip_styles(soup):
+  for attr in ('id', 'class', 'style'):
+    for t in soup.findAll(None, attrs={attr: True}):
+      del(t[attr])
+  return soup
+
 def strip_spans(soup):
   for tag in ('font', 'span', 'b', 'i', 'B', 'I'):
     for s in soup.findAll(tag):
       parent = s.parent
       s.replaceWith(NavigableString(s.renderContents()))
-      print parent.prettify()
+      # print parent.prettify()
     soup = BeautifulSoup(soup.prettify())
-  print "Soup is: %s" % soup
+  # print "Soup is: %s" % soup
   return soup
   
 
 """.strip()
   actual_answer = strip_spans(soup).prettify().strip()
   assert actual_answer == desired_answer
+
+def test_strip_styles():
+  soup = BeautifulSoup("""
+  <div id="Oobj19" >
+  <div id="Ggeo47" class="dfltt">
+  <font face="Times New Roman" class="fsx01"><B><I><a href="about.html" title="ABOUT">ABOUT</a><br></I></B></font></div>
+  </div>
+  """)
+  observed_answer = strip_styles(soup).prettify().strip()
+  desired_answer = """
+<div>
+ <div>
+  <font face="Times New Roman">
+   <b>
+    <i>
+     <a href="about.html" title="ABOUT">
+      ABOUT
+     </a>
+     <br />
+    </i>
+   </b>
+  </font>
+ </div>
+</div>
+  """.strip()
+  assert observed_answer == desired_answer
+
+
+