Brad Montgomery committed a484f1d Draft

no more scraping so no need for a parser

Comments (0)

Files changed (1)


-from HTMLParser import HTMLParser
-import re 
-class HTMLStripperParser(HTMLParser):
-    """
-    Simple, stupid parser to remove all HTML tags from 
-    a document. The point is to just get a the data. 
-    parser = HTMLStripperParser()
-    parser.feed(string_with_html)
-    parser.get_content()
-    """
-    def get_content(self):
-        "Ignores all consecutive whitespace"
-        return re.sub("\s+", " ", self.content)
-    def handle_data(self, data):
-        if hasattr(self, 'content'):
-            self.content += data
-        else:
-            self.content = data
-def strip_tags(html_content):
-    """
-    A convenience function for stripping HTML tags.
-    """
-    p = HTMLStripperParser()
-    p.feed(html_content)
-    return p.get_content()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.