Commits

Ian Lewis committed 48c68d7

Added tests for html sanitization

Comments (0)

Files changed (3)

lifestream/tests/__init__.py

 #:coding=utf-8:
 
 from feed_tests import * 
+from util_tests import * 

lifestream/tests/util_tests.py

+#!/usr/bin/env python
+#:coding=utf-8:
+
+from django.test import TransactionTestCase as DjangoTestCase
+
+from lifestream.util import * 
+
+class TagStrippingTest(DjangoTestCase):
+    valid_tags = VALID_TAGS
+    test_html = (
+        (u'<b>This is a test</b>', u'<b>This is a test</b>'),
+        (u'<script type="text/javascript">alert("DANGER!!");</script> Will Robinson', u'alert(&quot;DANGER!!&quot;); Will Robinson'),
+        (u'<a href="http://www.ianlewis.org/" rel="me" onclick="alert(\'woah!!\')">This is a test</a>', u'<a href="http://www.ianlewis.org/" rel="me">This is a test</a>'),
+    )
+
+    def test_tag_stripping(self):
+        for html in self.test_html:
+            stripped_html = sanitize_html(html[0], valid_tags=self.valid_tags)
+            self.assertEqual(stripped_html, html[1])
+
+    

lifestream/util/__init__.py

     "font-weight",
 )
 
-def sanitize_html(htmlSource, encoding=None):
+def sanitize_html(htmlSource, encoding=None, valid_tags=None, valid_styles=None):
     """
     Clean bad html content. Currently this simply strips tags that
     are not in the VALID_TAGS setting.
 
     Returns the sanitized html content.
     """
-    valid_tags = getattr(settings, "LIFESTREAM_VALID_TAGS", VALID_TAGS)
-    valid_styles = getattr(settings, "LIFESTREAM_VALID_STYLES", VALID_STYLES)
+    if valid_tags is None:
+        valid_tags = getattr(settings, "LIFESTREAM_VALID_TAGS", VALID_TAGS)
+    if valid_styles is None:
+        valid_styles = getattr(settings, "LIFESTREAM_VALID_STYLES", VALID_STYLES)
+
 
     js_regex = re.compile(r'[\s]*(&#x.{1,7})?'.join(list('javascript')))
     css_regex = re.compile(r' *(%s): *([^;]*);?' % '|'.join(valid_styles), re.IGNORECASE)