Commits

Anonymous committed 57da1cf

added HTML field

Comments (0)

Files changed (3)

form_extensions/fields.py

 from django.forms.fields import Field, CharField, FileField, ImageField
+from django.forms.widgets import Textarea
 from form_extensions.widgets import HoneypotWidget, MultiFileInput
 from django.core.exceptions import ValidationError
 from django.core import validators
+from form_extensions.utils import bleach, replace_elements, remove_empty_paragraphs
 import re
 
 
         raise ValidationError('Anti-spam field changed in value.')
 
 
+class HTMLField(Field):
+    widget = Textarea
+
+    def __init__(self, allowed_tags, allowed_attributes={}, allowed_styles={}, element_replacements={}, *args, **kwargs):
+        super(HTMLField, self).__init__(*args, **kwargs)
+        self.allowed_tags = allowed_tags
+        self.allowed_attributes = allowed_attributes
+        self.allowed_styles = allowed_styles
+        self.element_replacements = element_replacements
+
+    def clean(self, value):
+        value = utils.replace_elements(value, self.element_replacements)
+        value = utils.remove_empty_paragraphs(value)
+        value = utils.bleach(value, self.allowed_tags, self.allowed_attributes, self.allowed_styles)
+        return super(HTMLField, self).clean(value)
+
+
 class USCurrencyField(CharField):
     def clean(self, value):
         if value in validators.EMPTY_VALUES:

form_extensions/utils.py

+from lxml import html, etree
+import cgi
+import re
+
+
+SELF_CLOSING_TAGS = ('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'input', 'image', 'link', 'meta', 'param')
+
+
+def bleach(text, allowed_tags, allowed_attributes={}, allowed_styles={}, strip=False):
+    def wildcard_match(patterns, string):
+        return any([re.match(pattern.replace('*', '.*'), string) for pattern in patterns if '*' in pattern])
+
+    doc = html.fragment_fromstring(text, create_parent=True)
+    for element in doc.iter():
+        if not isinstance(element.tag, basestring):
+            element.drop_tree()
+            continue
+        tag = element.tag.lower()
+        if tag in allowed_tags:
+            for attrib in element.attrib.keys():
+                attrib = attrib.lower()
+                if isinstance(allowed_attributes, dict):
+                    attribs = allowed_attributes.get('*', []) + allowed_attributes.get(tag, [])
+                else:
+                    attribs = allowed_attributes
+                if attrib not in attribs and not wildcard_match(attribs, attrib):
+                    del element.attrib[attrib]
+                elif attrib == 'style':
+                    style = element.attrib['style']
+                    lines = [line for line in style.split(';') if ':' in line]
+                    properties = {}
+                    for line in lines:
+                        property, value = [word.strip().lower() for word in line.split(':')]
+                        if isinstance(allowed_styles, dict):
+                            props = allowed_styles.get('*', []) + allowed_styles.get(tag, [])
+                        else:
+                            props = allowed_styles
+                        if property in props or wildcard_match(props, property):
+                            properties[property] = value
+                    styles = [u'%s: %s;' % (property, value) for property, value in properties.items()]
+                    if styles:
+                        element.attrib['style'] = ' '.join(styles)
+                    else:
+                        del element.attrib['style']
+        elif element.getparent() is not None:
+            if strip:
+                element.drop_tree()
+            else:
+                attribs = "".join(u' %s="%s"' % (attrib, value) for attrib, value in element.attrib.items())
+                text = element.text.strip() if element.text else ''
+                tail = element.tail.strip() if element.tail else ''
+                text_dict = {'tag': tag, 'attribs': attribs, 'text': text, 'tail': tail}
+                if tag in SELF_CLOSING_TAGS:
+                    element.text = u'<%(tag)s%(attribs)s />' % text_dict
+                else:
+                    element.text = u'<%(tag)s%(attribs)s>%(text)s' % text_dict
+                    element.tail = u'</%(tag)s>%(tail)s' % text_dict
+                element.drop_tag()
+    return _tostring(doc)
+
+
+def replace_elements(text, element_replacements={}):
+    if element_replacements:
+        doc = html.fragment_fromstring(text, create_parent=True)
+        selection = ','.join(element_replacements.keys())
+        for element in doc.cssselect(selection):
+            element.tag = element_replacements[element.tag]
+        return _tostring(doc)
+    else:
+        return text
+
+
+def remove_empty_paragraphs(text):
+    doc = html.fragment_fromstring(text, create_parent=True)
+    for element in doc.cssselect('p'):
+        text = element.text_content()
+        text = text.strip()
+        if text == '':
+            element.drop_tree()
+    return _tostring(doc)
+
+
+def _tostring(doc):
+    clean = doc.text or u''
+    for child in doc.getchildren():
+        clean += etree.tounicode(child)
+    clean = clean.replace('\r', '')
+    clean = clean.replace('\t', '')
+    clean = clean.replace('&#13;', '')
+    clean = clean.replace('&nbsp;', '')
+    clean = clean.replace('&#160;', '')
+    return clean
 Django>=1.1
+lxml>=2.0