Source

django-article / article / signals.py

Full commit
from lxml.html import fromstring, tostring

from django.db.models.signals import pre_save
from django.dispatch import receiver

from article.models import Article

def normalize_html(html):
    tree = fromstring(html)
    # remove leading empty P tags
    for elem in tree.xpath('./*'):
        if elem.tag != 'p':
            break
        if len(elem) == 0 and elem.text.strip() == "":
            elem.getparent().remove(elem)
        else:
            break
    return tostring(tree, encoding='utf-8')


@receiver(pre_save, sender=Article)
def article_pre_save(instance, **kwargs):
    instance.teaser = normalize_html(instance.teaser)
    instance.content = normalize_html(instance.content)
    if instance.alias == "":
        instance.alias = None