Oben Sonne avatar Oben Sonne committed 6e99a0d

Better filter implementations

Use xml.etree instead or REs whenever possible. Highlighting uses a more
sane tag detection regexp.

Comments (0)

Files changed (3)

 
 import markdown
 
-from django.core.exceptions import ObjectDoesNotExist
-
 from diggie.urls import RE_PAGE_NAME
 from django.template.defaultfilters import slugify
 
 RE_IMG_LINK = r'\(\(([\w\.-]+)( .*?)?\)\)'
 
 # expression for page link CSS classes
-RX_PAGE_LINK_CLASS = re.compile(r'class="DIGGIE_PLC(%s)DIGGIE_PLC"' %
-                                RE_PAGE_NAME, re.UNICODE|re.DOTALL)
+RX_PAGE_LINK_CLASS = re.compile(r'^PAGELINK:(%s)$' % RE_PAGE_NAME, re.UNICODE)
 
 class LinkPattern(markdown.inlinepatterns.Pattern):
 
 
         el = markdown.etree.Element('a')
         el.set("href", "/page/%s%s" % (name, anchor))
-        el.set("class", "DIGGIE_PLC%sDIGGIE_PLC" % name)
+        el.set("class", "PAGELINK:%s" % name)
         el.text = alt or name
         return el
 

diggie/templates/delete.html

 <p>
 The safe way is to set the page deprecated.
 Deprecated pages are exlcuded from regular page listings. They only appear in
-the list of <a href="{% url diggie.views.pages label="Deprecated" %}">pages labeled deprecated</a>.
+the list of <a href="{% url diggie.views.pages label='Deprecated' %}">pages labeled deprecated</a>.
 </p>
 {% endif %}
 <form action="{% url diggie.views.delete name=page.tip.name %}" method="POST">

diggie/templatetags/diffies.py

 import re
+import urllib
+from xml.parsers.expat import ExpatError
 
 from django import template
 from django.core.exceptions import ObjectDoesNotExist
 from django.template.defaultfilters import stringfilter, striptags
-from django.template.defaultfilters import force_escape, urlencode
+from django.template.defaultfilters import force_escape
 from django.utils.safestring import mark_safe
 from django.core.urlresolvers import reverse
 
 from diggie.models import Label, Page
+from xml.etree import ElementTree as et
 
 register = template.Library()
 
-# -----------------------------------------------------------------------------
+# =============================================================================
+# utility functions
+# =============================================================================
+
+def vtree_fromstring(html):
+    """Create a virtual ElementTree from a string which may have ...
+
+    ... multiple root elements and plain text at the top level.
+
+    """
+    html = isinstance(html, unicode) and html.encode("UTF8") or html
+    return et.fromstring("<vroot>%s</vroot>" % html)
+
+def vtree_tostring(el):
+    """Convert a virtual ElementTree back to a string.
+
+    Strips any virtual data created by vtree_fromstring().
+
+    """
+    roots = [el.text or ""]
+    for child in el.getchildren():
+        roots.append(et.tostring(child))
+        if child.tail:
+            roots.append(child.tail)
+    return "".join(roots)
+
+# =============================================================================
 # filter to truncate and annotate text
-# -----------------------------------------------------------------------------
+# =============================================================================
 
 @register.filter("truncate")
 @stringfilter
 
     return mark_safe(span)
 
-# -----------------------------------------------------------------------------
+# =============================================================================
 # filter to generate links
-# -----------------------------------------------------------------------------
+# =============================================================================
 
 @register.filter("link")
 def link(obj, arg=None):
         text = trunc and truncraw(_name, trunc) or _name
 
     title = title.replace("~", _name)
-    tmpl='<a href="%s" title="%s" class="%s">%s</a>'
-    link = tmpl % (url, title, css, text)
+    el = et.Element("a")
+    el.set("href", url)
+    el.set("title", title)
+    el.set("class", css)
+    el.text = text
 
-    return mark_safe(link)
+    return mark_safe(et.tostring(el))
+
+# -----------------------------------------------------------------------------
 
 RX_GETPARAMS = re.compile(r'(<a.*?href=".*?)(".*?>)', re.DOTALL)
 
 def getparams(value, params):
     """Append GET params to an URL.
 
-    `value` either may be an URL or HTML code containing an link tag.
-
-    Note: This filter assumes the URL has not yet any GET params attached.
+    `value` either may be an URL or HTML code containing a link tag.
 
     """
-    kvpairs = []
-    for k, v in params.items():
-        kvpairs.append("%s=%s" % (urlencode(k), urlencode(v)))
-    params = kvpairs and "?%s" % "&".join(kvpairs) or ""
-    if RX_GETPARAMS.search(value):
-        value = RX_GETPARAMS.sub(r'\g<1>%s\g<2>' % params, value)
-    else: # plain URL
-        value += params
+    def addparams(url):
+        connector = "" if url.endswith("&") else "&" if "?" in url else "?"
+        return "%s%s%s" % (url, connector, params)
+
+    params = urllib.urlencode(params)
+    try:
+        tree = vtree_fromstring(value)
+        try:
+            link = tree.getiterator(tag="a")[0]
+        except IndexError:
+            raise StandardError("no link to append params to")
+        link.set("href", addparams(link.get("href", "")))
+        value = vtree_tostring(tree)
+    except ExpatError:
+        value = addparams(value)
+
     return mark_safe(value)
 
-# -----------------------------------------------------------------------------
+# =============================================================================
 # filter to manipulate HTML code
-# -----------------------------------------------------------------------------
+# =============================================================================
 
-from diggie.mdx import RX_PAGE_LINK_CLASS
-
-RX_CONTENT = re.compile(r'(>)(.*?)(<)', re.DOTALL)
+RE_TAG = r'''</?\s*\w+(?:(?:\s+\w+(?:\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>'''
+RX_TAG = re.compile(RE_TAG, re.DOTALL)
+RX_CONTENT = re.compile(r'(%s)(.+?)(%s)' % (RE_TAG, RE_TAG), re.DOTALL)
 REPL_HL = r'<span class="highlight">\1</span>'
 
 @register.filter("highlight")
     should work as expected.
 
     """
-    # TODO: maybe we should use this more sane RE:
-    # </?\w+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>
     def textmatch(match):
         prefix, content, suffix = match.groups()
         content = hlrx.sub(REPL_HL, content)
     if not hlrx:
         return html
 
-    if ">" in html: # HTML tags
+    if RX_TAG.search(html): # HTML tags
         html = RX_CONTENT.sub(textmatch, html)
     else:
         html = hlrx.sub(REPL_HL, html)
 
     return mark_safe(html)
 
+# -----------------------------------------------------------------------------
+
+ERROR_TMPL = """
+<p class="error">%s</p>
+%s
+"""
+
+from diggie.mdx import RX_PAGE_LINK_CLASS
+
 @register.filter("pagelinks")
 @stringfilter
 def pagelinks(html, pages):
     Class and title are set according to the status of the linked page.
 
     """
-    def plmatch(match):
-        name, = match.groups()
+    try:
+        print type(html)
+        tree = vtree_fromstring(html)
+    except ExpatError:
+        msg = "The page\'s source contains invalid XML constructs."
+        return mark_safe(ERROR_TMPL % (msg , html))
+
+    for link in tree.getiterator(tag="a"):
+        if not RX_PAGE_LINK_CLASS.match(link.get("class", "")):
+            continue
+        name = link.get("class").split(":", 1)[1]
         try:
             page = pages.get(tip__name=name)
             if page.isdeprecated():
-                css = " deprecated"
-                summary ="deprecated"
+                css = "deprecated"
+                summary = "deprecated"
             else:
                 css = ""
                 summary = force_escape(page.tip.summary)
         except ObjectDoesNotExist:
-            css = " create"
+            css = "create"
             summary = "does not exist"
-        title = summary and "%s (%s)" % (name, summary) or name
-        return 'class="diggie%s" title="%s"' % (css, title)
+        link.set("class", "diggie %s" % css)
+        link.set("title", summary and "%s (%s)" % (name, summary) or name)
 
-    html = RX_PAGE_LINK_CLASS.sub(plmatch, html)
-    return mark_safe(html)
+    return mark_safe(vtree_tostring(tree))
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.