another-pypy / py /

module for generating and serializing xml and html structures
by using simple python objects.

(c) holger krekel, holger at merlinux eu. 2009
import py
import sys, re

if sys.version_info >= (3,0):
    def u(s):
        return s
    def unicode(x):
        if hasattr(x, '__unicode__'):
            return x.__unicode__()
        return str(x)
    def u(s):
        return unicode(s)
    unicode = unicode

class NamespaceMetaclass(type):
    def __getattr__(self, name):
        if name[:1] == '_':
            raise AttributeError(name)
        if self == Namespace:
            raise ValueError("Namespace class is abstract")
        tagspec = self.__tagspec__
        if tagspec is not None and name not in tagspec:
            raise AttributeError(name)
        classattr = {}
        if self.__stickyname__:
            classattr['xmlname'] = name
        cls = type(name, (self.__tagclass__,), classattr)
        setattr(self, name, cls)
        return cls

class Tag(list):
    class Attr(object):
        def __init__(self, **kwargs):

    def __init__(self, *args, **kwargs):
        super(Tag, self).__init__(args)
        self.attr = self.Attr(**kwargs)

    def __unicode__(self):
        return self.unicode(indent=0)
    __str__ = __unicode__

    def unicode(self, indent=2):
        l = []
        SimpleUnicodeVisitor(l.append, indent).visit(self)
        return "".join(l)

    def __repr__(self):
        name = self.__class__.__name__
        return "<%r tag object %d>" % (name, id(self))

Namespace = NamespaceMetaclass('Namespace', (object, ), {
    '__tagspec__': None,
    '__tagclass__': Tag,
    '__stickyname__': False,

class HtmlTag(Tag):
    def unicode(self, indent=2):
        l = []
        HtmlVisitor(l.append, indent, shortempty=False).visit(self)
        return u("").join(l)

# exported plain html namespace
class html(Namespace):
    __tagclass__ = HtmlTag
    __stickyname__ = True
    __tagspec__ = dict([(x,1) for x in (
    ).split(',') if x])

    class Style(object):
        def __init__(self, **kw):
            for x, y in kw.items():
                x = x.replace('_', '-')
                setattr(self, x, y)

class raw(object):
    """just a box that can contain a unicode string that will be
    included directly in the output"""
    def __init__(self, uniobj):
        self.uniobj = uniobj

class SimpleUnicodeVisitor(object):
    """ recursive visitor to write unicode. """
    def __init__(self, write, indent=0, curindent=0, shortempty=True):
        self.write = write
        self.cache = {}
        self.visited = {} # for detection of recursion
        self.indent = indent
        self.curindent = curindent
        self.parents = []
        self.shortempty = shortempty  # short empty tags or not

    def visit(self, node):
        """ dispatcher on node's class/bases name. """
        cls = node.__class__
            visitmethod = self.cache[cls]
        except KeyError:
            for subclass in cls.__mro__:
                visitmethod = getattr(self, subclass.__name__, None)
                if visitmethod is not None:
                visitmethod = self.object
            self.cache[cls] = visitmethod

    def object(self, obj):

    def raw(self, obj):

    def list(self, obj):
        assert id(obj) not in self.visited
        self.visited[id(obj)] = 1
        map(self.visit, obj)

    def Tag(self, tag):
        assert id(tag) not in self.visited
            tag.parent = self.parents[-1]
        except IndexError:
            tag.parent = None
        self.visited[id(tag)] = 1
        tagname = getattr(tag, 'xmlname', tag.__class__.__name__)
        if self.curindent and not self._isinline(tagname):
            self.write("\n" + u(' ') * self.curindent)
        if tag:
            self.curindent += self.indent
            self.write(u('<%s%s>') % (tagname, self.attributes(tag)))
            for x in tag:
            self.write(u('</%s>') % tagname)
            self.curindent -= self.indent
            nameattr = tagname+self.attributes(tag)
            if self._issingleton(tagname):
                self.write(u('<%s/>') % (nameattr,))
                self.write(u('<%s></%s>') % (nameattr, tagname))

    def attributes(self, tag):
        # serialize attributes
        attrlist = dir(tag.attr)
        l = []
        for name in attrlist:
            res = self.repr_attribute(tag.attr, name)
            if res is not None:
        return u("").join(l)

    def repr_attribute(self, attrs, name):
        if name[:2] != '__':
            value = getattr(attrs, name)
            if name.endswith('_'):
                name = name[:-1]
            return ' %s="%s"' % (name, escape(unicode(value)))

    def getstyle(self, tag):
        """ return attribute list suitable for styling. """
            styledict =
        except AttributeError:
            return []
            stylelist = [x+': ' + y for x,y in styledict.items()]
            return [u(' style="%s"') % u('; ').join(stylelist)]

    def _issingleton(self, tagname):
        """can (and will) be overridden in subclasses"""
        return self.shortempty

    def _isinline(self, tagname):
        """can (and will) be overridden in subclasses"""
        return False

class HtmlVisitor(SimpleUnicodeVisitor):

    single = dict([(x, 1) for x in
    inline = dict([(x, 1) for x in
                ('a abbr acronym b basefont bdo big br cite code dfn em font '
                 'i img input kbd label q s samp select small span strike '
                 'strong sub sup textarea tt u var'.split(' '))])

    def repr_attribute(self, attrs, name):
        if name == 'class_':
            value = getattr(attrs, name)
            if value is None:
        return super(HtmlVisitor, self).repr_attribute(attrs, name)

    def _issingleton(self, tagname):
        return tagname in self.single

    def _isinline(self, tagname):
        return tagname in self.inline

class _escape:
    def __init__(self):
        self.escape = {
            u('"') : u('&quot;'), u('<') : u('&lt;'), u('>') : u('&gt;'),
            u('&') : u('&amp;'), u("'") : u('&apos;'),
        self.charef_rex = re.compile(u("|").join(self.escape.keys()))

    def _replacer(self, match):
        return self.escape[]

    def __call__(self, ustring):
        """ xml-escape the given unicode string. """
        ustring = unicode(ustring)
        return self.charef_rex.sub(self._replacer, ustring)

escape = _escape()