pypy / py / _xmlgen.py

"""
module for generating and serializing xml and html structures
by using simple python objects.

(c) holger krekel, holger at merlinux eu. 2009
"""
import py
import sys, re

if sys.version_info >= (3,0):
    def u(s):
        return s
    def unicode(x):
        if hasattr(x, '__unicode__'):
            return x.__unicode__()
        return str(x)
else:
    def u(s):
        return unicode(s)
    unicode = unicode


class NamespaceMetaclass(type):
    def __getattr__(self, name):
        if name[:1] == '_':
            raise AttributeError(name)
        if self == Namespace:
            raise ValueError("Namespace class is abstract")
        tagspec = self.__tagspec__
        if tagspec is not None and name not in tagspec:
            raise AttributeError(name)
        classattr = {}
        if self.__stickyname__:
            classattr['xmlname'] = name
        cls = type(name, (self.__tagclass__,), classattr)
        setattr(self, name, cls)
        return cls

class Tag(list):
    class Attr(object):
        def __init__(self, **kwargs):
            self.__dict__.update(kwargs)

    def __init__(self, *args, **kwargs):
        super(Tag, self).__init__(args)
        self.attr = self.Attr(**kwargs)

    def __unicode__(self):
        return self.unicode(indent=0)
    __str__ = __unicode__

    def unicode(self, indent=2):
        l = []
        SimpleUnicodeVisitor(l.append, indent).visit(self)
        return u("").join(l)

    def __repr__(self):
        name = self.__class__.__name__
        return "<%r tag object %d>" % (name, id(self))

Namespace = NamespaceMetaclass('Namespace', (object, ), {
    '__tagspec__': None,
    '__tagclass__': Tag,
    '__stickyname__': False,
})

class HtmlTag(Tag):
    def unicode(self, indent=2):
        l = []
        HtmlVisitor(l.append, indent, shortempty=False).visit(self)
        return u("").join(l)

# exported plain html namespace
class html(Namespace):
    __tagclass__ = HtmlTag
    __stickyname__ = True
    __tagspec__ = dict([(x,1) for x in (
        'a,abbr,acronym,address,applet,area,b,bdo,big,blink,'
        'blockquote,body,br,button,caption,center,cite,code,col,'
        'colgroup,comment,dd,del,dfn,dir,div,dl,dt,em,embed,'
        'fieldset,font,form,frameset,h1,h2,h3,h4,h5,h6,head,html,'
        'i,iframe,img,input,ins,kbd,label,legend,li,link,listing,'
        'map,marquee,menu,meta,multicol,nobr,noembed,noframes,'
        'noscript,object,ol,optgroup,option,p,pre,q,s,script,'
        'select,small,span,strike,strong,style,sub,sup,table,'
        'tbody,td,textarea,tfoot,th,thead,title,tr,tt,u,ul,xmp,'
        'base,basefont,frame,hr,isindex,param,samp,var'
    ).split(',') if x])

    class Style(object):
        def __init__(self, **kw):
            for x, y in kw.items():
                x = x.replace('_', '-')
                setattr(self, x, y)


class raw(object):
    """just a box that can contain a unicode string that will be
    included directly in the output"""
    def __init__(self, uniobj):
        self.uniobj = uniobj

class SimpleUnicodeVisitor(object):
    """ recursive visitor to write unicode. """
    def __init__(self, write, indent=0, curindent=0, shortempty=True):
        self.write = write
        self.cache = {}
        self.visited = {} # for detection of recursion
        self.indent = indent
        self.curindent = curindent
        self.parents = []
        self.shortempty = shortempty  # short empty tags or not

    def visit(self, node):
        """ dispatcher on node's class/bases name. """
        cls = node.__class__
        try:
            visitmethod = self.cache[cls]
        except KeyError:
            for subclass in cls.__mro__:
                visitmethod = getattr(self, subclass.__name__, None)
                if visitmethod is not None:
                    break
            else:
                visitmethod = self.__object
            self.cache[cls] = visitmethod
        visitmethod(node)

    # the default fallback handler is marked private
    # to avoid clashes with the tag name object
    def __object(self, obj):
        #self.write(obj)
        self.write(escape(unicode(obj)))

    def raw(self, obj):
        self.write(obj.uniobj)

    def list(self, obj):
        assert id(obj) not in self.visited
        self.visited[id(obj)] = 1
        for elem in obj:
            self.visit(elem)

    def Tag(self, tag):
        assert id(tag) not in self.visited
        try:
            tag.parent = self.parents[-1]
        except IndexError:
            tag.parent = None
        self.visited[id(tag)] = 1
        tagname = getattr(tag, 'xmlname', tag.__class__.__name__)
        if self.curindent and not self._isinline(tagname):
            self.write("\n" + u(' ') * self.curindent)
        if tag:
            self.curindent += self.indent
            self.write(u('<%s%s>') % (tagname, self.attributes(tag)))
            self.parents.append(tag)
            for x in tag:
                self.visit(x)
            self.parents.pop()
            self.write(u('</%s>') % tagname)
            self.curindent -= self.indent
        else:
            nameattr = tagname+self.attributes(tag)
            if self._issingleton(tagname):
                self.write(u('<%s/>') % (nameattr,))
            else:
                self.write(u('<%s></%s>') % (nameattr, tagname))

    def attributes(self, tag):
        # serialize attributes
        attrlist = dir(tag.attr)
        attrlist.sort()
        l = []
        for name in attrlist:
            res = self.repr_attribute(tag.attr, name)
            if res is not None:
                l.append(res)
        l.extend(self.getstyle(tag))
        return u("").join(l)

    def repr_attribute(self, attrs, name):
        if name[:2] != '__':
            value = getattr(attrs, name)
            if name.endswith('_'):
                name = name[:-1]
            if isinstance(value, raw):
                insert = value.uniobj
            else:
                insert = escape(unicode(value))
            return ' %s="%s"' % (name, insert)

    def getstyle(self, tag):
        """ return attribute list suitable for styling. """
        try:
            styledict = tag.style.__dict__
        except AttributeError:
            return []
        else:
            stylelist = [x+': ' + y for x,y in styledict.items()]
            return [u(' style="%s"') % u('; ').join(stylelist)]

    def _issingleton(self, tagname):
        """can (and will) be overridden in subclasses"""
        return self.shortempty

    def _isinline(self, tagname):
        """can (and will) be overridden in subclasses"""
        return False

class HtmlVisitor(SimpleUnicodeVisitor):

    single = dict([(x, 1) for x in
                ('br,img,area,param,col,hr,meta,link,base,'
                    'input,frame').split(',')])
    inline = dict([(x, 1) for x in
                ('a abbr acronym b basefont bdo big br cite code dfn em font '
                 'i img input kbd label q s samp select small span strike '
                 'strong sub sup textarea tt u var'.split(' '))])

    def repr_attribute(self, attrs, name):
        if name == 'class_':
            value = getattr(attrs, name)
            if value is None:
                return
        return super(HtmlVisitor, self).repr_attribute(attrs, name)

    def _issingleton(self, tagname):
        return tagname in self.single

    def _isinline(self, tagname):
        return tagname in self.inline


class _escape:
    def __init__(self):
        self.escape = {
            u('"') : u('&quot;'), u('<') : u('&lt;'), u('>') : u('&gt;'),
            u('&') : u('&amp;'), u("'") : u('&apos;'),
            }
        self.charef_rex = re.compile(u("|").join(self.escape.keys()))

    def _replacer(self, match):
        return self.escape[match.group(0)]

    def __call__(self, ustring):
        """ xml-escape the given unicode string. """
        ustring = unicode(ustring)
        return self.charef_rex.sub(self._replacer, ustring)

escape = _escape()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.