Source

psilib / psiutils.py

#! /usr/bin/env python

__title__ = 'utils'
__version__ = '0.03-test'
__author__ = 'L. C. Rees'

'''Shared utilities for psilib'''

from __future__ import generators

psins = u'urn:psilib:PSI:0:4:core'


class Utils:

    '''Shared Utilities'''    

    import zipfile
    from os import path
    try: import tarfile
    except ImportError: pass
    from xml.dom import Node
   
    def isarchive(self, name):
        '''Detects if file is a ZIP or TAR archive

        Arguments:
        name -- file name'''        
        if self.path.isfile(name):
            if self.zipfile.is_zipfile(name): return True
            # Process tarballs if tarfile available
            else:                
                try:
                    if self.tarfile.is_tarfile(name): return True
                except NameError: return False

    def toxmlfile(self, ip, op):
        '''Exports DOM tree to file

        ip -- input source
        op -- output path'''
        from Ft.Xml.Domlette import Print
        Print(ip, open(op, 'wb'))

    def toprettyxml(self, ip, op):
        '''Pretty prints DOM tree to file

        ip -- input source
        op -- output path'''
        from Ft.Xml.Domlette import PrettyPrint
        PrettyPrint(ip, open(op, 'wb'))

    def tostring(self, node):
        '''Returns the string version of a DOM

        node -- node to export to string'''
        from Ft.Xml.Domlette import Print
        from cStringIO import StringIO
        stringbuffer = StringIO()
        Print(node, stringbuffer)
        return stringbuffer.getvalue()

    def _domwalk(self, node, func):
        '''Walks a DOM tree and yields nodes following a pattern.
        Originated by Uche Ogbuji.

        node -- node to search
        func -- filter defining which node to find'''
        # Run filter on node
        if func(node): yield node
        # Run filter on node's children
        for child in node.childNodes:
            for cn in self._domwalk(child, func): yield cn
        return

    def tagsbyAttr(self, node, attr):
        '''Finds elements by attribute
    
        node -- node to search
        attr -- local attribute name'''
        func = lambda n: self.iselement(n) and self.hasatt(n, attr)
        return self._domwalk(node, func)            

    def firstTagByAttr(self, node, attr):
        '''Finds the first element with an attribute

        node -- node to search
        attr -- local attribute name'''
        return tagsbyAttr(node, attr).next()
    
    def tagsinList(self, node, taglist):
        '''Finds elements by local tag name in list

        node -- node to search
        taglist -- list of local tag names'''
        func = lambda n: self.iselement(n) and n.localName in taglist
        return self._domwalk(node, func)

    def firstTagByList(self, node, taglist):
        '''Finds first element in a node by local tag name in a list

        node -- node to search        
        taglist -- list of local tag names'''
        return self.tagsinList(node, taglist).next()
    
    def tags(self, node, ns, local):
        '''Finds elements by namespace and local tag name

        node -- node to search
        ns -- namespace
        local -- local tag name'''
        func = lambda n: (self.iselement(n) and self.isns(n, ns) and
                          self.isname(n, local))
        return self._domwalk(node, func)
        
    def firstTag(self, node, ns, local):
        '''Finds the first element with a namespace and local tag name

        node -- node to search
        ns -- namespace
        local -- local tag name'''
        return self.tags(node, ns, local).next()

    def tagsbyName(self, node, ns, local, name):
        '''Finds the first element with a namespace and local tag name

        node -- node to search
        ns -- namespace
        local -- local tag name
        name -- local tag attribute name'''
        return [i for i in self.tags(node, ns, local) 
            if self.getname(i) == name]

    def tagsbyNS(self, ns, node):
        '''Get all elements in a node
    
        node -- node to search'''
        func = lambda n: self.iselement(n) and self.isns(n, ns) 
        return self._domwalk(node, func)

    def getText(self, node):
        '''Finds text nodes under a node

        node -- node to search'''
        return self._domwalk(node, lambda n: self.istext(n))

    def stripws(self, node):
        '''Finds text nodes containing useless whitespace.

        node -- node to search'''
        # Gather useless whitespace nodes
        ws = [i for i in self.getText(node) if i.data.rstrip() == u'']
        # Delete useless whitespace nodes
        for text in ws: text.parentNode.removeChild(text)

    def getname(self, node):
        '''Returns a node's name attribute.

        node -- node to get the name of'''
        return node.getAttributeNS(None, 'name')

    def getclass(self, node):
        '''Returns a node's class attribute.

        node -- node to get the class of'''
        return node.getAttributeNS(None, 'class')    

    def attget(self, node, attr):
        return node.getAttributeNS(None, attr)

    def hasatt(self, node, attr):            
        return node.hasAttributeNS(None, attr)    

    def attdel(self, node, attr):
        node.removeAttributeNS(None, attr)

    def attset(self, node, attr, value):
        node.setAttributeNS(None, attr, value)

    def isname(self, node, name):
        if node.localName == name: return True

    def isns(self, node, ns):
        if node.namespaceURI == ns: return True

    def iselement(self, node):
        if node.nodeType == self.Node.ELEMENT_NODE: return True

    def istext(self, node):
        if node.nodeType == self.Node.TEXT_NODE: return True

    def iscomment(self, node):
        if node.nodeType == self.Node.COMMENT_NODE: return True

__all__ = ['isns', 'attremove', 'attget', 'attset', 'hasatt', 'isname', 'istext',
    'getname', 'tags', 'getText', 'stripws', 'tostring', 'getclass',
    'iselement', 'iscomment', 'isarchive', 'toxmlfile', 'toprettyxml',
    'firstTag', 'tagsbyNS', 'tagsinList', 'tagsbyName',
    'firstTagByList', 'tagsbyAttr', 'firstTagByAttr']
_inst = Utils()
tags = _inst.tags
isns = _inst.isns
attdel = _inst.attdel
attget = _inst.attget
attset = _inst.attset
hasatt = _inst.hasatt
isname = _inst.isname
istext = _inst.istext
getname = _inst.getname
getText = _inst.getText
stripws = _inst.stripws
tostring = _inst.tostring
getclass = _inst.getclass
firstTag = _inst.firstTag
tagsbyNS = _inst.tagsbyNS
iselement = _inst.iselement
iscomment = _inst.iscomment
isarchive = _inst.isarchive
toxmlfile = _inst.toxmlfile
tagsinList = _inst.tagsinList
tagsbyName = _inst.tagsbyName
tagsbyAttr = _inst.tagsbyAttr
toprettyxml = _inst.toprettyxml
firstTagByList = _inst.firstTagByList
firstTagByAttr = _inst.firstTagByAttr