django-syncr / syncr / brightkite / xml2dict.py

Thunder Chen<nkchenz@gmail.com> 2007.9.1
    import xml.etree.ElementTree as ET
    import cElementTree as ET # for 2.4

from object_dict import object_dict 
import re

class XML2Dict(object):

    def __init__(self):

    def _parse_node(self, node):
        node_tree = object_dict()
        # Save attrs and text, hope there will not be a child with same name
        if node.text:
            node_tree.value = node.text
        for (k,v) in node.attrib.items():
            k,v = self._namespace_split(k, object_dict({'value':v}))
            node_tree[k] = v
        #Save childrens
        for child in node.getchildren():
            tag, tree = self._namespace_split(child.tag, self._parse_node(child))
            if  tag not in node_tree: # the first time, so store it in dict
                node_tree[tag] = tree
            old = node_tree[tag]
            if not isinstance(old, list):
                node_tree[tag] = [old] # multi times, so change old dict to a list       
            node_tree[tag].append(tree) # add the new one      

        return  node_tree

    def _namespace_split(self, tag, value):
           Split the tag  '{http://cs.sfsu.edu/csc867/myscheduler}patients'
             ns = http://cs.sfsu.edu/csc867/myscheduler
             name = patients
        result = re.compile("\{(.*)\}(.*)").search(tag)
        if result:
            value.namespace, tag = result.groups()    
        return (tag, value)

    def parse(self, file):
        """parse a xml file to a dict"""
        f = open(file, 'r')
        return self.fromstring(f.read()) 

    def fromstring(self, s):
        """parse a string"""
        t = ET.fromstring(s)
        root_tag, root_tree = self._namespace_split(t.tag, self._parse_node(t))
        return object_dict({root_tag: root_tree})

if __name__ == '__main__':
    s = """<?xml version="1.0" encoding="utf-8" ?>
        <count n="1">10</count>
        <data><id>503938</id><name>hello, world</name></data>

    xml = XML2Dict()
    r = xml.fromstring(s)
    from pprint import pprint
    print r.result.count.value
    print r.result.count.n

    for data in r.result.data:
        print data.id, data.name 

    # Test file parsing