Source

osa / osa / xmlschema.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
# xmlschema.py - XMLSchemaParser class, part of osa.
# Copyright 2013 Sergey Bozhenkov, boz at ipp.mpg.de
# Licensed under GPLv3 or later, see the COPYING file.

"""
    Conversion of XML Schema types into Python classes.
"""
import xmlnamespace
import xmltypes
import xmlparser
import xml.etree.cElementTree as etree

class XMLSchemaParser(object):
    """
        Parser to get types from an XML Schema.
    """
    def __init__(self, root):
        """
            Initialize parser.

            self.schema - the root node of the schema
            self.tns - target namespace
            self.imported - a list of parsers for imported schemas

            Parameters
            ----------
            wsdl_url : xml schema, i.e. <schema ...> ... </schema>
                The schema to parse
        """
        #check we have a schema
        if root.tag != "{%s}schema" %xmlnamespace.NS_XSD:
            raise ValueError("Supplied root node '%s'"\
                             " is not of XML Schema type." %root.tag)

        #set schema parameters
        self.schema = root
        self.tns = self.schema.get("targetNamespace", "")

        #find and initialize imported ones
        self.imported = [] 
        imports = self.schema.findall('.//{%s}import' %xmlnamespace.NS_XSD)
        imports.extend(self.schema.findall('.//{%s}include' %xmlnamespace.NS_XSD))
        for schema in imports:
            loc = schema.get("schemaLocation", None)
            #basically says that types from that namespace will be used, no real
            #import, i.e. the real schema was defined already
            if loc is None:
                continue
            #standard namespace we know by default
            if loc in (xmlnamespace.NS_SOAP, xmlnamespace.NS_SOAP12,
                       xmlnamespace.NS_SOAP_ENC, xmlnamespace.NS_SOAP_ENV,
                       xmlnamespace.NS_WSDL, xmlnamespace.NS_XSD,
                       xmlnamespace.NS_XSI):
                continue
            #try getting the schema
            parser = XMLSchemaParser(xmlparser.parse_qualified_from_url(loc))
            #check if want to change the schema namespace
            ns = schema.get("namespace", None)
            if ns is not None:
                parser.tns = ns
            self.imported.append(parser)

    def generate_classes(self):
        """
            Generate Python classes from this schema.

            Returns
            -------
            out : dictionary
                Dictionary of types {ns}name -> Python class
        """
        xlist = self.get_list_of_defined_types()
        types = XMLSchemaParser.convert_xmltypes_to_python(xlist)
        return types

    def get_list_of_defined_types(self):
        """
            Construct a dictionary: type name -> xml node

            Types are given by complexType, simpleType or element.
            Types from imported schemas are included as well.
            Type names include namespaces.

            Returns
            -------
            out : dict
                A dictionary of defined types.
        """
        #get list of types
        raw = self.schema.findall('./{%s}complexType' %xmlnamespace.NS_XSD)
        raw.extend(self.schema.findall('./{%s}simpleType' %xmlnamespace.NS_XSD))
        #<element> entries are usually used in message section in
        #wsdl. Such an entry can either define its type inside,
        #or simply be an alias to defined type. However. we
        #can not simply attach all elements to types, because
        #element names can shadow the types. Therefore, do it
        #one by one.
        elements = self.schema.findall('./{%s}element' %xmlnamespace.NS_XSD)

        #create dictionary by getting also names of the types
        types = {}
        for el in raw:
            name = el.get("name", None)
            if name is not None: #consider an exception
                name = "{%s}%s" %(self.tns, name)
                types[name] = el

        for el in elements:
            name = el.get("name", None)
            if name is not None:
                name = "{%s}%s" %(self.tns, name)
                if not(types.has_key(name)):
                    types[name] = el

        #go over all children and append their types
        for parser in self.imported:
            types.update(parser.get_list_of_defined_types())

        return types

    @staticmethod
    def convert_xmltypes_to_python(xtypes):
        """
            Convert xml types definitions in the dictionary
            into Python classes.
            
            Parameters
            ----------
            xtypes : dictionary name -> xml element
                A dictionary as returned by get_list_of_defined_types.

            Returns
            -------
            out : dictionary name -> Python class
        """
        types = {} 
        for k in xtypes.keys():
            #if the class was already created as a parent of another class, do nothing
            if types.has_key(k):
                continue
            x = xtypes[k]
            XMLSchemaParser.create_type(k, x, xtypes, types)
        return types

    @staticmethod
    def create_type(name, element, xtypes, types):
        """
            Creates proper type for the element.

            The created types is appended to the types.

            Parameters
            ----------
            name : str
                Class name
            element : xml element
                Class node.
            xtypes : dictionary class name -> xml node
            types : dictionary class name -> Python class
                The result is appended here.
        """
        #I need this a a separate function to be able to call
        #it recursively for parents and children
        #a switch to decide what to do
        #element.complexType = complexType
        #element.simpleType = simpleType
        #element empty with type = xx = an alias to xx
        #element empty - empty class
        #complexType.sequence/all/choice - complex class, no parent
        #complexContent.extension - complex class with parent
        #simpleType.restriction - e.g. string enumeration
        if element.tag ==  "{%s}element" %xmlnamespace.NS_XSD:
            if len(element)>0:
                element = element[0]
            else:
                type = element.get("type", None)
                if type is not None:
                    #alias
                    XMLSchemaParser.create_alias(name, type, xtypes, types)
                    return
                else:
                    #empty class
                    XMLSchemaParser.create_empty_class(name, types)
                    return
        if element.tag == "{%s}complexType" %xmlnamespace.NS_XSD:
            #complex class
            XMLSchemaParser.create_complex_class(name, element, xtypes, types)
        elif element.tag == "{%s}simpleType" %xmlnamespace.NS_XSD and\
                element[0].tag == "{%s}restriction" %xmlnamespace.NS_XSD \
                and element[0].get("base", None) == "{%s}string" %xmlnamespace.NS_XSD \
                and len(element[0])>0 \
                and element[0][0].tag == "{%s}enumeration" %xmlnamespace.NS_XSD:
            XMLSchemaParser.create_string_enumeration(name, element, types)
        elif element.tag == "{%s}simpleType" %xmlnamespace.NS_XSD and\
                element[0].tag == "{%s}restriction" %xmlnamespace.NS_XSD \
                and element[0].get("base", None) is not None:
            base_type = element[0].get("base")
            XMLSchemaParser.create_alias(name, base_type, xtypes, types)

    @staticmethod
    def get_doc(x):
        """
            Extract documentation from element.

            Parameters
            -----------
            x : xml element

            Returns
            -------
            out : str
                Documentation from whatever found <documentation> out </documentation>
        """
        doc = x.find('.//{%s}documentation' %xmlnamespace.NS_XSD)
        if doc is None:
            doc = x.find('.//documentation')
        if doc is not None:
            return doc.text
        else:
            return "no documentation"

    @staticmethod
    def create_alias(name, alias_type, xtypes, types):
        """
            Create a copy of known class with proper namespace.

            Parameters
            ----------
            name : str
                Name of the new class.
            alias_type : str
                The target alias
            xtypes : dictionary class name -> xml node
            types : dictionary of classes
                The new aliases is appended here.
        """
        alias = None
        if alias_type is None:
            return
        elif xmltypes.primmap.has_key(alias_type):
            alias = xmltypes.primmap[alias_type]
        elif types.has_key(alias_type):
            alias = types[alias_type]
        elif not(xtypes.has_key(alias_type)):
            raise ValueError(" Alias class %s not found in schema" %(alias_type))
        else:
            XMLSchemaParser.create_type(alias_type,
                            xtypes[alias_type], xtypes, types)
            alias = types[alias_type]
        cls_name = xmlnamespace.get_local_name(name)
        cls_ns = xmlnamespace.get_ns(name)
        #create new type since the namespace may be different
        cls = type(cls_name, (alias,), {"__doc__":"no documentation",
                                  "_namespace":cls_ns})
        types[name] = cls

    @staticmethod
    def create_empty_class(name, types):
        """
            Create empty class, i.e. no children.

            Parameters
            ----------
            name : str
                Name of the new class.
            alias_type : str
                The target alias
            xtypes : dictionary class name -> xml node
            types : dictionary of classes
                The new aliases is appended here.
        """
        cls_name = xmlnamespace.get_local_name(name)
        cls_ns = xmlnamespace.get_ns(name)
        cls = xmltypes.ComplexTypeMeta(cls_name, [],
                                  {"_children":[], "__doc__":"no documentation",
                                   "_namespace":cls_ns})
        types[name] = cls
    

    @staticmethod
    def create_string_enumeration(name, element, types):
        """
            Creates a copy of XMLStringEnumertion with properly set
            allowed values.

            The created class is attached to types.

            Parameters
            ----------
            name : str
                Name of the new class.
            element : `etree.Element`
                XML description of the enumeration
            types : dictionary of classes
        """
        xvalues = element.findall('.//{%s}enumeration' %xmlnamespace.NS_XSD)
        values = []
        for x in xvalues:
            values.append(x.get("value", None))
        doc = XMLSchemaParser.get_doc(element)
        #create new class
        #I choose to give short names to classes, i.e. without
        # a namespace, even though Python can manage full names as well
        cls_name = xmlnamespace.get_local_name(name)
        cls_ns = xmlnamespace.get_ns(name)
        cls = type(cls_name, (xmltypes.XMLStringEnumeration,),
                                {"_allowedValues":values, "__doc__":doc,
                                  "_namespace":cls_ns})
        types[name] = cls
        
    @staticmethod
    def create_complex_class(name, element, xtypes, types):
        """
            Create complex class.

            Parameters
            ----------
            name : str
                Class name
            element : xml element
                Class node.
            xtypes : dictionary class name -> xml node
            types : dictionary class name -> Python class
                The result is appended here.
        """
        #decide if we have a parent and first create that
        parents = []
        exts = element.findall("./{%s}complexContent/{%s}extension" %(xmlnamespace.NS_XSD, xmlnamespace.NS_XSD))
        if exts is not None:
            for ext in exts:
                parent_name = ext.get("base", None) 
                if parent_name is None:
                    continue
                elif types.has_key(parent_name):
                    parent = types[parent_name]
                elif not(xtypes.has_key(parent_name)):
                    raise ValueError(" Parent class not found in schema for:\n %s" %(etree.tostring(element)))
                else:
                    XMLSchemaParser.create_type(parent_name,
                                    xtypes[parent_name], xtypes, types)
                    parent = types[parent_name]
                parents.append(parent)

        #find sequence/choice/all
        seq = None
        for str in ("sequence", "all", "choice"):
            #note deep search here, this is necessary for
            #extensions that look like
            #complexType->complexContent->extensions->sequence
            seq = element.find(".//{%s}%s" %(xmlnamespace.NS_XSD, str))
            if seq is not None:
                break

        #collect children
        children = []
        if seq is not None:
            for s in seq:
                #iterate over sequence, do not consider in place defs
                ref = s.get("ref", None) #reference to another element
                if ref is not None:
                    type_name = ref
                    child_name = xmlnamespace.get_local_name(ref)
                else:
                    type_name = s.get('type', None)
                    child_name = s.get('name', 'unknown')

                if type_name is None:
                    compl = s.find("./{%s}complexType" %(xmlnamespace.NS_XSD))
                    simpl = s.find("./{%s}simpleType" %(xmlnamespace.NS_XSD))
                    if compl is not None:
                        XMLSchemaParser.create_type(child_name, compl, xtypes, types)
                        type_name = child_name
                    elif simpl is not None:
                        XMLSchemaParser.create_type(child_name, simpl, xtypes, types)
                        type_name = child_name
                    else:
                        continue

                if xmltypes.primmap.has_key(type_name):
                    type = xmltypes.primmap[type_name]
                elif types.has_key(type_name):
                    type = types[type_name]
                elif not(xtypes.has_key(type_name)):
                    raise ValueError("Type %s not found for:\n %s" %(type_name,etree.tostring(element)))
                else:
                    XMLSchemaParser.create_type(type_name,
                                    xtypes[type_name], xtypes, types)
                    type = types[type_name]
                minOccurs = int(s.get('minOccurs', 1))
                maxOccurs = s.get('maxOccurs', 1)
                if maxOccurs != 'unbounded':
                    maxOccurs = int(maxOccurs)
                children.append({ "name":child_name,
                                 'type' : type,
                                 'min' : minOccurs,
                                 'max' : maxOccurs})

        #get doc
        doc = XMLSchemaParser.get_doc(element)

        #create new class
        #I choose to give short names to classes, i.e. without
        # a namespace, even though Python can manage full names as well
        cls_name = xmlnamespace.get_local_name(name)
        cls_ns = xmlnamespace.get_ns(name)
        cls = xmltypes.ComplexTypeMeta(cls_name, parents,
                                  {"_children":children, "__doc__":doc,
                                   "_namespace":cls_ns})
        types[name] = cls