1. Kirill Simonov
  2. pyyaml-legacy

Source

pyyaml-legacy / yaml / tests / testValidatingParser.py

import YamlTest
from here import flushLeft
from test import assertEquals, assertError
from testPullParser import mockParser, Loader
from yaml import load

"""
Part of the parse/pull experimental code.  This does a schema-driven
validating parse of YAML documents, but it's built on top of a 
crufty interim solution.

The schema-driven parser requires a pull parser interface.  Ideally 
a pull-parser would be pulling nodes from a YAML document on an
as-needed basis, and this is the eventual goal.  But, I don't have
a pull parser yet, so I simulate one by reading in the entire YAML
document into a Python data structure, then I do a push-based dump 
of the data structure to a mock emitter that stores up a list of
parser events that a mock parser then serves up to the schema-driven
parser on an as-needed basis.  Sounds complex, but there's really not
much code involved.

Nothing fancy is supported yet--just lists, dictionaries, and scalars; 
no aliases, class transformations, multiple docs, etc.  Also, we lose
the sort order on map keys, so you will notice that all the examples
have alphabetically sorted keys.

Also, once we go to a true pull parser, we could have more metadata,
such as line numbers for nodes, attached comments, etc., that can 
help with error reporting and round-tripping issues.
"""

testCases = """
-
    data: |
        --- foo
    schema:
        type: scalar
-
    data: |
        --- foo
    schema:
        type: seq
    error: |
        Wanted seq, got scalar
 -
    data: &list123 |
        ---
        - 1
        - 2
        - 3
    schema:
        type: seq
        child:
            type: scalar
  -
    data: *list123
    schema:
        type: seq
        max: 2
        child:
            type: scalar
    error: |
        Seq has max 2 elements
-
    data: |
        ---
        city: New Orleans
        state: LA
        street: Bourbon
    schema: &StreetCityState
        type: map
        items:
            - name: city
              value:
                  type: scalar
            - name: state
              value:
                  type: scalar
            - name: street
              value:
                  type: scalar
-
    data: |
        ---
        city: New Orleans
        state: LA
        where ya got ya shoes: on ya feet, on Bourbon St.
    schema: *StreetCityState
    error: |
        Expected key 'street', got 'where ya got ya shoes'
-
    data: |
        ---
        banana: yellow
        carrot: orange
        people:
            - fname: al
              salary: 44
            - fname: bob
              salary: 33
    schema:
        type: map
        items:
            - name: banana
              value:
                type: scalar
            - name: carrot
              value:
                type: scalar
            - name: people
              value:
                type: seq
                child:
                    type: map
                    items:
                        - name: fname
                          value:
                            type: scalar
                        - name: salary
                          value:
                            type: scalar
"""

class ValidatingLoader:
    def load(self, data, schema):
        self.simulateParser(data)
        return self.loadData(schema)

    def loadData(self, schema):
        typ = self.parser.getType()
        return self._load(typ, schema)

    def _load(self, typ, schema):
        if typ != schema['type']:
            raise Exception("Wanted %s, got %s\n" % (schema['type'], typ))
        if typ == 'seq':
            return self._loadSeq(schema)
        if typ == 'map':
            return self._loadMap(schema)
        else:
            return self.parser.getScalar()

    def _loadSeq(self, schema):
        results = []
        cnt = 0
        max = schema.get('max', None)
        schema = schema['child']
        while 1:
            typ = self.parser.getType()
            if typ is None:
                return results
            else:
                cnt += 1
                self.checkMax(cnt, max)
                results.append(self._load(typ, schema))

    def _loadMap(self, schema):
        results = {}
        for item in schema['items']:
            self.parser.getType()
            name = self.parser.getScalar()
            self.checkName(name, item)
            value = self.loadData(item['value'])
            results[name] = value
        self.parser.getType()
        return results

    def checkMax(self, cnt, max):
            if max is not None and cnt > max:
                raise Exception("Seq has max %d elements\n" % max)

    def checkName(self, name, item):
        if name != item['name']:
            raise Exception("Expected key '%s', got '%s'\n" % \
                (item['name'], name))

    def simulateParser(self, data):
        # This is the huge hack to work around
        # not having a true pull parser
        self.parser = mockParser(oldYamlLoad(data))

def testRoundTrip(data, schema):
    expected = oldYamlLoad(data)
    obj = ValidatingLoader().load(data, schema)
    assertEquals(expected, obj)

def oldYamlLoad(data):
    return load(data).next()

def testOneCase(test):
    data = test['data']
    schema = test['schema']
    if test.has_key('error'):
        assertError(lambda: testRoundTrip(data, schema),
            test['error'])
    else:
        testRoundTrip(data, schema)

class Test(YamlTest.YamlTest):
    def testFromYaml(self):
        for test in load(testCases).next():
            testOneCase(test)

if __name__ == '__main__':
    import unittest
    unittest.main()