Kirill Simonov avatar Kirill Simonov committed 5d8e6ae

Update PyRex based bindings to libyaml to include Parser functions.

Comments (0)

Files changed (4)

 
-#include <yaml/yaml.h>
+#include <yaml.h>
 
         YAML_ANCHOR_TOKEN
         YAML_TAG_TOKEN
         YAML_SCALAR_TOKEN
+    cdef enum yaml_event_type_t:
+        YAML_STREAM_START_EVENT
+        YAML_STREAM_END_EVENT
+        YAML_DOCUMENT_START_EVENT
+        YAML_DOCUMENT_END_EVENT
+        YAML_ALIAS_EVENT
+        YAML_SCALAR_EVENT
+        YAML_SEQUENCE_START_EVENT
+        YAML_SEQUENCE_END_EVENT
+        YAML_MAPPING_START_EVENT
+        YAML_MAPPING_END_EVENT
 
     ctypedef int yaml_read_handler_t(void *data, char *buffer,
             int size, int *size_read)
         int index
         int line
         int column
+    ctypedef struct yaml_version_directive_t:
+        int major
+        int minor
+    ctypedef struct yaml_tag_directive_t:
+        char *handle
+        char *prefix
+
+    ctypedef struct _yaml_token_stream_start_data_t:
+        yaml_encoding_t encoding
+    ctypedef struct _yaml_token_alias_data_t:
+        char *value
+    ctypedef struct _yaml_token_anchor_data_t:
+        char *value
     ctypedef struct _yaml_token_tag_data_t:
         char *handle
         char *suffix
         char *handle
         char *prefix
     ctypedef union _yaml_token_data_t:
-        yaml_encoding_t encoding
-        char *anchor
+        _yaml_token_stream_start_data_t stream_start
+        _yaml_token_alias_data_t alias
+        _yaml_token_anchor_data_t anchor
         _yaml_token_tag_data_t tag
         _yaml_token_scalar_data_t scalar
         _yaml_token_version_directive_data_t version_directive
         _yaml_token_data_t data
         yaml_mark_t start_mark
         yaml_mark_t end_mark
+
+    ctypedef struct _yaml_event_stream_start_data_t:
+        yaml_encoding_t encoding
+    ctypedef struct _yaml_event_document_start_data_t:
+        yaml_version_directive_t *version_directive
+        yaml_tag_directive_t **tag_directives
+        int implicit
+    ctypedef struct _yaml_event_document_end_data_t:
+        int implicit
+    ctypedef struct _yaml_event_alias_data_t:
+        char *anchor
+    ctypedef struct _yaml_event_scalar_data_t:
+        char *anchor
+        char *tag
+        char *value
+        int length
+        int plain_implicit
+        int quoted_implicit
+        yaml_scalar_style_t style
+    ctypedef struct _yaml_event_sequence_start_data_t:
+        char *anchor
+        char *tag
+        int implicit
+        yaml_sequence_style_t style
+    ctypedef struct _yaml_event_mapping_start_data_t:
+        char *anchor
+        char *tag
+        int implicit
+        yaml_mapping_style_t style
+    ctypedef union _yaml_event_data_t:
+        _yaml_event_stream_start_data_t stream_start
+        _yaml_event_document_start_data_t document_start
+        _yaml_event_document_end_data_t document_end
+        _yaml_event_alias_data_t alias
+        _yaml_event_scalar_data_t scalar
+        _yaml_event_sequence_start_data_t sequence_start
+        _yaml_event_mapping_start_data_t mapping_start
+    ctypedef struct yaml_event_t:
+        yaml_event_type_t type
+        _yaml_event_data_t data
+        yaml_mark_t start_mark
+        yaml_mark_t end_mark
+
     ctypedef struct yaml_parser_t:
         yaml_error_type_t error
         char *problem
     char *yaml_get_version_string()
     void yaml_get_version(int *major, int *minor, int *patch)
     void yaml_token_delete(yaml_token_t *token)
+    void yaml_event_delete(yaml_event_t *event)
     yaml_parser_t *yaml_parser_new()
     void yaml_parser_delete(yaml_parser_t *parser)
     void yaml_parser_set_input_string(yaml_parser_t *parser,
             yaml_encoding_t encoding)
     yaml_token_t *yaml_parser_get_token(yaml_parser_t *parser)
     yaml_token_t *yaml_parser_peek_token(yaml_parser_t *parser)
+    yaml_event_t *yaml_parser_get_event(yaml_parser_t *parser)
+    yaml_event_t *yaml_parser_peek_event(yaml_parser_t *parser)
 
         yaml_token_delete(token)
     yaml_parser_delete(parser)
 
-cdef class Scanner:
+def test_parser(data):
+    cdef yaml_parser_t *parser
+    cdef yaml_event_t *event
+    cdef int done
+    if PyString_CheckExact(data) == 0:
+        raise TypeError("string input required")
+    parser = yaml_parser_new()
+    if parser == NULL:
+        raise MemoryError
+    yaml_parser_set_input_string(parser, PyString_AS_STRING(data), PyString_GET_SIZE(data))
+    done = 0
+    while done == 0:
+        event = yaml_parser_get_event(parser)
+        if event == NULL:
+            raise MemoryError
+        if event.type == YAML_STREAM_END_EVENT:
+            done = 1
+        yaml_event_delete(event)
+    yaml_parser_delete(parser)
+
+cdef class ScannerAndParser:
 
     cdef yaml_parser_t *parser
     cdef int eof
     cdef object stream
     cdef yaml_token_t *cached_token
+    cdef yaml_event_t *cached_event
     cdef object cached_obj
 
     def __init__(self, stream):
             yaml_parser_delete(self.parser)
             self.parser = NULL
 
-    cdef object _convert(self, yaml_token_t *token):
+    cdef object _convert_token(self, yaml_token_t *token):
         if token == NULL:
             if self.parser.error == YAML_MEMORY_ERROR:
                 raise MemoryError
         elif token.type == YAML_VALUE_TOKEN:
             return yaml.ValueToken(start_mark, end_mark)
         elif token.type == YAML_ALIAS_TOKEN:
-            return yaml.AliasToken(token.data.anchor,
+            return yaml.AliasToken(token.data.alias.value,
                     start_mark, end_mark)
         elif token.type == YAML_ANCHOR_TOKEN:
-            return yaml.AnchorToken(token.data.anchor,
+            return yaml.AnchorToken(token.data.anchor.value,
                     start_mark, end_mark)
         elif token.type == YAML_TAG_TOKEN:
             handle = token.data.tag.handle
         else:
             raise RuntimeError("unknown token type")
 
+    cdef object _convert_event(self, yaml_event_t *event):
+        if event == NULL:
+            if self.parser.error == YAML_MEMORY_ERROR:
+                raise MemoryError
+            elif self.parser.error == YAML_READER_ERROR:
+                raise yaml.reader.ReaderError("<input>",
+                        self.parser.problem_offset,
+                        self.parser.problem_value,
+                        '?', self.parser.problem)
+            elif self.parser.error == YAML_SCANNER_ERROR:
+                if self.parser.context != NULL:
+                    raise yaml.scanner.ScannerError(
+                            self.parser.context,
+                            yaml.Mark("<input>",
+                                self.parser.context_mark.index,
+                                self.parser.context_mark.line,
+                                self.parser.context_mark.column,
+                                None, None),
+                            self.parser.problem,
+                            yaml.Mark("<input>",
+                                self.parser.problem_mark.index,
+                                self.parser.problem_mark.line,
+                                self.parser.problem_mark.column,
+                                None, None))
+                else:
+                    raise yaml.scanner.ScannerError(None, None,
+                            self.parser.problem,
+                            yaml.Mark("<input>",
+                                self.parser.problem_mark.index,
+                                self.parser.problem_mark.line,
+                                self.parser.problem_mark.column,
+                                None, None))
+            elif self.parser.error == YAML_PARSER_ERROR:
+                if self.parser.context != NULL:
+                    raise yaml.parser.ParserError(
+                            self.parser.context,
+                            yaml.Mark("<input>",
+                                self.parser.context_mark.index,
+                                self.parser.context_mark.line,
+                                self.parser.context_mark.column,
+                                None, None),
+                            self.parser.problem,
+                            yaml.Mark("<input>",
+                                self.parser.problem_mark.index,
+                                self.parser.problem_mark.line,
+                                self.parser.problem_mark.column,
+                                None, None))
+                else:
+                    raise yaml.parser.ParserError(None, None,
+                            self.parser.problem,
+                            yaml.Mark("<input>",
+                                self.parser.problem_mark.index,
+                                self.parser.problem_mark.line,
+                                self.parser.problem_mark.column,
+                                None, None))
+            else:
+                raise RuntimeError("neither error nor event produced")
+        start_mark = yaml.Mark("<input>",
+                event.start_mark.index,
+                event.start_mark.line,
+                event.start_mark.column,
+                None, None)
+        end_mark = yaml.Mark("<input>",
+                event.end_mark.index,
+                event.end_mark.line,
+                event.end_mark.column,
+                None, None)
+        if event.type == YAML_STREAM_START_EVENT:
+            return yaml.StreamStartEvent(start_mark, end_mark)
+        elif event.type == YAML_STREAM_END_EVENT:
+            return yaml.StreamEndEvent(start_mark, end_mark)
+        elif event.type == YAML_DOCUMENT_START_EVENT:
+            return yaml.DocumentStartEvent(start_mark, end_mark,
+                    (event.data.document_start.implicit == 0))
+        elif event.type == YAML_DOCUMENT_END_EVENT:
+            return yaml.DocumentEndEvent(start_mark, end_mark,
+                    (event.data.document_end.implicit == 0))
+        elif event.type == YAML_SCALAR_EVENT:
+            if event.data.scalar.anchor == NULL:
+                anchor = None
+            else:
+                anchor = event.data.scalar.anchor
+            if event.data.scalar.tag == NULL:
+                tag = None
+            else:
+                tag = event.data.scalar.tag
+            implicit = (event.data.scalar.plain_implicit == 1, event.data.scalar.quoted_implicit == 1)
+            flow_style = (event.data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE)
+            value = PyString_FromStringAndSize(event.data.scalar.value, event.data.scalar.length)
+            return yaml.ScalarEvent(anchor, tag, implicit, unicode(value, 'utf-8'),
+                    start_mark, end_mark)
+        elif event.type == YAML_ALIAS_EVENT:
+            if event.data.alias.anchor == NULL:
+                anchor = None
+            else:
+                anchor = event.data.alias.anchor
+            return yaml.AliasEvent(anchor, start_mark, end_mark)
+        elif event.type == YAML_SEQUENCE_START_EVENT:
+            if event.data.sequence_start.anchor == NULL:
+                anchor = None
+            else:
+                anchor = event.data.sequence_start.anchor
+            if event.data.sequence_start.tag == NULL:
+                tag = None
+            else:
+                tag = event.data.sequence_start.tag
+            implicit = (event.data.sequence_start.implicit == 1)
+            flow_style = (event.data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE)
+            return yaml.SequenceStartEvent(anchor, tag, implicit,
+                    start_mark, end_mark, flow_style)
+        elif event.type == YAML_MAPPING_START_EVENT:
+            if event.data.mapping_start.anchor == NULL:
+                anchor = None
+            else:
+                anchor = event.data.mapping_start.anchor
+            if event.data.mapping_start.tag == NULL:
+                tag = None
+            else:
+                tag = event.data.mapping_start.tag
+            implicit = (event.data.mapping_start.implicit == 1)
+            flow_style = (event.data.mapping_start.style == YAML_FLOW_MAPPING_STYLE)
+            return yaml.MappingStartEvent(anchor, tag, implicit,
+                    start_mark, end_mark, flow_style)
+        elif event.type == YAML_SEQUENCE_END_EVENT:
+            return yaml.SequenceEndEvent(start_mark, end_mark)
+        elif event.type == YAML_MAPPING_END_EVENT:
+            return yaml.MappingEndEvent(start_mark, end_mark)
+        else:
+            raise RuntimeError("unknown event type")
+
     def get_token(self):
         cdef yaml_token_t *token
         if self.cached_token != NULL:
         if self.eof != 0:
             return None
         token = yaml_parser_get_token(self.parser)
-        obj = self._convert(token)
+        obj = self._convert_token(token)
         if token.type == YAML_STREAM_END_TOKEN:
             self.eof = 1
         yaml_token_delete(token)
         if self.eof != 0:
             return None
         token = yaml_parser_peek_token(self.parser)
-        obj = self._convert(token)
+        obj = self._convert_token(token)
         if token.type == YAML_STREAM_END_TOKEN:
             self.eof = 1
         self.cached_token = token
             return False
         else:
             token = yaml_parser_peek_token(self.parser)
-            obj = self._convert(token)
+            obj = self._convert_token(token)
             if token.type == YAML_STREAM_END_TOKEN:
                 self.eof = 1
             self.cached_token = token
                 return True
         return False
 
-class Loader(Scanner,
-        yaml.parser.Parser,
+    def get_event(self):
+        cdef yaml_event_t *event
+        if self.cached_event != NULL:
+            yaml_event_delete(yaml_parser_get_event(self.parser))
+            obj = self.cached_obj
+            self.cached_event = NULL
+            self.cached_obj = None
+            return obj
+        if self.eof != 0:
+            return None
+        event = yaml_parser_get_event(self.parser)
+        obj = self._convert_event(event)
+        if event.type == YAML_STREAM_END_EVENT:
+            self.eof = 1
+        yaml_event_delete(event)
+        return obj
+
+    def peek_event(self):
+        cdef yaml_event_t *event
+        if self.cached_event != NULL:
+            return self.cached_obj
+        if self.eof != 0:
+            return None
+        event = yaml_parser_peek_event(self.parser)
+        obj = self._convert_event(event)
+        if event.type == YAML_STREAM_END_EVENT:
+            self.eof = 1
+        self.cached_event = event
+        self.cached_obj = obj
+        return obj
+
+    def check_event(self, *choices):
+        cdef yaml_event_t *event
+        if self.cached_event != NULL:
+            obj = self.cached_obj
+        elif self.eof != 0:
+            return False
+        else:
+            event = yaml_parser_peek_event(self.parser)
+            obj = self._convert_event(event)
+            if event.type == YAML_STREAM_END_EVENT:
+                self.eof = 1
+            self.cached_event = event
+            self.cached_obj = obj
+        if not choices:
+            return True
+        for choice in choices:
+            if isinstance(obj, choice):
+                return True
+        return False
+
+class Loader(ScannerAndParser,
         yaml.composer.Composer,
         yaml.constructor.Constructor,
         yaml.resolver.Resolver):
 
     def __init__(self, stream):
-        Scanner.__init__(self, stream)
-        yaml.parser.Parser.__init__(self)
+        ScannerAndParser.__init__(self, stream)
         yaml.composer.Composer.__init__(self)
         yaml.constructor.Constructor.__init__(self)
         yaml.resolver.Resolver.__init__(self)

tests/test_yaml_ext.py

     def testExtVersion(self):
         self.failUnlessEqual("%s.%s.%s" % _yaml.get_version(), _yaml.get_version_string())
 
-class TestExtScanner(test_appliance.TestAppliance):
+class TestExtLoader(test_appliance.TestAppliance):
 
     def _testExtScanner(self, test_name, data_filename, canonical_filename):
         data = file(data_filename, 'r').read()
             print "EXT_TOKENS:", ext_tokens
             raise
 
-TestExtScanner.add_tests('testExtScanner', '.data', '.canonical')
+    def _testExtParser(self, test_name, data_filename, canonical_filename):
+        data = file(data_filename, 'r').read()
+        events = list(yaml.parse(data))
+        ext_events = []
+        try:
+            for event in yaml.parse(data, Loader=yaml.ExtLoader):
+                ext_events.append(event)
+                #print "EVENT:", event
+            self.failUnlessEqual(len(events), len(ext_events))
+            for event, ext_event in zip(events, ext_events):
+                self.failUnlessEqual(event.__class__, ext_event.__class__)
+                if hasattr(event, 'anchor'):
+                    self.failUnlessEqual(event.anchor, ext_event.anchor)
+                if hasattr(event, 'tag'):
+                    self.failUnlessEqual(event.tag, ext_event.tag)
+                if hasattr(event, 'implicit'):
+                    self.failUnlessEqual(event.implicit, ext_event.implicit)
+                if hasattr(event, 'value'):
+                    self.failUnlessEqual(event.value, ext_event.value)
+        except:
+            print
+            print "DATA:"
+            print file(data_filename, 'rb').read()
+            print "EVENTS:", events
+            print "EXT_EVENTS:", ext_events
+            raise
+
+TestExtLoader.add_tests('testExtScanner', '.data', '.canonical')
+TestExtLoader.add_tests('testExtParser', '.data', '.canonical')
 
 def main(module='__main__'):
     unittest.main(module)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.