Kirill Simonov avatar Kirill Simonov committed 9972e25

Update libyaml bindings.

Comments (0)

Files changed (3)

 
 cdef extern from "_yaml.h":
 
+    void memcpy(char *d, char *s, int l)
+    int strlen(char *s)
     int PyString_CheckExact(object o)
     int PyUnicode_CheckExact(object o)
     char *PyString_AS_STRING(object o)
     int PyString_GET_SIZE(object o)
     object PyString_FromStringAndSize(char *v, int l)
+    object PyUnicode_DecodeUTF8(char *s, int s, char *e)
+    object PyUnicode_AsUTF8String(object o)
 
     cdef enum yaml_encoding_t:
         YAML_ANY_ENCODING
         YAML_BLOCK_MAPPING_STYLE
         YAML_FLOW_MAPPING_STYLE
     cdef enum yaml_token_type_t:
+        YAML_NO_TOKEN
         YAML_STREAM_START_TOKEN
         YAML_STREAM_END_TOKEN
         YAML_VERSION_DIRECTIVE_TOKEN
         YAML_TAG_TOKEN
         YAML_SCALAR_TOKEN
     cdef enum yaml_event_type_t:
+        YAML_NO_EVENT
         YAML_STREAM_START_EVENT
         YAML_STREAM_END_EVENT
         YAML_DOCUMENT_START_EVENT
         YAML_MAPPING_END_EVENT
 
     ctypedef int yaml_read_handler_t(void *data, char *buffer,
-            int size, int *size_read)
+            int size, int *size_read) except 0
 
     ctypedef struct yaml_mark_t:
         int index
     void yaml_get_version(int *major, int *minor, int *patch)
     void yaml_token_delete(yaml_token_t *token)
     void yaml_event_delete(yaml_event_t *event)
-    yaml_parser_t *yaml_parser_new()
+    int yaml_parser_initialize(yaml_parser_t *parser)
     void yaml_parser_delete(yaml_parser_t *parser)
     void yaml_parser_set_input_string(yaml_parser_t *parser,
             char *input, int size)
             yaml_read_handler_t *handler, void *data)
     void yaml_parser_set_encoding(yaml_parser_t *parser,
             yaml_encoding_t encoding)
-    yaml_token_t *yaml_parser_get_token(yaml_parser_t *parser)
-    yaml_token_t *yaml_parser_peek_token(yaml_parser_t *parser)
-    yaml_event_t *yaml_parser_get_event(yaml_parser_t *parser)
-    yaml_event_t *yaml_parser_peek_event(yaml_parser_t *parser)
+    int yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token) except *
+    int yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event) except *
 
     yaml_get_version(&major, &minor, &patch)
     return (major, minor, patch)
 
-def test_scanner(data):
-    cdef yaml_parser_t *parser
-    cdef yaml_token_t *token
+def test_scanner(stream):
+    cdef yaml_parser_t parser
+    cdef yaml_token_t token
     cdef int done
-    if PyString_CheckExact(data) == 0:
-        raise TypeError("string input required")
-    parser = yaml_parser_new()
-    if parser == NULL:
-        raise MemoryError
-    yaml_parser_set_input_string(parser, PyString_AS_STRING(data), PyString_GET_SIZE(data))
+    cdef int count
+    if hasattr(stream, 'read'):
+        stream = stream.read()
+    if PyUnicode_CheckExact(stream) != 0:
+        stream = stream.encode('utf-8')
+    if PyString_CheckExact(stream) == 0:
+        raise TypeError("a string or stream input is required")
+    if yaml_parser_initialize(&parser) == 0:
+        raise RuntimeError("cannot initialize parser")
+    yaml_parser_set_input_string(&parser, PyString_AS_STRING(stream), PyString_GET_SIZE(stream))
     done = 0
+    count = 0
     while done == 0:
-        token = yaml_parser_get_token(parser)
-        if token == NULL:
-            raise MemoryError
-        if token.type == YAML_STREAM_END_TOKEN:
+        if yaml_parser_scan(&parser, &token) == 0:
+            raise RuntimeError("cannot get next token: #%s" % count)
+        if token.type == YAML_NO_TOKEN:
             done = 1
-        yaml_token_delete(token)
-    yaml_parser_delete(parser)
+        else:
+            count = count+1
+        yaml_token_delete(&token)
+    yaml_parser_delete(&parser)
+    dummy = len(stream)
+    return count
 
-def test_parser(data):
-    cdef yaml_parser_t *parser
-    cdef yaml_event_t *event
+def test_parser(stream):
+    cdef yaml_parser_t parser
+    cdef yaml_event_t event
     cdef int done
-    if PyString_CheckExact(data) == 0:
-        raise TypeError("string input required")
-    parser = yaml_parser_new()
-    if parser == NULL:
-        raise MemoryError
-    yaml_parser_set_input_string(parser, PyString_AS_STRING(data), PyString_GET_SIZE(data))
+    cdef int count
+    if hasattr(stream, 'read'):
+        stream = stream.read()
+    if PyUnicode_CheckExact(stream) != 0:
+        stream = stream.encode('utf-8')
+    if PyString_CheckExact(stream) == 0:
+        raise TypeError("a string or stream input is required")
+    if yaml_parser_initialize(&parser) == 0:
+        raise RuntimeError("cannot initialize parser")
+    yaml_parser_set_input_string(&parser, PyString_AS_STRING(stream), PyString_GET_SIZE(stream))
     done = 0
+    count = 0
     while done == 0:
-        event = yaml_parser_get_event(parser)
-        if event == NULL:
-            raise MemoryError
-        if event.type == YAML_STREAM_END_EVENT:
+        if yaml_parser_parse(&parser, &event) == 0:
+            raise RuntimeError("cannot get next event: #%s" % count)
+        if event.type == YAML_NO_EVENT:
             done = 1
-        yaml_event_delete(event)
-    yaml_parser_delete(parser)
+        else:
+            count = count+1
+        yaml_event_delete(&event)
+    yaml_parser_delete(&parser)
+    dummy = len(stream)
+    return count
 
 cdef class ScannerAndParser:
 
-    cdef yaml_parser_t *parser
-    cdef int eof
+    cdef yaml_parser_t parser
+
     cdef object stream
-    cdef yaml_token_t *cached_token
-    cdef yaml_event_t *cached_event
-    cdef object cached_obj
+    cdef object current_token
+    cdef object current_event
+
+    cdef object cached_input
+    cdef object cached_YAML
+    cdef object cached_TAG
+    cdef object cached_question
+    cdef object cached_Mark
+    cdef object cached_ReaderError
+    cdef object cached_ScannerError
+    cdef object cached_ParserError
+    cdef object cached_StreamStartToken
+    cdef object cached_StreamEndToken
+    cdef object cached_DirectiveToken
+    cdef object cached_DocumentStartToken
+    cdef object cached_DocumentEndToken
+    cdef object cached_BlockSequenceStartToken
+    cdef object cached_BlockMappingStartToken
+    cdef object cached_BlockEndToken
+    cdef object cached_FlowSequenceStartToken
+    cdef object cached_FlowMappingStartToken
+    cdef object cached_FlowSequenceEndToken
+    cdef object cached_FlowMappingEndToken
+    cdef object cached_BlockEntryToken
+    cdef object cached_FlowEntryToken
+    cdef object cached_KeyToken
+    cdef object cached_ValueToken
+    cdef object cached_AliasToken
+    cdef object cached_AnchorToken
+    cdef object cached_TagToken
+    cdef object cached_ScalarToken
+    cdef object cached_StreamStartEvent
+    cdef object cached_StreamEndEvent
+    cdef object cached_DocumentStartEvent
+    cdef object cached_DocumentEndEvent
+    cdef object cached_AliasEvent
+    cdef object cached_SequenceStartEvent
+    cdef object cached_SequenceEndEvent
+    cdef object cached_MappingStartEvent
+    cdef object cached_MappingEndEvent
 
     def __init__(self, stream):
+        if yaml_parser_initialize(&self.parser) == 0:
+            raise MemoryError
         if hasattr(stream, 'read'):
-            stream = stream.read()
-        if PyUnicode_CheckExact(stream) != 0:
-            stream = stream.encode('utf-8')
-        if PyString_CheckExact(stream) == 0:
-            raise TypeError("a string or stream input is required")
-        self.parser = yaml_parser_new()
-        if self.parser == NULL:
-            raise MemoryError
-        yaml_parser_set_input_string(self.parser, PyString_AS_STRING(stream), PyString_GET_SIZE(stream))
-        self.eof = 0
-        self.stream = stream
-        self.cached_token = NULL
-        self.cached_obj = None
+            self.stream = stream
+            yaml_parser_set_input(&self.parser, input_handler, <void *>self)
+        else:
+            if PyUnicode_CheckExact(stream) != 0:
+                stream = PyUnicode_AsUTF8String(stream)
+            if PyString_CheckExact(stream) == 0:
+                raise TypeError("a string or stream input is required")
+            self.stream = stream
+            yaml_parser_set_input_string(&self.parser, PyString_AS_STRING(stream), PyString_GET_SIZE(stream))
+        self.current_token = None
+        self._cache_names()
+
+    def get_token(self):
+        if self.current_token is not None:
+            value = self.current_token
+            self.current_token = None
+        else:
+            value = self._scan()
+        return value
+
+    def peek_token(self):
+        if self.current_token is None:
+            self.current_token = self._scan()
+        return self.current_token
+
+    def check_token(self, *choices):
+        if self.current_token is None:
+            self.current_token = self._scan()
+        if self.current_token is None:
+            return False
+        if not choices:
+            return True
+        token_class = self.current_token.__class__
+        for choice in choices:
+            if token_class is choice:
+                return True
+        return False
+
+    def get_event(self):
+        if self.current_event is not None:
+            value = self.current_event
+            self.current_event = None
+        else:
+            value = self._parse()
+        return value
+
+    def peek_event(self):
+        if self.current_event is None:
+            self.current_event = self._parse()
+        return self.current_event
+
+    def check_event(self, *choices):
+        if self.current_event is None:
+            self.current_event = self._parse()
+        if self.current_event is None:
+            return False
+        if not choices:
+            return True
+        event_class = self.current_event.__class__
+        for choice in choices:
+            if event_class is choice:
+                return True
+        return False
 
     def __dealloc__(self):
-        if self.parser != NULL:
-            yaml_parser_delete(self.parser)
-            self.parser = NULL
+        yaml_parser_delete(&self.parser)
 
-    cdef object _convert_token(self, yaml_token_t *token):
-        if token == NULL:
+    cdef object _cache_names(self):
+        self.cached_input = '<input>'
+        self.cached_YAML = 'YAML'
+        self.cached_TAG = 'TAG'
+        self.cached_question = '?'
+        self.cached_Mark = yaml.Mark
+        self.cached_ReaderError = yaml.reader.ReaderError
+        self.cached_ScannerError = yaml.scanner.ScannerError
+        self.cached_ParserError = yaml.parser.ParserError
+        self.cached_StreamStartToken = yaml.StreamStartToken
+        self.cached_StreamEndToken = yaml.StreamEndToken
+        self.cached_DirectiveToken = yaml.DirectiveToken
+        self.cached_DocumentStartToken = yaml.DocumentStartToken
+        self.cached_DocumentEndToken = yaml.DocumentEndToken
+        self.cached_BlockSequenceStartToken = yaml.BlockSequenceStartToken
+        self.cached_BlockMappingStartToken = yaml.BlockMappingStartToken
+        self.cached_BlockEndToken = yaml.BlockEndToken
+        self.cached_FlowSequenceStartToken = yaml.FlowSequenceStartToken
+        self.cached_FlowMappingStartToken = yaml.FlowMappingStartToken
+        self.cached_FlowSequenceEndToken = yaml.FlowSequenceEndToken
+        self.cached_FlowMappingEndToken = yaml.FlowMappingEndToken
+        self.cached_BlockEntryToken = yaml.BlockEntryToken
+        self.cached_FlowEntryToken = yaml.FlowEntryToken
+        self.cached_KeyToken = yaml.KeyToken
+        self.cached_ValueToken = yaml.ValueToken
+        self.cached_AliasToken = yaml.AliasToken
+        self.cached_AnchorToken = yaml.AnchorToken
+        self.cached_TagToken = yaml.TagToken
+        self.cached_ScalarToken = yaml.ScalarToken
+        self.cached_StreamStartEvent = yaml.StreamStartEvent
+        self.cached_StreamEndEvent = yaml.StreamEndEvent
+        self.cached_DocumentStartEvent = yaml.DocumentStartEvent
+        self.cached_DocumentEndEvent = yaml.DocumentEndEvent
+        self.cached_AliasEvent = yaml.AliasEvent
+        self.cached_ScalarEvent = yaml.ScalarEvent
+        self.cached_SequenceStartEvent = yaml.SequenceStartEvent
+        self.cached_SequenceEndEvent = yaml.SequenceEndEvent
+        self.cached_MappingStartEvent = yaml.MappingStartEvent
+        self.cached_MappingEndEvent = yaml.MappingEndEvent
+
+    cdef object _scan(self):
+        cdef yaml_token_t token
+        if yaml_parser_scan(&self.parser, &token) == 0:
             if self.parser.error == YAML_MEMORY_ERROR:
                 raise MemoryError
             elif self.parser.error == YAML_READER_ERROR:
-                raise yaml.reader.ReaderError("<input>",
+                raise self.cached_ReaderError(self.cached_input,
                         self.parser.problem_offset,
                         self.parser.problem_value,
-                        '?', self.parser.problem)
+                        self.cached_question, self.parser.problem)
             elif self.parser.error == YAML_SCANNER_ERROR:
+                context_mark = None
+                problem_mark = None
                 if self.parser.context != NULL:
-                    raise yaml.scanner.ScannerError(
-                            self.parser.context,
-                            yaml.Mark("<input>",
-                                self.parser.context_mark.index,
-                                self.parser.context_mark.line,
-                                self.parser.context_mark.column,
-                                None, None),
-                            self.parser.problem,
-                            yaml.Mark("<input>",
-                                self.parser.problem_mark.index,
-                                self.parser.problem_mark.line,
-                                self.parser.problem_mark.column,
-                                None, None))
+                    context_mark = self.cached_Mark(self.cached_input,
+                            self.parser.context_mark.index,
+                            self.parser.context_mark.line,
+                            self.parser.context_mark.column,
+                            None, None)
+                if self.parser.problem != NULL:
+                    problem_mark = self.cached_Mark(self.cached_input,
+                            self.parser.problem_mark.index,
+                            self.parser.problem_mark.line,
+                            self.parser.problem_mark.column,
+                            None, None)
+                if self.parser.context != NULL:
+                    raise self.cached_ScannerError(
+                            self.parser.context, context_mark,
+                            self.parser.problem, problem_mark)
                 else:
                     raise yaml.scanner.ScannerError(None, None,
-                            self.parser.problem,
-                            yaml.Mark("<input>",
-                                self.parser.problem_mark.index,
-                                self.parser.problem_mark.line,
-                                self.parser.problem_mark.column,
-                                None, None))
-            else:
-                raise RuntimeError("neither error nor token produced")
-        start_mark = yaml.Mark("<input>",
+                            self.parser.problem, problem_mark)
+        start_mark = yaml.Mark(self.cached_input,
                 token.start_mark.index,
                 token.start_mark.line,
                 token.start_mark.column,
                 None, None)
-        end_mark = yaml.Mark("<input>",
+        end_mark = yaml.Mark(self.cached_input,
                 token.end_mark.index,
                 token.end_mark.line,
                 token.end_mark.column,
                 None, None)
-        if token.type == YAML_STREAM_START_TOKEN:
-            return yaml.StreamStartToken(start_mark, end_mark)
+        if token.type == YAML_NO_TOKEN:
+            return None
+        elif token.type == YAML_STREAM_START_TOKEN:
+            return self.cached_StreamStartToken(start_mark, end_mark)
         elif token.type == YAML_STREAM_END_TOKEN:
-            return yaml.StreamEndToken(start_mark, end_mark)
+            return self.cached_StreamEndToken(start_mark, end_mark)
         elif token.type == YAML_VERSION_DIRECTIVE_TOKEN:
-            return yaml.DirectiveToken('YAML',
+            return self.cached_DirectiveToken(self.cached_YAML,
                     (token.data.version_directive.major,
                         token.data.version_directive.minor),
                     start_mark, end_mark)
         elif token.type == YAML_TAG_DIRECTIVE_TOKEN:
-            return yaml.DirectiveToken('TAG',
+            return self.cached_DirectiveToken(self.cached_TAG,
                     (token.data.tag_directive.handle,
                         token.data.tag_directive.prefix),
                     start_mark, end_mark)
         elif token.type == YAML_DOCUMENT_START_TOKEN:
-            return yaml.DocumentStartToken(start_mark, end_mark)
+            return self.cached_DocumentStartToken(start_mark, end_mark)
         elif token.type == YAML_DOCUMENT_END_TOKEN:
-            return yaml.DocumentEndToken(start_mark, end_mark)
+            return self.cached_DocumentEndToken(start_mark, end_mark)
         elif token.type == YAML_BLOCK_SEQUENCE_START_TOKEN:
-            return yaml.BlockSequenceStartToken(start_mark, end_mark)
+            return self.cached_BlockSequenceStartToken(start_mark, end_mark)
         elif token.type == YAML_BLOCK_MAPPING_START_TOKEN:
-            return yaml.BlockMappingStartToken(start_mark, end_mark)
+            return self.cached_BlockMappingStartToken(start_mark, end_mark)
         elif token.type == YAML_BLOCK_END_TOKEN:
-            return yaml.BlockEndToken(start_mark, end_mark)
+            return self.cached_BlockEndToken(start_mark, end_mark)
         elif token.type == YAML_FLOW_SEQUENCE_START_TOKEN:
-            return yaml.FlowSequenceStartToken(start_mark, end_mark)
+            return self.cached_FlowSequenceStartToken(start_mark, end_mark)
         elif token.type == YAML_FLOW_SEQUENCE_END_TOKEN:
-            return yaml.FlowSequenceEndToken(start_mark, end_mark)
+            return self.cached_FlowSequenceEndToken(start_mark, end_mark)
         elif token.type == YAML_FLOW_MAPPING_START_TOKEN:
-            return yaml.FlowMappingStartToken(start_mark, end_mark)
+            return self.cached_FlowMappingStartToken(start_mark, end_mark)
         elif token.type == YAML_FLOW_MAPPING_END_TOKEN:
-            return yaml.FlowMappingEndToken(start_mark, end_mark)
+            return self.cached_FlowMappingEndToken(start_mark, end_mark)
         elif token.type == YAML_BLOCK_ENTRY_TOKEN:
-            return yaml.BlockEntryToken(start_mark, end_mark)
+            return self.cached_BlockEntryToken(start_mark, end_mark)
         elif token.type == YAML_FLOW_ENTRY_TOKEN:
-            return yaml.FlowEntryToken(start_mark, end_mark)
+            return self.cached_FlowEntryToken(start_mark, end_mark)
         elif token.type == YAML_KEY_TOKEN:
-            return yaml.KeyToken(start_mark, end_mark)
+            return self.cached_KeyToken(start_mark, end_mark)
         elif token.type == YAML_VALUE_TOKEN:
-            return yaml.ValueToken(start_mark, end_mark)
+            return self.cached_ValueToken(start_mark, end_mark)
         elif token.type == YAML_ALIAS_TOKEN:
-            return yaml.AliasToken(token.data.alias.value,
-                    start_mark, end_mark)
+            value = PyUnicode_DecodeUTF8(token.data.alias.value,
+                    strlen(token.data.alias.value), 'strict')
+            return self.cached_AliasToken(value, start_mark, end_mark)
         elif token.type == YAML_ANCHOR_TOKEN:
-            return yaml.AnchorToken(token.data.anchor.value,
-                    start_mark, end_mark)
+            value = PyUnicode_DecodeUTF8(token.data.anchor.value,
+                    strlen(token.data.anchor.value), 'strict')
+            return self.cached_AnchorToken(value, start_mark, end_mark)
         elif token.type == YAML_TAG_TOKEN:
-            handle = token.data.tag.handle
-            if handle == '':
+            handle = PyUnicode_DecodeUTF8(token.data.tag.handle,
+                    strlen(token.data.tag.handle), 'strict')
+            suffix = PyUnicode_DecodeUTF8(token.data.tag.suffix,
+                    strlen(token.data.tag.suffix), 'strict')
+            if not handle:
                 handle = None
-            return yaml.TagToken((handle, token.data.tag.suffix),
-                    start_mark, end_mark)
+            return self.cached_TagToken((handle, suffix), start_mark, end_mark)
         elif token.type == YAML_SCALAR_TOKEN:
-            value = PyString_FromStringAndSize(token.data.scalar.value, token.data.scalar.length)
-            return yaml.ScalarToken(unicode(value, 'utf-8'),
-                    bool(token.data.scalar.style == YAML_PLAIN_SCALAR_STYLE),
-                    start_mark, end_mark)
+            value = PyUnicode_DecodeUTF8(token.data.scalar.value,
+                    token.data.scalar.length, 'strict')
+            plain = False
+            style = None
+            if token.data.scalar.style == YAML_PLAIN_SCALAR_STYLE:
+                plain = True
+                style = ''
+            elif token.data.scalar.style == YAML_SINGLE_QUOTED_SCALAR_STYLE:
+                style = '\''
+            elif token.data.scalar.style == YAML_DOUBLE_QUOTED_SCALAR_STYLE:
+                style = '"'
+            elif token.data.scalar.style == YAML_LITERAL_SCALAR_STYLE:
+                style = '|'
+            elif token.data.scalar.style == YAML_FOLDED_SCALAR_STYLE:
+                style = '>'
+            return self.cached_ScalarToken(value, plain,
+                    start_mark, end_mark, style)
         else:
             raise RuntimeError("unknown token type")
 
-    cdef object _convert_event(self, yaml_event_t *event):
-        if event == NULL:
+    cdef object _parse(self):
+        cdef yaml_event_t event
+        if yaml_parser_parse(&self.parser, &event) == 0:
             if self.parser.error == YAML_MEMORY_ERROR:
                 raise MemoryError
             elif self.parser.error == YAML_READER_ERROR:
-                raise yaml.reader.ReaderError("<input>",
+                raise self.cached_ReaderError(self.cached_input,
                         self.parser.problem_offset,
                         self.parser.problem_value,
-                        '?', self.parser.problem)
-            elif self.parser.error == YAML_SCANNER_ERROR:
+                        self.cached_question, self.parser.problem)
+            elif self.parser.error == YAML_SCANNER_ERROR    \
+                    or self.parser.error == YAML_PARSER_ERROR:
+                context_mark = None
+                problem_mark = None
                 if self.parser.context != NULL:
-                    raise yaml.scanner.ScannerError(
-                            self.parser.context,
-                            yaml.Mark("<input>",
-                                self.parser.context_mark.index,
-                                self.parser.context_mark.line,
-                                self.parser.context_mark.column,
-                                None, None),
-                            self.parser.problem,
-                            yaml.Mark("<input>",
-                                self.parser.problem_mark.index,
-                                self.parser.problem_mark.line,
-                                self.parser.problem_mark.column,
-                                None, None))
+                    context_mark = self.cached_Mark(self.cached_input,
+                            self.parser.context_mark.index,
+                            self.parser.context_mark.line,
+                            self.parser.context_mark.column,
+                            None, None)
+                if self.parser.problem != NULL:
+                    problem_mark = self.cached_Mark(self.cached_input,
+                            self.parser.problem_mark.index,
+                            self.parser.problem_mark.line,
+                            self.parser.problem_mark.column,
+                            None, None)
+                if self.parser.error == YAML_SCANNER_ERROR:
+                    if self.parser.context != NULL:
+                        raise self.cached_ScannerError(
+                                self.parser.context, context_mark,
+                                self.parser.problem, problem_mark)
+                    else:
+                        raise self.cached_ScannerError(None, None,
+                                self.parser.problem, problem_mark)
                 else:
-                    raise yaml.scanner.ScannerError(None, None,
-                            self.parser.problem,
-                            yaml.Mark("<input>",
-                                self.parser.problem_mark.index,
-                                self.parser.problem_mark.line,
-                                self.parser.problem_mark.column,
-                                None, None))
-            elif self.parser.error == YAML_PARSER_ERROR:
-                if self.parser.context != NULL:
-                    raise yaml.parser.ParserError(
-                            self.parser.context,
-                            yaml.Mark("<input>",
-                                self.parser.context_mark.index,
-                                self.parser.context_mark.line,
-                                self.parser.context_mark.column,
-                                None, None),
-                            self.parser.problem,
-                            yaml.Mark("<input>",
-                                self.parser.problem_mark.index,
-                                self.parser.problem_mark.line,
-                                self.parser.problem_mark.column,
-                                None, None))
-                else:
-                    raise yaml.parser.ParserError(None, None,
-                            self.parser.problem,
-                            yaml.Mark("<input>",
-                                self.parser.problem_mark.index,
-                                self.parser.problem_mark.line,
-                                self.parser.problem_mark.column,
-                                None, None))
-            else:
-                raise RuntimeError("neither error nor event produced")
-        start_mark = yaml.Mark("<input>",
+                    if self.parser.context != NULL:
+                        raise self.cached_ParserError(
+                                self.parser.context, context_mark,
+                                self.parser.problem, problem_mark)
+                    else:
+                        raise self.cached_ParserError(None, None,
+                                self.parser.problem, problem_mark)
+        start_mark = yaml.Mark(self.cached_input,
                 event.start_mark.index,
                 event.start_mark.line,
                 event.start_mark.column,
                 None, None)
-        end_mark = yaml.Mark("<input>",
+        end_mark = yaml.Mark(self.cached_input,
                 event.end_mark.index,
                 event.end_mark.line,
                 event.end_mark.column,
                 None, None)
-        if event.type == YAML_STREAM_START_EVENT:
-            return yaml.StreamStartEvent(start_mark, end_mark)
+        if event.type == YAML_NO_EVENT:
+            return None
+        elif event.type == YAML_STREAM_START_EVENT:
+            return self.cached_StreamStartEvent(start_mark, end_mark)
         elif event.type == YAML_STREAM_END_EVENT:
-            return yaml.StreamEndEvent(start_mark, end_mark)
+            return self.cached_StreamEndEvent(start_mark, end_mark)
         elif event.type == YAML_DOCUMENT_START_EVENT:
-            return yaml.DocumentStartEvent(start_mark, end_mark,
-                    (event.data.document_start.implicit == 0))
+            return self.cached_DocumentStartEvent(start_mark, end_mark)
         elif event.type == YAML_DOCUMENT_END_EVENT:
-            return yaml.DocumentEndEvent(start_mark, end_mark,
-                    (event.data.document_end.implicit == 0))
+            return self.cached_DocumentEndEvent(start_mark, end_mark)
+        elif event.type == YAML_ALIAS_EVENT:
+            anchor = PyUnicode_DecodeUTF8(event.data.alias.anchor,
+                    strlen(event.data.alias.anchor), 'strict')
+            return self.cached_AliasEvent(anchor, start_mark, end_mark)
         elif event.type == YAML_SCALAR_EVENT:
-            if event.data.scalar.anchor == NULL:
-                anchor = None
-            else:
-                anchor = event.data.scalar.anchor
-            if event.data.scalar.tag == NULL:
-                tag = None
-            else:
-                tag = event.data.scalar.tag
-            implicit = (event.data.scalar.plain_implicit == 1, event.data.scalar.quoted_implicit == 1)
-            flow_style = (event.data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE)
-            value = PyString_FromStringAndSize(event.data.scalar.value, event.data.scalar.length)
-            return yaml.ScalarEvent(anchor, tag, implicit, unicode(value, 'utf-8'),
-                    start_mark, end_mark)
-        elif event.type == YAML_ALIAS_EVENT:
-            if event.data.alias.anchor == NULL:
-                anchor = None
-            else:
-                anchor = event.data.alias.anchor
-            return yaml.AliasEvent(anchor, start_mark, end_mark)
+            anchor = None
+            if event.data.scalar.anchor != NULL:
+                anchor = PyUnicode_DecodeUTF8(event.data.scalar.anchor,
+                        strlen(event.data.scalar.anchor), 'strict')
+            tag = None
+            if event.data.scalar.tag != NULL:
+                tag = PyUnicode_DecodeUTF8(event.data.scalar.tag,
+                        strlen(event.data.scalar.tag), 'strict')
+            value = PyUnicode_DecodeUTF8(event.data.scalar.value,
+                    event.data.scalar.length, 'strict')
+            plain_implicit = (event.data.scalar.plain_implicit == 1)
+            quoted_implicit = (event.data.scalar.quoted_implicit == 1)
+            style = None
+            if event.data.scalar.style == YAML_PLAIN_SCALAR_STYLE:
+                style = ''
+            elif event.data.scalar.style == YAML_SINGLE_QUOTED_SCALAR_STYLE:
+                style = '\''
+            elif event.data.scalar.style == YAML_DOUBLE_QUOTED_SCALAR_STYLE:
+                style = '"'
+            elif event.data.scalar.style == YAML_LITERAL_SCALAR_STYLE:
+                style = '|'
+            elif event.data.scalar.style == YAML_FOLDED_SCALAR_STYLE:
+                style = '>'
+            return self.cached_ScalarEvent(anchor, tag,
+                    (plain_implicit, quoted_implicit),
+                    value, start_mark, end_mark, style)
         elif event.type == YAML_SEQUENCE_START_EVENT:
-            if event.data.sequence_start.anchor == NULL:
-                anchor = None
-            else:
-                anchor = event.data.sequence_start.anchor
-            if event.data.sequence_start.tag == NULL:
-                tag = None
-            else:
-                tag = event.data.sequence_start.tag
+            anchor = None
+            if event.data.sequence_start.anchor != NULL:
+                anchor = PyUnicode_DecodeUTF8(event.data.sequence_start.anchor,
+                        strlen(event.data.sequence_start.anchor), 'strict')
+            tag = None
+            if event.data.sequence_start.tag != NULL:
+                tag = PyUnicode_DecodeUTF8(event.data.sequence_start.tag,
+                        strlen(event.data.sequence_start.tag), 'strict')
             implicit = (event.data.sequence_start.implicit == 1)
-            flow_style = (event.data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE)
-            return yaml.SequenceStartEvent(anchor, tag, implicit,
+            flow_style = None
+            if event.data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE:
+                flow_style = True
+            elif event.data.sequence_start.style == YAML_BLOCK_SEQUENCE_STYLE:
+                flow_style = False
+            return self.cached_SequenceStartEvent(anchor, tag, implicit,
                     start_mark, end_mark, flow_style)
         elif event.type == YAML_MAPPING_START_EVENT:
-            if event.data.mapping_start.anchor == NULL:
-                anchor = None
-            else:
-                anchor = event.data.mapping_start.anchor
-            if event.data.mapping_start.tag == NULL:
-                tag = None
-            else:
-                tag = event.data.mapping_start.tag
+            anchor = None
+            if event.data.mapping_start.anchor != NULL:
+                anchor = PyUnicode_DecodeUTF8(event.data.mapping_start.anchor,
+                        strlen(event.data.mapping_start.anchor), 'strict')
+            tag = None
+            if event.data.mapping_start.tag != NULL:
+                tag = PyUnicode_DecodeUTF8(event.data.mapping_start.tag,
+                        strlen(event.data.mapping_start.tag), 'strict')
             implicit = (event.data.mapping_start.implicit == 1)
-            flow_style = (event.data.mapping_start.style == YAML_FLOW_MAPPING_STYLE)
-            return yaml.MappingStartEvent(anchor, tag, implicit,
+            flow_style = None
+            if event.data.mapping_start.style == YAML_FLOW_SEQUENCE_STYLE:
+                flow_style = True
+            elif event.data.mapping_start.style == YAML_BLOCK_SEQUENCE_STYLE:
+                flow_style = False
+            return self.cached_MappingStartEvent(anchor, tag, implicit,
                     start_mark, end_mark, flow_style)
         elif event.type == YAML_SEQUENCE_END_EVENT:
-            return yaml.SequenceEndEvent(start_mark, end_mark)
+            return self.cached_SequenceEndEvent(start_mark, end_mark)
         elif event.type == YAML_MAPPING_END_EVENT:
-            return yaml.MappingEndEvent(start_mark, end_mark)
+            return self.cached_MappingEndEvent(start_mark, end_mark)
         else:
             raise RuntimeError("unknown event type")
 
-    def get_token(self):
-        cdef yaml_token_t *token
-        if self.cached_token != NULL:
-            yaml_token_delete(yaml_parser_get_token(self.parser))
-            obj = self.cached_obj
-            self.cached_token = NULL
-            self.cached_obj = None
-            return obj
-        if self.eof != 0:
-            return None
-        token = yaml_parser_get_token(self.parser)
-        obj = self._convert_token(token)
-        if token.type == YAML_STREAM_END_TOKEN:
-            self.eof = 1
-        yaml_token_delete(token)
-        return obj
-
-    def peek_token(self):
-        cdef yaml_token_t *token
-        if self.cached_token != NULL:
-            return self.cached_obj
-        if self.eof != 0:
-            return None
-        token = yaml_parser_peek_token(self.parser)
-        obj = self._convert_token(token)
-        if token.type == YAML_STREAM_END_TOKEN:
-            self.eof = 1
-        self.cached_token = token
-        self.cached_obj = obj
-        return obj
-
-    def check_token(self, *choices):
-        cdef yaml_token_t *token
-        if self.cached_token != NULL:
-            obj = self.cached_obj
-        elif self.eof != 0:
-            return False
-        else:
-            token = yaml_parser_peek_token(self.parser)
-            obj = self._convert_token(token)
-            if token.type == YAML_STREAM_END_TOKEN:
-                self.eof = 1
-            self.cached_token = token
-            self.cached_obj = obj
-        if not choices:
-            return True
-        for choice in choices:
-            if isinstance(obj, choice):
-                return True
-        return False
-
-    def get_event(self):
-        cdef yaml_event_t *event
-        if self.cached_event != NULL:
-            yaml_event_delete(yaml_parser_get_event(self.parser))
-            obj = self.cached_obj
-            self.cached_event = NULL
-            self.cached_obj = None
-            return obj
-        if self.eof != 0:
-            return None
-        event = yaml_parser_get_event(self.parser)
-        obj = self._convert_event(event)
-        if event.type == YAML_STREAM_END_EVENT:
-            self.eof = 1
-        yaml_event_delete(event)
-        return obj
-
-    def peek_event(self):
-        cdef yaml_event_t *event
-        if self.cached_event != NULL:
-            return self.cached_obj
-        if self.eof != 0:
-            return None
-        event = yaml_parser_peek_event(self.parser)
-        obj = self._convert_event(event)
-        if event.type == YAML_STREAM_END_EVENT:
-            self.eof = 1
-        self.cached_event = event
-        self.cached_obj = obj
-        return obj
-
-    def check_event(self, *choices):
-        cdef yaml_event_t *event
-        if self.cached_event != NULL:
-            obj = self.cached_obj
-        elif self.eof != 0:
-            return False
-        else:
-            event = yaml_parser_peek_event(self.parser)
-            obj = self._convert_event(event)
-            if event.type == YAML_STREAM_END_EVENT:
-                self.eof = 1
-            self.cached_event = event
-            self.cached_obj = obj
-        if not choices:
-            return True
-        for choice in choices:
-            if isinstance(obj, choice):
-                return True
-        return False
+cdef int input_handler(void *data, char *buffer, int size, int *read) except 0:
+    cdef ScannerAndParser parser
+    parser = <ScannerAndParser>data
+    value = parser.stream.read(size)
+    if PyString_CheckExact(value) == 0:
+        raise TypeError("a string value is expected")
+    if PyString_GET_SIZE(value) > size:
+        raise ValueError("a string value it too long")
+    memcpy(buffer, PyString_AS_STRING(value), PyString_GET_SIZE(value))
+    read[0] = PyString_GET_SIZE(value)
+    return 1
 
 class Loader(ScannerAndParser,
         yaml.composer.Composer,

tests/test_yaml_ext.py

 
 class TestExtLoader(test_appliance.TestAppliance):
 
-    def _testExtScanner(self, test_name, data_filename, canonical_filename):
-        data = file(data_filename, 'r').read()
+    def _testExtScannerFileInput(self, test_name, data_filename, canonical_filename):
+        self._testExtScanner(test_name, data_filename, canonical_filename, True)
+
+    def _testExtScanner(self, test_name, data_filename, canonical_filename, file_input=False):
+        if file_input:
+            data = file(data_filename, 'r')
+        else:
+            data = file(data_filename, 'r').read()
         tokens = list(yaml.scan(data))
         ext_tokens = []
         try:
+            if file_input:
+                data = file(data_filename, 'r')
             for token in yaml.scan(data, Loader=yaml.ExtLoader):
                 ext_tokens.append(token)
             self.failUnlessEqual(len(tokens), len(ext_tokens))
             raise
 
 TestExtLoader.add_tests('testExtScanner', '.data', '.canonical')
+TestExtLoader.add_tests('testExtScannerFileInput', '.data', '.canonical')
 TestExtLoader.add_tests('testExtParser', '.data', '.canonical')
 
 def main(module='__main__'):
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.