Kirill Simonov avatar Kirill Simonov committed 349cb4f

Add pyrex-based bindings for the libyaml scanner.

Comments (0)

Files changed (8)

 
-.PHONY: default build force install test dist clean
+.PHONY: default build buildext force forceext install installext test testext dist clean
 
 PYTHON=/usr/bin/python
 TEST=
 build:
 	${PYTHON} setup.py build ${PARAMETERS}
 
+buildext:
+	${PYTHON} setup_ext.py build ${PARAMETERS}
+
 force:
 	${PYTHON} setup.py build -f ${PARAMETERS}
 
+forceext:
+	${PYTHON} setup_ext.py build -f ${PARAMETERS}
+
 install: build
 	${PYTHON} setup.py install ${PARAMETERS}
 
+installext: buildext
+	${PYTHON} setup_ext.py install ${PARAMETERS}
+
 test: build
 	${PYTHON} tests/test_build.py ${TEST}
 
-dist: build
+testext: buildext
+	${PYTHON} tests/test_build_ext.py ${TEST}
+
+dist:
 	${PYTHON} setup.py sdist --formats=zip,gztar
 
-windist: build
+windist:
 	${PYTHON} setup.py bdist_wininst
 
 clean:
+
+#include <yaml/yaml.h>
+
+
+cdef extern from "_yaml.h":
+
+    int PyString_CheckExact(object o)
+    int PyUnicode_CheckExact(object o)
+    char *PyString_AS_STRING(object o)
+    int PyString_GET_SIZE(object o)
+    object PyString_FromStringAndSize(char *v, int l)
+
+    cdef enum yaml_encoding_t:
+        YAML_ANY_ENCODING
+        YAML_UTF8_ENCODING
+        YAML_UTF16LE_ENCODING
+        YAML_UTF16BE_ENCODING
+    cdef enum yaml_error_type_t:
+        YAML_NO_ERROR
+        YAML_MEMORY_ERROR
+        YAML_READER_ERROR
+        YAML_SCANNER_ERROR
+        YAML_PARSER_ERROR
+        YAML_WRITER_ERROR
+        YAML_EMITTER_ERROR
+    cdef enum yaml_scalar_style_t:
+        YAML_ANY_SCALAR_STYLE
+        YAML_PLAIN_SCALAR_STYLE
+        YAML_SINGLE_QUOTED_SCALAR_STYLE
+        YAML_DOUBLE_QUOTED_SCALAR_STYLE
+        YAML_LITERAL_SCALAR_STYLE
+        YAML_FOLDED_SCALAR_STYLE
+    cdef enum yaml_sequence_style_t:
+        YAML_ANY_SEQUENCE_STYLE
+        YAML_BLOCK_SEQUENCE_STYLE
+        YAML_FLOW_SEQUENCE_STYLE
+    cdef enum yaml_mapping_style_t:
+        YAML_ANY_MAPPING_STYLE
+        YAML_BLOCK_MAPPING_STYLE
+        YAML_FLOW_MAPPING_STYLE
+    cdef enum yaml_token_type_t:
+        YAML_STREAM_START_TOKEN
+        YAML_STREAM_END_TOKEN
+        YAML_VERSION_DIRECTIVE_TOKEN
+        YAML_TAG_DIRECTIVE_TOKEN
+        YAML_DOCUMENT_START_TOKEN
+        YAML_DOCUMENT_END_TOKEN
+        YAML_BLOCK_SEQUENCE_START_TOKEN
+        YAML_BLOCK_MAPPING_START_TOKEN
+        YAML_BLOCK_END_TOKEN
+        YAML_FLOW_SEQUENCE_START_TOKEN
+        YAML_FLOW_SEQUENCE_END_TOKEN
+        YAML_FLOW_MAPPING_START_TOKEN
+        YAML_FLOW_MAPPING_END_TOKEN
+        YAML_BLOCK_ENTRY_TOKEN
+        YAML_FLOW_ENTRY_TOKEN
+        YAML_KEY_TOKEN
+        YAML_VALUE_TOKEN
+        YAML_ALIAS_TOKEN
+        YAML_ANCHOR_TOKEN
+        YAML_TAG_TOKEN
+        YAML_SCALAR_TOKEN
+
+    ctypedef int yaml_read_handler_t(void *data, char *buffer,
+            int size, int *size_read)
+
+    ctypedef struct yaml_mark_t:
+        int index
+        int line
+        int column
+    ctypedef struct _yaml_token_tag_data_t:
+        char *handle
+        char *suffix
+    ctypedef struct _yaml_token_scalar_data_t:
+        char *value
+        int length
+        yaml_scalar_style_t style
+    ctypedef struct _yaml_token_version_directive_data_t:
+        int major
+        int minor
+    ctypedef struct _yaml_token_tag_directive_data_t:
+        char *handle
+        char *prefix
+    ctypedef union _yaml_token_data_t:
+        yaml_encoding_t encoding
+        char *anchor
+        _yaml_token_tag_data_t tag
+        _yaml_token_scalar_data_t scalar
+        _yaml_token_version_directive_data_t version_directive
+        _yaml_token_tag_directive_data_t tag_directive
+    ctypedef struct yaml_token_t:
+        yaml_token_type_t type
+        _yaml_token_data_t data
+        yaml_mark_t start_mark
+        yaml_mark_t end_mark
+    ctypedef struct yaml_parser_t:
+        yaml_error_type_t error
+        char *problem
+        int problem_offset
+        int problem_value
+        yaml_mark_t problem_mark
+        char *context
+        yaml_mark_t context_mark
+
+    char *yaml_get_version_string()
+    void yaml_get_version(int *major, int *minor, int *patch)
+    void yaml_token_delete(yaml_token_t *token)
+    yaml_parser_t *yaml_parser_new()
+    void yaml_parser_delete(yaml_parser_t *parser)
+    void yaml_parser_set_input_string(yaml_parser_t *parser,
+            char *input, int size)
+    void yaml_parser_set_input(yaml_parser_t *parser,
+            yaml_read_handler_t *handler, void *data)
+    void yaml_parser_set_encoding(yaml_parser_t *parser,
+            yaml_encoding_t encoding)
+    yaml_token_t *yaml_parser_get_token(yaml_parser_t *parser)
+    yaml_token_t *yaml_parser_peek_token(yaml_parser_t *parser)
+
+
+import yaml
+
+def get_version_string():
+    return yaml_get_version_string()
+
+def get_version():
+    cdef int major, minor, patch
+    yaml_get_version(&major, &minor, &patch)
+    return (major, minor, patch)
+
+cdef class Scanner:
+
+    cdef yaml_parser_t *parser
+    cdef int eof
+    cdef object stream
+
+    def __init__(self, stream):
+        cdef char *input
+        cdef int size
+        if hasattr(stream, 'read'):
+            stream = stream.read()
+        if PyUnicode_CheckExact(stream) != 0:
+            stream = stream.encode('utf-8')
+        if PyString_CheckExact(stream) == 0:
+            raise TypeError("a string or stream input is required")
+        self.parser = yaml_parser_new()
+        if self.parser == NULL:
+            raise MemoryError
+        yaml_parser_set_input_string(self.parser, PyString_AS_STRING(stream), PyString_GET_SIZE(stream))
+        self.eof = 0
+        self.stream = stream
+
+    def __dealloc__(self):
+        if self.parser != NULL:
+            yaml_parser_delete(self.parser)
+            self.parser = NULL
+
+    cdef object _convert(self, yaml_token_t *token):
+        if token == NULL:
+            if self.parser.error == YAML_MEMORY_ERROR:
+                raise MemoryError
+            elif self.parser.error == YAML_READER_ERROR:
+                raise yaml.reader.ReaderError("<input>",
+                        self.parser.problem_offset,
+                        self.parser.problem_value,
+                        '?', self.parser.problem)
+            elif self.parser.error == YAML_SCANNER_ERROR:
+                if self.parser.context != NULL:
+                    raise yaml.scanner.ScannerError(
+                            self.parser.context,
+                            yaml.Mark("<input>",
+                                self.parser.context_mark.index,
+                                self.parser.context_mark.line,
+                                self.parser.context_mark.column,
+                                None, None),
+                            self.parser.problem,
+                            yaml.Mark("<input>",
+                                self.parser.problem_mark.index,
+                                self.parser.problem_mark.line,
+                                self.parser.problem_mark.column,
+                                None, None))
+                else:
+                    raise yaml.scanner.ScannerError(None, None,
+                            self.parser.problem,
+                            yaml.Mark("<input>",
+                                self.parser.problem_mark.index,
+                                self.parser.problem_mark.line,
+                                self.parser.problem_mark.column,
+                                None, None))
+            else:
+                raise RuntimeError("neither error nor token produced")
+        start_mark = yaml.Mark("<input>",
+                token.start_mark.index,
+                token.start_mark.line,
+                token.start_mark.column,
+                None, None)
+        end_mark = yaml.Mark("<input>",
+                token.end_mark.index,
+                token.end_mark.line,
+                token.end_mark.column,
+                None, None)
+        if token.type == YAML_STREAM_START_TOKEN:
+            return yaml.StreamStartToken(start_mark, end_mark)
+        elif token.type == YAML_STREAM_END_TOKEN:
+            return yaml.StreamEndToken(start_mark, end_mark)
+        elif token.type == YAML_VERSION_DIRECTIVE_TOKEN:
+            return yaml.DirectiveToken('YAML',
+                    (token.data.version_directive.major,
+                        token.data.version_directive.minor),
+                    start_mark, end_mark)
+        elif token.type == YAML_TAG_DIRECTIVE_TOKEN:
+            return yaml.DirectiveToken('TAG',
+                    (token.data.tag_directive.handle,
+                        token.data.tag_directive.prefix),
+                    start_mark, end_mark)
+        elif token.type == YAML_DOCUMENT_START_TOKEN:
+            return yaml.DocumentStartToken(start_mark, end_mark)
+        elif token.type == YAML_DOCUMENT_END_TOKEN:
+            return yaml.DocumentEndToken(start_mark, end_mark)
+        elif token.type == YAML_BLOCK_SEQUENCE_START_TOKEN:
+            return yaml.BlockSequenceStartToken(start_mark, end_mark)
+        elif token.type == YAML_BLOCK_MAPPING_START_TOKEN:
+            return yaml.BlockMappingStartToken(start_mark, end_mark)
+        elif token.type == YAML_BLOCK_END_TOKEN:
+            return yaml.BlockEndToken(start_mark, end_mark)
+        elif token.type == YAML_FLOW_SEQUENCE_START_TOKEN:
+            return yaml.FlowSequenceStartToken(start_mark, end_mark)
+        elif token.type == YAML_FLOW_SEQUENCE_END_TOKEN:
+            return yaml.FlowSequenceEndToken(start_mark, end_mark)
+        elif token.type == YAML_FLOW_MAPPING_START_TOKEN:
+            return yaml.FlowMappingStartToken(start_mark, end_mark)
+        elif token.type == YAML_FLOW_MAPPING_END_TOKEN:
+            return yaml.FlowMappingEndToken(start_mark, end_mark)
+        elif token.type == YAML_BLOCK_ENTRY_TOKEN:
+            return yaml.BlockEntryToken(start_mark, end_mark)
+        elif token.type == YAML_FLOW_ENTRY_TOKEN:
+            return yaml.FlowEntryToken(start_mark, end_mark)
+        elif token.type == YAML_KEY_TOKEN:
+            return yaml.KeyToken(start_mark, end_mark)
+        elif token.type == YAML_VALUE_TOKEN:
+            return yaml.ValueToken(start_mark, end_mark)
+        elif token.type == YAML_ALIAS_TOKEN:
+            return yaml.AliasToken(token.data.anchor,
+                    start_mark, end_mark)
+        elif token.type == YAML_ANCHOR_TOKEN:
+            return yaml.AnchorToken(token.data.anchor,
+                    start_mark, end_mark)
+        elif token.type == YAML_TAG_TOKEN:
+            handle = token.data.tag.handle
+            if handle == '':
+                handle = None
+            return yaml.TagToken((handle, token.data.tag.suffix),
+                    start_mark, end_mark)
+        elif token.type == YAML_SCALAR_TOKEN:
+            value = PyString_FromStringAndSize(token.data.scalar.value, token.data.scalar.length)
+            return yaml.ScalarToken(unicode(value, 'utf-8'),
+                    bool(token.data.scalar.style == YAML_PLAIN_SCALAR_STYLE),
+                    start_mark, end_mark)
+        else:
+            raise RuntimeError("unknown token type")
+
+    def get_token(self):
+        cdef yaml_token_t *token
+        if self.eof != 0:
+            return None
+        token = yaml_parser_get_token(self.parser)
+        obj = self._convert(token)
+        if token.type == YAML_STREAM_END_TOKEN:
+            self.eof = 1
+        yaml_token_delete(token)
+        return obj
+
+    def peek_token(self):
+        cdef yaml_token_t *token
+        if self.eof != 0:
+            return None
+        token = yaml_parser_peek_token(self.parser)
+        return self._convert(token)
+
+    def check_token(self, *choices):
+        cdef yaml_token_t *token
+        if self.eof != 0:
+            return False
+        token = yaml_parser_peek_token(self.parser)
+        obj = self._convert(token)
+        if not choices:
+            return True
+        for choice in choices:
+            if isinstance(obj, choice):
+                return True
+        return False
+
+class Loader(Scanner,
+        yaml.parser.Parser,
+        yaml.composer.Composer,
+        yaml.constructor.Constructor,
+        yaml.resolver.Resolver):
+
+    def __init__(self, stream):
+        Scanner.__init__(self, stream)
+        yaml.parser.Parser.__init__(self)
+        yaml.composer.Composer.__init__(self)
+        yaml.constructor.Constructor.__init__(self)
+        yaml.resolver.Resolver.__init__(self)
+
+yaml.ExtLoader = Loader
+

lib/yaml/constructor.py

 
 from error import *
 from nodes import *
-from composer import *
 
 try:
     import datetime
 class ConstructorError(MarkedYAMLError):
     pass
 
-class BaseConstructor(Composer):
+class BaseConstructor:
 
     yaml_constructors = {}
     yaml_multi_constructors = {}
+
+from distutils.core import setup
+from distutils.extension import Extension
+from Pyrex.Distutils import build_ext
+
+setup(
+    name = '_yaml',
+    ext_modules=[
+        Extension("_yaml", ["ext/_yaml.pyx"], libraries=['yaml']),
+    ],
+    cmdclass = {'build_ext': build_ext}
+)
+

tests/test_build_ext.py

+
+
+def main():
+    import sys, os, distutils.util
+    build_lib = 'build/lib'
+    build_lib_ext = os.path.join('build', 'lib.%s-%s' % (distutils.util.get_platform(), sys.version[0:3]))
+    sys.path.insert(0, build_lib)
+    sys.path.insert(0, build_lib_ext)
+    import test_yaml_ext
+    test_yaml_ext.main('test_yaml_ext')
+
+if __name__ == '__main__':
+    main()
+

tests/test_yaml_ext.py

+
+import unittest, test_appliance
+
+import _yaml, yaml
+
+class TestExtVersion(unittest.TestCase):
+
+    def testExtVersion(self):
+        self.failUnlessEqual("%s.%s.%s" % _yaml.get_version(), _yaml.get_version_string())
+
+class TestExtScanner(test_appliance.TestAppliance):
+
+    def _testExtScanner(self, test_name, data_filename, canonical_filename):
+        data = file(data_filename, 'r').read()
+        tokens = list(yaml.scan(data))
+        ext_tokens = []
+        try:
+            for token in yaml.scan(data, Loader=yaml.ExtLoader):
+                ext_tokens.append(token)
+            self.failUnlessEqual(len(tokens), len(ext_tokens))
+            for token, ext_token in zip(tokens, ext_tokens):
+                self.failUnlessEqual(token.__class__, ext_token.__class__)
+                self.failUnlessEqual((token.start_mark.index, token.start_mark.line, token.start_mark.column),
+                        (ext_token.start_mark.index, ext_token.start_mark.line, ext_token.start_mark.column))
+                self.failUnlessEqual((token.end_mark.index, token.end_mark.line, token.end_mark.column),
+                        (ext_token.end_mark.index, ext_token.end_mark.line, ext_token.end_mark.column))
+                if hasattr(token, 'value'):
+                    self.failUnlessEqual(token.value, ext_token.value)
+        except:
+            print
+            print "DATA:"
+            print file(data_filename, 'rb').read()
+            print "TOKENS:", tokens
+            print "EXT_TOKENS:", ext_tokens
+            raise
+
+TestExtScanner.add_tests('testExtScanner', '.data', '.canonical')
+
+def main(module='__main__'):
+    unittest.main(module)
+
+if __name__ == '__main__':
+    main()
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.