Kirill Simonov avatar Kirill Simonov committed ed172c1

Move pyyaml3000 to a separate directory.

Comments (0)

Files changed (445)

+Copyright (c) 2006 Kirill Simonov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+include README LICENSE
+
+.PHONY: default build force install test dist-src clean
+
+PYTHON=/usr/bin/python
+TEST=
+PARAMETERS=
+
+build:
+	${PYTHON} setup.py build ${PARAMETERS}
+
+force:
+	${PYTHON} setup.py build -f ${PARAMETERS}
+
+install: build
+	${PYTHON} setup.py install ${PARAMETERS}
+
+test: build
+	${PYTHON} tests/test_build.py ${TEST}
+
+dist-src:
+	${PYTHON} setup.py sdist --formats=zip,gztar
+
+clean:
+	${PYTHON} setup.py clean -a
+
+PyYAML3000 - The next generation YAML parser for Python.
+
+To install, type 'python setup.py install'.
+
+For more information, check 'http://trac.xitology.org/pysyck/wiki/PyYAML3000'.
+
+Post your questions and opinions to the YAML-Core mailing list:
+'http://lists.sourceforge.net/lists/listinfo/yaml-core'.
+
+PyYAML3000 is written by Kirill Simonov <xi@resolvent.net>. It is released
+under the MIT license. See the file LICENSE for more details.

lib/yaml/__init__.py

+
+from error import *
+from reader import *
+from scanner import *
+from parser import *
+from composer import *
+from resolver import *
+from constructor import *
+
+from tokens import *
+from events import *
+from nodes import *
+
+def parse(data, Reader=Reader, Scanner=Scanner, Parser=Parser):
+    reader = Reader(data)
+    scanner = Scanner(reader)
+    parser = Parser(scanner)
+    return parser
+
+def load(data, Reader=Reader, Scanner=Scanner, Parser=Parser,
+        Composer=Composer, Resolver=Resolver, Constructor=Constructor):
+    reader = Reader(data)
+    scanner = Scanner(reader)
+    parser = Parser(scanner)
+    composer = Composer(parser)
+    resolver = Resolver(composer)
+    constructor = Constructor(resolver)
+    return constructor
+
+def load_document(*args, **kwds):
+    for document in load(*args, **kwds):
+        return document
+

lib/yaml/composer.py

+
+__all__ = ['Composer', 'ComposerError']
+
+from error import MarkedYAMLError
+from events import *
+from nodes import *
+
+class ComposerError(MarkedYAMLError):
+    pass
+
+class Composer:
+
+    def __init__(self, parser):
+        self.parser = parser
+        self.all_anchors = {}
+        self.complete_anchors = {}
+
+    def check(self):
+        # If there are more documents available?
+        return not self.parser.check(StreamEndEvent)
+
+    def get(self):
+        # Get the root node of the next document.
+        if not self.parser.check(StreamEndEvent):
+            return self.compose_document()
+
+    def __iter__(self):
+        # Iterator protocol.
+        while not self.parser.check(StreamEndEvent):
+            yield self.compose_document()
+
+    def compose_document(self):
+        node = self.compose_node()
+        self.all_anchors = {}
+        self.complete_anchors = {}
+        return node
+
+    def compose_node(self):
+        if self.parser.check(AliasEvent):
+            event = self.parser.get()
+            anchor = event.anchor
+            if anchor not in self.all_anchors:
+                raise ComposerError(None, None, "found undefined alias %r"
+                        % anchor.encode('utf-8'), event.start_marker)
+            if anchor not in self.complete_anchors:
+                collection_event = self.all_anchors[anchor]
+                raise ComposerError("while composing a collection",
+                        collection_event.start_marker,
+                        "found recursive anchor %r" % anchor.encode('utf-8'),
+                        event.start_marker)
+            return self.complete_anchors[anchor]
+        event = self.parser.peek()
+        anchor = event.anchor
+        if anchor is not None:
+            if anchor in self.all_anchors:
+                raise ComposerError("found duplicate anchor %r; first occurence"
+                        % anchor.encode('utf-8'), self.all_anchors[anchor].start_marker,
+                        "second occurence", event.start_marker)
+            self.all_anchors[anchor] = event
+        if self.parser.check(ScalarEvent):
+            node = self.compose_scalar_node()
+        elif self.parser.check(SequenceEvent):
+            node = self.compose_sequence_node()
+        elif self.parser.check(MappingEvent):
+            node = self.compose_mapping_node()
+        if anchor is not None:
+            self.complete_anchors[anchor] = node
+        return node
+
+    def compose_scalar_node(self):
+        event = self.parser.get()
+        return ScalarNode(event.tag, event.value,
+                event.start_marker, event.end_marker)
+
+    def compose_sequence_node(self):
+        start_event = self.parser.get()
+        value = []
+        while not self.parser.check(CollectionEndEvent):
+            value.append(self.compose_node())
+        end_event = self.parser.get()
+        return SequenceNode(start_event.tag, value,
+                start_event.start_marker, end_event.end_marker)
+
+    def compose_mapping_node(self):
+        start_event = self.parser.get()
+        value = {}
+        while not self.parser.check(CollectionEndEvent):
+            key_event = self.parser.peek()
+            item_key = self.compose_node()
+            item_value = self.compose_node()
+            if item_key in value:
+                raise ComposerError("while composing a mapping", start_event.start_marker,
+                        "found duplicate key", key_event.start_marker)
+            value[item_key] = item_value
+        end_event = self.parser.get()
+        return MappingNode(start_event.tag, value,
+                start_event.start_marker, end_event.end_marker)
+

lib/yaml/constructor.py

+
+__all__ = ['BaseConstructor', 'Constructor', 'ConstructorError',
+    'YAMLObject', 'YAMLObjectMetaclass']
+
+from error import *
+from nodes import *
+
+try:
+    import datetime
+    datetime_available = True
+except ImportError:
+    datetime_available = False
+
+try:
+    set
+except NameError:
+    from sets import Set as set
+
+import binascii, re
+
+class ConstructorError(MarkedYAMLError):
+    pass
+
+class BaseConstructor:
+
+    def __init__(self, resolver):
+        self.resolver = resolver
+        self.constructed_objects = {}
+
+    def check(self):
+        # If there are more documents available?
+        return self.resolver.check()
+
+    def get(self):
+        # Construct and return the next document.
+        if self.resolver.check():
+            return self.construct_document(self.resolver.get())
+
+    def __iter__(self):
+        # Iterator protocol.
+        while self.resolver.check():
+            yield self.construct_document(self.resolver.get())
+
+    def construct_document(self, node):
+        native = self.construct_object(node)
+        self.constructed_objects = {}
+        return native
+
+    def construct_object(self, node):
+        if node in self.constructed_objects:
+            return self.constructed_objects[node]
+        if node.tag in self.yaml_constructors:
+            native = self.yaml_constructors[node.tag](self, node)
+        elif None in self.yaml_constructors:
+            native = self.yaml_constructors[None](self, node)
+        elif isinstance(node, ScalarNode):
+            native = self.construct_scalar(node)
+        elif isinstance(node, SequenceNode):
+            native = self.construct_sequence(node)
+        elif isinstance(node, MappingNode):
+            native = self.construct_mapping(node)
+        self.constructed_objects[node] = native
+        return native
+
+    def construct_scalar(self, node):
+        if not isinstance(node, ScalarNode):
+            if isinstance(node, MappingNode):
+                for key_node in node.value:
+                    if key_node.tag == u'tag:yaml.org,2002:value':
+                        return self.construct_scalar(node.value[key_node])
+            raise ConstructorError(None, None,
+                    "expected a scalar node, but found %s" % node.id,
+                    node.start_marker)
+        return node.value
+
+    def construct_sequence(self, node):
+        if not isinstance(node, SequenceNode):
+            raise ConstructorError(None, None,
+                    "expected a sequence node, but found %s" % node.id,
+                    node.start_marker)
+        return [self.construct_object(child) for child in node.value]
+
+    def construct_mapping(self, node):
+        if not isinstance(node, MappingNode):
+            raise ConstructorError(None, None,
+                    "expected a mapping node, but found %s" % node.id,
+                    node.start_marker)
+        mapping = {}
+        merge = None
+        for key_node in node.value:
+            if key_node.tag == u'tag:yaml.org,2002:merge':
+                if merge is not None:
+                    raise ConstructorError("while constructing a mapping", node.start_marker,
+                            "found duplicate merge key", key_node.start_marker)
+                value_node = node.value[key_node]
+                if isinstance(value_node, MappingNode):
+                    merge = [self.construct_mapping(value_node)]
+                elif isinstance(value_node, SequenceNode):
+                    merge = []
+                    for subnode in value_node.value:
+                        if not isinstance(subnode, MappingNode):
+                            raise ConstructorError("while constructing a mapping",
+                                    node.start_marker,
+                                    "expected a mapping for merging, but found %s"
+                                    % subnode.id, subnode.start_marker)
+                        merge.append(self.construct_mapping(subnode))
+                    merge.reverse()
+                else:
+                    raise ConstructorError("while constructing a mapping", node.start_marker,
+                            "expected a mapping or list of mappings for merging, but found %s"
+                            % value_node.id, value_node.start_marker)
+            elif key_node.tag == u'tag:yaml.org,2002:value':
+                if '=' in mapping:
+                    raise ConstructorError("while construction a mapping", node.start_marker,
+                            "found duplicate value key", key_node.start_marker)
+                value = self.construct_object(node.value[key_node])
+                mapping['='] = value
+            else:
+                key = self.construct_object(key_node)
+                try:
+                    duplicate_key = key in mapping
+                except TypeError, exc:
+                    raise ConstructorError("while constructing a mapping", node.start_marker,
+                            "found unacceptable key (%s)" % exc, key_node.start_marker)
+                if duplicate_key:
+                    raise ConstructorError("while constructing a mapping", node.start_marker,
+                            "found duplicate key", key_node.start_marker)
+                value = self.construct_object(node.value[key_node])
+                mapping[key] = value
+        if merge is not None:
+            merge.append(mapping)
+            mapping = {}
+            for submapping in merge:
+                mapping.update(submapping)
+        return mapping
+
+    def construct_pairs(self, node):
+        if not isinstance(node, MappingNode):
+            raise ConstructorError(None, None,
+                    "expected a mapping node, but found %s" % node.id,
+                    node.start_marker)
+        pairs = []
+        for key_node in node.value:
+            key = self.construct_object(key_node)
+            value = self.construct_object(node.value[key_node])
+            pairs.append((key, value))
+        return pairs
+
+    def add_constructor(cls, tag, constructor):
+        if not 'yaml_constructors' in cls.__dict__:
+            cls.yaml_constructors = cls.yaml_constructors.copy()
+        cls.yaml_constructors[tag] = constructor
+    add_constructor = classmethod(add_constructor)
+
+    yaml_constructors = {}
+
+class Constructor(BaseConstructor):
+
+    def construct_yaml_null(self, node):
+        self.construct_scalar(node)
+        return None
+
+    bool_values = {
+        u'yes':     True,
+        u'no':      False,
+        u'true':    True,
+        u'false':   False,
+        u'on':      True,
+        u'off':     False,
+    }
+
+    def construct_yaml_bool(self, node):
+        value = self.construct_scalar(node)
+        return self.bool_values[value.lower()]
+
+    def construct_yaml_int(self, node):
+        value = str(self.construct_scalar(node))
+        value = value.replace('_', '')
+        sign = +1
+        if value[0] == '-':
+            sign = -1
+        if value[0] in '+-':
+            value = value[1:]
+        if value == '0':
+            return 0
+        elif value.startswith('0b'):
+            return sign*int(value[2:], 2)
+        elif value.startswith('0x'):
+            return sign*int(value[2:], 16)
+        elif value[0] == '0':
+            return sign*int(value, 8)
+        elif ':' in value:
+            digits = [int(part) for part in value.split(':')]
+            digits.reverse()
+            base = 1
+            value = 0
+            for digit in digits:
+                value += digit*base
+                base *= 60
+            return sign*value
+        else:
+            return sign*int(value)
+
+    inf_value = 1e300000
+    nan_value = inf_value/inf_value
+
+    def construct_yaml_float(self, node):
+        value = str(self.construct_scalar(node))
+        value = value.replace('_', '')
+        sign = +1
+        if value[0] == '-':
+            sign = -1
+        if value[0] in '+-':
+            value = value[1:]
+        if value.lower() == '.inf':
+            return sign*self.inf_value
+        elif value.lower() == '.nan':
+            return self.nan_value
+        elif ':' in value:
+            digits = [float(part) for part in value.split(':')]
+            digits.reverse()
+            base = 1
+            value = 0.0
+            for digit in digits:
+                value += digit*base
+                base *= 60
+            return sign*value
+        else:
+            return float(value)
+
+    def construct_yaml_binary(self, node):
+        value = self.construct_scalar(node)
+        try:
+            return str(value).decode('base64')
+        except (binascii.Error, UnicodeEncodeError), exc:
+            raise ConstructorError(None, None,
+                    "failed to decode base64 data: %s" % exc, node.start_marker) 
+
+    timestamp_regexp = re.compile(
+            ur'''^(?P<year>[0-9][0-9][0-9][0-9])
+                -(?P<month>[0-9][0-9]?)
+                -(?P<day>[0-9][0-9]?)
+                (?:(?:[Tt]|[ \t]+)
+                (?P<hour>[0-9][0-9]?)
+                :(?P<minute>[0-9][0-9])
+                :(?P<second>[0-9][0-9])
+                (?:\.(?P<fraction>[0-9]*))?
+                (?:[ \t]*(?:Z|(?P<tz_hour>[-+][0-9][0-9]?)
+                (?::(?P<tz_minute>[0-9][0-9])?)?))?)?$''', re.X)
+
+    def construct_yaml_timestamp(self, node):
+        value = self.construct_scalar(node)
+        match = self.timestamp_regexp.match(node.value)
+        values = match.groupdict()
+        for key in values:
+            if values[key]:
+                values[key] = int(values[key])
+            else:
+                values[key] = 0
+        fraction = values['fraction']
+        if fraction:
+            while 10*fraction < 1000000:
+                fraction *= 10
+            values['fraction'] = fraction
+        stamp = datetime.datetime(values['year'], values['month'], values['day'],
+                values['hour'], values['minute'], values['second'], values['fraction'])
+        diff = datetime.timedelta(hours=values['tz_hour'], minutes=values['tz_minute'])
+        return stamp-diff
+
+    def construct_yaml_omap(self, node):
+        # Note: we do not check for duplicate keys, because it's too
+        # CPU-expensive.
+        if not isinstance(node, SequenceNode):
+            raise ConstructorError("while constructing an ordered map", node.start_marker,
+                    "expected a sequence, but found %s" % node.id, node.start_marker)
+        omap = []
+        for subnode in node.value:
+            if not isinstance(subnode, MappingNode):
+                raise ConstructorError("while constructing an ordered map", node.start_marker,
+                        "expected a mapping of length 1, but found %s" % subnode.id,
+                        subnode.start_marker)
+            if len(subnode.value) != 1:
+                raise ConstructorError("while constructing an ordered map", node.start_marker,
+                        "expected a single mapping item, but found %d items" % len(subnode.value),
+                        subnode.start_marker)
+            key_node = subnode.value.keys()[0]
+            key = self.construct_object(key_node)
+            value = self.construct_object(subnode.value[key_node])
+            omap.append((key, value))
+        return omap
+
+    def construct_yaml_pairs(self, node):
+        # Note: the same code as `construct_yaml_omap`.
+        if not isinstance(node, SequenceNode):
+            raise ConstructorError("while constructing pairs", node.start_marker,
+                    "expected a sequence, but found %s" % node.id, node.start_marker)
+        pairs = []
+        for subnode in node.value:
+            if not isinstance(subnode, MappingNode):
+                raise ConstructorError("while constructing pairs", node.start_marker,
+                        "expected a mapping of length 1, but found %s" % subnode.id,
+                        subnode.start_marker)
+            if len(subnode.value) != 1:
+                raise ConstructorError("while constructing pairs", node.start_marker,
+                        "expected a single mapping item, but found %d items" % len(subnode.value),
+                        subnode.start_marker)
+            key_node = subnode.value.keys()[0]
+            key = self.construct_object(key_node)
+            value = self.construct_object(subnode.value[key_node])
+            pairs.append((key, value))
+        return pairs
+
+    def construct_yaml_set(self, node):
+        value = self.construct_mapping(node)
+        return set(value)
+
+    def construct_yaml_str(self, node):
+        value = self.construct_scalar(node)
+        try:
+            return str(value)
+        except UnicodeEncodeError:
+            return value
+
+    def construct_yaml_seq(self, node):
+        return self.construct_sequence(node)
+
+    def construct_yaml_map(self, node):
+        return self.construct_mapping(node)
+
+    def construct_undefined(self, node):
+        raise ConstructorError(None, None,
+                "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'),
+                node.start_marker)
+
+Constructor.add_constructor(
+        u'tag:yaml.org,2002:null',
+        Constructor.construct_yaml_null)
+
+Constructor.add_constructor(
+        u'tag:yaml.org,2002:bool',
+        Constructor.construct_yaml_bool)
+
+Constructor.add_constructor(
+        u'tag:yaml.org,2002:int',
+        Constructor.construct_yaml_int)
+
+Constructor.add_constructor(
+        u'tag:yaml.org,2002:float',
+        Constructor.construct_yaml_float)
+
+Constructor.add_constructor(
+        u'tag:yaml.org,2002:binary',
+        Constructor.construct_yaml_binary)
+
+if datetime_available:
+    Constructor.add_constructor(
+            u'tag:yaml.org,2002:timestamp',
+            Constructor.construct_yaml_timestamp)
+
+Constructor.add_constructor(
+        u'tag:yaml.org,2002:omap',
+        Constructor.construct_yaml_omap)
+
+Constructor.add_constructor(
+        u'tag:yaml.org,2002:pairs',
+        Constructor.construct_yaml_pairs)
+
+Constructor.add_constructor(
+        u'tag:yaml.org,2002:set',
+        Constructor.construct_yaml_set)
+
+Constructor.add_constructor(
+        u'tag:yaml.org,2002:str',
+        Constructor.construct_yaml_str)
+
+Constructor.add_constructor(
+        u'tag:yaml.org,2002:seq',
+        Constructor.construct_yaml_seq)
+
+Constructor.add_constructor(
+        u'tag:yaml.org,2002:map',
+        Constructor.construct_yaml_map)
+
+Constructor.add_constructor(None,
+        Constructor.construct_undefined)
+
+class YAMLObjectMetaclass(type):
+
+    def __init__(cls, name, bases, kwds):
+        super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds)
+        if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None:
+            cls.yaml_constructor.add_constructor(cls.yaml_tag, cls.from_yaml)
+
+class YAMLObject(object):
+
+    __metaclass__ = YAMLObjectMetaclass
+
+    yaml_constructor = Constructor
+
+    yaml_tag = None
+
+    def from_yaml(cls, constructor, node):
+        raise ConstructorError(None, None,
+                "found undefined constructor for the tag %r"
+                % node.tag.encode('utf-8'), node.start_marker)
+    from_yaml = classmethod(from_yaml)
+
+    def to_yaml(self):
+        assert False    # needs dumper
+

lib/yaml/error.py

+
+__all__ = ['Marker', 'YAMLError', 'MarkedYAMLError']
+
+class Marker:
+
+    def __init__(self, name, line, column, buffer, pointer):
+        self.name = name
+        self.line = line
+        self.column = column
+        self.buffer = buffer
+        self.pointer = pointer
+
+    def get_snippet(self, indent=4, max_length=75):
+        if self.buffer is None:
+            return None
+        head = ''
+        start = self.pointer
+        while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029':
+            start -= 1
+            if self.pointer-start > max_length/2-1:
+                head = ' ... '
+                start += 5
+                break
+        tail = ''
+        end = self.pointer
+        while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029':
+            end += 1
+            if end-self.pointer > max_length/2-1:
+                tail = ' ... '
+                end -= 5
+                break
+        snippet = self.buffer[start:end].encode('utf-8')
+        return ' '*indent + head + snippet + tail + '\n'  \
+                + ' '*(indent+self.pointer-start+len(head)) + '^'
+
+    def __str__(self):
+        snippet = self.get_snippet()
+        where = "  in \"%s\", line %d, column %d"   \
+                % (self.name, self.line+1, self.column+1)
+        if snippet is not None:
+            where += ":\n"+snippet
+        return where
+
+class YAMLError(Exception):
+    pass
+
+class MarkedYAMLError(YAMLError):
+
+    def __init__(self, context=None, context_marker=None,
+            problem=None, problem_marker=None):
+        self.context = context
+        self.context_marker = context_marker
+        self.problem = problem
+        self.problem_marker = problem_marker
+
+    def __str__(self):
+        lines = []
+        #for (place, marker) in [(self.context, self.context_marker),
+        #                        (self.problem, self.problem_marker)]:
+        #    if place is not None:
+        #        lines.append(place)
+        #        if marker is not None:
+        #            lines.append(str(marker))
+        if self.context is not None:
+            lines.append(self.context)
+        if self.context_marker is not None  \
+            and (self.problem is None or self.problem_marker is None
+                    or self.context_marker.name != self.problem_marker.name
+                    or self.context_marker.line != self.problem_marker.line
+                    or self.context_marker.column != self.problem_marker.column):
+            lines.append(str(self.context_marker))
+        if self.problem is not None:
+            lines.append(self.problem)
+        if self.problem_marker is not None:
+            lines.append(str(self.problem_marker))
+        return '\n'.join(lines)
+
+
+

lib/yaml/events.py

+
+class Event:
+    def __init__(self, start_marker, end_marker):
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+    def __repr__(self):
+        attributes = [key for key in self.__dict__
+                if not key.endswith('_marker')]
+        attributes.sort()
+        arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
+                for key in attributes])
+        return '%s(%s)' % (self.__class__.__name__, arguments)
+
+class NodeEvent(Event):
+    def __init__(self, anchor, start_marker, end_marker):
+        self.anchor = anchor
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+
+class AliasEvent(NodeEvent):
+    pass
+
+class ScalarEvent(NodeEvent):
+    def __init__(self, anchor, tag, value, start_marker, end_marker):
+        self.anchor = anchor
+        self.tag = tag
+        self.value = value
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+
+class CollectionEvent(NodeEvent):
+    def __init__(self, anchor, tag, start_marker, end_marker):
+        self.anchor = anchor
+        self.tag = tag
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+
+class SequenceEvent(CollectionEvent):
+    pass
+
+class MappingEvent(CollectionEvent):
+    pass
+
+class CollectionEndEvent(Event):
+    pass
+
+class StreamEndEvent(Event):
+    pass
+

lib/yaml/nodes.py

+
+class Node:
+    def __init__(self, tag, value, start_marker, end_marker):
+        self.tag = tag
+        self.value = value
+        self.start_marker = start_marker
+        self.end_marker = end_marker
+    def __repr__(self):
+        value = self.value
+        if isinstance(value, list):
+            if len(value) == 0:
+                value = '<empty>'
+            elif len(value) == 1:
+                value = '<1 item>'
+            else:
+                value = '<%d items>' % len(value)
+        else:
+            if len(value) > 75:
+                value = repr(value[:70]+u' ... ')
+            else:
+                value = repr(value)
+        return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value)
+
+class ScalarNode(Node):
+    id = 'scalar'
+
+class CollectionNode(Node):
+    pass
+
+class SequenceNode(CollectionNode):
+    id = 'sequence'
+
+class MappingNode(CollectionNode):
+    id = 'mapping'
+

lib/yaml/parser.py

+
+# YAML can be parsed by an LL(1) parser!
+#
+# We use the following production rules:
+# stream            ::= implicit_document? explicit_document* STREAM-END
+# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END?
+# implicit_document ::= block_node DOCUMENT-END?
+# block_node    ::= ALIAS | properties? block_content
+# flow_node     ::= ALIAS | properties? flow_content
+# properties    ::= TAG ANCHOR? | ANCHOR TAG?
+# block_content     ::= block_collection | flow_collection | SCALAR
+# flow_content      ::= flow_collection | SCALAR
+# block_collection  ::= block_sequence | block_mapping
+# block_sequence    ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
+# block_mapping     ::= BLOCK-MAPPING_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK-END
+# block_node_or_indentless_sequence ::= ALIAS | properties? (block_content | indentless_block_sequence)
+# indentless_block_sequence         ::= (BLOCK-ENTRY block_node?)+
+# flow_collection   ::= flow_sequence | flow_mapping
+# flow_sequence     ::= FLOW-SEQUENCE-START (flow_sequence_entry FLOW-ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END
+# flow_mapping      ::= FLOW-MAPPING-START (flow_mapping_entry FLOW-ENTRY)* flow_mapping_entry? FLOW-MAPPING-END
+# flow_sequence_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+# flow_mapping_entry    ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+
+# TODO: support for BOM within a stream.
+# stream ::= (BOM? implicit_document)? (BOM? explicit_document)* STREAM-END
+
+# Note that there is a slight deviation from the specification. We require a
+# non-empty node content if ANCHOR or TAG is specified. This disallow such
+# documents as
+#
+#   key:    !!str   # empty value
+#
+# This is done to prevent ambiguity in parsing tags and aliases:
+#
+#   {   !!perl/YAML::Parser:    value }
+#
+# What is it? Should it be interpreted as
+#   {   ? !<tag:yaml.org,2002:perl/YAML::Parser> '' : value }
+# or
+#   {   ? !<tag:yaml.org,2002:perl/YAML::Parser:> value : '' }
+# Since we disallow non-empty node content, tags are always followed by spaces
+# or line breaks.
+
+# FIRST sets:
+# stream: FIRST(block_node) + { DIRECTIVE DOCUMENT-START }
+# explicit_document: { DIRECTIVE DOCUMENT-START }
+# implicit_document: FIRST(block_node)
+# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
+# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
+# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
+# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# block_sequence: { BLOCK-SEQUENCE-START }
+# block_mapping: { BLOCK-MAPPING-START }
+# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
+# indentless_sequence: { ENTRY }
+# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# flow_sequence: { FLOW-SEQUENCE-START }
+# flow_mapping: { FLOW-MAPPING-START }
+# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
+# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
+
+__all__ = ['Parser', 'ParserError']
+
+from error import MarkedYAMLError
+from tokens import *
+from events import *
+
+class ParserError(MarkedYAMLError):
+    pass
+
+class Parser:
+    # Since writing an LL(1) parser is a straightforward task, we do not give
+    # many comments here.
+    # Note that we use Python generators. If you rewrite the parser in another
+    # language, you may replace all 'yield'-s with event handler calls.
+
+    DEFAULT_TAGS = {
+        u'!':   u'!',
+        u'!!':  u'tag:yaml.org,2002:',
+    }
+
+    def __init__(self, scanner):
+        self.scanner = scanner
+        self.current_event = None
+        self.yaml_version = None
+        self.tag_handles = {}
+        self.event_generator = self.parse_stream()
+
+    def check(self, *choices):
+        # Check the type of the next event.
+        if self.current_event is None:
+            try:
+                self.current_event = self.event_generator.next()
+            except StopIteration:
+                pass
+        if self.current_event is not None:
+            for choice in choices:
+                if isinstance(self.current_event, choice):
+                    return True
+        return False
+
+    def peek(self):
+        # Get the next event.
+        if self.current_event is None:
+            try:
+                self.current_event = self.event_generator.next()
+            except StopIteration:
+                pass
+        return self.current_event
+
+    def get(self):
+        # Get the next event.
+        if self.current_event is None:
+            try:
+                self.current_event = self.event_generator.next()
+            except StopIteration:
+                pass
+        value = self.current_event
+        self.current_event = None
+        return value
+
+    def __iter__(self):
+        # Iterator protocol.
+        return self.event_generator
+
+    def parse_stream(self):
+        # implicit_document? explicit_document* STREAM-END
+
+        # Parse implicit document.
+        if not self.scanner.check(DirectiveToken, DocumentStartToken,
+                StreamEndToken):
+            self.tag_handles = self.DEFAULT_TAGS
+            for event in self.parse_block_node():
+                yield event
+
+        # Parse explicit documents.
+        while not self.scanner.check(StreamEndToken):
+            self.process_directives()
+            if not self.scanner.check(DocumentStartToken):
+                raise ParserError(None, None,
+                        "expected '<document start>', but found %r"
+                        % self.scanner.peek().id,
+                        self.scanner.peek().start_marker)
+            token = self.scanner.get()
+            if self.scanner.check(DirectiveToken,
+                    DocumentStartToken, DocumentEndToken, StreamEndToken):
+                yield self.process_empty_scalar(token.end_marker)
+            else:
+                for event in self.parse_block_node():
+                    yield event
+            while self.scanner.check(DocumentEndToken):
+                self.scanner.get()
+
+        # Parse end of stream.
+        token = self.scanner.get()
+        yield StreamEndEvent(token.start_marker, token.end_marker)
+
+    def process_directives(self):
+        # DIRECTIVE*
+        self.yaml_version = None
+        self.tag_handles = {}
+        while self.scanner.check(DirectiveToken):
+            token = self.scanner.get()
+            if token.name == u'YAML':
+                if self.yaml_version is not None:
+                    raise ParserError(None, None,
+                            "found duplicate YAML directive", token.start_marker)
+                major, minor = token.value
+                if major != 1:
+                    raise ParserError(None, None,
+                            "found incompatible YAML document (version 1.* is required)",
+                            token.start_marker)
+                self.yaml_version = token.value
+            elif token.name == u'TAG':
+                handle, prefix = token.value
+                if handle in self.tag_handles:
+                    raise ParserError(None, None,
+                            "duplicate tag handle %r" % handle.encode('utf-8'),
+                            token.start_marker)
+                self.tag_handles[handle] = prefix
+        for key in self.DEFAULT_TAGS:
+            if key not in self.tag_handles:
+                self.tag_handles[key] = self.DEFAULT_TAGS[key]
+
+    def parse_block_node(self):
+        return self.parse_node(block=True)
+
+    def parse_flow_node(self):
+        return self.parse_node()
+
+    def parse_block_node_or_indentless_sequence(self):
+        return self.parse_node(block=True, indentless_sequence=True)
+
+    def parse_node(self, block=False, indentless_sequence=False):
+        # block_node    ::= ALIAS | properties? block_content
+        # flow_node     ::= ALIAS | properties? flow_content
+        # properties    ::= TAG ANCHOR? | ANCHOR TAG?
+        # block_content     ::= block_collection | flow_collection | SCALAR
+        # flow_content      ::= flow_collection | SCALAR
+        # block_collection  ::= block_sequence | block_mapping
+        # block_node_or_indentless_sequence ::= ALIAS | properties?
+        #                                       (block_content | indentless_block_sequence)
+        if self.scanner.check(AliasToken):
+            token = self.scanner.get()
+            yield AliasEvent(token.value, token.start_marker, token.end_marker)
+        else:
+            anchor = None
+            tag = None
+            start_marker = end_marker = tag_marker = None
+            if self.scanner.check(AnchorToken):
+                token = self.scanner.get()
+                start_marker = end_marker = token.start_marker
+                anchor = token.value
+                if self.scanner.check(TagToken):
+                    token = self.scanner.get()
+                    end_marker = tag_marker = token.start_marker
+                    tag = token.value
+            elif self.scanner.check(TagToken):
+                token = self.scanner.get()
+                start_marker = end_marker = tag_marker = token.start_marker
+                tag = token.value
+                if self.scanner.check(AnchorToken):
+                    token = self.scanner.get()
+                    end_marker = token.start_marker
+                    anchor = token.value
+            if tag is not None:
+                handle, suffix = tag
+                if handle is not None:
+                    if handle not in self.tag_handles:
+                        raise ParserError("while parsing a node", start_marker,
+                                "found undefined tag handle %r" % handle.encode('utf-8'),
+                                tag_marker)
+                    tag = self.tag_handles[handle]+suffix
+                else:
+                    tag = suffix
+            if tag is None:
+                if not (self.scanner.check(ScalarToken) and
+                        self.scanner.peek().plain):
+                    tag = u'!'
+            if start_marker is None:
+                start_marker = self.scanner.peek().start_marker
+            event = None
+            collection_events = None
+            if indentless_sequence and self.scanner.check(BlockEntryToken):
+                end_marker = self.scanner.peek().end_marker
+                event = SequenceEvent(anchor, tag, start_marker, end_marker)
+                collection_events = self.parse_indentless_sequence()
+            else:
+                if self.scanner.check(ScalarToken):
+                    token = self.scanner.get()
+                    end_marker = token.end_marker
+                    event = ScalarEvent(anchor, tag, token.value,
+                            start_marker, end_marker)
+                elif self.scanner.check(FlowSequenceStartToken):
+                    end_marker = self.scanner.peek().end_marker
+                    event = SequenceEvent(anchor, tag, start_marker, end_marker)
+                    collection_events = self.parse_flow_sequence()
+                elif self.scanner.check(FlowMappingStartToken):
+                    end_marker = self.scanner.peek().end_marker
+                    event = MappingEvent(anchor, tag, start_marker, end_marker)
+                    collection_events = self.parse_flow_mapping()
+                elif block and self.scanner.check(BlockSequenceStartToken):
+                    end_marker = self.scanner.peek().start_marker
+                    event = SequenceEvent(anchor, tag, start_marker, end_marker)
+                    collection_events = self.parse_block_sequence()
+                elif block and self.scanner.check(BlockMappingStartToken):
+                    end_marker = self.scanner.peek().start_marker
+                    event = MappingEvent(anchor, tag, start_marker, end_marker)
+                    collection_events = self.parse_block_mapping()
+                else:
+                    if block:
+                        node = 'block'
+                    else:
+                        node = 'flow'
+                    token = self.scanner.peek()
+                    raise ParserError("while scanning a %s node" % node, start_marker,
+                            "expected the node content, but found %r" % token.id,
+                            token.start_marker)
+            yield event
+            if collection_events is not None:
+                for event in collection_events:
+                    yield event
+
+    def parse_block_sequence(self):
+        # BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
+        token = self.scanner.get()
+        start_marker = token.start_marker
+        while self.scanner.check(BlockEntryToken):
+            token = self.scanner.get()
+            if not self.scanner.check(BlockEntryToken, BlockEndToken):
+                for event in self.parse_block_node():
+                    yield event
+            else:
+                yield self.process_empty_scalar(token.end_marker)
+        if not self.scanner.check(BlockEndToken):
+            token = self.scanner.peek()
+            raise ParserError("while scanning a block collection", start_marker,
+                    "expected <block end>, but found %r" % token.id, token.start_marker)
+        token = self.scanner.get()
+        yield CollectionEndEvent(token.start_marker, token.end_marker)
+
+    def parse_indentless_sequence(self):
+        # (BLOCK-ENTRY block_node?)+
+        while self.scanner.check(BlockEntryToken):
+            token = self.scanner.get()
+            if not self.scanner.check(BlockEntryToken,
+                    KeyToken, ValueToken, BlockEndToken):
+                for event in self.parse_block_node():
+                    yield event
+            else:
+                yield self.process_empty_scalar(token.end_marker)
+        token = self.scanner.peek()
+        yield CollectionEndEvent(token.start_marker, token.start_marker)
+
+    def parse_block_mapping(self):
+        # BLOCK-MAPPING_START
+        #   ((KEY block_node_or_indentless_sequence?)?
+        #   (VALUE block_node_or_indentless_sequence?)?)*
+        # BLOCK-END
+        token = self.scanner.get()
+        start_marker = token.start_marker
+        while self.scanner.check(KeyToken, ValueToken):
+            if self.scanner.check(KeyToken):
+                token = self.scanner.get()
+                if not self.scanner.check(KeyToken, ValueToken, BlockEndToken):
+                    for event in self.parse_block_node_or_indentless_sequence():
+                        yield event
+                else:
+                    yield self.process_empty_scalar(token.end_marker)
+            if self.scanner.check(ValueToken):
+                token = self.scanner.get()
+                if not self.scanner.check(KeyToken, ValueToken, BlockEndToken):
+                    for event in self.parse_block_node_or_indentless_sequence():
+                        yield event
+                else:
+                    yield self.process_empty_scalar(token.end_marker)
+            else:
+                token = self.scanner.peek()
+                yield self.process_empty_scalar(token.start_marker)
+        if not self.scanner.check(BlockEndToken):
+            token = self.scanner.peek()
+            raise ParserError("while scanning a block mapping", start_marker,
+                    "expected <block end>, but found %r" % token.id, token.start_marker)
+        token = self.scanner.get()
+        yield CollectionEndEvent(token.start_marker, token.end_marker)
+
+    def parse_flow_sequence(self):
+        # flow_sequence     ::= FLOW-SEQUENCE-START
+        #                       (flow_sequence_entry FLOW-ENTRY)*
+        #                       flow_sequence_entry?
+        #                       FLOW-SEQUENCE-END
+        # flow_sequence_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+        #
+        # Note that while production rules for both flow_sequence_entry and
+        # flow_mapping_entry are equal, their interpretations are different.
+        # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
+        # generate an inline mapping (set syntax).
+        token = self.scanner.get()
+        start_marker = token.start_marker
+        while not self.scanner.check(FlowSequenceEndToken):
+            if self.scanner.check(KeyToken):
+                token = self.scanner.get()
+                yield MappingEvent(None, u'!',
+                        token.start_marker, token.end_marker)
+                if not self.scanner.check(ValueToken,
+                        FlowEntryToken, FlowSequenceEndToken):
+                    for event in self.parse_flow_node():
+                        yield event
+                else:
+                    yield self.process_empty_scalar(token.end_marker)
+                if self.scanner.check(ValueToken):
+                    token = self.scanner.get()
+                    if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken):
+                        for event in self.parse_flow_node():
+                            yield event
+                    else:
+                        yield self.process_empty_scalar(token.end_marker)
+                else:
+                    token = self.scanner.peek()
+                    yield self.process_empty_scalar(token.start_marker)
+                token = self.scanner.peek()
+                yield CollectionEndEvent(token.start_marker, token.start_marker)
+            else:
+                for event in self.parse_flow_node():
+                    yield event
+            if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken):
+                token = self.scanner.peek()
+                raise ParserError("while scanning a flow sequence", start_marker,
+                        "expected ',' or ']', but got %r" % token.id, token.start_marker)
+            if self.scanner.check(FlowEntryToken):
+                self.scanner.get()
+        token = self.scanner.get()
+        yield CollectionEndEvent(token.start_marker, token.end_marker)
+
+    def parse_flow_mapping(self):
+        # flow_mapping      ::= FLOW-MAPPING-START
+        #                       (flow_mapping_entry FLOW-ENTRY)*
+        #                       flow_mapping_entry?
+        #                       FLOW-MAPPING-END
+        # flow_mapping_entry    ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+        token = self.scanner.get()
+        start_marker = token.start_marker
+        while not self.scanner.check(FlowMappingEndToken):
+            if self.scanner.check(KeyToken):
+                token = self.scanner.get()
+                if not self.scanner.check(ValueToken,
+                        FlowEntryToken, FlowMappingEndToken):
+                    for event in self.parse_flow_node():
+                        yield event
+                else:
+                    yield self.process_empty_scalar(token.end_marker)
+                if self.scanner.check(ValueToken):
+                    token = self.scanner.get()
+                    if not self.scanner.check(FlowEntryToken, FlowMappingEndToken):
+                        for event in self.parse_flow_node():
+                            yield event
+                    else:
+                        yield self.process_empty_scalar(token.end_marker)
+                else:
+                    token = self.scanner.peek()
+                    yield self.process_empty_scalar(token.start_marker)
+            else:
+                for event in self.parse_flow_node():
+                    yield event
+                yield self.process_empty_scalar(self.scanner.peek().start_marker)
+            if not self.scanner.check(FlowEntryToken, FlowMappingEndToken):
+                token = self.scanner.peek()
+                raise ParserError("while scanning a flow mapping", start_marker,
+                        "expected ',' or '}', but got %r" % token.id, token.start_marker)
+            if self.scanner.check(FlowEntryToken):
+                self.scanner.get()
+        if not self.scanner.check(FlowMappingEndToken):
+            token = self.scanner.peek()
+            raise ParserError("while scanning a flow mapping", start_marker,
+                    "expected '}', but found %r" % token.id, token.start_marker)
+        token = self.scanner.get()
+        yield CollectionEndEvent(token.start_marker, token.end_marker)
+
+    def process_empty_scalar(self, marker):
+        return ScalarEvent(None, None, u'', marker, marker)
+

lib/yaml/reader.py

+# This module contains abstractions for the input stream. You don't have to
+# looks further, there are no pretty code.
+#
+# We define two classes here.
+#
+#   Marker(source, line, column)
+# It's just a record and its only use is producing nice error messages.
+# Parser does not use it for any other purposes.
+#
+#   Reader(source, data)
+# Reader determines the encoding of `data` and converts it to unicode.
+# Reader provides the following methods and attributes:
+#   reader.peek(length=1) - return the next `length` characters
+#   reader.forward(length=1) - move the current position to `length` characters.
+#   reader.index - the number of the current character.
+#   reader.line, stream.column - the line and the column of the current character.
+
+__all__ = ['Reader', 'ReaderError']
+
+from error import YAMLError, Marker
+
+import codecs, re
+
+# Unfortunately, codec functions in Python 2.3 does not support the `finish`
+# arguments, so we have to write our own wrappers.
+
+try:
+    codecs.utf_8_decode('', 'strict', False)
+    from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode
+
+except TypeError:
+
+    def utf_16_le_decode(data, errors, finish=False):
+        if not finish and len(data) % 2 == 1:
+            data = data[:-1]
+        return codecs.utf_16_le_decode(data, errors)
+
+    def utf_16_be_decode(data, errors, finish=False):
+        if not finish and len(data) % 2 == 1:
+            data = data[:-1]
+        return codecs.utf_16_be_decode(data, errors)
+
+    def utf_8_decode(data, errors, finish=False):
+        if not finish:
+            # We are trying to remove a possible incomplete multibyte character
+            # from the suffix of the data.
+            # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd.
+            # All further bytes are in the range 0x80 to 0xbf.
+            # UTF-8 encoded UCS characters may be up to six bytes long.
+            count = 0
+            while count < 5 and count < len(data)   \
+                    and '\x80' <= data[-count-1] <= '\xBF':
+                count -= 1
+            if count < 5 and count < len(data)  \
+                    and '\xC0' <= data[-count-1] <= '\xFD':
+                data = data[:-count-1]
+        return codecs.utf_8_decode(data, errors)
+
+class ReaderError(YAMLError):
+
+    def __init__(self, name, position, character, encoding, reason):
+        self.name = name
+        self.character = character
+        self.position = position
+        self.encoding = encoding
+        self.reason = reason
+
+    def __str__(self):
+        if isinstance(self.character, str):
+            return "'%s' codec can't decode byte #x%02x: %s\n"  \
+                    "  in \"%s\", position %d"    \
+                    % (self.encoding, ord(self.character), self.reason,
+                            self.name, self.position)
+        else:
+            return "unacceptable character #x%04x: %s\n"    \
+                    "  in \"%s\", position %d"    \
+                    % (ord(self.character), self.reason,
+                            self.name, self.position)
+
+class Reader:
+    # Reader:
+    # - determines the data encoding and converts it to unicode,
+    # - checks if characters are in allowed range,
+    # - adds '\0' to the end.
+
+    # Reader accepts
+    #  - a `str` object,
+    #  - a `unicode` object,
+    #  - a file-like object with its `read` method returning `str`,
+    #  - a file-like object with its `read` method returning `unicode`.
+
+    # Yeah, it's ugly and slow.
+
+    def __init__(self, data):
+        self.name = None
+        self.stream = None
+        self.stream_pointer = 0
+        self.eof = True
+        self.buffer = u''
+        self.pointer = 0
+        self.raw_buffer = None
+        self.raw_decode = None
+        self.index = 0
+        self.line = 0
+        self.column = 0
+        if isinstance(data, unicode):
+            self.name = "<unicode string>"
+            self.check_printable(data)
+            self.buffer = data+u'\0'
+        elif isinstance(data, str):
+            self.name = "<string>"
+            self.raw_buffer = data
+            self.determine_encoding()
+        else:
+            self.stream = data
+            self.name = getattr(data, 'name', "<file>")
+            self.eof = False
+            self.raw_buffer = ''
+            self.determine_encoding()
+
+    def peek(self, index=0):
+        if self.pointer+index+1 >= len(self.buffer):
+            self.update(index+1)
+        return self.buffer[self.pointer+index]
+
+    def prefix(self, length=1):
+        if self.pointer+length >= len(self.buffer):
+            self.update(length)
+        return self.buffer[self.pointer:self.pointer+length]
+
+    def forward(self, length=1):
+        if self.pointer+length+1 >= len(self.buffer):
+            self.update(length+1)
+        for k in range(length):
+            ch = self.buffer[self.pointer]
+            self.pointer += 1
+            self.index += 1
+            if ch in u'\n\x85\u2028\u2029'  \
+                    or (ch == u'\r' and self.buffer[self.pointer+1] != u'\n'):
+                self.line += 1
+                self.column = 0
+            elif ch != u'\uFEFF':
+                self.column += 1
+
+    def get_marker(self):
+        if self.stream is None:
+            return Marker(self.name, self.line, self.column,
+                    self.buffer, self.pointer)
+        else:
+            return Marker(self.name, self.line, self.column, None, None)
+
+    def determine_encoding(self):
+        while not self.eof and len(self.raw_buffer) < 2:
+            self.update_raw()
+        if not isinstance(self.raw_buffer, unicode):
+            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
+                self.raw_decode = utf_16_le_decode
+            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
+                self.raw_decode = utf_16_be_decode
+            else:
+                self.raw_decode = utf_8_decode
+        self.update(1)
+
+    NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
+    def check_printable(self, data):
+        match = self.NON_PRINTABLE.search(data)
+        if match:
+            character = match.group()
+            position = self.index+(len(self.buffer)-self.pointer)+match.start()
+            raise ReaderError(self.name, position, character,
+                    'unicode', "special characters are not allowed")
+
+    def update(self, length):
+        if self.raw_buffer is None:
+            return
+        self.buffer = self.buffer[self.pointer:]
+        self.pointer = 0
+        while len(self.buffer) < length:
+            if not self.eof:
+                self.update_raw()
+            if self.raw_decode is not None:
+                try:
+                    data, converted = self.raw_decode(self.raw_buffer,
+                            'strict', self.eof)
+                except UnicodeDecodeError, exc:
+                    character = exc.object[exc.start]
+                    if self.stream is not None:
+                        position = self.stream_pointer-len(self.raw_buffer)+exc.start
+                    else:
+                        position = exc.start
+                    raise ReaderError(self.name, position, character,
+                            exc.encoding, exc.reason)
+            else:
+                data = self.raw_buffer
+                converted = len(data)
+            self.check_printable(data)
+            self.buffer += data
+            self.raw_buffer = self.raw_buffer[converted:]
+            if self.eof:
+                self.buffer += u'\0'
+                self.raw_buffer = None
+                break
+
+    def update_raw(self, size=1024):
+        data = self.stream.read(size)
+        if data:
+            self.raw_buffer += data
+            self.stream_pointer += len(data)
+        else:
+            self.eof = True
+
+#try:
+#    import psyco
+#    psyco.bind(Reader)
+#except ImportError:
+#    pass
+

lib/yaml/resolver.py

+
+__all__ = ['BaseResolver', 'Resolver', 'ResolverError']
+
+from error import MarkedYAMLError
+from nodes import *
+
+import re
+
+# Not really used.
+class ResolverError(MarkedYAMLError):
+    pass
+
+class BaseResolver:
+
+    DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str'
+    DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq'
+    DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map'
+
+    def __init__(self, composer):
+        self.composer = composer
+        self.resolved_nodes = {}
+
+    def check(self):
+        # If there are more documents available?
+        return self.composer.check()
+
+    def get(self):
+        # Resolve and return the root node of the next document.
+        if self.composer.check():
+            return self.resolve_document(self.composer.get())
+
+    def __iter__(self):
+        # Iterator protocol.
+        while self.composer.check():
+            yield self.resolve_document(self.composer.get())
+
+    def resolve_document(self, node):
+        self.resolve_node([], node)
+        return node
+        self.resolved_nodes = {}
+
+    def resolve_node(self, path, node):
+        if node in self.resolved_nodes:
+            return
+        self.resolved_nodes[node] = None
+        if isinstance(node, ScalarNode):
+            self.resolve_scalar(path, node)
+        elif isinstance(node, SequenceNode):
+            self.resolve_sequence(path, node)
+            for index in range(len(node.value)):
+                self.resolve_node(path+[(node, index)], node.value[index])
+        elif isinstance(node, MappingNode):
+            self.resolve_mapping(path, node)
+            for key in node.value:
+                self.resolve_node(path+[node, None], key)
+                self.resolve_node(path+[node, key], node.value[key])
+
+    def resolve_scalar(self, path, node):
+        if node.tag is None:
+            node.tag = self.detect_scalar(node.value)
+        if node.tag is None or node.tag == u'!':
+            node.tag = self.DEFAULT_SCALAR_TAG
+
+    def resolve_sequence(self, path, node):
+        if node.tag is None or node.tag == u'!':
+            node.tag = self.DEFAULT_SEQUENCE_TAG
+
+    def resolve_mapping(self, path, node):
+        if node.tag is None or node.tag == u'!':
+            node.tag = self.DEFAULT_MAPPING_TAG
+
+    def detect_scalar(self, value):
+        if value == u'':
+            detectors = self.yaml_detectors.get(u'', [])
+        else:
+            detectors = self.yaml_detectors.get(value[0], [])
+        detectors += self.yaml_detectors.get(None, [])
+        for tag, regexp in detectors:
+            if regexp.match(value):
+                return tag
+
+    def add_detector(cls, tag, regexp, first):
+        if not 'yaml_detectors' in cls.__dict__:
+            cls.yaml_detectors = cls.yaml_detectors.copy()
+        for ch in first:
+            cls.yaml_detectors.setdefault(ch, []).append((tag, regexp))
+    add_detector = classmethod(add_detector)
+
+    yaml_detectors = {}
+
+class Resolver(BaseResolver):
+    pass
+
+Resolver.add_detector(
+        u'tag:yaml.org,2002:bool',
+        re.compile(ur'''^(?:yes|Yes|YES|n|N|no|No|NO
+                    |true|True|TRUE|false|False|FALSE
+                    |on|On|ON|off|Off|OFF)$''', re.X),
+        list(u'yYnNtTfFoO'))
+
+Resolver.add_detector(
+        u'tag:yaml.org,2002:float',
+        re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)?\.[0-9_]*(?:[eE][-+][0-9]+)?
+                    |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
+                    |[-+]?\.(?:inf|Inf|INF)
+                    |\.(?:nan|NaN|NAN))$''', re.X),
+        list(u'-+0123456789.'))
+
+Resolver.add_detector(
+        u'tag:yaml.org,2002:int',
+        re.compile(ur'''^(?:[-+]?0b[0-1_]+
+                    |[-+]?0[0-7_]+
+                    |[-+]?(?:0|[1-9][0-9_]*)
+                    |[-+]?0x[0-9a-fA-F_]+
+                    |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X),
+        list(u'-+0123456789'))
+
+Resolver.add_detector(
+        u'tag:yaml.org,2002:merge',
+        re.compile(ur'^(?:<<)$'),
+        ['<'])
+
+Resolver.add_detector(
+        u'tag:yaml.org,2002:null',
+        re.compile(ur'''^(?: ~
+                    |null|Null|NULL
+                    | )$''', re.X),
+        [u'~', u'n', u'N', u''])
+
+Resolver.add_detector(
+        u'tag:yaml.org,2002:timestamp',
+        re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
+                    |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
+                     (?:[Tt]|[ \t]+)[0-9][0-9]?
+                     :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)?
+                     (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X),
+        list(u'0123456789'))
+
+Resolver.add_detector(
+        u'tag:yaml.org,2002:value',
+        re.compile(ur'^(?:=)$'),
+        ['='])
+
+# The following detector is only for documentation purposes. It cannot work
+# because plain scalars cannot start with '!', '&', or '*'.
+Resolver.add_detector(
+        u'tag:yaml.org,2002:yaml',
+        re.compile(ur'^(?:!|&|\*)$'),
+        list(u'!&*'))
+

lib/yaml/scanner.py

+
+# Scanner produces tokens of the following types:
+# DIRECTIVE(name, value)
+# DOCUMENT-START
+# DOCUMENT-END
+# STREAM-END
+# BLOCK-SEQUENCE-START
+# BLOCK-MAPPING-START
+# BLOCK-END
+# FLOW-SEQUENCE-START
+# FLOW-MAPPING-START
+# FLOW-SEQUENCE-END
+# FLOW-MAPPING-END
+# BLOCK-ENTRY
+# FLOW-ENTRY
+# KEY
+# VALUE
+# ALIAS(value)
+# ANCHOR(value)
+# TAG(value)
+# SCALAR(value, plain)
+#
+# Read comments in the Scanner code for more details.
+#
+
+__all__ = ['Scanner', 'ScannerError']
+
+from error import MarkedYAMLError
+from tokens import *
+
+class ScannerError(MarkedYAMLError):
+    pass
+
+class SimpleKey:
+    # See below simple keys treatment.
+