Commits

Kirill Simonov committed a4348b1

* Add the token STREAM-START.
* Add parsing events: STREAM-START, DOCUMENT-START, DOCUMENT-END.

  • Participants
  • Parent commits d6d2073

Comments (0)

Files changed (8)

lib/yaml/composer.py

         self.all_anchors = {}
         self.complete_anchors = {}
 
+        # Drop the STREAM-START event.
+        self.parser.get()
+
     def check(self):
         # If there are more documents available?
         return not self.parser.check(StreamEndEvent)
             yield self.compose_document()
 
     def compose_document(self):
+
+        # Drop the DOCUMENT-START event.
+        self.parser.get()
+
+        # Compose the root node.
         node = self.compose_node()
+
+        # Drop the DOCUMENT-END event.
+        self.parser.get()
+
         self.all_anchors = {}
         self.complete_anchors = {}
         return node

lib/yaml/events.py

 class CollectionEndEvent(Event):
     pass
 
+class DocumentStartEvent(Event):
+    pass
+
+class DocumentEndEvent(Event):
+    pass
+
+class StreamStartEvent(Event):
+    pass
+
 class StreamEndEvent(Event):
     pass
 

lib/yaml/parser.py

 # YAML can be parsed by an LL(1) parser!
 #
 # We use the following production rules:
-# stream            ::= implicit_document? explicit_document* STREAM-END
+# stream            ::= STREAM-START implicit_document? explicit_document* STREAM-END
 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END?
 # implicit_document ::= block_node DOCUMENT-END?
 # block_node    ::= ALIAS | properties? block_content
 # or line breaks.
 
 # FIRST sets:
-# stream: FIRST(block_node) + { DIRECTIVE DOCUMENT-START }
+# stream: { STREAM-START }
 # explicit_document: { DIRECTIVE DOCUMENT-START }
 # implicit_document: FIRST(block_node)
 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
         return self.event_generator
 
     def parse_stream(self):
-        # implicit_document? explicit_document* STREAM-END
+        # STREAM-START implicit_document? explicit_document* STREAM-END
+
+        # Parse start of stream.
+        token = self.scanner.get()
+        yield StreamStartEvent(token.start_mark, token.end_mark)
 
         # Parse implicit document.
         if not self.scanner.check(DirectiveToken, DocumentStartToken,
                 StreamEndToken):
             self.tag_handles = self.DEFAULT_TAGS
+            token = self.scanner.peek()
+            start_mark = end_mark = token.start_mark
+            yield DocumentStartEvent(start_mark, end_mark)
             for event in self.parse_block_node():
                 yield event
+            token = self.scanner.peek()
+            start_mark = end_mark = token.start_mark
+            while self.scanner.check(DocumentEndToken):
+                token = self.scanner.get()
+                end_mark = token.end_mark
+            yield DocumentEndEvent(start_mark, end_mark)
 
         # Parse explicit documents.
         while not self.scanner.check(StreamEndToken):
+            token = self.scanner.peek()
+            start_mark = token.start_mark
             self.process_directives()
             if not self.scanner.check(DocumentStartToken):
                 raise ParserError(None, None,
                         % self.scanner.peek().id,
                         self.scanner.peek().start_mark)
             token = self.scanner.get()
+            end_mark = token.end_mark
+            yield DocumentStartEvent(start_mark, end_mark)
             if self.scanner.check(DirectiveToken,
                     DocumentStartToken, DocumentEndToken, StreamEndToken):
                 yield self.process_empty_scalar(token.end_mark)
             else:
                 for event in self.parse_block_node():
                     yield event
+            token = self.scanner.peek()
+            start_mark = end_mark = token.start_mark
             while self.scanner.check(DocumentEndToken):
-                self.scanner.get()
+                token = self.scanner.get()
+                end_mark = token.end_mark
+            yield DocumentEndEvent(start_mark, end_mark)
 
         # Parse end of stream.
         token = self.scanner.get()

lib/yaml/scanner.py

 
 # Scanner produces tokens of the following types:
+# STREAM-START
+# STREAM-END
 # DIRECTIVE(name, value)
 # DOCUMENT-START
 # DOCUMENT-END
-# STREAM-END
 # BLOCK-SEQUENCE-START
 # BLOCK-MAPPING-START
 # BLOCK-END
         # List of processed tokens that are not yet emitted.
         self.tokens = []
 
+        # Add the STREAM-START token.
+        self.fetch_stream_start()
+
         # Number of tokens that were emitted through the `get_token` method.
         self.tokens_taken = 0
 
 
     # Fetchers.
 
+    def fetch_stream_start(self):
+        # We always add STREAM-START as the first token and STREAM-END as the
+        # last token.
+
+        # Read the token.
+        mark = self.reader.get_mark()
+        
+        # Add STREAM-END.
+        self.tokens.append(StreamStartToken(mark, mark))
+        
+
     def fetch_stream_end(self):
 
         # Set the current intendation to -1.
         # Read the token.
         mark = self.reader.get_mark()
         
-        # Add END.
+        # Add STREAM-END.
         self.tokens.append(StreamEndToken(mark, mark))
 
         # The reader is ended.

lib/yaml/tokens.py

 class DocumentEndToken(Token):
     id = '<document end>'
 
+class StreamStartToken(Token):
+    id = '<stream start>'
+
 class StreamEndToken(Token):
     id = '<stream end>'
 

tests/test_appliance.py

     def scan(self):
         #print self.data[self.index:]
         tokens = []
+        tokens.append(StreamStartToken(None, None))
         while True:
             self.find_token()
             ch = self.data[self.index]
         self.scanner = CanonicalScanner(data)
         self.events = []
 
-    # stream: document* END
+    # stream: STREAM-START document* STREAM-END
     def parse_stream(self):
+        self.consume_token(StreamStartToken)
+        self.events.append(StreamStartEvent(None, None))
         while not self.test_token(StreamEndToken):
             if self.test_token(DirectiveToken, DocumentStartToken):
                 self.parse_document()
             else:
                 raise Error("document is expected, got "+repr(self.tokens[self.index]))
+        self.consume_token(StreamEndToken)
         self.events.append(StreamEndEvent(None, None))
 
     # document: DIRECTIVE? DOCUMENT-START node
         if self.test_token(DirectiveToken):
             self.consume_token(DirectiveToken)
         self.consume_token(DocumentStartToken)
+        self.events.append(DocumentStartEvent(None, None))
         self.parse_node()
+        self.events.append(DocumentEndEvent(None, None))
 
     # node: ALIAS | ANCHOR? TAG? (SCALAR|sequence|mapping)
     def parse_node(self):

tests/test_structure.py

             parser = Parser(Scanner(Reader(file(data_filename, 'rb'))))
             node1 = []
             while not parser.check(StreamEndEvent):
-                node1.append(self._convert(parser))
+                if not parser.check(StreamStartEvent, DocumentStartEvent, DocumentEndEvent):
+                    node1.append(self._convert(parser))
+                else:
+                    parser.get()
             parser.get()
             if len(node1) == 1:
                 node1 = node1[0]

tests/test_tokens.py

             scanner = Scanner(Reader(file(data_filename, 'rb')))
             tokens1 = []
             for token in scanner:
-                if not isinstance(token, StreamEndToken):
+                if not isinstance(token, (StreamStartToken, StreamEndToken)):
                     tokens1.append(token)
             tokens1 = [self.replaces[t.__class__] for t in tokens1]
             self.failUnlessEqual(tokens1, tokens2)
                 scanner = Scanner(Reader(file(filename, 'rb')))
                 tokens = []
                 for token in scanner:
-                    if not isinstance(token, StreamEndToken):
+                    if not isinstance(token, (StreamStartToken, StreamEndToken)):
                         tokens.append(token.__class__.__name__)
             except:
                 print