Commits

Ross Light committed 3a48e07

Move scanner rules into lex.go

Comments (0)

Files changed (3)

     ast.go\
     chars.go\
     constructor.go\
+    lex.go\
     parser.go\
     scanner.go\
     schema.go\
+package yaml
+
+import (
+	"strconv"
+)
+
+const (
+	blockIn = iota
+	blockOut
+	blockKey
+	flowKey
+	flowIn
+	flowOut
+)
+
+func (s *scanner) trySeparate(indent, context int) bool {
+	if context == blockKey || context == flowKey {
+		return s.trySeparateInLine()
+	}
+
+	initialIndex := s.index
+	if s.trySLComments() && s.tryFlowLinePrefix(indent) {
+		return true
+	} else {
+		s.index = initialIndex
+	}
+	return s.trySeparateInLine()
+}
+
+func (s *scanner) tryFlowLinePrefix(indent int) bool {
+	if !s.acceptIndent(indent) {
+		return false
+	}
+	s.trySeparateInLine()
+	return true
+}
+
+func (s *scanner) trySeparateInLine() bool {
+	return s.acceptRun(whitespace) || (s.peek() != eof && s.isStartOfLine())
+}
+
+func (s *scanner) trySBComment() (ok bool) {
+	defer func(initIndex int) {
+		if !ok {
+			s.index = initIndex
+		}
+	}(s.index)
+
+	if s.trySeparateInLine() {
+		s.tryCommentText()
+	}
+	return s.peek() == eof || s.acceptBreak()
+}
+
+func (s *scanner) tryLComment() (ok bool) {
+	defer func(initIndex int) {
+		if !ok {
+			s.index = initIndex
+		}
+	}(s.index)
+
+	if !s.trySeparateInLine() {
+		return false
+	}
+	s.tryCommentText()
+	return s.acceptBreak()
+}
+
+func (s *scanner) trySLComments() (ok bool) {
+	defer func(initIndex int) {
+		if !ok {
+			s.index = initIndex
+		}
+	}(s.index)
+
+	if !s.isStartOfLine() {
+		if !s.trySBComment() {
+			return false
+		}
+	}
+	for s.tryLComment() {
+	}
+	return true
+}
+
+func (s *scanner) tryCommentText() bool {
+	if !s.accept("#") {
+		return false
+	}
+	for {
+		rune := s.next()
+		if rune == eof || containsRune(breakChars, rune) {
+			break
+		}
+	}
+	s.backup()
+	return true
+}
+
+func (s *scanner) parseYAMLStream() {
+	s.parseDocumentPrefix()
+	// l-any-document?
+	if initialIndex := s.index; !s.acceptString("...") && !s.acceptString("---") && s.peek() != eof {
+		s.parseAnyDocument()
+	} else {
+		s.index = initialIndex
+	}
+
+	for s.peek() != eof {
+		s.parseDocumentPrefix()
+		if s.acceptString("...") {
+			s.emit(TokenDocumentEnd)
+			if !s.trySLComments() {
+				s.expected("line break")
+			}
+			s.ignore()
+			for s.acceptString("...") {
+				s.emit(TokenDocumentEnd)
+				if !s.trySLComments() {
+					s.expected("line break")
+				}
+				s.ignore()
+			}
+			s.parseDocumentPrefix()
+			if s.peek() != eof {
+				s.parseAnyDocument()
+			}
+		} else {
+			if initialIndex := s.index; s.acceptString("---") {
+				s.index = initialIndex
+				s.parseExplicitDocument()
+			} else {
+				break
+			}
+		}
+	}
+
+	if r := s.next(); r == eof {
+		s.ignore()
+		s.emit(TokenStreamEnd)
+	} else {
+		s.backup()
+		s.expected("EOF")
+	}
+}
+
+func (s *scanner) parseAnyDocument() {
+	initialIndex := s.index
+	switch {
+	case s.peek() == '%':
+		s.parseDirectiveDocument()
+	case s.acceptString("---"):
+		s.index = initialIndex
+		s.parseExplicitDocument()
+	default:
+		s.parseBareDocument()
+	}
+}
+
+func (s *scanner) parseDirectiveDocument() {
+	s.match("%")
+	s.backup()
+	for s.peek() == '%' {
+		s.parseDirective()
+	}
+	s.parseExplicitDocument()
+}
+
+func (s *scanner) parseExplicitDocument() {
+	if s.acceptString("---") {
+		s.emit(TokenDocumentStart)
+	} else {
+		s.expected(`"---"`)
+	}
+	s.parseBareDocument()
+	// TODO: | e-node s-l-comments
+}
+
+func (s *scanner) parseBareDocument() {
+	s.parseBlockNode(-1, blockIn)
+}
+
+func (s *scanner) parseDocumentPrefix() {
+	if s.acceptRun(string(bomRune)) {
+		s.ignore()
+	}
+	for s.tryLComment() {
+		s.ignore()
+		s.acceptRun(string(bomRune))
+		s.ignore()
+	}
+}
+
+func (s *scanner) parseDirective() {
+	s.match("%")
+
+	// Directive name
+	for {
+		rune := s.next()
+		if rune == eof || containsRune(whitespace+breakChars, rune) {
+			break
+		}
+	}
+	s.backup()
+
+	// Parameters
+	for {
+		oldIndex := s.index
+		if !s.acceptRun(whitespace) {
+			break
+		}
+
+		if s.peek() == eof || s.peek() == '#' {
+			s.index = oldIndex
+			break
+		}
+
+		for {
+			rune := s.next()
+			if rune == eof || containsRune(whitespace+breakChars, rune) {
+				break
+			}
+		}
+		s.backup()
+	}
+
+	s.emit(TokenDirective)
+	if !s.trySLComments() {
+		s.expected("line break")
+	}
+	s.ignore()
+}
+
+func (s *scanner) parseBlockNode(indent, context int) {
+	initIndex := s.index
+
+	// TODO: block-scalar
+	if s.trySeparate(indent+1, context) {
+		// TODO: properties
+		if s.peek() == '|' || s.peek() == '>' {
+			s.errorf("cannot handle block scalar")
+		}
+	}
+	s.index = initIndex
+
+	// block-collection
+	// TODO: properties
+	if s.trySLComments() {
+		// detect indent
+		if indentIndex := s.index; s.acceptIndent(indent + 1) {
+			s.acceptRun(" ")
+			newIndent := s.index - indentIndex
+
+			if s.peek() == '-' {
+				// block sequence
+				s.index = indentIndex
+				s.ignore()
+				s.parseBlockSequence(newIndent)
+				return
+			} else if s.peek() == '?' || s.peekImplicitKey() {
+				// block mapping
+				s.index = indentIndex
+				s.ignore()
+				s.parseBlockMapping(newIndent)
+				return
+			}
+		}
+	}
+	s.index = initIndex
+
+	// flow-in-block
+	if !s.trySeparate(indent+1, flowOut) {
+		s.expected("space")
+	}
+	s.ignore()
+	s.parseFlowNode(indent+1, flowOut)
+
+	if !s.trySLComments() {
+		s.expected("line break")
+	}
+	s.ignore()
+}
+
+func (s *scanner) parseBlockSequence(indent int) {
+	s.emit(TokenBlockSequenceStart)
+
+	for s.peek() != eof && s.acceptIndent(indent) {
+		s.ignore()
+
+		s.match("-")
+		// XXX: not followed by ns-char?
+		s.emit(TokenBlockEntry)
+
+		s.parseBlockIndented(indent, blockIn)
+	}
+
+	s.emit(TokenBlockEnd)
+}
+
+const maxImplicitKeySize = 1024
+
+func (s *scanner) peekImplicitKey() bool {
+	const (
+		statePlain = iota
+		stateSingle
+		stateDouble
+		stateMap
+		stateSequence
+	)
+
+	initIndex := s.index
+	defer func() {
+		s.index = initIndex
+	}()
+
+	// Get initial state
+	states := make([]int, 0, 2)
+	switch r := s.peek(); r {
+	case eof:
+		return false
+	case '{':
+		states = append(states, stateMap)
+	case '[':
+		states = append(states, stateSequence)
+	case '\'':
+		states = append(states, stateSingle)
+	case '"':
+		states = append(states, stateDouble)
+	case '?', ':', '-':
+		s.next()
+		if isPlainSafeChar(s.peek(), blockKey) {
+			states = append(states, statePlain)
+		} else {
+			return false
+		}
+	default:
+		if !containsRune(breakChars+whitespace+indicatorChars, r) {
+			states = append(states, statePlain)
+		} else {
+			return false
+		}
+	}
+	s.next()
+
+	for len(states) > 0 {
+		if s.index-initIndex > maxImplicitKeySize {
+			return false
+		}
+
+		r := s.peek()
+		if r == eof || containsRune(breakChars, r) {
+			return false
+		}
+
+		switch states[len(states)-1] {
+		case stateSingle:
+			s.next()
+			if r == '\'' {
+				if s.peek() == '\'' {
+					s.next()
+				} else {
+					states = states[:len(states)-1]
+				}
+			}
+		case stateDouble:
+			s.next()
+			switch r {
+			case '\\':
+				r2 := s.next()
+				if containsRune(breakChars, r2) {
+					return false
+				}
+			case '"':
+				states = states[:len(states)-1]
+			}
+		case statePlain:
+			if r == ':' {
+				states = states[:len(states)-1]
+			} else {
+				s.next()
+			}
+		default:
+			// TODO: sequences and maps
+			return false
+		}
+	}
+
+	// Finally... is there a colon?
+	s.trySeparateInLine()
+	return s.accept(":")
+}
+
+func (s *scanner) parseBlockIndented(indent, context int) {
+	// TODO: compact syntax and empty
+	s.parseBlockNode(indent, context)
+}
+
+func (s *scanner) parseBlockMapping(indent int) {
+	s.emit(TokenBlockMappingStart)
+
+	for s.peek() != eof && s.acceptIndent(indent) {
+		s.ignore()
+
+		if s.peek() == '?' { // explicit entry
+			s.next()
+			s.emit(TokenKey)
+
+			s.parseBlockIndented(indent, blockOut)
+
+			// TODO: empty
+			s.acceptIndent(indent)
+			s.ignore()
+
+			s.match(":")
+			s.emit(TokenValue)
+
+			s.parseBlockIndented(indent, blockOut)
+		} else { // implicit entry
+			s.emit(TokenKey)
+
+			if s.peek() != ':' {
+				s.parseFlowNode(-1, blockKey)
+
+				s.trySeparateInLine()
+				s.ignore()
+			}
+
+			s.match(":")
+			s.emit(TokenValue)
+
+			// TODO: empty
+			s.parseBlockNode(indent, blockOut)
+		}
+	}
+
+	s.emit(TokenBlockEnd)
+}
+
+func (s *scanner) parseFlowNode(indent, context int) {
+	if s.peek() == '*' {
+		s.next()
+		s.parseAnchorName()
+		s.emit(TokenAlias)
+		return
+	}
+
+	switch s.peek() {
+	case '!', '&':
+		s.parseProperties(indent, context)
+		if oldIndex := s.index; s.trySeparate(indent, context) && s.peek() != eof && (containsRune(`{['"`, s.peek())) {
+			s.ignore()
+
+			// TODO: go back to oldIndex if there isn't content
+			s.parseFlowContent(indent, context)
+		} else {
+			s.index = oldIndex
+		}
+	default:
+		s.parseFlowContent(indent, context)
+	}
+}
+
+func (s *scanner) parseProperties(indent, context int) {
+	firstRune := s.peek()
+	switch firstRune {
+	case '!':
+		s.parseTagProperty()
+	case '&':
+		s.parseAnchorProperty()
+	default:
+		s.expected("properties")
+	}
+
+	if oldIndex := s.index; s.trySeparate(indent, context) {
+		switch {
+		case firstRune == '!' && s.peek() == '&':
+			s.ignore()
+			s.parseAnchorProperty()
+		case firstRune == '&' && s.peek() == '!':
+			s.ignore()
+			s.parseTagProperty()
+		default:
+			s.index = oldIndex
+		}
+	}
+}
+
+func (s *scanner) parseTagProperty() {
+	s.match("!")
+	if s.accept("<") {
+		// Verbatim tag
+		s.acceptRun(uriChars)
+		s.match(">")
+	} else {
+		// Shorthand or non-specific tag
+		s.acceptRun(wordChars)
+		s.accept("!")
+		s.acceptRun(tagChars)
+	}
+	s.emit(TokenTag)
+}
+
+func (s *scanner) parseAnchorProperty() {
+	s.match("&")
+	s.parseAnchorName()
+	s.emit(TokenAnchor)
+}
+
+func (s *scanner) parseAnchorName() {
+	init := s.index
+	for {
+		rune := s.next()
+		if rune == eof || containsRune(whitespace+breakChars+flowIndicatorChars, rune) {
+			break
+		}
+	}
+	s.backup()
+	if s.index == init {
+		s.expected("anchor")
+	}
+}
+
+func (s *scanner) parseFlowContent(indent, context int) {
+	switch s.peek() {
+	case '[':
+		s.parseFlowSequence(indent, context)
+	case '{':
+		s.parseFlowMapping(indent, context)
+	case '\'':
+		s.parseSingleQuoted(indent, context)
+	case '"':
+		s.parseDoubleQuoted(indent, context)
+	default:
+		s.parsePlain(indent, context)
+	}
+}
+
+func inFlow(context int) int {
+	switch context {
+	case flowOut, flowIn:
+		return flowIn
+	case blockKey, flowKey:
+		return flowKey
+	}
+	panic("never reached")
+}
+
+func (s *scanner) parseFlowSequence(indent, context int) {
+	s.match("[")
+	s.emit(TokenFlowSequenceStart)
+
+	s.trySeparate(indent, context)
+	s.ignore()
+
+	context = inFlow(context)
+entryLoop:
+	for {
+		switch s.peek() {
+		case ',':
+			s.next()
+			s.emit(TokenFlowEntry)
+			continue entryLoop
+		case ']':
+			break entryLoop
+		}
+
+		s.parseFlowSequenceEntry(indent, context)
+		s.trySeparate(indent, context)
+		s.ignore()
+
+		if s.next() == ',' {
+			s.emit(TokenFlowEntry)
+			s.trySeparate(indent, context)
+			s.ignore()
+		} else {
+			s.backup()
+			break
+		}
+
+		s.trySeparate(indent, context)
+		s.ignore()
+	}
+
+	s.match("]")
+	s.emit(TokenFlowSequenceEnd)
+}
+
+func (s *scanner) parseFlowSequenceEntry(indent, context int) {
+	s.parseFlowNode(indent, context)
+	// TODO: | ns-flow-pair
+}
+
+func (s *scanner) parseFlowMapping(indent, context int) {
+	s.match("{")
+	s.emit(TokenFlowMappingStart)
+
+	s.trySeparate(indent, context)
+	s.ignore()
+
+	// TODO: ns-flow-map-entries
+
+	s.match("}")
+	s.emit(TokenFlowMappingEnd)
+}
+
+func (s *scanner) parseSingleQuoted(indent, context int) {
+	s.match(`'`)
+
+	var value string
+	switch context {
+	case flowOut, flowIn:
+		value = s.parseSingleMultiLine(indent)
+	case blockKey, flowKey:
+		value = s.parseSingleOneLine()
+	}
+
+	s.match(`'`)
+	s.emitString(TokenScalar, value)
+}
+
+func (s *scanner) parseSingleOneLine() string {
+	var runes []int
+	for {
+		if r := s.parseSingleChar(); r != -1 {
+			runes = append(runes, r)
+		} else {
+			break
+		}
+	}
+	return string(runes)
+}
+
+func (s *scanner) parseSingleMultiLine(indent int) string {
+	var runes []int
+
+	s.acceptRun(whitespace)
+	for {
+		if r := s.parseSingleChar(); r != -1 {
+			runes = append(runes, r)
+		} else {
+			break
+		}
+	}
+
+	// TODO: additional lines
+
+	return string(runes)
+}
+
+func (s *scanner) parseSingleChar() int {
+	if s.acceptString("''") {
+		return '\''
+	}
+
+	r := s.next()
+	if r == '\'' || !isJSONChar(r) {
+		s.backup()
+		return -1
+	}
+	return r
+}
+
+func (s *scanner) parseDoubleQuoted(indent, context int) {
+	s.match(`"`)
+
+	var value string
+	switch context {
+	case flowOut, flowIn:
+		value = s.parseDoubleMultiLine(indent)
+	case blockKey, flowKey:
+		value = s.parseDoubleOneLine()
+	}
+
+	s.match(`"`)
+	s.emitString(TokenScalar, value)
+}
+
+func (s *scanner) parseDoubleOneLine() string {
+	var runes []int
+	for {
+		if r := s.parseDoubleChar(); r != -1 {
+			runes = append(runes, r)
+		} else {
+			break
+		}
+	}
+	return string(runes)
+}
+
+func (s *scanner) parseDoubleMultiLine(indent int) string {
+	var runes []int
+
+	s.acceptRun(whitespace)
+	for {
+		if r := s.parseDoubleChar(); r != -1 {
+			runes = append(runes, r)
+		} else {
+			break
+		}
+	}
+
+	// TODO: additional lines
+
+	return string(runes)
+}
+
+func (s *scanner) parseDoubleChar() int {
+	r := s.next()
+	switch {
+	case r == '\\':
+		// Escape code
+		s.backup()
+		return s.parseEscapeChar()
+	case r != '"' && isJSONChar(r):
+		// Normal character
+		return r
+	}
+
+	// End of string
+	s.backup()
+	return -1
+}
+
+func (s *scanner) parseEscapeChar() int {
+	s.match("\\")
+
+	switch r := s.next(); r {
+	case '0':
+		return 0
+	case 'a':
+		return '\a'
+	case 'b':
+		return '\b'
+	case 't', '\t':
+		return '\t'
+	case 'n':
+		return '\n'
+	case 'v':
+		return '\v'
+	case 'f':
+		return '\f'
+	case 'r':
+		return '\r'
+	case 'e':
+		// ASCII escape
+		return 0x1b
+	case ' ', '"', '/', '\\':
+		return r
+	case 'N':
+		// Unicode next line
+		return 0x85
+	case '_':
+		// Unicode non-breaking space
+		return 0xA0
+	case 'L':
+		// Unicode line separator
+		return 0x2028
+	case 'P':
+		// Unicode paragraph separator
+		return 0x2029
+	case 'x':
+		return s.parseHexEscape(2)
+	case 'u':
+		return s.parseHexEscape(4)
+	case 'U':
+		return s.parseHexEscape(8)
+	}
+
+	s.backup()
+	s.expected("escape")
+	return -1
+}
+
+func (s *scanner) parseHexEscape(n int) int {
+	initIndex := s.index
+	for i := 0; i < n; i++ {
+		// TODO: give a better error message
+		s.match(hexDigits)
+	}
+	hex := string(s.buf[initIndex:s.index])
+
+	ui, err := strconv.Btoui64(hex, 16)
+	if err != nil {
+		s.error(err)
+	}
+	return int(ui)
+}
+
+func (s *scanner) parsePlain(indent, context int) {
+	// ns-plain-first
+	firstRune := s.next()
+	switch {
+	case firstRune == eof:
+		s.errorf("Unexpected EOF")
+	case containsRune("?:-", firstRune):
+		if !isPlainSafeChar(s.peek(), context) {
+			s.backup()
+			s.expected("scalar")
+		}
+	case containsRune(breakChars+whitespace+indicatorChars, firstRune):
+		s.backup()
+		s.expected("scalar")
+	}
+
+	// nb-ns-plain-in-line
+	for {
+		s.acceptRun("#") // the previous character is guaranteed to be non-whitespace
+		initialIndex := s.index
+		s.acceptRun(whitespace)
+		rune := s.next()
+		if rune == eof || (rune == ':' && !isPlainSafeChar(s.peek(), context)) || rune == '#' || !isPlainSafeChar(rune, context) {
+			s.index = initialIndex
+			break
+		}
+	}
+	// TODO: additional lines
+
+	s.emit(TokenScalar)
+}
+
+func isPlainSafeChar(rune int, context int) bool {
+	switch context {
+	case flowOut, blockKey:
+		return !containsRune(whitespace+breakChars, rune)
+	case flowIn, flowKey:
+		return !containsRune(whitespace+breakChars+flowIndicatorChars, rune)
+	}
+	panic("never reached")
+}
+
+func (s *scanner) isStartOfLine() bool {
+	return s.currentPos().Column == 1
+}
 	"fmt"
 	"io"
 	"os"
-	"strconv"
 	"strings"
 )
 
 	Scan() (Token, os.Error)
 }
 
-// advancePosition calculates the new position after seeing the given runes.
-// Special cases:
-//
-//		advancePosition(advancePosition(pos, []int{'\r'}), []int{'\n'}) != advancePosition(pos, []int("\r\n"))
-func advancePosition(pos Position, runes []int) Position {
-	for i, r := range runes {
-		switch r {
-		case '\r':
-			pos.Line++
-			pos.Column = 1
-		case '\n':
-			if i == 0 || runes[i-1] != '\r' {
-				pos.Line++
-				pos.Column = 1
-			}
-		case bomRune:
-			// don't advance column
-		default:
-			pos.Column++
-		}
-	}
-
-	pos.Index += len(runes)
-	return pos
-}
-
 type scanner struct {
 	reader io.RuneReader
 	err    os.Error
 	return Token{}, s.err
 }
 
-const eof = -1
-
-type scanError struct {
-	Error    os.Error
-	Position Position
-}
-
-func (err scanError) String() string {
-	if err.Position.Column == 0 {
-		return err.Error.String()
-	}
-	return err.Position.String() + ": " + err.Error.String()
-}
-
 // scan starts the scanning process and catches any errors that occur.
 func (s *scanner) scan() {
 	defer close(s.tokens)
 	s.parseYAMLStream()
 }
 
-// error will make Scan return an error once all of the tokens are flushed.
-func (s *scanner) error(err os.Error) {
-	panic(scanError{err, s.currentPos()})
-}
-
-func (s *scanner) errorf(format string, args ...interface{}) {
-	s.error(fmt.Errorf(format, args...))
-}
-
-func (s *scanner) expected(exp string) {
-	s.errorf("Expected %s, got %q", exp, s.peek())
-}
-
-const (
-	blockIn = iota
-	blockOut
-	blockKey
-	flowKey
-	flowIn
-	flowOut
-)
-
-func (s *scanner) trySeparate(indent, context int) bool {
-	if context == blockKey || context == flowKey {
-		return s.trySeparateInLine()
-	}
-
-	initialIndex := s.index
-	if s.trySLComments() && s.tryFlowLinePrefix(indent) {
-		return true
-	} else {
-		s.index = initialIndex
-	}
-	return s.trySeparateInLine()
-}
-
-func (s *scanner) tryFlowLinePrefix(indent int) bool {
-	if !s.acceptIndent(indent) {
-		return false
-	}
-	s.trySeparateInLine()
-	return true
-}
-
-func (s *scanner) trySeparateInLine() bool {
-	return s.acceptRun(whitespace) || (s.peek() != eof && s.isStartOfLine())
-}
-
-func (s *scanner) trySBComment() (ok bool) {
-	defer func(initIndex int) {
-		if !ok {
-			s.index = initIndex
-		}
-	}(s.index)
-
-	if s.trySeparateInLine() {
-		s.tryCommentText()
-	}
-	return s.peek() == eof || s.acceptBreak()
-}
-
-func (s *scanner) tryLComment() (ok bool) {
-	defer func(initIndex int) {
-		if !ok {
-			s.index = initIndex
-		}
-	}(s.index)
-
-	if !s.trySeparateInLine() {
-		return false
-	}
-	s.tryCommentText()
-	return s.acceptBreak()
-}
-
-func (s *scanner) trySLComments() (ok bool) {
-	defer func(initIndex int) {
-		if !ok {
-			s.index = initIndex
-		}
-	}(s.index)
-
-	if !s.isStartOfLine() {
-		if !s.trySBComment() {
-			return false
-		}
-	}
-	for s.tryLComment() {
-	}
-	return true
-}
-
-func (s *scanner) tryCommentText() bool {
-	if !s.accept("#") {
-		return false
-	}
-	for {
-		rune := s.next()
-		if rune == eof || containsRune(breakChars, rune) {
-			break
-		}
-	}
-	s.backup()
-	return true
-}
-
-func (s *scanner) parseYAMLStream() {
-	s.parseDocumentPrefix()
-	// l-any-document?
-	if initialIndex := s.index; !s.acceptString("...") && !s.acceptString("---") && s.peek() != eof {
-		s.parseAnyDocument()
-	} else {
-		s.index = initialIndex
-	}
-
-	for s.peek() != eof {
-		s.parseDocumentPrefix()
-		if s.acceptString("...") {
-			s.emit(TokenDocumentEnd)
-			if !s.trySLComments() {
-				s.expected("line break")
-			}
-			s.ignore()
-			for s.acceptString("...") {
-				s.emit(TokenDocumentEnd)
-				if !s.trySLComments() {
-					s.expected("line break")
-				}
-				s.ignore()
-			}
-			s.parseDocumentPrefix()
-			if s.peek() != eof {
-				s.parseAnyDocument()
-			}
-		} else {
-			if initialIndex := s.index; s.acceptString("---") {
-				s.index = initialIndex
-				s.parseExplicitDocument()
-			} else {
-				break
-			}
-		}
-	}
-
-	if r := s.next(); r == eof {
-		s.ignore()
-		s.emit(TokenStreamEnd)
-	} else {
-		s.backup()
-		s.expected("EOF")
-	}
-}
-
-func (s *scanner) parseAnyDocument() {
-	initialIndex := s.index
-	switch {
-	case s.peek() == '%':
-		s.parseDirectiveDocument()
-	case s.acceptString("---"):
-		s.index = initialIndex
-		s.parseExplicitDocument()
-	default:
-		s.parseBareDocument()
-	}
-}
-
-func (s *scanner) parseDirectiveDocument() {
-	s.match("%")
-	s.backup()
-	for s.peek() == '%' {
-		s.parseDirective()
-	}
-	s.parseExplicitDocument()
-}
-
-func (s *scanner) parseExplicitDocument() {
-	if s.acceptString("---") {
-		s.emit(TokenDocumentStart)
-	} else {
-		s.expected(`"---"`)
-	}
-	s.parseBareDocument()
-	// TODO: | e-node s-l-comments
-}
-
-func (s *scanner) parseBareDocument() {
-	s.parseBlockNode(-1, blockIn)
-}
-
-func (s *scanner) parseDocumentPrefix() {
-	if s.acceptRun(string(bomRune)) {
-		s.ignore()
-	}
-	for s.tryLComment() {
-		s.ignore()
-		s.acceptRun(string(bomRune))
-		s.ignore()
-	}
-}
-
-func (s *scanner) parseDirective() {
-	s.match("%")
-
-	// Directive name
-	for {
-		rune := s.next()
-		if rune == eof || containsRune(whitespace+breakChars, rune) {
-			break
-		}
-	}
-	s.backup()
-
-	// Parameters
-	for {
-		oldIndex := s.index
-		if !s.acceptRun(whitespace) {
-			break
-		}
-
-		if s.peek() == eof || s.peek() == '#' {
-			s.index = oldIndex
-			break
-		}
-
-		for {
-			rune := s.next()
-			if rune == eof || containsRune(whitespace+breakChars, rune) {
-				break
-			}
-		}
-		s.backup()
-	}
-
-	s.emit(TokenDirective)
-	if !s.trySLComments() {
-		s.expected("line break")
-	}
-	s.ignore()
-}
-
-func (s *scanner) parseBlockNode(indent, context int) {
-	initIndex := s.index
-
-	// TODO: block-scalar
-	if s.trySeparate(indent+1, context) {
-		// TODO: properties
-		if s.peek() == '|' || s.peek() == '>' {
-			s.errorf("cannot handle block scalar")
-		}
-	}
-	s.index = initIndex
-
-	// block-collection
-	// TODO: properties
-	if s.trySLComments() {
-		// detect indent
-		if indentIndex := s.index; s.acceptIndent(indent + 1) {
-			s.acceptRun(" ")
-			newIndent := s.index - indentIndex
-
-			if s.peek() == '-' {
-				// block sequence
-				s.index = indentIndex
-				s.ignore()
-				s.parseBlockSequence(newIndent)
-				return
-			} else if s.peek() == '?' || s.peekImplicitKey() {
-				// block mapping
-				s.index = indentIndex
-				s.ignore()
-				s.parseBlockMapping(newIndent)
-				return
-			}
-		}
-	}
-	s.index = initIndex
-
-	// flow-in-block
-	if !s.trySeparate(indent+1, flowOut) {
-		s.expected("space")
-	}
-	s.ignore()
-	s.parseFlowNode(indent+1, flowOut)
-
-	if !s.trySLComments() {
-		s.expected("line break")
-	}
-	s.ignore()
-}
-
-func (s *scanner) parseBlockSequence(indent int) {
-	s.emit(TokenBlockSequenceStart)
-
-	for s.peek() != eof && s.acceptIndent(indent) {
-		s.ignore()
-
-		s.match("-")
-		// XXX: not followed by ns-char?
-		s.emit(TokenBlockEntry)
-
-		s.parseBlockIndented(indent, blockIn)
-	}
-
-	s.emit(TokenBlockEnd)
-}
-
-const maxImplicitKeySize = 1024
-
-func (s *scanner) peekImplicitKey() bool {
-	const (
-		statePlain = iota
-		stateSingle
-		stateDouble
-		stateMap
-		stateSequence
-	)
-
-	initIndex := s.index
-	defer func() {
-		s.index = initIndex
-	}()
-
-	// Get initial state
-	states := make([]int, 0, 2)
-	switch r := s.peek(); r {
-	case eof:
-		return false
-	case '{':
-		states = append(states, stateMap)
-	case '[':
-		states = append(states, stateSequence)
-	case '\'':
-		states = append(states, stateSingle)
-	case '"':
-		states = append(states, stateDouble)
-	case '?', ':', '-':
-		s.next()
-		if isPlainSafeChar(s.peek(), blockKey) {
-			states = append(states, statePlain)
-		} else {
-			return false
-		}
-	default:
-		if !containsRune(breakChars+whitespace+indicatorChars, r) {
-			states = append(states, statePlain)
-		} else {
-			return false
-		}
-	}
-	s.next()
-
-	for len(states) > 0 {
-		if s.index-initIndex > maxImplicitKeySize {
-			return false
-		}
-
-		r := s.peek()
-		if r == eof || containsRune(breakChars, r) {
-			return false
-		}
-
-		switch states[len(states)-1] {
-		case stateSingle:
-			s.next()
-			if r == '\'' {
-				if s.peek() == '\'' {
-					s.next()
-				} else {
-					states = states[:len(states)-1]
-				}
-			}
-		case stateDouble:
-			s.next()
-			switch r {
-			case '\\':
-				r2 := s.next()
-				if containsRune(breakChars, r2) {
-					return false
-				}
-			case '"':
-				states = states[:len(states)-1]
-			}
-		case statePlain:
-			if r == ':' {
-				states = states[:len(states)-1]
-			} else {
-				s.next()
-			}
-		default:
-			// TODO: sequences and maps
-			return false
-		}
-	}
-
-	// Finally... is there a colon?
-	s.trySeparateInLine()
-	return s.accept(":")
-}
-
-func (s *scanner) parseBlockIndented(indent, context int) {
-	// TODO: compact syntax and empty
-	s.parseBlockNode(indent, context)
-}
-
-func (s *scanner) parseBlockMapping(indent int) {
-	s.emit(TokenBlockMappingStart)
-
-	for s.peek() != eof && s.acceptIndent(indent) {
-		s.ignore()
-
-		if s.peek() == '?' { // explicit entry
-			s.next()
-			s.emit(TokenKey)
-
-			s.parseBlockIndented(indent, blockOut)
-
-			// TODO: empty
-			s.acceptIndent(indent)
-			s.ignore()
-
-			s.match(":")
-			s.emit(TokenValue)
-
-			s.parseBlockIndented(indent, blockOut)
-		} else { // implicit entry
-			s.emit(TokenKey)
-
-			if s.peek() != ':' {
-				s.parseFlowNode(-1, blockKey)
-
-				s.trySeparateInLine()
-				s.ignore()
-			}
-
-			s.match(":")
-			s.emit(TokenValue)
-
-			// TODO: empty
-			s.parseBlockNode(indent, blockOut)
-		}
-	}
-
-	s.emit(TokenBlockEnd)
-}
-
-func (s *scanner) parseFlowNode(indent, context int) {
-	if s.peek() == '*' {
-		s.next()
-		s.parseAnchorName()
-		s.emit(TokenAlias)
-		return
-	}
-
-	switch s.peek() {
-	case '!', '&':
-		s.parseProperties(indent, context)
-		if oldIndex := s.index; s.trySeparate(indent, context) && s.peek() != eof && (containsRune(`{['"`, s.peek())) {
-			s.ignore()
-
-			// TODO: go back to oldIndex if there isn't content
-			s.parseFlowContent(indent, context)
-		} else {
-			s.index = oldIndex
-		}
-	default:
-		s.parseFlowContent(indent, context)
-	}
-}
-
-func (s *scanner) parseProperties(indent, context int) {
-	firstRune := s.peek()
-	switch firstRune {
-	case '!':
-		s.parseTagProperty()
-	case '&':
-		s.parseAnchorProperty()
-	default:
-		s.expected("properties")
-	}
-
-	if oldIndex := s.index; s.trySeparate(indent, context) {
-		switch {
-		case firstRune == '!' && s.peek() == '&':
-			s.ignore()
-			s.parseAnchorProperty()
-		case firstRune == '&' && s.peek() == '!':
-			s.ignore()
-			s.parseTagProperty()
-		default:
-			s.index = oldIndex
-		}
-	}
-}
-
-func (s *scanner) parseTagProperty() {
-	s.match("!")
-	if s.accept("<") {
-		// Verbatim tag
-		s.acceptRun(uriChars)
-		s.match(">")
-	} else {
-		// Shorthand or non-specific tag
-		s.acceptRun(wordChars)
-		s.accept("!")
-		s.acceptRun(tagChars)
-	}
-	s.emit(TokenTag)
-}
-
-func (s *scanner) parseAnchorProperty() {
-	s.match("&")
-	s.parseAnchorName()
-	s.emit(TokenAnchor)
-}
-
-func (s *scanner) parseAnchorName() {
-	init := s.index
-	for {
-		rune := s.next()
-		if rune == eof || containsRune(whitespace+breakChars+flowIndicatorChars, rune) {
-			break
-		}
-	}
-	s.backup()
-	if s.index == init {
-		s.expected("anchor")
-	}
-}
-
-func (s *scanner) parseFlowContent(indent, context int) {
-	switch s.peek() {
-	case '[':
-		s.parseFlowSequence(indent, context)
-	case '{':
-		s.parseFlowMapping(indent, context)
-	case '\'':
-		s.parseSingleQuoted(indent, context)
-	case '"':
-		s.parseDoubleQuoted(indent, context)
-	default:
-		s.parsePlain(indent, context)
-	}
-}
-
-func inFlow(context int) int {
-	switch context {
-	case flowOut, flowIn:
-		return flowIn
-	case blockKey, flowKey:
-		return flowKey
-	}
-	panic("never reached")
-}
-
-func (s *scanner) parseFlowSequence(indent, context int) {
-	s.match("[")
-	s.emit(TokenFlowSequenceStart)
-
-	s.trySeparate(indent, context)
-	s.ignore()
-
-	context = inFlow(context)
-entryLoop:
-	for {
-		switch s.peek() {
-		case ',':
-			s.next()
-			s.emit(TokenFlowEntry)
-			continue entryLoop
-		case ']':
-			break entryLoop
-		}
-
-		s.parseFlowSequenceEntry(indent, context)
-		s.trySeparate(indent, context)
-		s.ignore()
-
-		if s.next() == ',' {
-			s.emit(TokenFlowEntry)
-			s.trySeparate(indent, context)
-			s.ignore()
-		} else {
-			s.backup()
-			break
-		}
-
-		s.trySeparate(indent, context)
-		s.ignore()
-	}
-
-	s.match("]")
-	s.emit(TokenFlowSequenceEnd)
-}
-
-func (s *scanner) parseFlowSequenceEntry(indent, context int) {
-	s.parseFlowNode(indent, context)
-	// TODO: | ns-flow-pair
-}
-
-func (s *scanner) parseFlowMapping(indent, context int) {
-	s.match("{")
-	s.emit(TokenFlowMappingStart)
-
-	s.trySeparate(indent, context)
-	s.ignore()
-
-	// TODO: ns-flow-map-entries
-
-	s.match("}")
-	s.emit(TokenFlowMappingEnd)
-}
-
-func (s *scanner) parseSingleQuoted(indent, context int) {
-	s.match(`'`)
-
-	var value string
-	switch context {
-	case flowOut, flowIn:
-		value = s.parseSingleMultiLine(indent)
-	case blockKey, flowKey:
-		value = s.parseSingleOneLine()
-	}
-
-	s.match(`'`)
-	s.emitString(TokenScalar, value)
-}
-
-func (s *scanner) parseSingleOneLine() string {
-	var runes []int
-	for {
-		if r := s.parseSingleChar(); r != -1 {
-			runes = append(runes, r)
-		} else {
-			break
-		}
-	}
-	return string(runes)
-}
-
-func (s *scanner) parseSingleMultiLine(indent int) string {
-	var runes []int
-
-	s.acceptRun(whitespace)
-	for {
-		if r := s.parseSingleChar(); r != -1 {
-			runes = append(runes, r)
-		} else {
-			break
-		}
-	}
-
-	// TODO: additional lines
-
-	return string(runes)
-}
-
-func (s *scanner) parseSingleChar() int {
-	if s.acceptString("''") {
-		return '\''
-	}
-
-	r := s.next()
-	if r == '\'' || !isJSONChar(r) {
-		s.backup()
-		return -1
-	}
-	return r
-}
-
-func (s *scanner) parseDoubleQuoted(indent, context int) {
-	s.match(`"`)
-
-	var value string
-	switch context {
-	case flowOut, flowIn:
-		value = s.parseDoubleMultiLine(indent)
-	case blockKey, flowKey:
-		value = s.parseDoubleOneLine()
-	}
-
-	s.match(`"`)
-	s.emitString(TokenScalar, value)
-}
-
-func (s *scanner) parseDoubleOneLine() string {
-	var runes []int
-	for {
-		if r := s.parseDoubleChar(); r != -1 {
-			runes = append(runes, r)
-		} else {
-			break
-		}
-	}
-	return string(runes)
-}
-
-func (s *scanner) parseDoubleMultiLine(indent int) string {
-	var runes []int
-
-	s.acceptRun(whitespace)
-	for {
-		if r := s.parseDoubleChar(); r != -1 {
-			runes = append(runes, r)
-		} else {
-			break
-		}
-	}
-
-	// TODO: additional lines
-
-	return string(runes)
-}
-
-func (s *scanner) parseDoubleChar() int {
-	r := s.next()
-	switch {
-	case r == '\\':
-		// Escape code
-		s.backup()
-		return s.parseEscapeChar()
-	case r != '"' && isJSONChar(r):
-		// Normal character
-		return r
-	}
-
-	// End of string
-	s.backup()
-	return -1
-}
-
-func (s *scanner) parseEscapeChar() int {
-	s.match("\\")
-
-	switch r := s.next(); r {
-	case '0':
-		return 0
-	case 'a':
-		return '\a'
-	case 'b':
-		return '\b'
-	case 't', '\t':
-		return '\t'
-	case 'n':
-		return '\n'
-	case 'v':
-		return '\v'
-	case 'f':
-		return '\f'
-	case 'r':
-		return '\r'
-	case 'e':
-		// ASCII escape
-		return 0x1b
-	case ' ', '"', '/', '\\':
-		return r
-	case 'N':
-		// Unicode next line
-		return 0x85
-	case '_':
-		// Unicode non-breaking space
-		return 0xA0
-	case 'L':
-		// Unicode line separator
-		return 0x2028
-	case 'P':
-		// Unicode paragraph separator
-		return 0x2029
-	case 'x':
-		return s.parseHexEscape(2)
-	case 'u':
-		return s.parseHexEscape(4)
-	case 'U':
-		return s.parseHexEscape(8)
-	}
-
-	s.backup()
-	s.expected("escape")
-	return -1
-}
-
-func (s *scanner) parseHexEscape(n int) int {
-	initIndex := s.index
-	for i := 0; i < n; i++ {
-		// TODO: give a better error message
-		s.match(hexDigits)
-	}
-	hex := string(s.buf[initIndex:s.index])
-
-	ui, err := strconv.Btoui64(hex, 16)
-	if err != nil {
-		s.error(err)
-	}
-	return int(ui)
-}
-
-func (s *scanner) parsePlain(indent, context int) {
-	// ns-plain-first
-	firstRune := s.next()
-	switch {
-	case firstRune == eof:
-		s.errorf("Unexpected EOF")
-	case containsRune("?:-", firstRune):
-		if !isPlainSafeChar(s.peek(), context) {
-			s.backup()
-			s.expected("scalar")
-		}
-	case containsRune(breakChars+whitespace+indicatorChars, firstRune):
-		s.backup()
-		s.expected("scalar")
-	}
-
-	// nb-ns-plain-in-line
-	for {
-		s.acceptRun("#") // the previous character is guaranteed to be non-whitespace
-		initialIndex := s.index
-		s.acceptRun(whitespace)
-		rune := s.next()
-		if rune == eof || (rune == ':' && !isPlainSafeChar(s.peek(), context)) || rune == '#' || !isPlainSafeChar(rune, context) {
-			s.index = initialIndex
-			break
-		}
-	}
-	// TODO: additional lines
-
-	s.emit(TokenScalar)
-}
-
-func isPlainSafeChar(rune int, context int) bool {
-	switch context {
-	case flowOut, blockKey:
-		return !containsRune(whitespace+breakChars, rune)
-	case flowIn, flowKey:
-		return !containsRune(whitespace+breakChars+flowIndicatorChars, rune)
-	}
-	panic("never reached")
-}
-
-func (s *scanner) isStartOfLine() bool {
-	return s.currentPos().Column == 1
-}
+const eof = -1
 
 // next returns the next rune and updates index.
 func (s *scanner) next() int {
 	return s.buf[s.index]
 }
 
-// rewind moves the index backward
+// backup moves the index backward
 func (s *scanner) backup() {
 	s.index--
 }
 
+// consume advances the buffer and returns the characters removed from the
+// buffer.
+func (s *scanner) consume() (chars string) {
+	if s.index < len(s.buf) {
+		chars = string(s.buf[:s.index])
+	} else {
+		chars = string(s.buf)
+	}
+	s.ignore()
+	return
+}
+
 // ignore advances the buffer and discards the characters read.
 func (s *scanner) ignore() {
 	if s.index < len(s.buf) {
 	return
 }
 
-// consume advances the buffer and returns the characters removed from the
-// buffer.
-func (s *scanner) consume() (chars string) {
-	if s.index < len(s.buf) {
-		chars = string(s.buf[:s.index])
-	} else {
-		chars = string(s.buf)
+// error will make Scan return an error once all of the tokens are flushed.
+func (s *scanner) error(err os.Error) {
+	panic(scanError{err, s.currentPos()})
+}
+
+func (s *scanner) errorf(format string, args ...interface{}) {
+	s.error(fmt.Errorf(format, args...))
+}
+
+func (s *scanner) expected(exp string) {
+	s.errorf("Expected %s, got %q", exp, s.peek())
+}
+
+type scanError struct {
+	Error    os.Error
+	Position Position
+}
+
+func (err scanError) String() string {
+	if err.Position.Column == 0 {
+		return err.Error.String()
 	}
-	s.ignore()
-	return
+	return err.Position.String() + ": " + err.Error.String()
 }
 
 // currentPos returns the file position at the index.
 	}
 	return true
 }
+
+// advancePosition calculates the new position after seeing the given runes.
+// Special cases:
+//
+//		advancePosition(advancePosition(pos, []int{'\r'}), []int{'\n'}) != advancePosition(pos, []int("\r\n"))
+func advancePosition(pos Position, runes []int) Position {
+	for i, r := range runes {
+		switch r {
+		case '\r':
+			pos.Line++
+			pos.Column = 1
+		case '\n':
+			if i == 0 || runes[i-1] != '\r' {
+				pos.Line++
+				pos.Column = 1
+			}
+		case bomRune:
+			// don't advance column
+		default:
+			pos.Column++
+		}
+	}
+
+	pos.Index += len(runes)
+	return pos
+}