Commits

Eric Roshan Eisner  committed 585f360

add tokenizer

  • Participants
  • Parent commits 7d09a68

Comments (0)

Files changed (2)

File parse_test.go

 		}
 	}
 }
+
+var tokenTable = []struct {
+	src    string
+	fail   bool
+	tokens []string
+}{
+	{"", false, nil},
+	{"1m", false, []string{"1", "m"}},
+	{"m1", false, []string{"m", "1"}}, // It's the parser's job to reject this
+	{"22nm*(4.5kN+.99N)", false,
+		[]string{"22", "nm", "*", "(", "4.5", "kN", "+", ".99", "N", ")"}},
+	{" 22 nm * ( 4.5 kN + .99 N ) ", false,
+		[]string{"22", "nm", "*", "(", "4.5", "kN", "+", ".99", "N", ")"}},
+
+	// TODO
+	// {"1.2e-9m", false, []string{"1.2e-9", "m"}},
+}
+
+func TestTokenizer(t *testing.T) {
+	for _, entry := range tokenTable {
+		tokens, err := tokenize(entry.src)
+
+		if entry.fail && err == nil {
+			t.Errorf("Tokenizing %q: expected error, but got no error.")
+		}
+		if !entry.fail && err != nil {
+			t.Errorf("Tokenizing %q: expected no error, got %q",
+				entry.src, err.Error())
+		}
+
+		tokenMatch := true
+		if len(tokens) != len(entry.tokens) {
+			tokenMatch = false
+		} else {
+			for i := 0; i < len(tokens); i += 1 {
+				if tokens[i] != entry.tokens[i] {
+					tokenMatch = false
+				}
+			}
+		}
+		if !tokenMatch {
+			t.Errorf("Tokenizing %q: expected tokens %v, got %v",
+				entry.src, entry.tokens, tokens)
+		}
+	}
+}
+package units
+
+type tokenError string
+
+func (err tokenError) Error() string {
+	return string(err)
+}
+
+type tokenizer struct {
+	src string
+	i   int
+}
+
+// Return the next token, or error, or no tokens left.
+func (t *tokenizer) next() (token string, err error, done bool) {
+	start := t.i
+	inNumber := false
+	for ; t.i < len(t.src); t.i += 1 {
+		switch t.src[t.i] {
+		case ' ':
+			if start == t.i {
+				start += 1
+			} else {
+				token = t.src[start:t.i]
+				return
+			}
+		case '(', ')', '*', '/', '+', '-':
+			if start == t.i {
+				t.i += 1
+			}
+			token = t.src[start:t.i]
+			return
+		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.':
+			if start == t.i {
+				inNumber = true
+			} else if !inNumber {
+				token = t.src[start:t.i]
+				return
+			}
+		default:
+			// Other bytes are assumed to be part of a unit name.
+			if inNumber {
+				// A number-unit should be tokenated out, e.g. "1m"
+				token = t.src[start:t.i]
+				return
+			}
+		}
+	}
+	if start != t.i {
+		token = t.src[start:t.i]
+		return
+	}
+	return "", nil, true
+}
+
+func tokenize(src string) (tokens []string, err error) {
+	tokenizer := &tokenizer{src: src}
+	token, err, done := tokenizer.next()
+	for err == nil && !done {
+		tokens = append(tokens, token)
+		token, err, done = tokenizer.next()
+	}
+	if err != nil {
+		tokens = nil
+	}
+	return
+}