Commits

Eric Roshan Eisner committed 52cf33b Draft

tokenize exponents in numbers and compound units

Comments (0)

Files changed (2)

 	{" 22 nm * ( 4.5 kN + .99 N ) ", false,
 		[]string{"22", "nm", "*", "(", "4.5", "kN", "+", ".99", "N", ")"}},
 
-	// TODO
-	// {"1.2e-9m", false, []string{"1.2e-9", "m"}},
+	{"1.2e9", false, []string{"1.2e9"}},
+	{"1.2e9+.8e9", false, []string{"1.2e9", "+", ".8e9"}},
+	{"1.2e-9m", false, []string{"1.2e-9", "m"}},
+
+	{"10m/s*10N", false, []string{"10", "m/s", "*", "10", "N"}},
 }
 
 func TestTokenizer(t *testing.T) {
 	{"1J + 1N*1m - 1W*1s", false, "1J"},
 	{"3600J/(.5h+1800s)", false, "1W"},
 	{"3+4*2/(1-5)", false, "1"},
+	{"1e-9kg*m/s*s*1e9m/s", false, "1W"},
 
 	{"", true, ""},
 	{"(3+2) (2*3)", true, ""},
 	i   int
 }
 
+func (t *tokenizer) scanDigits() {
+	for ; t.i < len(t.src); t.i++ {
+		if c := t.src[t.i]; '0' > c || c > '9' {
+			break
+		}
+	}
+	return
+}
+
+func (t *tokenizer) scanNumber() (token string, err error, done bool) {
+	start := t.i
+	t.scanDigits()
+	if t.i >= len(t.src) {
+		goto exit
+	}
+
+	if t.src[t.i] == '.' {
+		t.i++
+		t.scanDigits()
+		if t.i >= len(t.src) {
+			goto exit
+		}
+	}
+
+	if c := t.src[t.i]; c == 'e' || c == 'E' {
+		t.i++
+		if t.i >= len(t.src) {
+			goto exit
+		}
+
+		if c = t.src[t.i]; c == '-' || c == '+' {
+			t.i++
+		}
+		t.scanDigits()
+	}
+
+exit:
+	return t.src[start:t.i], nil, false
+}
+
+func unitByte(b byte) bool {
+	if '0' <= b && b <= '9' {
+		return false
+	}
+	switch b {
+	case '(', ')', '+', '-', '.', ' ':
+		return false
+	}
+	return true
+}
+
+func (t *tokenizer) scanUnit() (token string, err error, done bool) {
+	start := t.i
+	for ; t.i < len(t.src); t.i++ {
+		c := t.src[t.i]
+		if !unitByte(c) {
+			return t.src[start:t.i], nil, false
+		}
+		if c == '/' || c == '*' {
+			if t.i+1 == len(t.src) || !unitByte(t.src[t.i+1]) {
+				break
+			}
+		}
+	}
+	return t.src[start:t.i], nil, false
+}
+
 // Return the next token, or error, or no tokens left.
 func (t *tokenizer) next() (token string, err error, done bool) {
 	start := t.i
-	inNumber := false
 	for ; t.i < len(t.src); t.i += 1 {
 		switch t.src[t.i] {
 		case ' ':
 			return
 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.':
 			if start == t.i {
-				inNumber = true
-			} else if !inNumber {
-				token = t.src[start:t.i]
-				return
+				return t.scanNumber()
 			}
+			token = t.src[start:t.i]
+			return
 		default:
-			// Other bytes are assumed to be part of a unit name.
-			if inNumber {
-				// A number-unit should be tokenated out, e.g. "1m"
-				token = t.src[start:t.i]
-				return
-			}
+			return t.scanUnit()
 		}
 	}
 	if start != t.i {