Commits

Anonymous committed b49d902

Make yoob.Scanner parameterized.

  • Participants
  • Parent commits 41572c9

Comments (0)

Files changed (3)

File README.markdown

 *   `yoob.Scanner`, in `yoob/scanner.js`
     
     A simple, inefficient lexical analyzer, parameterized with a table of
-    regexps (TODO).  Can also serve as a starting point for writing your
-    own simple, inefficient lexical analyzer.
+    regexps.  Can also serve as a starting point for writing your own, less
+    simple, inefficient lexical analyzer.
 
 PLANNED:
 

File eg/scanner.html

 
   document.getElementById('scan').onclick = function(e) {
     var s = new yoob.Scanner();
-    s.init(input.value);
+    s.init([
+      ['bracket',         "^(\\(|\\)|\\[|\\])"],
+      ['integer literal', "^(\\d+)"],
+      ['identifier',      "^([a-zA-Z]\\w*)"],
+      ['string literal',  "^\"(.*?)\""]
+    ]);
+    s.reset(input.value);
     while (s.token !== null) {
       alert("'" + s.token + "', type: " + s.type);
       s.scan();

File src/yoob/scanner.js

 
 /*
  * A lexical analyzer.
+ * Create a new yoob.Scanner object, then call init, passing it an
+ * array of two-element arrays; first element of each of these is the
+ * type of token, the second element is a regular expression (in a
+ * String) which matches that token at the start of the string.  The
+ * regular expression should have exactly one capturing group.
+ * Then call reset, passing it the string to be scanned.
+ * 
  */
 yoob.Scanner = function() {
   this.text = undefined;
-  this.index = undefined;
   this.token = undefined;
   this.type = undefined;
   this.error = undefined;
+  this.table = undefined;
+  this.whitespacePattern = "^[ \\t\\n\\r]*";
 
-  this.init = function(text) {
+  this.init = function(table) {
+    this.table = table;
+  };
+
+  this.reset = function(text) {
     this.text = text;
-    this.index = 0;
     this.token = undefined;
     this.type = undefined;
     this.error = undefined;
     this.scan();
   };
   
-  this.scanPattern = function(pattern, type, tokenGroup) {
+  this.scanPattern = function(pattern, type) {
     var re = new RegExp(pattern);
     var match = re.exec(this.text);
     if (match === null) return false;
     this.type = type;
-    if (tokenGroup !== undefined) {
-      this.token = match[tokenGroup];
-    }
+    this.token = match[1];
     this.text = this.text.substr(match[0].length);
     return true;
   };
 
   this.scan = function() {
-    this.scanPattern("^[ \\t\\n\\r]*", "whitespace");
+    this.scanPattern(this.whitespacePattern, "whitespace");
     if (this.text.length === 0) {
       this.token = null;
       this.type = "EOF";
       return;
     }
-    if (this.scanPattern("^(\\(|\\)|\\[|\\])", "bracket", 1)) return;
-    if (this.scanPattern("^(\\d+)", "integer literal", 1)) return;
-    if (this.scanPattern("^([a-zA-Z]\\w*)", "identifier", 1)) return;
-    if (this.scanPattern("^\"(.*?)\"", "string literal", 1)) return;
-    if (this.scanPattern("^([\\s\\S])", "unknown character", 1)) return;
-    // todo insert catchall case
+    for (var i = 0; i < this.table.length; i++) {
+      var type = this.table[i][0];
+      var pattern = this.table[i][1];
+      if (this.scanPattern(pattern, type)) return;
+    }
+    if (this.scanPattern("^([\\s\\S])", "unknown character")) return;
+    // should never get here
   };
 
   this.expect = function(token) {