Commits

Anonymous committed 7c0db83

Now the lexer also returns AST nodes on the stack, just like the parser.

Comments (0)

Files changed (5)

 
 (defmethod print-object ((obj ast-node) stream)
   (print-unreadable-object (obj stream :type t :identity t)
-    (with-slots (children) obj
-      (when children
-	(format stream "Children=~s" children)))))
+    (with-slots (value children) obj
+      (format stream "~@[Value=~s~] ~@[Children=~s~]" value children))))
 
 (defgeneric ast-node-type (node)
   (:documentation "Return the symbolic name for the node's type (corresponds to a rule name in 
    the original grammar")
   (:method ((node ast-node))
     (class-name (class-of node))))
+
+(defgeneric ast-node-value (node)
+  (:documentation "Return the nodes value")
+  (:method ((node ast-node))
+    (slot-value node 'value)))
 
 
 "
-  (let ((token-rules 
+  (let ((token-nodes (loop for token in token-definitions
+			collect (destructuring-bind (state character-test token-type &key ((:state next-state) nil) ((:accumulate accumulation-test) nil)) token
+				  (declare (ignorable state character-test next-state accumulation-test))
+				  `(defclass ,token-type (ast-node) ()))))
+	(token-rules 
 	 (loop for token in token-definitions
 	    collect (destructuring-bind (state character-test token-type &key ((:state next-state) nil) ((:accumulate accumulation-test) nil)) token
 		      (let ((actual-character-test (cond ((equal t character-test)
 			      ,(when next-state `(setf state ,next-state))
 			      (return-token ',token-type))))))))
     `(progn
+
+       ,@token-nodes
+
        (defclass ,name (lexer)
 	 ((state :initform ,initial-state)))
 
 			   do (progn 
 				(accumulate nc)
 				(incf-lex-position source position))
-			   finally (return (list type token-value))))
+			   finally (return (list type (make-instance type :value token-value)))))
 		      (return-token (type)
-			(list type c)))
+			(list type (make-instance type :value c))))
 	       (when c
 		 (cond ,@token-rules)))))))))
 

package-hh-parse.lisp

 
    ;; AST nodes
    #:ast-node-type
+   #:ast-node-value
 
    ;; grammars
    ;; #:+ from CL
 	   (parser (make-parser lexer grammar)))
       (multiple-value-bind (result value) (parse-input parser)
 	(assert-equal :failed result)
-	(assert-equal `(identifier "foobar") value)))
+	(destructuring-bind (node-type node) value
+	  (assert-equal 'identifier node-type)
+	  (assert-true (string= "foobar" (ast-node-value node))))))
 
     (let* ((grammar (html-grammar))
 	   (source (make-source "<foo bar=1>borp whaple</foo"))
 	   (parser (make-parser lexer grammar)))
       (multiple-value-bind (result value) (parse-input parser)
 	(assert-equal :failed result)
-	(assert-equal `(identifier "foo") value)))
+	(destructuring-bind (node-type node) value
+	  (assert-equal 'identifier node-type)
+	  (assert-true (string= "foo" (ast-node-value node))))))
 
     (let* ((grammar (html-grammar))
 	   (source (make-source "<foo b ar=1>borp whaple</foo>"))
 	   (parser (make-parser lexer grammar)))
       (multiple-value-bind (result value) (parse-input parser)
 	(assert-equal :failed result)
-	(assert-equal `(ws " ") value)))
+	(destructuring-bind (node-type node) value
+	  (assert-equal 'ws node-type)
+	  (assert-true (string= " " (ast-node-value node))))))
 
     (let* ((grammar (html-grammar))
 	   (source (make-source "<foo>"))
 	   (parser (make-parser lexer grammar)))
       (multiple-value-bind (result value) (parse-input parser)
 	(assert-equal :failed result)
-	(assert-equal `(gt #\>) value)))
+	(destructuring-bind (node-type node) value
+	  (assert-equal 'gt node-type)
+	  (assert-true (char= #\>) (ast-node-value node)))))
 
     )
 
 ;; Grammar
 
 (defclass ast-node ()
-  ((children :initform () :initarg :children :accessor children)))
+  ((value :initform () :initarg :value :accessor value-of)
+   (children :initform () :initarg :children :accessor children)))
 
 (defclass production ()
   ((rule-name :initarg :rule :accessor rule-name)
 (defclass lalr1-parser ()
   ((grammar :initarg :grammar :accessor grammar)
    (lexer :initarg :lexer :accessor lexer)
-   (stack :initform () :accessor stack)))
+   (stack :initform () :accessor stack 
+	  :documentation "The parser uses a stack, where each entry is a list of length 2.  The 1st item is number indicating the state
+          (this value in the entry on the top of the stack represents the current state of the parser).  The 2nd item is a token
+          or value (terminology is still messy).  This token or value is itself a 2-item list, with the first item being
+          a symbol in the grammar corresponding to that node that is on the stack (could be a token from a lexer, or a non-terminal
+          after reductions have occurred).  The 2nd item is the actual AST node built by the lexer and parser as parsing progresses. ")))
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.