Phil Hargett  committed 42ba60f

Appear to have a working LALR(1) parser for a basic html grammar

  • Participants
  • Parent commits be83af7
  • Branches parsing

Comments (0)

Files changed (3)

File grammar.lisp

 (defun transform-extended-grammar-to-fundamental (start-rule-name grammar ) 
-  (transform-for-nil start-rule-name
-		     (let ((*new-rules* nil))
-		       (append (for-each-rhs grammar
+  (transform-for-nil 'start
+		     (let ((*new-rules* nil)
+			   (augmented-grammar (cons `(start (,start-rule-name)) grammar)))
+		       (append (for-each-rhs augmented-grammar
 					     #'(lambda (rhs)		      
 						 (transform-rhs (if (listp rhs) rhs (list rhs)))))
 (defclass lr1-item ()
   ((position :initarg :position)
-   (lookahead :initarg :lookahead)
+   (lookahead :initarg :lookahead )
    (production :initarg :production)))
 (defclass lr1-state ()
 (defun initial-lr1-state-for-grammar (specification start-rule-name)
   (make-instance 'lr1-state
 		 :items (closure-of-lr1-items specification 
-					      (loop for rule in specification
-						 if (eql (car rule) start-rule-name)
-						 return (loop for production in (rule-productions rule)
-							   collect (initial-lr1-item-for-production production :eof))))))
+					      (let ((rule (assoc start-rule-name specification)))
+						(loop for production in (rule-productions rule)
+						   collect (initial-lr1-item-for-production production :eof))))))
 (defun rule-productions-in-grammar (specification rule-name)
   (loop for rule in specification
 		     ;; how many items on the stack to pop
 		     (list :reduce production )))
-	     (record-accept (statei next-symbol)
+	     (record-accept (statei next-symbol production)
 	       ;; note we're assuming next-symbol will be :eof
-	       (setf (gethash (list statei next-symbol) (entries action-table)) :accept))
+	       (setf (gethash (list statei next-symbol) (entries action-table)) (list :accept production)))
 	     (record-goto (statei non-terminal)
 	       (let* ((state (elt states statei)) 
       		    (if (shifting-p item)
       			;; shifting
       			(record-shift i next-symbol)
-      			;; reducing or accepting -- check look ahead first
-      			;; record nothing if lookahead not matched
-      			(when (equal next-symbol (slot-value item 'lookahead))
-      			  (if (equal :eof next-symbol)
-      			      ;; accepting
-      			      (record-accept i next-symbol)
-      			      ;; reducing--check lookahead first
-      			      (record-reduce i next-symbol (slot-value item 'production))))))
+      			;; reducing or accepting -- use lookahead to decide
+			(with-slots (lookahead production) item
+			  (if (and (equal :eof lookahead)
+				   (equal 'start (slot-value production 'rule-name)))
+			      ;; accepting
+			      (record-accept i lookahead production)
+			      ;; reducing
+			      (record-reduce i lookahead production)))))
       		 ;; goto table
       		 do (loop for non-terminal in non-terminals
       			 do (record-goto i non-terminal)))))
 (defun parse-token (parser token)
   "Advance the state of the parser by parsing a single token; does not assume token came from lexer"
   (let ((grammar (grammar parser)))
-    (destructuring-bind (token-symbol token-value) token
+    (destructuring-bind (token-symbol token-value) (if token token (list :eof nil))
       (declare (ignorable token-value))
       (loop with continue = t
 	 with result = nil
       (declare (ignore stack-state))
-(defun parse-input (parser input)
+(defun parse-input (parser)
   (let ((lexer (lexer parser)))
-    (loop for token = (funcall lexer input)
-       while token
-       do (parse-token parser token)
-       finally (return (get-parse-result parser)))))
+    (loop for token = (parse-token parser (next-token lexer))
+       while (equal :continue token)
+       finally (return (values token (stack parser)))
+       ;; finally (return (get-parse-result parser))
+	 )))

File samples.lisp

 							       (* (^ tag html-text))
-						    (document (tag :eof))))))
+						    (document (tag))))))
   (defun html-grammar ()