Commits

Anonymous committed 88b24ad

Now building LR1 items

Comments (0)

Files changed (2)

     grammar-419))
 
 (let ((grammar-420 (transform-extended-grammar-to-fundamental `(
-								(start (:bof s :dof))
+								(start (s))
 								(s (l eq r)
 								   (r))
 								(l (star r)
 								    ;; literals are digits, decimal, plus, minus
 								    (integer (digit)
 									     (integer digit))
-								    (numeric-value ( (? (^ plus minus)) (integer (? decimal integer))))
-								    (number (:bof numeric-value :eof))
+								    (numeric-value ( (? (^ plus minus)) integer (? decimal integer)))
+								    (number (numeric-value))
 								    ))))
   (defun numbers-grammar ()
     numbers-grammar))
 							       (* (^ tag html-text))
 							       es 
 							       end-tag)))
-						    (document (:bof tag :eof))))))
+						    (document (tag))))))
   (defun html-grammar ()
     html-grammar))
 
 ;; ---------------------------------------------------------------------------------------------------------------------
 ;; LALR(1) parser construction
 
+;; helper
+(defmacro equal-slots ( (&rest slots) left right &key ((:test test) ''equal)) 
+  `(and ,@(loop for slot in slots
+	     collect `(funcall ,test (slot-value ,left ',slot) (slot-value ,right ',slot)))))
+
 ;; Classes + types
 
 (defclass production ()
 (defmethod print-object ((obj production) stream)
   (print-unreadable-object (obj stream :type t :identity t)
     (with-slots (rule-name rhs) obj
-      (format stream "Rule=~a RHS=~s" rule-name rhs))))
+      (format stream "Rule=~s RHS=~s" rule-name rhs))))
 
 (defclass lr0-item ()
   ((position :initarg :position)
       (with-slots ((right-position position) (right-production production)) right
 	(and (= left-position right-position)
 	     (equal-items left-production right-production)))))
+  (:method ((left lr1-item) (right lr1-item))
+    (with-slots ((left-position position) (left-lookahead lookahead) (left-production production)) left
+      (with-slots ((right-position position) (right-lookahead lookahead) (right-production production)) right
+	(and (= left-position right-position)
+	     (equal left-lookahead right-lookahead)
+	     (equal-items left-production right-production)))))
   (:method ((left list) (right list))
-    (loop for left-item in left
-       for right-item in right
-       with same = t
-       do (setf same (and same (equal-items left-item right-item)))
-       while same
-       finally (return same)))
+    (and (= (length left) (length right)) 
+	 (loop for left-item in left
+	    for right-item in right
+	    with same = t
+	    do (setf same (and same (equal-items left-item right-item)))
+	    while same
+	    finally (return same))))
   (:method ((left t) (right t))
     (equal left right)))
 
 
 (defun initial-lr0-items-for-grammar (grammar start-rule-name)
   "Start rule should have  single production of the form: rule-name (:bof some-other-rule-name :eof);
-  an item's position is an index into the production's rhs of the *next* unread term.  Thus,
-  the starting position is 1, implying the beginning of the file (:bof) has been read,
-  and some-other-rule-name is the next team waiting to be read"
+  an item's position is an index into the production's rhs of the *next* unread term."
   (loop for rule in grammar
      if (eql (car rule) start-rule-name)
      return (loop for production in (rule-productions rule)
 	       collect (initial-lr0-item-for-production production))))
 
+(defun initial-lr1-item-for-production (production lookahead)
+  (make-instance 'lr1-item :position 0 :production production :lookahead lookahead))
+
+(defun initial-lr1-items-for-grammar (grammar start-rule-name)
+  (loop for rule in grammar
+     if (eql (car rule) start-rule-name)
+     return (loop for production in (rule-productions rule)
+	       collect (initial-lr1-item-for-production production :eof))))
+
 (defun rule-productions-in-grammar (grammar rule-name)
   (loop for rule in grammar
      if (eql (car rule) rule-name)
     (with-slots (rule-name rhs) production
       (>= position (length rhs)))))
 
-(defun next-term-for-item (item)
+(defun next-lr0-term-for-item (item)
   (unless (item-finished-p item) 
     (with-slots (position production) item
       (with-slots ( rule-name rhs) production
 	(elt rhs position)))))
 
-(defun increment-item-position (item)
+(defun next-lr1-term-and-lookahead-for-item (item default-lookahead)
+  (unless (item-finished-p item) 
+    (with-slots (position production) item
+      (with-slots ( rule-name rhs) production
+	(values (elt rhs position)
+		(let ((lookahead-position (1+ position)))
+		  (if (< lookahead-position (length rhs))
+		      (elt rhs lookahead-position)
+		      default-lookahead)))))))
+
+(defun increment-lr0-item-position (item)
   (with-slots (position production) item
     (make-instance 'lr0-item :position (1+ position) :production production)))
 
+(defun increment-lr1-item-position (item)
+  (with-slots (position lookahead production) item
+    (make-instance 'lr1-item :position (1+ position) :lookahead lookahead :production production)))
+
 (defun item< (left right)
   (labels ((symbol< (left right)
 	     (string< (symbol-name left) (symbol-name right)))
 	  (loop for production in productions
 	     do (with-slots (rule-name rhs) production
 		  (declare (ignore rule-name))
-		  (loop for first in (first-of-symbol grammar (car rhs))
-		     do (pushnew first firsts))))
+		  (let ((new-first (car rhs)))
+		    (unless (or (equal symbol new-first) (member new-first firsts))
+		      (loop for first in (first-of-symbol grammar new-first)
+			 do (pushnew first firsts))))))
 	  firsts)
 	;; terminal -- no productions
 	(list symbol))))
   (let ((closure items))
     (loop for done = t
        do (loop for item in closure
-	     for productions = (rule-productions-in-grammar grammar (next-term-for-item item))
+	     for productions = (rule-productions-in-grammar grammar (next-lr0-term-for-item item))
 	     when productions
 	     do (loop for production in productions
 		   do (let ((initial-item (initial-lr0-item-for-production production)))
 			  (setf done nil)
 			  (push initial-item closure)))))
        until done
-       ;; note: we sort here, to ensure that we can use #'equal as a canonical
+       ;; note: we sort here, to ensure that we can use #'equal-items as a canonical
        ;; test of uniqueness (and set membership, too)
        finally (return (sort closure #'item<)))))
 
+(defun closure-of-lr1-items (grammar items)
+  (let ((closure items))
+    (loop for done = t
+       do (loop for item in closure
+	     for (next-term lookahead) = (multiple-value-list (next-lr1-term-and-lookahead-for-item item (slot-value item 'lookahead)))
+	     for productions = (rule-productions-in-grammar grammar next-term)
+	     when productions
+	     do (loop for production in productions
+		   do (loop for first in (first-of-symbol grammar lookahead) 
+			 do (let ((initial-item (initial-lr1-item-for-production production first)))
+			      (unless (member initial-item closure :test #'equal-items)
+				(setf done nil)
+				(push initial-item closure))))))
+       until done
+       ;; note: we sort here, to ensure that we can use #'equal-items as a canonical
+       ;; test of uniqueness (and set membership, too)
+       finally (return (sort closure #'item<)))))
+
+(defgeneric equal-cores (left right)
+  (:method ((left lr1-item) (right lr1-item))
+    (and (equal-slots (position) left right)
+	 (equal-slots (production) left right :test 'equal-cores)))
+  (:method ((left production) (right production))
+    (equal-slots (rule-name rhs) left right))
+  (:method ((left list) (right list))
+    (and (= (length left) (length right)) 
+	 (loop for left-item in left
+	    for right-item in right
+	    with same = t
+	    do (setf same (and same (equal-cores left-item right-item)))
+	    while same
+	    finally (return same))))
+  (:method ((left t) (right t))
+    (equal left right)))
+
 (defun lr0-goto-for-symbol (grammar items symbol)
   (closure-of-lr0-items grammar 
 		    (loop for item in items
-		       if (equal symbol (next-term-for-item item))
+		       if (equal symbol (next-lr0-term-for-item item))
 		       unless (item-finished-p item)
-		       collect (increment-item-position item))))
+		       collect (increment-lr0-item-position item))))
+
+(defun lr1-goto-for-symbol (grammar items symbol)
+  (closure-of-lr1-items grammar 
+		    (loop for item in items
+		       if (equal symbol (next-lr1-term-and-lookahead-for-item item (slot-value item 'lookahead)))
+		       unless (item-finished-p item)
+		       collect (increment-lr1-item-position item))))
 
 (defun lr0-items-for-grammar (grammar start-rule-name)
   (let ((states (list (closure-of-lr0-items grammar (initial-lr0-items-for-grammar grammar start-rule-name))))
        until done
        finally (return (reverse states)))))
 
+(defun lr1-items-for-grammar (grammar start-rule-name)
+  (let ((states (list (closure-of-lr1-items grammar (initial-lr1-items-for-grammar grammar start-rule-name))))
+	(symbols (symbols-in-grammar grammar)))
+    (loop for done = t
+       do (loop for state in states
+	     ;; do (format *standard-output* "State: ~a~%" state)
+	     do (loop for symbol in symbols
+		   do (let ((goto (lr1-goto-for-symbol grammar state symbol)))
+			(when (and goto (not (member goto states :test #'equal-items)))
+			  (setf done nil)
+			  (push goto states)))))
+       until done
+       finally (return (reverse states)))))
+