HH-Parse / parser.lisp

(in-package :hh-parse)

;; ---------------------------------------------------------------------------------------------------------------------
;; LALR(1) parser construction

;; Source code helper
(defmethod source-text ((parser lalr1-parser))
  (source-text (lexer parser)))

(defmethod (setf source-text) (text (parser lalr1-parser) )
  ;; Drop existing lines
  (setf (source-text (lexer parser)) text))

(defun expected-next-symbols (parser)
  "Given a parser's current state, return a list of valid symbols (terminal and non-terminal) that would advance the parse"
  (let ((state (caar (stack parser)))
	(actions (entries (actions (grammar parser))))
	(expected ()))
    (loop for (expected-state expected-symbol) being the hash-key of actions
	 if (= state expected-state ) 
	 do (push expected-symbol expected))

(defun assimilate-captures (parser reduced-term)
  (let ((remaining-captures ()))
    (loop for (slot value) in (captures parser)
       if (slot-exists-p reduced-term slot)
       do (setf (slot-value reduced-term slot) value)
       else do (push (list slot value) remaining-captures))
    (setf (captures parser) remaining-captures)))

(defun reduce-production (parser production node-values)
  (let ((reduced-term (apply (reduction production) parser (rule-name production) node-values)))
    (assimilate-captures parser reduced-term)

;; parser context helpers

(defun push-parser-context (parser state node)
  "Because the parser is a table-driven LR parser, the parser uses a stack to manage
   its progress through a parse.  The entries in this stack are contexts, each of which
   is a 2-element list.  The 1st element in a context is the state (a number) associated
   with the context: the state in the context on the top of the stack is the state
   used by the LR parsing algorithm for table lookups.  The 2nd element is a node (see
   current-parser-node for details)."
  (push (list state node) (stack parser)))

(defun current-parser-context (parser)
  (car (stack parser)))

(defun pop-parser-context (parser)
  (pop (stack parser)))

(defun pop-parser-node (parser)
  (when (stack parser)
    (destructuring-bind (state node) (pop-parser-context parser)
      (declare (ignorable state))

(defun current-parser-state (parser)
  (when (stack parser)
    (destructuring-bind (state node) (current-parser-context parser)
      (declare (ignorable node))

(defun current-parser-node (parser)
  "Return the node in the context on the top of a parser's stack. The parser understands a node as a 2-element list,
  where the first element is a symbol identifying the node type (and corresponding to a symbol in the underlying grammar),
  and the second element is (usually) an AST node"
  (when (stack parser)
    (destructuring-bind (state node) (current-parser-context parser)
      (declare (ignorable state))

(defun get-parse-result (parser)
  (current-parser-node parser))

;; parsing

(defun parse-token (parser token)
  "Advance the state of the parser by parsing a single token; does not assume token came from lexer"
  (let ((grammar (grammar parser)))
    (destructuring-bind (token-symbol token-value) (if token token (list :eof nil))
      (declare (ignorable token-value))
      (loop with continue = t
	 with result = nil
	 while continue
	 do (let ((stack-state (current-parser-state parser)))
	      (let ((action (gethash (list stack-state token-symbol) (entries (actions grammar)))))
		(if action
		    (destructuring-bind (op arg) action

		      (cond ((equal :shift op)
			     (let ((next-state arg))
			       (push-parser-context parser next-state token)
			       (setf continue nil)
			       (setf result :continue)))

			    ((equal :reduce op)
			     (let* ((production arg)
				    (reduced-term (reduce-production parser
								     ;; we reverse the values, because they were on stack in reverse
								     (reverse (loop for i from 1 to (length (slot-value production 'rhs))
										 collect (let ((stack-node (pop-parser-node parser)))
											   (destructuring-bind (node-type node-value) stack-node
											     (declare (ignorable node-type))
			       (let ((new-stack-state (current-parser-state parser)))
				 (push-parser-context parser 
						      (gethash (list new-stack-state (slot-value production 'rule-name )) (entries (gotos grammar)))
						      (list (slot-value production 'rule-name) reduced-term)))))

			    ((equal :accept op)
			     (setf continue nil)
			     (setf result :succeeded))

			    (t (setf continue nil)
			       (setf result :failed))))
		      (setf continue nil)
		      (setf result :failed)))))
	 finally (return (values result (current-parser-context parser)))))))

(defun parse-input (parser &optional input)
  (let ((lexer (lexer parser)))
    (when input (setf (source-text lexer) input))
    (loop for result = (parse-token parser (next-token lexer))
       while (equal :continue result)
       finally (return (values result (get-parse-result parser))))))

(defun make-parser (lexer grammar)
  (let ((parser (make-instance 'lalr1-parser :lexer lexer :grammar grammar)))
    (push-parser-context parser 0 'start-rule)

;; ---------------------------------------------------------------------------------------------------------------------

(defmacro defparser (name &key grammar lexer)
  (let ((parser-factory (intern (format nil "MAKE-~a-PARSER" name) (symbol-package name)))
	(grammar-name (intern (format nil "~a-GRAMMAR" name) (symbol-package name)))
	(lexer-name (intern (format nil "~a-LEXER" name) (symbol-package name))))

       (defgrammar ,grammar-name

       (deflexer ,lexer-name

       (defun ,parser-factory ()
	 (let ((grammar (,grammar-name))
	       (lexer (make-instance ',lexer-name)))
	   (make-parser lexer grammar))))))