Source

HH-Parse / parser.lisp

;; Copyright (c) 2010 Phil Hargett

;; Permission is hereby granted, free of charge, to any person obtaining a copy
;; of this software and associated documentation files (the "Software"), to deal
;; in the Software without restriction, including without limitation the rights
;; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
;; copies of the Software, and to permit persons to whom the Software is
;; furnished to do so, subject to the following conditions:

;; The above copyright notice and this permission notice shall be included in
;; all copies or substantial portions of the Software.

;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
;; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
;; THE SOFTWARE.

(in-package :hh-parse)

;; ---------------------------------------------------------------------------------------------------------------------
;; LALR(1) parser construction

;; Source code helper
(defmethod source-text ((parser lalr1-parser))
  (source-text (lexer parser)))

(defmethod (setf source-text) (text (parser lalr1-parser) )
  ;; Drop existing lines
  (setf (source-text (lexer parser)) text))

(defun expected-next-symbols (parser)
  "Given a parser's current state, return a list of valid symbols (terminal and non-terminal) that would advance the parse"
  (let ((state (caar (stack parser)))
	(actions (entries (actions (grammar parser))))
	(expected ()))
    (loop for (expected-state expected-symbol) being the hash-key of actions
	 if (= state expected-state ) 
	 do (push expected-symbol expected))
    expected))

(defun assimilate-captures (parser reduced-term)
  (let ((remaining-captures ()))
    (loop for (slot value) in (captures parser)
       if (slot-exists-p reduced-term slot)
       do (setf (slot-value reduced-term slot) value)
       else do (push (list slot value) remaining-captures))
    (setf (captures parser) remaining-captures)))

(defun reduce-production (parser production node-values)
  (let ((reduced-term (apply (reduction production) parser (rule-name production) node-values)))
    (assimilate-captures parser reduced-term)
    reduced-term))

;; parser context helpers

(defun push-parser-context (parser state node)
  "Because the parser is a table-driven LR parser, the parser uses a stack to manage
   its progress through a parse.  The entries in this stack are contexts, each of which
   is a 2-element list.  The 1st element in a context is the state (a number) associated
   with the context: the state in the context on the top of the stack is the state
   used by the LR parsing algorithm for table lookups.  The 2nd element is a node (see
   current-parser-node for details)."
  (push (list state node) (stack parser)))

(defun current-parser-context (parser)
  (car (stack parser)))

(defun pop-parser-context (parser)
  (pop (stack parser)))

(defun pop-parser-node (parser)
  (when (stack parser)
    (destructuring-bind (state node) (pop-parser-context parser)
      (declare (ignorable state))
      node)))

(defun current-parser-state (parser)
  (when (stack parser)
    (destructuring-bind (state node) (current-parser-context parser)
      (declare (ignorable node))
      state)))

(defun current-parser-node (parser)
  "Return the node in the context on the top of a parser's stack. The parser understands a node as a 2-element list,
  where the first element is a symbol identifying the node type (and corresponding to a symbol in the underlying grammar),
  and the second element is (usually) an AST node"
  (when (stack parser)
    (destructuring-bind (state node) (current-parser-context parser)
      (declare (ignorable state))
      node)))

(defun get-parse-result (parser)
  (current-parser-node parser))

;; parsing

(defun parse-token (parser token)
  "Advance the state of the parser by parsing a single token; does not assume token came from lexer"
  (let ((grammar (grammar parser)))
    (destructuring-bind (token-symbol token-value) (if token token (list :eof nil))
      (declare (ignorable token-value))
      (loop with continue = t
	 with result = nil
	 while continue
	 do (let ((stack-state (current-parser-state parser)))
	      (let ((action (gethash (list stack-state token-symbol) (entries (actions grammar)))))
		(if action
		    (destructuring-bind (op arg) action

		      (cond ((equal :shift op)
			     (let ((next-state arg))
			       (push-parser-context parser next-state token)
			       (setf continue nil)
			       (setf result :continue)))

			    ((equal :reduce op)
			     (let* ((production arg)
				    (reduced-term (reduce-production parser
								     production 
								     ;; we reverse the values, because they were on stack in reverse
								     (reverse (loop for i from 1 to (length (slot-value production 'rhs))
										 collect (let ((stack-node (pop-parser-node parser)))
											   (destructuring-bind (node-type node-value) stack-node
											     (declare (ignorable node-type))
											     node-value)))))))
			       (let ((new-stack-state (current-parser-state parser)))
				 (push-parser-context parser 
						      (gethash (list new-stack-state (slot-value production 'rule-name )) (entries (gotos grammar)))
						      (list (slot-value production 'rule-name) reduced-term)))))

			    ((equal :accept op)
			     (setf continue nil)
			     (setf result :succeeded))

			    (t (setf continue nil)
			       (setf result :failed))))
		    (progn
		      (setf continue nil)
		      (setf result :failed)))))
	 finally (return (values result (current-parser-context parser)))))))

(defun parse-input (parser &optional input)
  (let ((lexer (lexer parser)))
    (when input (setf (source-text lexer) input))
    (loop for result = (parse-token parser (next-token lexer))
       while (equal :continue result)
       finally (return (values result (get-parse-result parser))))))

(defun make-parser (lexer grammar)
  (let ((parser (make-instance 'lalr1-parser :lexer lexer :grammar grammar)))
    (push-parser-context parser 0 (start grammar))
    parser))

;; ---------------------------------------------------------------------------------------------------------------------
;;

(defmacro defparser (name &key grammar lexer)
  (let ((parser-factory (intern (format nil "MAKE-~a-PARSER" name) (symbol-package name)))
	(grammar-name (intern (format nil "~a-GRAMMAR" name) (symbol-package name)))
	(lexer-name (intern (format nil "~a-LEXER" name) (symbol-package name))))
    `(progn 

       (defgrammar ,grammar-name
	   ,@grammar)

       (deflexer ,lexer-name
	   ,@lexer)

       (defun ,parser-factory ()
	 (let ((grammar (,grammar-name))
	       (lexer (make-instance ',lexer-name)))
	   (make-parser lexer grammar))))))
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.