Anonymous avatar Anonymous committed 0d6eb9b

Initial implementation of captures

Comments (0)

Files changed (5)

     (declare (ignorable rule-name options))
     rhss))
 
+(defun rule-spec-rule-slots (rule)
+  "From a rule specification find all of the slots for which there are captures (eg., = or +=)"
+  (let ((slots ()))
+    (labels ((collect-captures (term)
+	       (cond ((and (listp term) (or (eql '= (car term)) (eql '+= (car term))))
+		      (push (cadr term) slots))
+		     ((listp term)
+		      (loop for child-term in term
+			 do (collect-captures child-term)))
+		     (t t))))
+      (loop for rhs in (rule-spec-rule-rhss rule)
+	 do (collect-captures rhs))
+      (reverse slots))))
+
+(defun capture-compoundtermp (term)
+  (and (listp term) (member (car term) '(= +=))))
+
+(defun noncapture-compoundtermp (term)
+  (and (listp term) (member (car term) '(+ ? * ^))))
+
 (defun compoundtermp (term)
-  (and (listp term) (member (car term) '(+ ? * ^))))
+  (or (capture-compoundtermp term) (noncapture-compoundtermp term)))
 
 (defun make-new-named-rule (rule-name options rhss) 	       
   (let ((rule (cons rule-name (cons options rhss))))
 					     (loop for rhs in rhss
 						collect (or (funcall rhs-func rhs) rhs))))))))
 
+(defun set-captured-value (parser slot value)
+  (setf (captures parser) 
+	(cons (list slot value) 
+	      (loop for capture in (captures parser)
+		 for (captured-slot captured-value) = capture
+		 unless (equal captured-slot slot)
+		 collect (list captured-slot captured-value)))))
+
+(defun append-captured-value (parser slot value)
+  (setf (captures parser) 
+	(loop for capture in (captures parser)
+	   for (captured-slot captured-value) = capture
+	   if (equal captured-slot slot)
+	   collect (list captured-slot (if (listp captured-value)
+					   (cons value captured-value)
+					   (list value captured-value)))
+	   else 
+	   collect (list captured-slot (list captured-value)))))
+
 (defun transform-term (term)
   (cond ((null term) term)
 	((not (listp term)) term)
 		 ((eq term-type '*)
 		  ;; cheating here--reusing the ? and + transformations
 		  (transform-term `(? (+ ,@(transform-term (cdr term))))) )
+		 ((eq term-type '=)
+		  ;; TODO here's where we need to figure out the name of the value (e.g., the slot name)
+		  ;; to capture, and set things up so we do--perhaps in reduce?
+		  (let* ((capture-slot (cadr term)) ;; slot name is 1st after the =
+			 (capture-body (transform-term (cddr term))) 
+			 (capture-rule-name (genrulesym "CAPTURE-")))
+		    (make-new-named-rule capture-rule-name 
+					 `(reduction (lambda (parser rule-name arg) 
+						       ;; note that with the above signature we expect only 1 arg
+						       ;; that means terms like (= a b c) are not valid, only (= a b)
+						       (let ((reduced-term (make-instance rule-name)))
+							 (set-captured-value parser ',capture-slot (value-of arg))
+							 reduced-term)))
+					 `(,capture-body))))
+		 ((eq term-type '+=)
+		  (let* ((capture-slot (cadr term)) ;; slot name is 1st after the =
+			 (capture-body (transform-term (cddr term))) 
+			 (capture-rule-name (genrulesym "CAPTURE-")))
+		    (make-new-named-rule capture-rule-name 
+					 `(reduction (lambda (parser rule-name arg) 
+						       ;; note that with the above signature we expect only 1 arg
+						       ;; that means terms like (+= a b c) are not valid, only (+= a b)
+						       (let ((reduced-term (make-instance rule-name)))
+							 (append-captured-value parser ',capture-slot (value-of arg))
+							 reduced-term)))
+					 `(,capture-body))))
 		 ((eq term-type '^)
 		  (let* ((alternate-body (transform-term (cdr term))) 
 			 (alternate-rule-name (genrulesym "ALTERNATE-")))
 		       (let ((rhss (rule-spec-rule-rhss rule)))
 			 (when (member `(:nil) rhss :test #'equal)
 			   (progn
-			     ;; (format *standard-output* "New nilable term: ~s~%" term)
 			     (push term nilable)
 			     ;; setting this, too, in case this changes
 			     ;; how we handle RHSs we have already seen--so
 						     for expanded-rhs = (flatten (expand-term rhs position expansion))
 						     do (progn 
 							  (setf expanded t)
-							  ;; (format *standard-output* "Expanded ~s:~s to ~s~%" rule-name rhs expanded-rhs)
 							  (setf transforming t)
 							  (push expanded-rhs new-rhss))))
 					 unless expanded do (push rhs new-rhss))
       (setf new-grammar (remove-unused-rules start-rule-name new-grammar))
       new-grammar)))
 
+(defun transform-for-captures (grammar)
+  (for-each-rule grammar
+		 #'(lambda (rule)
+		     (let ((slots (rule-spec-rule-slots rule))
+			   (options (rule-spec-rule-options rule))
+			   (rule-name (rule-spec-rule-name rule))
+			   (rhss (rule-spec-rule-rhss rule)))
+		       (cons rule-name (cons (append `(slots ,slots) options) rhss))))))
+
 (defun transform-extended-grammar-to-fundamental (start-rule-name grammar ) 
+  ;; TODO this could be done in a pipeline model where different stages evolve the grammar
+  ;; independently, but use (and depend upon) the input of the prior stage
   (transform-for-nil 'start-rule
 		     (let ((*new-rules* nil)
 			   (augmented-grammar (cons `(start-rule () (,start-rule-name)) grammar)))
-		       (append (for-each-rhs augmented-grammar
-					     #'(lambda (rhs)		      
-						 (transform-rhs (if (listp rhs) rhs (list rhs)))))
+		       (append (for-each-rhs (transform-for-captures augmented-grammar)
+					      #'(lambda (rhs)		      
+						  (transform-rhs (if (listp rhs) rhs (list rhs)))))
 			       *new-rules*))))
 
 ;; ---------------------------------------------------------------------------------------------------------------------
 
 ;; printing
 
+(defgeneric print-rule-slots (rule stream)
+  (:documentation "Print a rule's slots to a stream"))
+
 (defmethod print-object ((obj production) stream)
   (print-unreadable-object (obj stream :type t :identity t)
     (with-slots (rule-name options rhs) obj
       (format stream "Rule=~s Options=~s RHS=~s" rule-name options rhs))))
 
+(defmethod print-object ((obj rule-node) stream)
+  (print-unreadable-object (obj stream :type t :identity t)
+    (with-slots (value children) obj
+      (format stream "~@[Value=~s~] ~@[Children=~s~]" value children)
+      (print-rule-slots obj stream))))
+
 (defmethod print-object ((obj lr-parse-table) stream)
   (print-unreadable-object (obj stream :type t :identity t)
     (format stream "Max states=~a~%" (number-of-states obj))
 
 ;; specifications
 
+;; TODO this routine gets called multiple times during grammar creation, but
+;; the results are not preserved between invocations--so a *lot* of work happens
+;; each time, only to be thrown away
 (defun rule-productions (rule)
   (destructuring-bind (rule-name options &rest rhss) rule
     (loop for rhs in rhss
        collect (let ((production (make-instance 'production :rule rule-name :rhs rhs)))
 		 (loop for (slot value) on options by #'cddr
 		    ;; TODO consider eval'ing the value first; depends on use cases for doing so
+		    when (slot-exists-p production slot) 
 		    do (setf (slot-value production slot) (eval value)))
 		 production))))
 
 ;; ---------------------------------------------------------------------------------------------------------------------
 ;; Reductions
 
-(defun list-terms (rule-name &rest args)
+(defun list-terms (parser rule-name &rest args)
+  (declare (ignorable parser))
   (make-instance rule-name :children args))
 
-(defun ignore-terms (rule-name &rest args)
+(defun ignore-terms (parser rule-name &rest args)
+  (declare (ignorable parser))
   (declare (ignorable args))
   (make-instance rule-name))
 
     short-hand for a set of productions all with the same rule name)
 "
   (let* ((specification (transform-extended-grammar-to-fundamental start-rule-name rules))
-	 (ast-classes (loop for rule in (non-terminals-in-grammar specification)
-			   collect `(defclass ,rule (ast-node) ()))))
+	 (ast-classes (loop for rule-name in (non-terminals-in-grammar specification)
+			 for rule = (assoc rule-name specification)
+			 for rule-options = (rule-spec-rule-options rule)
+			 for rule-slots = (getf rule-options 'slots )
+			 collect `(progn 
+				    (defclass ,rule-name (rule-node) (,@rule-slots))
+				    (defmethod print-rule-slots ((rule ,rule-name) stream)
+				      ,@(loop for slot in rule-slots
+				    	   collect `(when (slot-boundp rule ',slot) 
+				    		      (format stream " ~s=~s" ',slot (slot-value rule ',slot)))))))))
     `(progn
        ,@ast-classes
        (let ((grammar (make-grammar ',specification)))

package-hh-parse.lisp

    ;; grammars
    ;; #:+ from CL
    ;; #:* from CL
+   ;; #:= from CL
+   ;; #:+= from CL
    #:? 
    #:^ 
 
 	 do (push expected-symbol expected))
     expected))
 
-(defun reduce-production (production symbol-values)
-  (apply (reduction production) (rule-name production) symbol-values))
+(defun assimilate-captures (parser reduced-term)
+  (let ((remaining-captures ()))
+    (loop for (slot value) in (captures parser)
+       if (slot-exists-p reduced-term slot)
+       do (setf (slot-value reduced-term slot) value)
+       else do (push (list slot value) remaining-captures))
+    (setf (captures parser) remaining-captures)))
+
+(defun reduce-production (parser production node-values)
+  (let ((reduced-term (apply (reduction production) parser (rule-name production) node-values)))
+    (assimilate-captures parser reduced-term)
+    reduced-term))
 
 ;; parser context helpers
 
 
 			    ((equal :reduce op)
 			     (let* ((production arg)
-				    (reduced-term (reduce-production production 
+				    (reduced-term (reduce-production parser
+								     production 
 								     ;; we reverse the values, because they were on stack in reverse
 								     (reverse (loop for i from 1 to (length (slot-value production 'rhs))
 										 collect (let ((stack-node (pop-parser-node parser)))
   (number-term () (numeric-value)))
 
 (defgrammar html-grammar document
-  ( tag-name ()  (identifier) )
+  ( tag-name ()   (= name identifier) )
   
   ( attribute-name ()  (identifier) )
 
   ((value :initform () :initarg :value :accessor value-of)
    (children :initform () :initarg :children :accessor children)))
 
+(defclass rule-node (ast-node) ())
+
 (defclass production ()
   ((rule-name :initarg :rule :accessor rule-name)
    (reduction :initform #'list-terms :initarg :reduction :accessor reduction)
           (this value in the entry on the top of the stack represents the current state of the parser).  The 2nd item is a token
           or value (terminology is still messy).  This token or value is itself a 2-item list, with the first item being
           a symbol in the grammar corresponding to that node that is on the stack (could be a token from a lexer, or a non-terminal
-          after reductions have occurred).  The 2nd item is the actual AST node built by the lexer and parser as parsing progresses. ")))
+          after reductions have occurred).  The 2nd item is the actual AST node built by the lexer and parser as parsing progresses. ")
+   (captures :initform () :initarg :captures :accessor captures)))
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.