1. Phil Hargett
  2. HH-Parse

Commits

Phil Hargett  committed a445ae1

Splitting out grammar into separate file (and overwriting what we had before)

  • Participants
  • Parent commits 22bec4a
  • Branches parsing

Comments (0)

Files changed (2)

File grammar.lisp

View file
  • Ignore whitespace
 (in-package :hh-parse)
 
-(defmacro parser (&key (counter 0) (threads nil) (results nil) (position nil) (stop nil))
-  `(make-instance 'parser
-		  :counter ,counter
-		  :threads (list ,@threads)
-		  :results (list ,@results)
-		  ,@(when position
-			  `(:position ,position)
-			  )
-		  ,@(when stop
-			  `(:stop ,stop)
-			  )
-		  )
-  )
+ #|
+Notation for extended grammar:
 
-;; macro object factories
+(rule-name alt1 alt2 ...)
 
-(defmacro thread (&key (id nil) (status nil) (stack nil) (value nil) )
-  `(make-instance 'parse-thread
-		  ,@(when id
-			  `(:id ,id)
-			  )
-		  ,@(when status
-			  `(:status ,status)
-			  )
-		  :stack (list ,@stack)
-		  :value ,value
-		  )
-  )
+where each term alt1/alt2 etc.s is a list of either symbols, literals, or lists with any of 
+the following as heads:
 
-(defmacro frame (&key (node nil) (position nil))
-  `(make-instance 'parse-frame 
-		  ,@(when node
-			  `(:node ,node)
-			  )
-		  ,@(when position
-			  `(:position ,position)
-			  )
-		  )
-  )
++ repeat terms 1 or more times
+* repeat terms 0 or more times
+? optional terms: appear 0 or 1 times
+^ alternative terms : choose 1 term to appear
 
-(defmacro seq (&rest body)
-  `(make-instance 'sequence-node
-		  :elements (list ,@body)
-		  )
-  )
+also: 
 
-(defmacro alt (&rest body)
-  `(make-instance 'alternatives-node
-		  :alternatives (list ,@body)
-		  )
-  )
+() around terms represents a logical grouping (that is, if the head
+  is none of the above symbols)
 
-(defmacro opt (&rest body)
-  `(make-instance 'optional-node
-		  :element (seq ,@body)
-		  )
-  )
+All of these reduce to a more fundamental grammar (similar to CL-YACC) of:
 
-(defmacro rep (&rest body)
-  `(make-instance 'repeat-node
-		  ;; :element (seq ,@body)
-		  :element (lambda () (seq ,@body) )
-		  )
-  )
+(rule-name alt1 alt2 ...)
 
-(defmacro lit (literal &key (index nil) (wants nil) )
-  `(make-instance 'literal-node :literal ,literal
-		  ,@(when index
-			  `(:index ,index)
-			  )
-		  ,@(when wants
-			  `(:wants ,wants)
-			  )
-		  )
-  )
+where 
 
-(defmacro ws (&rest characters)
-  "Represents whitespace that must be present (can be any size other than zero)"
-  `(rep (alt ,@(mapcar (lambda (c)
-			 `(lit (string ,c) )
-			 )
-		       (or characters `(#\Newline #\Return #\Tab #\Space #\Page) )
-		)
-	     )
-	)
-  )
+rule = symbol identifying name of rule
+alt1/alt2 = individual terms that are either names of other rules, or lists, where
+  each list is a sequence of other rule names
 
-(defmacro cc ( test )
-  "Tests for whether a character belongs to a specific character class; test should
-   evaluate to a funcallable"
-  `(make-instance 'character-class-node :test ,test)
-  )
+Transforming the extended grammar into the fundamental grammar involves the following:
 
+* Treat the head of each rule as its name; set that aside
+* For each item in the tail of rule, treat each item as a discrete alternate right-hand side that is possible 
+  (e.g., a ;; rhs
+	 )
+* For any RHS that is not a list, make it a list
+* Convert () : For each RHS, walk the RHS and convert any list that does not have a recognized head (eg., see above)
+  into a reference to a new rule that has that list as a sequence in its single RHS
+* Convert * : For each occurence in a RHS of a list with * as the head, replace with (? (+ ...)) instead
+* Convert ? : For each occurrence of ? as the head of a list, convert the containing RHS into 2 separate RHS:
+  one with the rest of the list in place of the term containing ? and another without the ? terms at all (as
+  if it's not there)
+* Convert + : For each occurrence of + , convert to a reference to a new rule that has 2 alternatives, one with
+  repetition, one without
 
+Terminology:
 
-(defmacro es (&rest characters)
-  "Represents optional whitespace; useful for showing that space between nodes is allowed but not required"
-  `(opt (ws ,@characters))
-  )
+* A grammar comprises one or more rules
+* A rule is a list whose car is a symbol for the rule name, and the cdr is a list of right-hand side alternatives
+* A right-hand side (or rhs) is a list, each element of which is either the symbolic name of a rule, a list
+  with one of the extended symbols above as it's head, or a literal (terminal)
+* A production is a logical idea only: it's the pairing of a rule name with 1 rhs (so a rule is just a
+  short-hand for a set of productions all with the same rule name)
 
-(defmacro @ (&rest slots)
-    `(lambda (obj)
-       ,(loop for i in slots
-	    for v = `(slot-value obj (quote,i)) then `(slot-value ,v (quote ,i) )
-	    finally (return v)
-	      )
-       )
-  )
+|#
 
-(defmacro rule (name (&rest args) body
-		&key
-		(slots nil)
-		)
-  (let (
-	(node-class-name (intern (format nil "~a-NODE" 
-					 (symbol-name name)
-					 )
-				 (symbol-package name)
-				 )
-	  )
-	(slot-list (append args slots))
-	)
-    `(progn
+;; ---------------------------------------------------------------------------------------------------------------------
+;; Grammar representations
 
-       (defclass ,node-class-name (rule-node)
-	 ,slot-list
-	 )
+(defvar *grammar* nil "Current grammar under inspection")
+(defvar *new-rules* nil "New rules under construction")
+(defvar *rule-name* "" "Name of current rule--useful for constructing derived rule names")
 
-       (defmacro ,name ,args
-	 (let (
-	       (arg-list (quote ,args) )
-	       (val-list (list ,@args) )
-	       (node-class-name (quote ,node-class-name))
-	       )
-	   `(apply (lambda ()
-		     (let (
-			   (node (make-instance (quote ,node-class-name) ) )
-			   )
-		       ,@(mapcar (lambda (s v)
-				   `(setf (slot-value node (quote ,s)) ,v)
-				   )
-				 arg-list
-				 val-list
-				 )
-		       node
-		       )
-		     )
-		   nil
-		   )
-	   )     
-	 )
+(defun compoundtermp (term)
+  (and (listp term) (member (car term) '(+ ? * ^))))
 
-       (defmethod node-equal ( (left ,node-class-name) (right ,node-class-name) )
-	 (loop for slot in (quote ,slot-list)
-	    with is-equal = t
-	    do (if (and (slot-boundp left slot) (slot-boundp right slot) )
-		   (setf is-equal (let (
-				    (left-slot-value (slot-value left slot) )
-				    (right-slot-value (slot-value right slot) )
-				    )
-				(and is-equal
-				     (node-equal left-slot-value right-slot-value)
-				     )
-				)
-		     )
-		   (setf is-equal
-			 (and is-equal
-			      (not (or (slot-boundp left slot) (slot-boundp right slot)) )
-			      )
-			 )
-		   )
-	    finally (return is-equal)
-	      )
-	 )
+(defun make-new-named-rule (rule-name rhss) 	       
+  (let ((rule (cons rule-name rhss)))
+    (setf *new-rules*
+	  (append *new-rules* (list rule)))
+    rule-name))
 
-       (defmethod copy-node ( (node ,node-class-name) &optional (context-hash nil) )
-       	 (copy-if-needed (context-hash node)
-       			 (let (
-       			       (new-node (make-instance (quote ,node-class-name)) )
-       			       )
-       			   ;; copy all slots
-       			   ;; TODO clean this up
-       			   (loop for slot in `(called results)
-       			      do (when (slot-boundp node slot)
-       				   (setf (slot-value new-node slot)
-       					 (copy-node (slot-value node slot) context-hash)
-       					 )
-       				   )
-       				)
-			   
-       			   (loop for slot in (quote ,slot-list)
-       			      do (when (slot-boundp node slot)
-       				   (setf (slot-value new-node slot)
-       					 (copy-node (slot-value node slot) context-hash)
-       					 )
-       				   )
-       				)
-       			   new-node
-       			   )
-       			 )
-       	 )
+(defun for-each-rule (*grammar* rule-func)
+  (loop for rule in *grammar*
+     collect (or (funcall rule-func rule) rule)))
 
-       (defmethod print-object ( (obj ,node-class-name) stream)
-	 (print-unreadable-object (obj stream :type t :identity t)
-	   (format stream "~{ ~a=~s~}" 
-		   (loop for slot in (quote ,slot-list)
-		      collect slot
-		      if (slot-boundp obj slot)
-		      collect (slot-value obj slot)
-		      else collect "<unbound>"
-			)
-		   )
-	   )
-	 )
+(defun for-each-rhs (*grammar* rhs-func)
+  (for-each-rule *grammar*
+		 #'(lambda (rule)
+		     (destructuring-bind (rule-name &rest rhss) rule
+		       (cons rule-name (loop for rhs in rhss
+					  collect (or (funcall rhs-func rhs) rhs)))))))
 
-       (defmethod continue-parse ( parser thread (node ,node-class-name) )
-	 (macrolet (
-		    (capture (slot value &optional (transform '#'identity))
-		      `(make-instance 'capture-node
-				      :target node
-				      :slot (quote ,slot)
-				      :value ,value
-				      :transform ,transform
-				      )
-		      )
-		    (accumulate (slot value &optional (transform '#'identity))
-		      `(make-instance 'capture-accumulate-node
-				      :target node
-				      :slot (quote ,slot)
-				      :value ,value
-				      :transform ,transform
-				      )
-		      )
-		    )
-	   (with-slots (called ,@args) node
-	     (if called
-		 ;; already called the rule's constructed node
-		 (if (value-of thread)
-		     (return-from-node parser thread node)
-		     (return-from-node parser thread nil)
-		     )
-		 ;; prepare
-		 (progn
-		   (setf called t)
-		   (call-node parser thread (progn ,body) )
-		   )
-		 )
-	     )
-	   )
-	 )
+(defun transform-term (term)
+  (cond ((null term) term)
+	((not (listp term)) term)
+	(t ;; some kind of list
+	 ;; checking compound terms first
+	 (let ((term-type (car term)))
+	   (cond ((eq term-type '+)
+		  (let* ((repeat-body (transform-term (cdr term)))
+			 (repeat-rule-name (gensym "REPEAT-")))
+		    (make-new-named-rule repeat-rule-name `( (,repeat-rule-name ,@repeat-body)
+							     (,@repeat-body)) )))
+		 ((eq term-type '*)
+		  ;; cheating here--reusing the ? and + transformations
+		  (transform-term `(? (+ ,@(transform-term (cdr term))))) )
+		 ((eq term-type '^)
+		  (let* ((alternate-body (transform-term (cdr term))) 
+			 (alternate-rule-name (gensym "ALTERNATE-")))
+		    (make-new-named-rule alternate-rule-name 
+					 (loop for alternate in alternate-body
+					    collect (if (listp alternate) alternate (list alternate))))))
+		 ((eq term-type '?)
+		  (let* ((optional-body (transform-term (cdr term)))
+			 (optional-rule-name (gensym "OPTIONAL-")))
+		    (make-new-named-rule optional-rule-name `( ( ,@optional-body)
+							       ( :nil)) )))
+		 (t ;; ordinary list, term list, or group--take your pick of language
+		  (loop for item in term
+		     collect (transform-term item))))))))
 
-       )
-    )
-  )
+
+(defun transform-rhs (rhs)
+  (let ((new-rhs (cond ((null rhs) rhs)
+		       ((compoundtermp rhs)
+			(transform-term rhs))
+		       (t (loop for term in rhs
+			     ;; this is important--we don't treat
+			     ;; an rhs as a term, because then we couldn't recognize
+			     ;; groups as a simple termlist--couldn't know difference
+			     collect (transform-term term))))))
+    (if (listp new-rhs) new-rhs (list new-rhs)))) ;; make sure it's always a list
+
+(defun transform-extended-grammar-to-fundamental (grammar) 
+  (let ((*grammar* grammar)
+	(*new-rules* nil))
+    (append (for-each-rhs *grammar*
+			  #'(lambda (rhs)		      
+			      (transform-rhs (if (listp rhs) rhs (list rhs)))))
+	    *new-rules*)))
+
+;; ---------------------------------------------------------------------------------------------------------------------
+;; Sample grammars
+
+(let ((grammar-419 (transform-extended-grammar-to-fundamental `((S (^ (:i E :t S SP) :a))
+								(SP (:e (? S)))
+								(E (:b))))))
+  (defun grammar-419 ()
+    grammar-419))
+
+(let ((grammar-420 (transform-extended-grammar-to-fundamental `(
+								(start (:bof s :dof))
+								(s (l eq r)
+								   (r))
+								(l (star r)
+								   (id))
+								(r (l))))))
+  (defun grammar-420 ()
+    grammar-420))
+
+(let ((numbers-grammar (transform-extended-grammar-to-fundamental `(
+								    ;; literals are digits, decimal, plus, minus
+								    (integer (digit)
+									     (integer digit))
+								    (numeric-value ( (? (^ plus minus)) (integer (? decimal integer))))
+								    (number (:bof numeric-value :eof))
+								    ))))
+  (defun numbers-grammar ()
+    numbers-grammar))
+
+(let ((html-grammar 
+       (transform-extended-grammar-to-fundamental `(( tag-name  (identifier) )
+
+						    ( attribute-name  (identifier) )
+
+						    ( quantity-value  (^ number-value
+									 ( number-value pct-symbol)
+									 ( number-value pct)
+									 ( number-value px) ) )
+
+						    ( attribute-value  (^ quantity-value
+									  string-value))
+
+						    ( attribute  (attribute-name 
+								  es 
+								  equal-sign
+								  es 
+								  attribute-value))
+
+						    ( attribute-list  ( attribute (* ws attribute)))
+
+						    ( start-tag  ( lt 
+								   es 
+								   tag-name
+								   (? ws attribute-list)
+								   es 
+								   gt))
+
+						    ( end-tag  ( lt es forward-slash es tag-name  gt))
+
+						    ( single-tag  ( lt 
+								    es 
+								    tag-name
+								    (? ws attribute-list) 
+								    es 
+								    fs 
+								    es 
+								    gt))
+
+						    ( tag  (^ single-tag
+							      (start-tag 
+							       es 
+							       (* (^ tag html-text))
+							       es 
+							       end-tag)))
+						    (document (:bof tag :eof))))))
+  (defun html-grammar ()
+    html-grammar))
+

File hh-parse.asd

View file
  • Ignore whitespace
 	       ;; (:file "types")
 	       ;; (:file "generics")
 	       ;; (:file "copying")
-	       ;; (:file "grammar")
+	       (:file "grammar")
 	       (:file "parser")
 	       ;; (:file "extended")
 	       ;; (:file "continue")