Commits

Anonymous committed 22bec4a

Basic LR0 item-set construction complete, and a suitable extended grammar properly generates fundamental grammar forms.

Comments (0)

Files changed (2)

   :serial t
   :components (
                (:file "package-hh-parse")
-	       (:file "types")
-	       (:file "generics")
-	       (:file "copying")
-	       (:file "grammar")
+	       ;; (:file "types")
+	       ;; (:file "generics")
+	       ;; (:file "copying")
+	       ;; (:file "grammar")
 	       (:file "parser")
-	       (:file "extended")
-	       (:file "continue")
-	       (:file "equality")
-	       (:file "printing")
-	       (:file "tracing")
-	       (:file "source")
+	       ;; (:file "extended")
+	       ;; (:file "continue")
+	       ;; (:file "equality")
+	       ;; (:file "printing")
+	       ;; (:file "tracing")
+	       ;; (:file "source")
 	       )
   :depends-on (
 	       ; external packages
-	       "cl-fad"
+	       :cl-fad
 
 	       ; project packages
-	       "hh-utils"
+	       :hh-utils
 
                )
   )
 (in-package :hh-parse)
 
-;; =====================================================================================================================
-;;
-;; Constants + globals
-;;
-;; =====================================================================================================================
+ #|
+Notation for extended grammar:
 
-(defvar *trace-parser* nil
-  "If true, then parse routines will print tracing information to *trace-output*"
-  )
+(rule-name alt1 alt2 ...)
 
-;; =====================================================================================================================
-;;
-;; Tracing routines
-;;
-;; =====================================================================================================================
+where each term alt1/alt2 etc.s is a list of either symbols, literals, or lists with any of 
+the following as heads:
 
-(defmacro trace-parser (&rest args)
-  `(when *trace-parser*
-     (format *trace-output* ,@args)
-     )
-  )
++ repeat terms 1 or more times
+* repeat terms 0 or more times
+? optional terms: appear 0 or 1 times
+^ alternative terms : choose 1 term to appear
 
-;; =====================================================================================================================
-;;
-;; Implementations
-;;
-;; =====================================================================================================================
+also: 
 
-;; results
+() around terms represents a logical grouping (that is, if the head
+  is none of the above symbols)
 
-(defmacro getf-result (thread node)
-  ;; `(getf (node-results-in ,thread) ,node)
-  (declare (ignorable thread))
-  `(results-of ,node)
-  )
+All of these reduce to a more fundamental grammar (similar to CL-YACC) of:
 
-(defmacro setf-result (thread node result)
-  ;; `(setf (getf-result ,thread ,node) ,result)
-  (declare (ignorable thread))
-  `(setf (results-of ,node) ,result)
-  )
+(rule-name alt1 alt2 ...)
 
-;; top-level algorithm
-(defun parse ( code top-node &key ((:trace *trace-parser*) nil) (stop nil) )
-  (let* (
-	 (parser (make-instance 'parser :code code :stop stop) )
+where 
+
+rule = symbol identifying name of rule
+alt1/alt2 = individual terms that are either names of other rules, or lists, where
+  each list is a sequence of other rule names
+
+Transforming the extended grammar into the fundamental grammar involves the following:
+
+* Treat the head of each rule as its name; set that aside
+* For each item in the tail of rule, treat each item as a discrete alternate right-hand side that is possible 
+  (e.g., a ;; rhs
 	 )
-    (start-thread parser top-node)
-    (parse-input parser)
-    )
-  )
+* For any RHS that is not a list, make it a list
+* Convert () : For each RHS, walk the RHS and convert any list that does not have a recognized head (eg., see above)
+  into a reference to a new rule that has that list as a sequence in its single RHS
+* Convert * : For each occurence in a RHS of a list with * as the head, replace with (? (+ ...)) instead
+* Convert ? : For each occurrence of ? as the head of a list, convert the containing RHS into 2 separate RHS:
+  one with the rest of the list in place of the term containing ? and another without the ? terms at all (as
+  if it's not there)
+* Convert + : For each occurrence of + , convert to a reference to a new rule that has 2 alternatives, one with
+  repetition, one without
 
-(defmethod parse-input ( (parser parser) )
-  (when (slot-boundp parser 'position)
-    (setf (parse-position (code parser) ) (position-of parser) )
-    )
-  (loop until (parse-completed-p parser)
-     do (trace-parser "P: ~s~%" parser)
-     do (progn
-	  ;; get ready for character
-	  (trace-parser "P: Getting ready for character~%")
-	  (loop while (threads-of parser)
-	     do (setf (ready-for-token parser) t)
-	       (loop for thread in (threads-of parser)
-		  do (loop while (and (not (thread-completed-p thread)) (stack-of thread) )
-			;; TODO yeah, still don't have this cleaned up
-			for stack = (stack-of thread)
-			for frame = (first stack)
-			for node = (node-in frame)
-			;; until ready for character
-			until (wants-token-p node)
-			do (continue-parse parser thread node)
-			  )
-		    )
-	     until (ready-for-token parser)
-	       )
+Terminology:
 
-	  ;; handle next character, if any remaining
-	  (loop for thread in (threads-of parser)
-	     ;; theoretically, the nil will cause nodes to start failing
-	     ;; and eventually cause threads to flush, thus exiting the outer loop
-	     ;; also: note that we are passing an eof token (i.e., nil) and allowing
-	     ;; it to be handled before exiting.  This implies end-p can't by itself
-	     ;; be an exit criterion (i.e., checked in parse-completed-p
-	     with character = (if (end-p parser) nil (next-token parser) )
-	     do (trace-parser "P: Reading character ~a~%" character)
-	     do (when character
-		  (setf (last-token-read-successfully parser) nil)
-		  ) 
-	     do (when (and (not (thread-completed-p thread)) (stack-of thread) )
-		  (let*(
-			(stack (stack-of thread) )
-			(frame (first stack))
-			(node (node-in frame))
-			)
-		    (setf (value-of thread) character)
-		    (continue-parse parser thread node)
-		    )
-		  )
-	       )
-	  )
-     finally (return (values (when (parse-results-valid-p parser) 
-			       (parse-result parser)
-			       )
-			     parser
-			     )
-		     )
-       )
-  )
+* A grammar comprises one or more rules
+* A rule is a list whose car is a symbol for the rule name, and the cdr is a list of right-hand side alternatives
+* A right-hand side (or rhs) is a list, each element of which is either the symbolic name of a rule, a list
+  with one of the extended symbols above as it's head, or a literal (terminal)
+* A production is a logical idea only: it's the pairing of a rule name with 1 rhs (so a rule is just a
+  short-hand for a set of productions all with the same rule name)
 
-(defmethod parse-completed-p ( (parser parser) )
-  (or (null (threads-of parser))
-      (and (stop-position-of parser)
-	   (positions-equal (parse-position (code parser) )
-			    (stop-position-of parser)
-			    )
-	   )
-      )
-  )
+|#
 
-(defmethod parse-result ( (parser parser) )
-  (let (
-	(results (results-of parser))
-	)
-    (if (> (length results) 1)
-	results
-	(car results)
-	)
-    )
-  )
+;; ---------------------------------------------------------------------------------------------------------------------
+;; Grammar representations
 
-(defmethod parse-results-valid-p ( (parser parser) )
-  (and (end-p parser) (results-of parser) (last-token-read-successfully parser) )
-  )
+(defvar *grammar* nil "Current grammar under inspection")
+(defvar *new-rules* nil "New rules under construction")
+(defvar *rule-name* "" "Name of current rule--useful for constructing derived rule names")
 
-(defmethod start-thread (parser node &optional (base-thread nil) )
-  (let* (
-	 (copy-context (make-thread-copy-context) )
-	 (thread (if base-thread
-		     (copy-thread base-thread copy-context)
-		     (make-instance 'parse-thread)
-		     )
-	   )
-	 )
-    (setf (id-of thread) (thread-count parser))
-    (incf (thread-count parser))
-    (setf (stack-of thread)
-	  (cons (make-instance 'parse-frame
-			       ;; we have to copy the node "into" the context
-			       ;; of the thread, because the new thread refers
-			       ;; to copies of objects in the base thread--
-			       ;; the node needs to refer to the same copies
-			       :node (copy-node node copy-context)
-			       :position (parse-position (code parser))
-			       )
-		(stack-of thread)
-		)
-	  )
-    (setf (threads-of parser)
-	  (append (threads-of parser)
-		  (list thread)
-		  )
-	  )
-    ;; reset readiness for characters; new thread needs a chance to continue
-    (setf (ready-for-token parser) nil) 
-    thread
-    )
-  )
+(defun compoundtermp (term)
+  (and (listp term) (member (car term) '(+ ? * ^))))
 
-(defmethod thread-completed-p ( (thread parse-thread) )
-  (typep (status-of thread) 'thread-completion-status)
-  )
+(defun make-new-named-rule (rule-name rhss) 	       
+  (let ((rule (cons rule-name rhss)))
+    (setf *new-rules*
+	  (append *new-rules* (list rule)))
+    rule-name))
 
-(defmethod next-token ( (parser parser) )
-  (let (
-	(c (next-parse-token (code parser)) )
-	)
-    (setf (position-of parser) (parse-position (code parser)) )
-    c
-    )
-  )
+(defun for-each-rule (*grammar* rule-func)
+  (loop for rule in *grammar*
+     collect (or (funcall rule-func rule) rule)))
 
-(defmethod end-p ( (parser parser) )
-  (end-parse-tokens-p (code parser) )
-  )
+(defun for-each-rhs (*grammar* rhs-func)
+  (for-each-rule *grammar*
+		 #'(lambda (rule)
+		     (destructuring-bind (rule-name &rest rhss) rule
+		       (cons rule-name (loop for rhs in rhss
+					  collect (or (funcall rhs-func rhs) rhs)))))))
 
-(defmethod call-node (parser thread node)
-  ;; clear before calling, because the value is no longer valid
-  (setf (value-of thread) nil) 
-  (push (make-instance 'parse-frame
-		       :node node
-		       :position (parse-position (code parser) )
-		       )
-	(stack-of thread)
-	)
-  )
+(defun transform-term (term)
+  (cond ((null term) term)
+	((not (listp term)) term)
+	(t ;; some kind of list
+	 ;; checking compound terms first
+	 (let ((term-type (car term)))
+	   (cond ((eq term-type '+)
+		  (let* ((repeat-body (transform-term (cdr term)))
+			 (repeat-rule-name (gensym "REPEAT-")))
+		    (make-new-named-rule repeat-rule-name `( (,repeat-rule-name ,@repeat-body)
+							     (,@repeat-body)) )))
+		 ((eq term-type '*)
+		  ;; cheating here--reusing the ? and + transformations
+		  (transform-term `(? (+ ,@(transform-term (cdr term))))) )
+		 ((eq term-type '^)
+		  (let* ((alternate-body (transform-term (cdr term))) 
+			 (alternate-rule-name (gensym "ALTERNATE-")))
+		    (make-new-named-rule alternate-rule-name 
+					 (loop for alternate in alternate-body
+					    collect (if (listp alternate) alternate (list alternate))))))
+		 ((eq term-type '?)
+		  (let* ((optional-body (transform-term (cdr term)))
+			 (optional-rule-name (gensym "OPTIONAL-")))
+		    (make-new-named-rule optional-rule-name `( ( ,@optional-body)
+							       ( :nil)) )))
+		 (t ;; ordinary list, term list, or group--take your pick of language
+		  (loop for item in term
+		     collect (transform-term item))))))))
 
-(defmethod return-from-node (parser thread value)
-  (setf (value-of thread) value)
-  (setf (last-token-read-successfully parser) 
-	(or (last-token-read-successfully parser)
-	    value
-	    )
-	)
-  (pop (stack-of thread) )
-  (when (null (stack-of thread) )
-    (return-value-to-parser parser thread value)
-    (setf (threads-of parser) (delete thread (threads-of parser)) )
-    (trace-parser "#~a: thread exited with result ~a (~a threads remaining)~%" 
-		  (id-of thread)
-		  value
-		  (length (threads-of parser))
-		  )
-    )
-  )
 
-(defmethod return-value-to-parser (parser thread value)
-  ;; It's possible we do not want this check for empty value,
-  ;; because that implies that grammars that allow empty input
-  ;; should still succeed--but that seems like an uninteresting
-  ;; set to support
-  (if (and (end-p parser) value (not (empty-node-p value) ) )
-      ;; succeeded
-      ;; hmmm....note that an empty-node is a failure
-      (progn
-	(setf (status-of thread) :succeeded)
-	;; several threads may result in the same solution; let's weed those out here
-	(unless (member value (results-of parser) :test #'node-equal)
-	  (setf (results-of parser)
-		(append (results-of parser)
-			(list value)
-			)
-		)
-	  )
-	)
-      ;; failed
-      (setf (status-of thread) :failed)
-      )
-  )
+(defun transform-rhs (rhs)
+  (let ((new-rhs (cond ((null rhs) rhs)
+		       ((compoundtermp rhs)
+			(transform-term rhs))
+		       (t (loop for term in rhs
+			     ;; this is important--we don't treat
+			     ;; an rhs as a term, because then we couldn't recognize
+			     ;; groups as a simple termlist--couldn't know difference
+			     collect (transform-term term))))))
+    (if (listp new-rhs) new-rhs (list new-rhs)))) ;; make sure it's always a list
 
-(defmethod abort-thread ( (parser parser) (thread parse-thread) )
-  (setf (status-of thread) :failed)
-  (setf (threads-of parser) (delete thread (threads-of parser)) )
-  )
+(defun transform-extended-grammar-to-fundamental (grammar) 
+  (let ((*grammar* grammar)
+	(*new-rules* nil))
+    (append (for-each-rhs *grammar*
+			  #'(lambda (rhs)		      
+			      (transform-rhs (if (listp rhs) rhs (list rhs)))))
+	    *new-rules*)))
 
-;; assimilation
+;; ---------------------------------------------------------------------------------------------------------------------
+;; LALR(1) parser construction
 
-(defmethod assimilate-result ( thread (node parse-node) result)
-  (declare (ignorable thread))
-  (setf-result thread node
-	       (append (getf-result thread node)
-		       (list result)
-		       )
-	       )
-  node
-  )
+(defun rule-productions (rule)
+  (destructuring-bind (rule-name &rest rhss) rule
+    (loop for rhs in rhss
+       collect (list rule-name rhs))))
 
-(defmethod assimilate-result ( thread (node parse-node) (result empty-node) )
-  ;; we do nothing
-  node
-  )
+(defun productions-in-grammar (grammar)
+  "Given a grammar, return as rule-name -> rhs pairs (actually, just short lists);
+  because of the way its constructed rule names and rhss are reused, but space for 
+  the each production itself is extra"
+  (loop for rule in  (transform-extended-grammar-to-fundamental grammar)
+       append (rule-productions rule)))
 
-(defmethod assimilate-result ( thread (node parse-node) (result list) )
-  (with-slots (results) node
-    (loop for item in result
-	 do (assimilate-result thread node item)
-	 )
-    )
-  node
-  )
+(defun terminals-in-grammar (grammar)
+  "Given a grammar, find all of the terminals (that is, symbols with no rule) in the grammar"
+  (let ((rules (make-hash-table)))
+    (loop for rule in grammar
+       do (destructuring-bind (rule-name &rest rhss) rule 
+	    (declare (ignore rhss))
+	    (setf (gethash rule-name rules) rule )))))
 
-(defmethod assimilate-result ( thread (node parse-node) (result sequence-node) )
-  (with-slots (results) node
-    (assimilate-result thread node (elements-of result))
-    )
-  node
-  )
+(defun symbols-in-grammar (grammar)
+  (let ((symbols ()))
+    (loop for production in (productions-in-grammar grammar)
+	 do (destructuring-bind (rule-name rhs) production
+	      (pushnew rule-name symbols)
+	      (loop for term in rhs
+		 do (unless (equal term :nil) 
+		      (pushnew term symbols)))))
+    symbols))
 
-(defmethod assimilate-result ( thread (node parse-node) (result character) )
-  (assimilate-result thread node (lit (string result)) )
-  )
+(defun initial-item-for-production (production)
+  (list 0 production))
 
-(defmethod assimilate-result ( thread (node parse-node) (result literal-node) )
-  (symbol-macrolet (
-  		    (results (getf-result thread node) )
-  		    )
-    (let (
-  	  (last-node (car (last results)) )
-  	  )
-      (if (typep last-node 'literal-node)
-  	  ;; append literals together
-  	  (setf (car (last results) )
-  		(lit (concatenate 'string (literal last-node) (literal result)) )
-  		)
-  	  ;; otherwise just add this new result as-is
-  	  (call-next-method)
-  	  )
-      )
-    )
-  node
-  )
+(defun initial-items-for-grammar (grammar start-rule-name)
+  "Start rule should have  single production of the form: rule-name (:bof some-other-rule-name :eof);
+  an item's position is an index into the production's rhs of the *next* unread term.  Thus,
+  the starting position is 1, implying the beginning of the file (:bof) has been read,
+  and some-other-rule-name is the next team waiting to be read"
+  (loop for rule in grammar
+     if (eql (car rule) start-rule-name)
+     return (loop for production in (rule-productions rule)
+	       collect (list 1 production))))
 
-(defmethod consolidated-results (thread node)
-  (let (
-	(results (getf-result thread node) )
-  	)
-    (when results
-      (if (= 1 (length results ) )
-	  (first results )
-	  (make-instance 'sequence-node :elements results )
-	  )
-      )
-    )
-  )
+(defun rule-productions-in-grammar (grammar rule-name)
+  (loop for rule in grammar
+     if (eql (car rule) rule-name)
+     return (rule-productions rule)))
 
-(defmethod empty-node-p ( node )
-  nil
-  )
+(defun item-finished-p (item)
+  (destructuring-bind (position (rule-name rhs)) item
+    (>= position (length rhs))))
 
-(defmethod empty-node-p ( (node empty-node) )
-  t
-  )
+(defun next-term-for-item (item)
+  (unless (item-finished-p item) 
+    (destructuring-bind (position (rule-name rhs)) item
+      (elt rhs position))))
 
-(defmethod wants-token-p ( (node parse-node) )
-  nil
-  )
+(defun increment-item-position (item)
+  (destructuring-bind (position production) item
+    (list (1+ position) production)))
 
-(defmethod wants-token-p ( (node literal-node) )
-  (slot-value node 'called)
-  )
+(defun item< (left right)
+  (labels ((symbol< (left right)
+	     (string< (symbol-name left) (symbol-name right)))
+	   (production< (left right)
+	     (destructuring-bind (left-rule-name left-rhs) left
+	       (destructuring-bind (right-rule-name right-rhs) right
+		 (cond ((symbol< left-rule-name right-rule-name) t)
+		       ((equal left-rule-name right-rule-name)
+			(and (<= (length left-rhs) (length right-rhs))
+			     (loop for left-term in left-rhs
+				for right-term in right-rhs
+				with result = t
+				do (setf result (and result (symbol< left-term right-term)))
+				finally (return result))))
+		       (t nil))))))
+    (destructuring-bind (left-position left-production) left
+      (destructuring-bind (right-position right-production) right
+	(cond ((< left-position right-position) t)
+	      ((> left-position right-position) nil)
+	      (t (production< left-production right-production)))))))
 
-(defmethod wants-token-p ( (node character-class-node) )
-  (slot-value node 'called)
-  )
+(defun first-of-symbol (grammar symbol)
+  "Given a symbol of a grammar, return it's FIRST(); if the symbol is a terminal,
+  then the FIRST is just a list containing the symbol itself.  If it's a 
+  non-terminal, then it's the union of the FIRSTs for the first symbol on
+  the rhs of each of the non-terminals productions"
+  ;; TODO consider whether we need to include non-terminals in our list of firsts,
+  ;; as that may help later when we consider supporting incremental parsing
+  (let ((productions (rule-productions-in-grammar grammar symbol)))
+    (if productions
+	;; non-terminal
+	(let ((firsts ())) 
+	  (loop for production in productions
+	     do (destructuring-bind (rule-name rhs) production
+		  (declare (ignore rule-name))
+		  (loop for first in (first-of-symbol grammar (car rhs))
+		     do (pushnew first firsts))))
+	  firsts)
+	;; terminal -- no productions
+	(list symbol))))
 
-;; capture semantics
+(defun closure-of-items (grammar items)
+  (let ((closure items))
+    (loop for done = t
+       do (loop for item in closure
+	     for productions = (rule-productions-in-grammar grammar (next-term-for-item item))
+	     when productions
+	     do (loop for production in productions
+		   do (let ((initial-item (initial-item-for-production production)))
+			(unless (member initial-item closure :test #'equal)
+			  (setf done nil)
+			  (push initial-item closure)))))
+       until done
+       ;; note: we sort here, to ensure that we can use #'equal as a canonical
+       ;; test of uniqueness (and set membership, too)
+       finally (return (sort closure #'item<)))))
 
-(defmethod capture-value ( (node capture-node) value )
-  (handler-case  (with-slots (slot target transform) node
-		   (setf (slot-value target slot)
-			 (funcall transform value)
-			 )
-		   )
-    (unbound-slot () 
-      ;; just ignore it
-      ;; this is a concession to knowing that most captures 
-      ;; will likely use @ to access slots of another node...a bit of a hack
-      (trace-parser "P: Slot ~a of ~a not bound"
-		    (slot-value node 'slot)
-		    node
-		    )
-      )
-    )
-  )
+(defun goto-for-symbol (grammar items symbol)
+  (closure-of-items grammar 
+		    (loop for item in items
+		       if (equal symbol (next-term-for-item item))
+		       unless (item-finished-p item)
+		       collect (increment-item-position item))))
 
-(defmethod capture-value ( (node capture-accumulate-node) value )
-  (with-slots (slot target transform) node
-    (setf (slot-value target slot)
-	  (append (when (slot-boundp target slot)
-			(slot-value target slot)
-			)
-		  (list (funcall transform value) )
-		  )
-	  )
-    )
-  )
+(defun lr0-items-for-grammar (grammar start-rule-name)
+  (let ((states (list (closure-of-items grammar (initial-items-for-grammar grammar start-rule-name))))
+	(symbols (symbols-in-grammar grammar)))
+    (loop for done = t
+       do (loop for state in states
+	     ;; do (format *standard-output* "State: ~a~%" state)
+	     do (loop for symbol in symbols
+		   do (let ((goto (goto-for-symbol grammar state symbol)))
+			(when (and goto (not (member goto states :test #'equal)))
+			  (setf done nil)
+			  (push goto states)))))
+       until done
+       finally (return (reverse states)))))
 
-;; Implementation for stream-based token streams
+;; ---------------------------------------------------------------------------------------------------------------------
+;; Sample grammars
 
-(defmethod parse-position ( (tokens stream) )
-  (file-position tokens)
-  )
+(let ((grammar-419 (transform-extended-grammar-to-fundamental `((S (^ (:i E :t S SP) :a))
+								(SP (:e (? S)))
+								(E (:b))))))
+  (defun grammar-419 ()
+    grammar-419))
 
-(defmethod (setf parse-position ) ( position (tokens stream) )
-  (file-position tokens position)
-  )
+(let ((grammar-420 (transform-extended-grammar-to-fundamental `(
+								(start (:bof s :dof))
+								(s (l eq r)
+								   (r))
+								(l (star r)
+								   (id))
+								(r (l))))))
+  (defun grammar-420 ()
+    grammar-420))
 
-(defmethod next-parse-token ( (tokens stream) )
-  (read-char tokens nil nil)
-  )
+(let ((numbers-grammar (transform-extended-grammar-to-fundamental `(
+								    ;; literals are digits, decimal, plus, minus
+								    (integer (digit)
+									     (integer digit))
+								    (numeric-value ( (? (^ plus minus)) (integer (? decimal integer))))
+								    (number (:bof numeric-value :eof))
+								    ))))
+  (defun numbers-grammar ()
+    numbers-grammar))
 
-(defmethod end-parse-tokens-p ( (tokens stream) )
-  (equal :eof (peek-char nil tokens nil :eof) )
-  )
+(let ((html-grammar 
+       (transform-extended-grammar-to-fundamental `(( tag-name  (identifier) )
 
-(defmethod positions-equal ( (left t) (right t) )
-  (equal left right)
-  )
+						    ( attribute-name  (identifier) )
 
+						    ( quantity-value  (^ number-value
+									 ( number-value pct-symbol)
+									 ( number-value pct)
+									 ( number-value px) ) )
+
+						    ( attribute-value  (^ quantity-value
+									  string-value))
+
+						    ( attribute  (attribute-name 
+								  es 
+								  equal-sign
+								  es 
+								  attribute-value))
+
+						    ( attribute-list  ( attribute (* ws attribute)))
+
+						    ( start-tag  ( lt 
+								   es 
+								   tag-name
+								   (? ws attribute-list)
+								   es 
+								   gt))
+
+						    ( end-tag  ( lt es forward-slash es tag-name  gt))
+
+						    ( single-tag  ( lt 
+								    es 
+								    tag-name
+								    (? ws attribute-list) 
+								    es 
+								    fs 
+								    es 
+								    gt))
+
+						    ( tag  (^ single-tag
+							      (start-tag 
+							       es 
+							       (* (^ tag html-text))
+							       es 
+							       end-tag)))
+						    (document (:bof tag :eof))))))
+  (defun html-grammar ()
+    html-grammar))
+