Commits

Anonymous committed be6bf27

Added defgrammar and deflexer to streamline new definitions of both. Simplified exports and added to package file. Verifying basic HTML parsing still works.

Comments (0)

Files changed (7)

        until done
        finally (return  states))))
 
-(defun make-grammar (specification start-rule-name)
-  (let* ((states (lr1-states-for-grammar specification start-rule-name))
+(defun make-grammar (specification)
+  (let* ((states (lr1-states-for-grammar specification 'start))
 	 (non-terminals (non-terminals-in-grammar specification))
 	 (action-table (make-instance 'lr-parse-table :states (length states)))
 	 (goto-table (make-instance 'lr-parse-table :states (length states))))
       		 ;; goto table
       		 do (loop for non-terminal in non-terminals
       			 do (record-goto i non-terminal)))))
-    (make-instance 'lalr1-grammar :specification specification :states states :actions action-table :gotos goto-table)))
+    (make-instance 'lalr1-grammar :specification specification :states states :actions action-table :gotos goto-table)))
+
+;; ---------------------------------------------------------------------------------------------------------------------
+;;
+
+(defmacro defgrammar (name start-rule-name &rest rules)
+  `(let* ((specification (transform-extended-grammar-to-fundamental ',start-rule-name ',rules))
+	  (grammar (make-grammar specification)))
+     (defun ,name ()
+       grammar)))
   :serial t
   :components (
                (:file "package-hh-parse")
+	       (:file "lexer")
 	       (:file "grammar")
-	       (:file "lexer")
 	       (:file "parser")
 	       (:file "source")
 	       (:file "samples")
 ;;
 ;; ---------------------------------------------------------------------------------------------------------------------
 
-;; (defmacro deflexer (name char-var &rest body)
-;;   `(progn
-;;      (defclass ,name (lexer))
+(defmacro deflexer (name (&optional (initial-state nil)) &rest token-definitions)
+  "Define a lexer whose class is the provided name, the initial state of the lexer is initial,
+and token definitions are a set of rules defining the tokens recognized by the lexer.  Token definitions should be
+in the following form:
 
-;;      (defmethod next-token ((lexer ,name))
-;;        (with-slots (source position state)
-;; 	   (let ((,char-var (next-character source position state)))
-	     
-;; 	     ,@body)))))
+  ;; (state character-test token-type &key ((:state next-state) nil) ((:accumulate accumulation-test) nil))
+
+where forms WITH an accumulation test expand to:
+
+  ;; ((and (equal ',state state) (funcall ,character-test c))
+  ;;  (return-accumulated-token ',token-type ,accumulation-test))
+
+and forms WITHOUT an accumulation test expand to:
+
+ ;; ((and (equal ',state state) (funcall ,character-test c))
+ ;;  (return-token ',token-type))
+
+In both cases, if next-state is non-nil, then the expanded token rule will cause
+the lexer to change state after recognizing the token.
+
+One helpful feature (for some token types) is that if t is passed for character-test,
+then the test matches any character--no need to pass in a full lambda just to always return t.
+
+An additional helpful feature is that if character-test is a character constant
+and not a function, then the character-test will be a simple lambda:
+
+  ;; (lambda (c) (char= ,character-test c))
+
+For character tests, the single argument passed into the test is the character
+just read from the lexer's source.  For accumulation tests, the single argument
+passed to the test is a lookahead: that is, if the accumulation test returns true, then this
+character will be read from the lexer stream and consumed; if the accumulation
+test returns false, the character is not consumed and accumulation of a token
+completes. 
+
+
+"
+  (let ((token-rules 
+	 (loop for token in token-definitions
+	    collect (destructuring-bind (state character-test token-type &key ((:state next-state) nil) ((:accumulate accumulation-test) nil)) token
+		      (let ((actual-character-test (cond ((equal t character-test)
+							  t)
+							 ((functionp character-test) `(funcall ,character-test c))
+							 ((and (listp character-test) (equal 'lambda (car character-test)))
+							  `(funcall ,character-test c))
+							 ((and (listp character-test) (equal 'function (car character-test)))
+							  `(funcall ,character-test c))
+							 ((characterp character-test)
+							  `(char= c ,character-test))
+							 (t (error "Bad character test ~s in token definition: ~s~%" character-test token)))))
+			(if accumulation-test
+			    ;; with accumulation test
+			    `((and (equal ,state state) ,actual-character-test)
+			      ,(when next-state `(setf state ,next-state))
+			      (return-accumulated-token ',token-type ,accumulation-test))
+
+			    ;; without accumulation test
+			    `((and (equal ,state state) ,actual-character-test)
+			      ,(when next-state `(setf state ,next-state))
+			      (return-token ',token-type))))))))
+    `(progn
+       (defclass ,name (lexer)
+	 ((state :initform ,initial-state)))
+
+       (defmethod next-token ((lexer ,name))
+	 (with-slots (source position state) lexer
+	   (let ((c (next-lex-character source position))
+		 (token-value (make-array `(0) :element-type 'character :adjustable t :fill-pointer t)))
+	     (labels ((accumulate (c)
+			(vector-push-extend c token-value))
+		      (return-accumulated-token (type test)
+			(accumulate c)
+			(loop for nc = (current-lex-character source position)
+			   while (and nc (funcall test nc))
+			   do (progn 
+				(accumulate nc)
+				(incf-lex-position source position))
+			   finally (return (list type token-value))))
+		      (return-token (type)
+			(list type c)))
+	       (when c
+		 (cond ,@token-rules)))))))))
 
 (defmethod copy-lexer ((lexer lexer))
   (with-slots (source position state value) lexer

package-hh-parse.lisp

   (:export
    
    ;; Exported symbols go here
-   #:parse
 
-   #:seq
-   #:alt
-   #:rep
-   #:opt
-   #:lit
-   #:rule
-   #:ws
-   #:es
-   #:cc
-   #:capture
-   #:accumulate
-   #:@
+   ;; source
+   #:source-code-file
+   #:make-source
+  
+   #:source-text
+   #:insert-line
+   #:insert-text
 
-   #:parser
-   #:thread
-   #:frame
+   #:lines-of
+   #:current-position
+   #:current-line
+   #:current-line-position
+   #:current-column-position
 
-   #:node-equal
-   
-   )
-  )
+   ;; grammars
+   #:defgrammar
+
+   ;; lexers
+   #:deflexer
+   #:source
+   #:state
+   ;; #:position already experted by CL-USER
+   #:lexer-error
+   #:next-token
+   #:copy-lexer
+
+   ;; parsers
+   #:make-parser
+   #:parse-token
+   #:parse-input
+   #:get-parse-result
+
+   ;; samples
+   #:html-grammar
+   #:html-lexer
+
+   ))
 
 (defun parse-input (parser)
   (let ((lexer (lexer parser)))
-    (loop for token = (parse-token parser (next-token lexer))
-       while (equal :continue token)
-       finally (return (values token (stack parser)))
-       ;; finally (return (get-parse-result parser))
-	 )))
+    (loop for result = (parse-token parser (next-token lexer))
+       while (equal :continue result)
+       finally (return (values result (get-parse-result parser))))))
 ;; ---------------------------------------------------------------------------------------------------------------------
 ;; Sample grammars
 
-(let ((grammar-419 (transform-extended-grammar-to-fundamental 'S
-							      `((S (^ (:i E :t S SP) :a))
-								(SP (:e (? S)))
-								(E (:b))))))
-  (defun grammar-419 ()
-    grammar-419))
+(defgrammar grammar-419 S
+  (S (^ (:i E :t S SP) :a))
+  (SP (:e (? S)))
+  (E (:b)))
 
-(let ((grammar-420 (transform-extended-grammar-to-fundamental 'start
-							      `(
-								(start (s))
-								(s (l eq r)
-								   (r))
-								(l (star r)
-								   (id))
-								(r (l))))))
-  (defun grammar-420 ()
-    grammar-420))
+(defgrammar grammar-420 s
+  (s (l eq r)
+     (r))
+  (l (star r)
+     (id))
+  (r (l)))
 
-(let ((numbers-grammar (transform-extended-grammar-to-fundamental 'number 
-								  `(
-								    ;; literals are digits, decimal, plus, minus
-								    (integer (digit)
-									     (integer digit))
-								    (numeric-value ( (? (^ plus minus)) integer (? decimal integer)))
-								    (number (numeric-value))
-								    ))))
-  (defun numbers-grammar ()
-    numbers-grammar))
+(defgrammar numbers-grammar number  
+  ;; literals are digits, decimal, plus, minus
+  (integer (digit)
+	   (integer digit))
+  (numeric-value ( (? (^ plus minus)) integer (? decimal integer)))
+  (number (numeric-value)))
 
-(let ((html-grammar 
-       (transform-extended-grammar-to-fundamental 'document 
-						  `(( tag-name  (identifier) )
-						    
-						    ( attribute-name  (identifier) )						  
+(defgrammar html-grammar document
+  ( tag-name  (identifier) )
+  
+  ( attribute-name  (identifier) )
 
-						    (integer (digit)
-							     (integer digit))
-						    (numeric-value ( (? (^ plus minus)) integer (? decimal integer)))
-						    (number (numeric-value))
+  (integer (digit)
+	   (integer digit))
+  (numeric-value ( (? (^ plus minus)) integer (? decimal integer)))
+  (number (numeric-value))
 
-						    (es (? ws))
+  (es (? ws))
 
-						    ( quantity-value  (^ number
-									 ( number pct-symbol)
-									 ( number pct)
-									 ( number px) ) )
+  ( quantity-value  (^ number
+		       ( number pct-symbol)
+		       ( number pct)
+		       ( number px) ) )
 
-						    ( attribute-value  (^ quantity-value
-									  string-value))
+  ( attribute-value  (^ quantity-value
+			string-value))
 
-						    ( attribute  (attribute-name 
-								  es 
-								  equal-sign
-								  es 
-								  attribute-value))
+  ( attribute  (attribute-name 
+		es 
+		equal-sign
+		es 
+		attribute-value))
 
-						    ( attribute-list  ( attribute (* ws attribute)))
+  ( attribute-list  ( attribute (* ws attribute)))
 
-						    ( start-tag  ( lt 
-								   es 
-								   tag-name
-								   (? ws attribute-list)
-								   es 
-								   gt))
+  ( start-tag  ( lt 
+		 es 
+		 tag-name
+		 (? ws attribute-list)
+		 es 
+		 gt))
 
-						    ( end-tag  ( lt es forward-slash es tag-name  gt))
+  ( end-tag  ( lt es forward-slash es tag-name  gt))
 
-						    ( single-tag  ( lt 
-								    es 
-								    tag-name
-								    (? ws attribute-list) 
-								    es 
-								    forward-slash 
-								    es 
-								    gt))
+  ( single-tag  ( lt 
+		  es 
+		  tag-name
+		  (? ws attribute-list) 
+		  es 
+		  forward-slash 
+		  es 
+		  gt))
 
-						    ( tag  (^ single-tag
-							      (start-tag 
-							       es 
-							       (* (^ tag html-text))
-							       es 
-							       end-tag)))
-						    (document (tag))))))
-  (defun html-grammar ()
-    html-grammar))
+  ( tag  (^ single-tag
+	    (start-tag 
+	     es 
+	     (* (^ tag html-text))
+	     es 
+	     end-tag)))
+  (document (tag)))
 
-(defclass html-lexer (lexer) 
-  ((state :initform :text)))
+(deflexer html-lexer (:text)
+  (:tag #'digit-char-p digit)
+  (:tag #'alpha-char-p identifier :accumulate #'(lambda (nc)
+						  (and nc
+						       (or (digit-char-p nc) 
+							   (alpha-char-p nc)
+							   (equal #\_ nc)))))
+  (:text #\< lt :state :tag)
+  (:tag #\> gt :state :text)
+  (:tag #\+ plus)
+  (:tag #\- minus)
+  (:tag #\. decimal)
+  (:tag #\/ forward-slash)
+  (:tag #\% percent-symbol)
+  (:tag #\= equal-sign)
+  (:tag #'whitespace-p ws :accumulate #'whitespace-p)
+  (:text t html-text :accumulate #'(lambda (nc) (char/= #\< nc))))
 
-(defmethod next-token ((lexer html-lexer))
-  (with-slots (source position state) lexer
-    (let ((c (next-lex-character source position))
-	  (token-value (make-array `(0) :element-type 'character :adjustable t :fill-pointer t)))
-      (labels ((accumulate (c)
-		 (vector-push-extend c token-value))
-	       (return-accumulated-token (type test)
-		 (accumulate c)
-		 (loop for nc = (current-lex-character source position)
-		    while (and nc (funcall test nc))
-		    do (progn 
-			 (accumulate nc)
-			 (incf-lex-position source position))
-		    finally (return (list type token-value))))
-	       (return-token (type)
-		 (list type c)))
-	(when c
-	  (cond ((and (equal :tag state) (digit-char-p c)) 
-		 (return-token 'digit))
-		((and (equal :tag state) (alpha-char-p c))
-		 (return-accumulated-token 'identifier #'(lambda (nc)
-						   (and nc
-							(or (digit-char-p nc) 
-							    (alpha-char-p nc)
-							    (equal #\_ nc))))))
-		((and (equal :text state) (char= #\< c))
-		 (setf state :tag)
-		 (return-token 'lt))
-		((and (equal :tag state) (char= #\> c))
-		 (setf state :text)
-		 (return-token 'gt))
-		((and (equal :tag state) (char= #\+ c))
-		 (return-token 'plus))
-		((and (equal :tag state) (char= #\- c))
-		 (return-token 'minus))
-		((and (equal :tag state) (char= #\. c))
-		 (return-token 'decimal))
-		((and (equal :tag state) (char= #\/ c))
-		 (return-token 'forward-slash))
-		((and (equal :tag state) (char= #\% c))
-		 (return-token 'percent-symbol))
-		((and (equal :tag state) (char= #\= c))
-		 (return-token 'equal-sign))
-		((and (equal :tag state) (whitespace-p c))
-		 (return-accumulated-token 'ws #'whitespace-p))
-		((equal :text state)
-		 (return-accumulated-token 'html-text #'(lambda (nc) (char/= #\< nc))))
-		(t (error 'lexer-error :lexer lexer :character c))
-		))))))
 (in-package :hh-parse)
 
-(export
- (list
-
-  'source-code-line
-  'source-code-position
-  'source-code-file
-  
-  'source-text
-  'insert-line
-  'insert-text
-
-  'lines-of
-  'current-position
-  'current-line
-  'current-line-position
-  'current-column-position
-  )
- )
-
 ;; =====================================================================================================================
 ;;
 ;; Helpers
 ;; =====================================================================================================================
 
 (defclass source-code-line ()
-  (
-   (parser :initform nil :initarg :parser :accessor parser-of
+  ((parser :initform nil :initarg :parser :accessor parser-of
 	   :documentation "Parser positioned at end of the line")
    (text :initform "" :initarg :text :accessor text-of)))
 
     (loop for line across (lines-of source)
 	 do (write-string (text-of line) text))))
 
-(defmethod (setf source-text) ( text (source source-code-file) )
+(defmethod (setf source-text) ( (text string) (source source-code-file) )
   ;; Drop existing lines
   (setf (lines-of source) (make-lines) )
   (insert-text source text))
 
+(defmethod (setf source-text) ( (text stream) (source source-code-file) )
+  (loop for line = (read-line text nil)
+       while line
+       do (setf (source-text line) source)))
+
 (defmethod insert-line ( (source source-code-file) text)
   (unless (char= (elt text (- (length text) 1) ) #\Newline)
     (setf text (concatenate 'string text #(#\Newline))))
 (defmethod reparse ( (source source-code-file) top-node)
   t)
 
+(defun make-source (&optional text)
+  (let ((source (make-instance 'source-code-file)))
+    (when text
+      (setf (source-text source) text))
+    source))
+
 ;; =====================================================================================================================
 ;;
 ;; Printing