Anonymous committed 46a7d64

Cleaned up lexer error report function, and streamlined html lexer code more

  • Participants
  • Parent commits 594adda
  • Branches parsing

Comments (0)

Files changed (3)

    (state :initform :any :accessor state)))
 (define-condition lexer-error (error)
-  ((lexer :initarg :lexer :accessor lexer))
+  ((lexer :initarg :lexer :accessor lexer)
+   (unexpected-character :initform nil :initarg :character :accessor unexpected-character))
   (:report (lambda (condition stream)
 	     (with-slots (source position state) (lexer condition)
-	       (format stream "Lexer in state ~a encountered unexpected characters before ~a at position ~a"
+	       (format stream "Unexpected character ~@[~s~] in state ~s at position (line=~a,column=~a)"
+		       (unexpected-character condition)
-		       (current-parse-character source position)
-		       position)))))
+		       (line-at position)
+		       (column-at position))))))
 ;; ---------------------------------------------------------------------------------------------------------------------

File samples.lisp

 (defmethod next-token ((lexer html-lexer))
   (with-slots (source position state) lexer
-    (let ((c (next-parse-character source position))
+    (let ((c (next-lex-character source position))
 	  (token-value (make-array `(0) :element-type 'character :adjustable t :fill-pointer t)))
       (labels ((accumulate (c)
 		 (vector-push-extend c token-value))
-	       (accumulate-while (type test)
+	       (return-accumulated-token (type test)
 		 (accumulate c)
-		 (loop for nc = (current-parse-character source position)
+		 (loop for nc = (current-lex-character source position)
 		    while (and nc (funcall test nc))
 		    do (progn 
 			 (accumulate nc)
-			 (incf-parse-position source position))
-		    finally (return (list type token-value)))))
+			 (incf-lex-position source position))
+		    finally (return (list type token-value))))
+	       (return-token (type)
+		 (list type c)))
 	(when c
-	  ;; TODO factor in state into each test, and
-	  ;; arrange in an order that makes sense (esp. for text)
 	  (cond ((and (equal :tag state) (digit-char-p c)) 
-		 (list 'digit c))
+		 (return-token 'digit))
 		((and (equal :tag state) (alpha-char-p c))
-		 (accumulate-while 'identifier #'(lambda (nc)
+		 (return-accumulated-token 'identifier #'(lambda (nc)
 						   (and nc
 							(or (digit-char-p nc) 
 							    (alpha-char-p nc)
 							    (equal #\_ nc))))))
 		((and (equal :text state) (char= #\< c))
 		 (setf state :tag)
-		 (list 'lt c))
+		 (return-token 'lt))
 		((and (equal :tag state) (char= #\> c))
 		 (setf state :text)
-		 (list 'gt c))
+		 (return-token 'gt))
 		((and (equal :tag state) (char= #\+ c))
-		 (list 'plus c))
+		 (return-token 'plus))
 		((and (equal :tag state) (char= #\- c))
-		 (list 'minus c))
+		 (return-token 'minus))
 		((and (equal :tag state) (char= #\. c))
-		 (list 'decimal c))
+		 (return-token 'decimal))
 		((and (equal :tag state) (char= #\/ c))
-		 (list 'forward-slash c))
+		 (return-token 'forward-slash))
 		((and (equal :tag state) (char= #\% c))
-		 (list 'percent-symbol c))
+		 (return-token 'percent-symbol))
 		((and (equal :tag state) (char= #\= c))
-		 (list 'equal-sign c))
+		 (return-token 'equal-sign))
 		((and (equal :tag state) (whitespace-p c))
-		 (accumulate-while 'ws #'whitespace-p))
+		 (return-accumulated-token 'ws #'whitespace-p))
 		((equal :text state)
-		 (accumulate-while 'html-text #'(lambda (nc) (char/= #\< nc))))
-		(t (error 'lexer-error :lexer lexer))
+		 (return-accumulated-token 'html-text #'(lambda (nc) (char/= #\< nc))))
+		(t (error 'lexer-error :lexer lexer :character c))
 (defgeneric (setf parse-position) ( position code ) )
-(defgeneric current-parse-character ( source position )
+(defgeneric current-lex-character ( source position )
   (:documentation "Return the character at the current position in the source"))
-(defgeneric incf-parse-position (source position)
+(defgeneric incf-lex-position (source position)
   (:documentation "Increment the position to the next character in source, adjusting for line breaks"))
-(defgeneric next-parse-character ( source position)
+(defgeneric next-lex-character ( source position)
   (:documentation "Return the current character, and advance the position in the source"))
 (defgeneric next-column ( position )
   (setf (current-column-position code) (column-at position))
   (setf (current-line-position code) (line-at position)))
-(defmethod current-parse-character ( (source source-code-file) (position source-code-position))
+(defmethod current-lex-character ( (source source-code-file) (position source-code-position))
   (let ((line-pos (current-line-position position)))
     ;; TODO we're only checking line position right now,
     ;; because if using next-character, then should never
 	     (pos (current-column-position position)))
 	(elt text pos)))))
-(defmethod incf-parse-position ( (source source-code-file) (position source-code-position) )
+(defmethod incf-lex-position ( (source source-code-file) (position source-code-position) )
   (let ((line-pos (current-line-position position))
 	(column-pos (current-column-position position))
 	(line (elt (lines-of source) (current-line-position position))))
 	  (setf (current-line-position position) (1+ line-pos))
 	  (setf (current-column-position position) 0)))))
-(defmethod next-parse-character ( (source source-code-file) (position source-code-position))
-  (let ((c (current-parse-character source position)))
+(defmethod next-lex-character ( (source source-code-file) (position source-code-position))
+  (let ((c (current-lex-character source position)))
     (when c
       ;; only increment if wasn't already at end
-      (incf-parse-position source position))
+      (incf-lex-position source position))
 (defmethod flush-parsers ( (source source-code-file) &optional (line nil) )