1. Moritz Heidkamp
  2. comparse


comparse / comparse.scm

;; Based on http://common-lisp.net/~dcrampsie/smug.html
;; Inspired by https://github.com/joshua-choi/fnparse/

(module comparse

 fail result item bind
 satisfies in is char-seq maybe
 sequence sequence*
 repeated zero-or-more one-or-more
 any-of all-of none-of none-of*
 preceded-by followed-by enclosed-by
 as-string recursive-parser
 memoize memo-table)

(import chicken scheme latch)

(use data-structures lazy-seq srfi-1 srfi-14 srfi-69 extras trie)

(define ((result value) input)
  (cons value input))

(define fail
  (constantly #f))

(define (item input)
  (and (not (lazy-null? input))
       (cons (lazy-head input)
             (lazy-tail input))))

(define ((bind parser proc) input)
  (and-let* ((value (parser input)))
    ((proc (car value)) (cdr value))))

(define (satisfies condition . args)
  (bind item (lambda (x)
               (if (apply condition x args)
                   (result x)

(define (args-list parser more-parsers)
  (if (and (list? parser) (null? more-parsers))
      (cons parser more-parsers)))

(define (in collection . items)
  (if (and (null? items) (char-set? collection))
       (lambda (c)
         (and (char? c)
              (char-set-contains? collection c))))
      (satisfies memq (args-list collection items))))

(define (is x)
  (satisfies eq? x))

(define-syntax sequence*
  (syntax-rules ()
    ((_ () body ...)
     (begin body ...))
    ((_ ((binding parser) more-bindings ...) body ...)
     (bind parser
           (lambda (binding)
             (sequence* (more-bindings ...) body ...))))))

(define (sequence parser . parsers)
  (let ((parsers (args-list parser parsers)))
    (lambda (input)
      (let loop ((parsers parsers)
                 (parts '())
                 (input input))
        (if (null? parsers)
            (cons (reverse parts) input)
            (and-let* ((value ((car parsers) input)))
              (loop (cdr parsers)
                    (cons (car value) parts)
                    (cdr value))))))))

(define ((char-seq str) input)
  (let ((len (string-length str)))
    (let loop ((pos 0) (input input))
      (if (= len pos)
          (cons str input)
          (and (< pos len)
               (not (lazy-null? input))
               (eq? (lazy-head input) (string-ref str pos))
               (loop (+ pos 1) (lazy-tail input)))))))

(define ((any-of parser . parsers) input)
  (let loop ((parsers (args-list parser parsers)))
    (and (not (null? parsers))
         (or ((car parsers) input)
             (loop (cdr parsers))))))

(define ((all-of parser . parsers) input)
  (let loop ((parsers (args-list parser parsers)))
    (and-let* ((value ((car parsers) input)))
      (if (null? (cdr parsers))
          (and value (loop (cdr parsers)))))))

(define ((none-of parser . parsers) input)
  (let loop ((parsers (args-list parser parsers)))
    (if (null? parsers)
        (cons #t input)
        (and (not ((car parsers) input))
             (loop (cdr parsers))))))

(define (preceded-by parser . parsers)
  (let loop ((parsers (args-list parser parsers)))
    (bind (car parsers)
          (lambda (value)
            (if (null? (cdr parsers))
                (result value)
                (loop (cdr parsers)))))))

(define (none-of* parser but . parsers)
  (receive (but parsers) (car+cdr (reverse (cons* parser but parsers)))
    (preceded-by (none-of parsers) but)))

(define ((followed-by parser following . more-following) input)
  (and-let* ((value (parser input)))
    (let loop ((following (args-list following more-following))
               (input (cdr value)))
      (if (null? following)
          (and-let* ((value ((car following) input)))
            (loop (cdr following) (cdr value)))))))

(define (enclosed-by open content close)
  (sequence* ((_ open) (value content) (_ close))
    (result value)))

(define (->parser object)
  (cond ((procedure? object) object)
        ((char-set? object) (satisfies object))
        ((char? object) (is object))
        ((string? object) (char-seq object))
        (else (error "Don't know how to turn object into parser" object))))

(define (zero-or-more parser)
  (any-of (sequence* ((x parser)
                      (xs (zero-or-more parser)))
            (result (cons x xs)))
          (result '())))

(define (one-or-more parser)
  (sequence* ((x parser)
              (y (zero-or-more parser)))
    (result (cons x y))))

(define (repeated-until parser end)
  (any-of (all-of end (result '()))
          (sequence* ((x parser)
                      (y (repeated-until parser end)))
            (result (cons x y)))))

(define (repeated* parser min max)
  (let ((min (or min 0)))
    (any-of (sequence* ((x parser)
                        (y (repeated* parser
                                      (- min 1)
                                      (and max (- max 1)))))
              (result (cons x y)))
            (if (and (<= min 0) (or (not max) (>= max 0)))
                (result '())

(define (repeated parser #!rest args #!key min max until)
  (cond (until
         (cond (max
                (followed-by (repeated* parser min max) until))
               ((or (not min) (zero? min))
                (repeated-until parser until))
                (sequence* ((x (repeated* parser min min))
                            (y (repeated-until parser until)))
                  (result (append x y))))))
        ((or min max (null? args))
         (repeated* parser min max))
         (repeated* parser (car args) (car args)))))

(define (maybe parser)
  (any-of parser (result #f)))

(define (xconc x y)
  (conc y x))

(define (as-string parser)
  (sequence* ((parts parser))
    (result (fold xconc "" (remove boolean? (flatten parts))))))

(define memo-table
  (make-parameter #f))

(define (lazy-seq-prefix from to)
  (let loop ((from from))
    (if (or (eq? from to) (lazy-null? from))
        (cons (lazy-head from)
              (loop (lazy-tail from))))))

(define (parser-memo-ref memo input)
  (let loop ((memo memo)
             (input input)
             (length 1))
    (and (not (lazy-null? input))
         (and-let* ((memo  (trie-ref* memo (lazy-head input)))
                    (value (trie-value memo)))
           (if (null? value)
               (loop memo (lazy-tail input) (+ 1 length))
               (cons (car value) length))))))

(define (memo-ref parser input)
  (and-let* ((parser-memo (hash-table-ref/default (memo-table) parser #f))
             (result (parser-memo-ref parser-memo input)))
    (cons (car result)
          (let loop ((n (cdr result)) (input input))
            (if (zero? n)
                (loop (- n 1) (lazy-tail input)))))))

(define (memo-set! parser input)
  (and-let* ((result (parser input)))
    (hash-table-update! (memo-table)
                        (lambda (memo)
                          (trie-insert! memo
                                        (lazy-seq-prefix input (cdr result))
                                        (car result))

(define ((memoize parser) input)
  (if (memo-table)
      (or (memo-ref parser input)
          (memo-set! parser input))
      (parser input)))

(define-syntax recursive-parser
  (syntax-rules ()
    ((_ body ...)
     (lambda ()
       (lambda (input)
          ((parser (begin body ...)))
          (parser input)))))))

(define (->lazy-seq x)
  (cond ((lazy-seq? x) x)
        ((string? x) (list->lazy-seq (string->list x)))
        ((list? x) (list->lazy-seq x))
        ((input-port? x) (input-port->lazy-seq x read-char))
        (else (error "Don't know how to turn object into lazy-seq" x))))

(define (parse parser input #!key memoize)
  (parameterize ((memo-table (if memoize (make-hash-table) (memo-table))))
    (let* ((result (parser (->lazy-seq input))))
      (if result
          (values (car result) (cdr result))
          (values result (->lazy-seq input))))))