comparse / comparse.scm

;; Based on
;; Inspired by

(module comparse

 fail result item bind
 satisfies in is char-seq maybe
 sequence sequence*
 repeated zero-or-more one-or-more
 any-of all-of none-of none-of*
 preceded-by followed-by enclosed-by
 as-string recursive-parser
 memoize memo-table)

(import chicken scheme)

(use data-structures lazy-seq srfi-1 srfi-14 srfi-69 extras latch trie)

(define-type input
  (struct lazy-seq))

(define-type parse
  (or (pair * input) boolean))

(define-type parser
  (input -> parse))

(define-type variadic-parser-combinator
  ((or (list-of parser) parser) #!rest parser --> parser))

;; (: result
;;    (forall (value)
;;            (value --> (input -> (pair value input)))))
(: result (* -> parser))
(define ((result value) input)
  (cons value input))

(define fail
  (constantly #f))

(define (item input)
  (and (not (lazy-null? input))
       (cons (lazy-head input)
             (lazy-tail input))))

(: bind (parser (* -> parser) --> parser))
(define ((bind parser proc) input)
  (and-let* ((value (parser input)))
    ((proc (car value)) (cdr value))))

(: satisfies ((* #!rest * -> boolean) #!rest * --> parser))
(define (satisfies condition . args)
  (bind item (lambda (x)
               (if (apply condition x args)
                   (result x)

(: arg-list (* (list-of *) --> (list-of *)))
(define (arg-list arg more-args)
  (if (and (list? arg) (null? more-args))
      (cons arg more-args)))

(: parser-list (parser (list-of parser) --> (list-of parser)))
(define parser-list arg-list)

(: in (* #!rest * --> parser))
(define (in collection . items)
  (if (and (null? items) (char-set? collection))
       (lambda (c)
         (and (char? c)
              (char-set-contains? collection c))))
      (satisfies memq (arg-list collection items))))

(define (is x)
  (satisfies eq? x))

(define-syntax sequence*
  (syntax-rules ()
    ((_ () body ...)
     (begin body ...))
    ((_ ((binding parser) more-bindings ...) body ...)
     (bind parser
           (lambda (binding)
             (sequence* (more-bindings ...) body ...))))))

(: sequence variadic-parser-combinator)
(define (sequence parser . parsers)
  (let ((parsers (parser-list parser parsers)))
    (lambda (input)
      (let loop ((parsers parsers)
                 (parts '())
                 (input input))
        (if (null? parsers)
            (cons (reverse parts) input)
            (and-let* ((value ((car parsers) input)))
              (loop (cdr parsers)
                    (cons (car value) parts)
                    (cdr value))))))))

(: char-seq (string -> parser))
(define ((char-seq str) input)
  (let ((len (string-length str)))
    (let loop ((pos 0) (input input))
      (if (= len pos)
          (cons str input)
          (and (< pos len)
               (not (lazy-null? input))
               (eq? (lazy-head input) (string-ref str pos))
               (loop (+ pos 1) (lazy-tail input)))))))

(: any-of variadic-parser-combinator)
(define ((any-of parser . parsers) input)
  (let loop ((parsers (parser-list parser parsers)))
    (and (not (null? parsers))
         (or ((car parsers) input)
             (loop (cdr parsers))))))

(: all-of variadic-parser-combinator)
(define ((all-of parser . parsers) input)
  (let loop ((parsers (parser-list parser parsers)))
    (and-let* ((value ((car parsers) input)))
      (if (null? (cdr parsers))
          (and value (loop (cdr parsers)))))))

(: none-of variadic-parser-combinator)
(define ((none-of parser . parsers) input)
  (let loop ((parsers (parser-list parser parsers)))
    (if (null? parsers)
        (cons #t input)
        (and (not ((car parsers) input))
             (loop (cdr parsers))))))

(: preceded-by variadic-parser-combinator)
(define (preceded-by parser . parsers)
  (let loop ((parsers (parser-list parser parsers)))
    (bind (car parsers)
          (lambda (value)
            (if (null? (cdr parsers))
                (result value)
                (loop (cdr parsers)))))))

(: none-of* (parser parser #!rest parser -> parser))
(define (none-of* parser but . parsers)
  (receive (but parsers) (car+cdr (reverse (cons* parser but parsers)))
    (preceded-by (none-of parsers) but)))

(: followed-by variadic-parser-combinator)
(define ((followed-by parser following . more-following) input)
  (and-let* ((value (parser input)))
    (let loop ((following (parser-list following more-following))
               (input (cdr value)))
      (if (null? following)
          (and-let* ((value ((car following) input)))
            (loop (cdr following) (cdr value)))))))

(: enclosed-by (parser parser parser --> parser))
(define (enclosed-by open content close)
  (sequence* ((_ open) (value content) (_ close))
    (result value)))

(define (->parser object)
  (cond ((procedure? object) object)
        ((char-set? object) (in object))
        ((char? object) (is object))
        ((string? object) (char-seq object))
        (else (error "Don't know how to turn object into parser" object))))

(: zero-or-more (parser --> parser))
(define (zero-or-more parser)
  (any-of (sequence* ((x parser)
                      (xs (zero-or-more parser)))
            (result (cons x xs)))
          (result '())))

(: one-or-more (parser --> parser))
(define (one-or-more parser)
  (sequence* ((x parser)
              (y (zero-or-more parser)))
    (result (cons x y))))

(: repeated-until (parser parser --> parser))
(define (repeated-until parser end)
  (any-of (all-of end (result '()))
          (sequence* ((x parser)
                      (y (repeated-until parser end)))
            (result (cons x y)))))

(: repeated* (parser fixnum (or boolean fixnum) --> parser))
(define (repeated* parser min max)
  (any-of (sequence* ((x parser)
                      (y (repeated* parser
                                    (- min 1)
                                    (and max (- max 1)))))
            (result (cons x y)))
          (if (and (<= min 0) (or (not max) (>= max 0)))
              (result '())

(: repeated (parser #!rest * --> parser))
(define (repeated parser #!key (min 0) max until)
  (if until
      (cond (max
             (followed-by (repeated* parser min max) until))
            ((zero? min)
             (repeated-until parser until))
             (sequence* ((x (repeated* parser min min))
                         (y (repeated-until parser until)))
               (result (append x y)))))
      (repeated* parser min max)))

(: maybe (parser --> parser))
(define (maybe parser)
  (any-of parser (result #f)))

(: as-string (parser --> parser))
(define (as-string parser)
  (sequence* ((parts parser))
    (result (apply conc (remove boolean? (flatten parts))))))

(define memo-table
  (make-parameter #f))

(: lazy-seq-prefix (input input -> (or null list)))
(define (lazy-seq-prefix from to)
  (let loop ((from from))
    (if (or (eq? from to) (lazy-null? from))
        (cons (lazy-head from)
              (loop (lazy-tail from))))))

(: parser-memo-ref ((struct trie) input -> (or boolean (pair * fixnum))))
(define (parser-memo-ref memo input)
  (let loop ((memo memo)
             (input input)
             (length 1))
    (and (not (lazy-null? input))
         (and-let* ((memo  (trie-ref* memo (lazy-head input)))
                    (value (trie-value memo)))
           (if (null? value)
               (loop memo (lazy-tail input) (+ 1 length))
               (cons (car value) length))))))

(: memo-ref (parser input -> parse))
(define (memo-ref parser input)
  (and-let* ((parser-memo (hash-table-ref/default (memo-table) parser #f))
             (result (parser-memo-ref parser-memo input)))
    (cons (car result)
          (let loop ((n (cdr result)) (input input))
            (if (zero? n)
                (loop (- n 1) (lazy-tail input)))))))

(: memo-set! (parser input -> parse))
(define (memo-set! parser input)
  (and-let* ((result (parser input)))
    (hash-table-update! (memo-table)
                        (lambda (memo)
                          (trie-insert! memo
                                        (lazy-seq-prefix input (cdr result))
                                        (car result))

(: memoize (parser -> parser))
(define ((memoize parser) input)
  (if (memo-table)
      (or (memo-ref parser input)
          (memo-set! parser input))
      (parser input)))

(define-syntax recursive-parser
  (syntax-rules ()
    ((_ body ...)
     (lambda ()
       (lambda (input)
          ((parser (begin body ...)))
          (parser input)))))))

(define (->lazy-seq x)
  (cond ((lazy-seq? x) x)
        ((string? x) (list->lazy-seq (string->list x)))
        ((list? x) (list->lazy-seq x))
        ((input-port? x) (input-port->lazy-seq x read-char))
        (else (error "Don't know how to turn object into lazy-seq" x))))

(: parse (parser input #!rest * -> * input))
(define (parse parser input #!key memoize)
  (parameterize ((memo-table (if memoize (make-hash-table) (memo-table))))
    (let* ((input (->lazy-seq input))
           (result (parser (->lazy-seq input))))
      (if result
          (values (car result) (cdr result))
          (values result input)))))