Commits

Anonymous committed 2e8cd88

Switch to comparse and complete unicode support

  • Participants

Comments (0)

Files changed (10)

+*.so
+*.import.*
+*.c
+*.o
+**/*.svn
+.svn
+;; ws <- `[ \t\n\r]*
+
+(define ws
+  (zero-or-more (in (string->char-set " \t\n\r"))))
+
+;; begin-array     <- #\[
+
+(define begin-array
+  (is #\[))
+
+;; begin-object    <- #\{
+
+(define begin-object
+  (is #\{))
+
+;; end-array       <- #\]
+
+(define end-array
+  (is #\]))
+
+;; end-object      <- #\}
+
+(define end-object
+  (is #\}))
+
+;; name-separator  <- #\:
+
+(define name-separator
+  (is #\:))
+
+;; value-separator <- #\,
+
+(define value-separator
+  (is #\,))
+
+;; false <- "false" 
+;;       -> handle-false
+
+(define false
+  (bind (char-seq "false") handle-false))
+
+;; true  <- "true"
+;;       -> handle-true
+
+(define true
+  (bind (char-seq "true") handle-true))
+
+;; null  <- "null"
+;;       -> handle-null
+
+(define null
+  (bind (char-seq "null") handle-null))
+
+
+;; escape  <- ["\\/bfnrt]
+;;         -> handle-escape
+
+(define escape
+  (bind (in char-set:json-escaped)
+        handle-escape))
+
+;; unicode <- #\u [[:xdigit:]]{4}
+;;         -> handle-unicode
+
+(define unicode
+  (bind (as-string
+         (preceded-by (is #\u)
+                      (repeated
+                       (in char-set:hex-digit)
+                       min: 4 max: 4)))
+        handle-unicode))
+
+;; char    <- #\\ ( escape / unicode ) / (![[:cntrl:]"\\] .)
+
+(define char
+  (any-of (preceded-by (is #\\) (any-of escape unicode))
+          (none-of* (satisfies json-char?) item)))
+
+;; raw-string <- #\" char* #\"
+;;            -> handle-string/raw
+
+(define raw-string
+  (memoize
+   (as-string
+    (enclosed-by (is #\")
+                 (zero-or-more char)
+                 (is #\")))))
+
+;; member <- ws raw-string ws name-separator value
+;;        -> handle-member
+
+(define member
+  (recursive-parser
+   (sequence* ((_ ws)
+               (name raw-string)
+               (_ (preceded-by ws name-separator))
+               (value value))
+     (handle-member name value))))
+
+
+;; object <- begin-object ws ( member ( value-separator member )* )? end-object
+;;        -> handle-object
+
+(define object
+  (bind (enclosed-by
+         (preceded-by begin-object ws)
+         (any-of
+          (sequence*
+              ((first-member (member))
+               (more-members (zero-or-more (preceded-by value-separator (member)))))
+            (result (cons first-member more-members)))
+          (result '()))
+         end-object)
+        handle-object))
+
+;; array  <- begin-array ws ( value ( value-separator value )* )? end-array
+;;        -> handle-array
+
+
+(define array
+  (recursive-parser
+   (bind (enclosed-by
+          (preceded-by begin-array ws)
+          (any-of
+           (sequence*
+               ((first-value value)
+                (more-values (zero-or-more (preceded-by value-separator value))))
+             (result (cons first-value more-values)))
+           (result '()))
+          end-array)
+         handle-array)))
+
+;; exp    <- [Ee] ([+-])? ([[:digit:]])+
+;;        -> as-string
+
+(define exp
+  (as-string
+   (sequence* ((e      (in #\E #\e))
+               (sig    (maybe (in #\+ #\-)))
+               (digits (one-or-more (in char-set:digit))))
+     (result (cons* e sig digits)))))
+
+;; frac   <- ,#\. ([[:digit:]])+
+;;        -> as-string
+
+(define frac
+  (as-string
+   (sequence* ((dec (is #\.))
+               (digits (one-or-more (in char-set:digit))))
+     (result (cons dec digits)))))
+
+;; int    <- ,"0" / ( [123456789] ([[:digit:]])* )
+;;        -> as-string
+
+(define int
+  (any-of (char-seq "0")
+          (as-string
+           (sequence*
+               ((n  (in (string->char-set "123456789")))
+                (ns (zero-or-more (in char-set:digit))))
+             (result (cons n ns))))))
+
+;; number <- ,"-"? int frac? exp?
+;;        -> handle-number
+
+(define number
+  (bind (as-string
+         (sequence (maybe (is #\-))
+                   int
+                   (maybe frac)
+                   (maybe exp)))
+        handle-number))
+
+;; string  <- raw-string
+;;         -> handle-string
+
+(define string
+  (bind raw-string handle-string))
+
+;; value  <- ws ( null / false / true / object / array / number / string ) ws
+
+(define value
+  (enclosed-by ws (any-of null false true object (array) number string) ws))
+
+;; document <- ws ( object / array ) ws
+
+(define document
+  (enclosed-by ws (any-of object (array)) ws))
+((synopsis "A JSON parser (and emitter) built with genturfa'i")
+ (author "Moritz Heidkamp")
+ (test-depends test)
+ (depends comparse utf8 vector-lib latch)
+ (category parsing)
+ (license "BSD"))
+(module medea
+
+(read-json
+ json-parsers
+ write-json
+ json-unparsers
+ json->string)
+
+(import chicken (except scheme string member))
+(use srfi-14 utf8 latch srfi-69 lazy-seq
+     (except comparse as-string)
+     (prefix utf8-srfi-14 utf8-)
+     (only srfi-1 cons* find remove)
+     (only data-structures compose constantly identity alist-ref)
+     (only utf8-srfi-13 string-concatenate string-pad)
+     (only vector-lib vector-for-each)
+     (only ports with-output-to-port with-output-to-string))
+
+(define json-parsers
+  (make-parameter `((string . ,identity)
+                    (number . ,string->number)
+                    (member . ,(lambda (name value)
+                                 (cons (string->symbol name) value)))
+                    (object . ,identity)
+                    (array  . ,list->vector))))
+
+(define (parser-ref name)
+  (alist-ref name (json-parsers)))
+
+(define (handle-number x)
+  (result ((parser-ref 'number) x)))
+
+(define (handle-array args)
+  (result ((parser-ref 'array) args)))
+
+(define (handle-member name value)
+  (result ((parser-ref 'member ) name value)))
+
+(define (handle-object args)
+  (result ((parser-ref 'object) args)))
+
+(define (handle-string string)
+  (result ((parser-ref 'string) string)))
+
+(define (handle-unicode u)
+  (result (integer->char (string->number u 16))))
+
+(define (handle-escape e)
+  (result
+   (case e
+     ((#\\) #\\)
+     ((#\/) #\/)
+     ((#\") #\")
+     ((#\b) #\backspace)
+     ((#\f) #\page)
+     ((#\n) #\newline)
+     ((#\r) #\return)
+     ((#\t) #\tab))))
+
+(define handle-true (constantly (result #t)))
+(define handle-false (constantly (result #f)))
+(define handle-null (constantly (result 'null)))
+
+(define (as-string parser)
+  (sequence* ((parts parser))
+    (result (apply conc (remove boolean? parts)))))
+
+(define char-set:json-escaped
+  (string->char-set "\"\\/bfnrt"))
+
+(define char-set:json-unescaped
+  (utf8-char-set-union
+   (utf8-ucs-range->char-set #x20 #x22)
+   (utf8-ucs-range->char-set #x23 #x5C)
+   (utf8-ucs-range->char-set #x5D #x110000)))
+
+(define char-set:json-char
+  (utf8-char-set-union
+   utf8-char-set:iso-control
+   (utf8-char-set #\" #\\)))
+
+(define (json-char? c)
+  (utf8-char-set-contains? char-set:json-char c))
+
+(include "grammar.scm")
+
+(define (read-json #!optional (input (current-input-port)) (memoize? #t))
+  (parse document
+         (cond ((input-port? input)
+                (input-port->lazy-seq input read-char))
+               ((string? input)
+                (string->list input))
+               (else input))
+         memoize: memoize?))
+
+(define (unparse-string s)
+  (let ((chars (string->list s)))
+    (display #\")
+    (for-each
+     (lambda (c)
+       (cond ((utf8-char-set-contains? char-set:json-unescaped c)
+              (display c))
+             ((char-set-contains? char-set:json-escaped c)
+              (display "\\")
+              (display c))
+             (else
+              (display "\\u")
+              (display (string-pad (number->string (char->integer c) 16) 4 #\0)))))
+     chars)
+    (display #\")))
+
+(define (for-each/delimiter proc list delimiter)
+  (let ((size (if (vector? list)
+                  (vector-length list)
+                  (length list)))
+        (count 0))
+    ((if (vector? list) vector-for-each for-each)
+     (lambda (d #!optional (ad d))
+       (proc ad)
+       (set! count (add1 count))
+       (unless (= count size)
+         (display #\,)))
+     list)))
+
+(define json-unparsers
+  (make-parameter
+   (list (cons list?
+               (lambda (object)
+                 (display #\{)
+                 (for-each/delimiter
+                  (lambda (member)
+                    (unparse-string (symbol->string (car member)))
+                    (display #\:)
+                    (write-json (cdr member)))
+                  object
+                  #\,)
+                 (display #\})))
+
+         (cons vector?
+               (lambda (array)
+                 (display #\[)
+                 (for-each/delimiter write-json array #\,)
+                 (display #\])))
+
+         (cons string? unparse-string)
+         (cons number? write)
+         (cons (cut eq? <> #t) (lambda (o) (display "true")))
+         (cons (cut eq? <> #f) (lambda (o) (display "false")))
+         (cons (cut eq? <> 'null) (lambda (o) (display "null")))
+         (cons (constantly #t)
+               (lambda datum
+                 (error 'write-json "don't know how to write datum as JSON" datum))))))
+
+(define (write-json object #!optional (port (current-output-port)))
+  (with-output-to-port port
+    (lambda ()
+      ((cdr (find (lambda (unparser)
+                    ((car unparser) object))
+                  (json-unparsers)))
+       object))))
+
+(define (json->string object)
+  (with-output-to-string (lambda () (write-json object))))
+
+)
+(compile -d0 -O3 -s medea.scm -J)
+(compile -d0 -O3 -s medea.import.scm)
+
+(install-extension
+ 'medea
+ '("medea.so" "medea.import.so")
+ '((version "0.0.4")))

File tests/parsers-unparsers.scm

+(import medea)
+(use test)
+
+(test-group "custom parsers"
+  (parameterize
+      ((json-parsers (append `((object . ,(lambda (object)
+                                            (cons 'object object)))
+                               (member . ,(lambda (name value)
+                                            (list (string->symbol name) value)))
+                               (array . ,(lambda (array)
+                                           (cons 'array array))))
+                             (json-parsers))))
+
+    (test-read "sxml" '(object (foo (array 1 2 3))
+                               (bar (object (baz "hey there"))))
+               "{ \"foo\": [1,2,3], \"bar\" : { \"baz\": \"hey there\" } }"))
+
+  (parameterize ((json-parsers (alist-cons 'string string-upcase (json-parsers))))
+    (test-read "string parser customization"
+               '#("HEY" ((there . "SAY WHAT")))
+               "[\"hey\", { \"there\": \"say WhaT\" }]")))
+
+
+(test-group "custom unparsers"
+  (parameterize
+      ((json-unparsers (alist-cons symbol? 
+                                   (lambda (o) 
+                                     (write-json (format ":~A" (symbol->string o))))
+                                   (json-unparsers))))
+    (test-write "symbols" "[\":foo\",\":bar\"]" '#(foo bar)))
+
+
+  (parameterize
+      ((json-unparsers (append (list (cons (lambda (o)
+                                             (and (pair? o) (eq? 'object (car o))))
+                                           (lambda (o)
+                                             (write-json (map (lambda (m) (apply cons m)) (cdr o)))))
+                                     (cons (lambda (o)
+                                             (and (pair? o) (eq? 'array (car o))))
+                                           (lambda (o)
+                                             (write-json (list->vector (cdr o))))))
+                               (json-unparsers))))
+    (test-write "sxml" "{\"foo\":[123,\"hey\"]}" '(object (foo (array 123 "hey"))))))

File tests/python-test-pass1.scm

+(define json-python-test-pass1
+#<<JSON
+[
+    "JSON Test Pattern pass1",
+    {"object with 1 member":["array with 1 element"]},
+    {},
+    [],
+    -42,
+    true,
+    false,
+    null,
+    {
+        "integer": 1234567890,
+        "real": -9876.543210,
+        "e": 0.123456789e-12,
+        "E": 1.234567890E+34,
+        "":  23456789012E66,
+        "zero": 0,
+        "one": 1,
+        "space": " ",
+        "quote": "\"",
+        "backslash": "\\",
+        "controls": "\b\f\n\r\t",
+        "slash": "/ & \/",
+        "alpha": "abcdefghijklmnopqrstuvwyz",
+        "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
+        "digit": "0123456789",
+        "special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?",
+        "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
+        "true": true,
+        "false": false,
+        "null": null,
+        "array":[  ],
+        "object":{  },
+        "address": "50 St. James Street",
+        "url": "http://www.JSON.org/",
+        "comment": "// /* <!-- --",
+        "# -- --> */": " ",
+        " s p a c e d " :[1,2 , 3
+
+,
+
+4 , 5        ,          6           ,7        ],
+        "compact": [1,2,3,4,5,6,7],
+        "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
+        "quotes": "&#34; \u0022 %22 0x22 034 &#x22;",
+        "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
+: "A key can be any string"
+    },
+    0.5 ,98.6
+,
+99.44
+,
+
+1066
+
+
+,"rosebud"]
+JSON
+)

File tests/reading.scm

+(import medea)
+(use test)
+
+(load-relative "python-test-pass1")
+
+(define-syntax test-read
+  (syntax-rules ()
+    ((_ result json)
+     (test-read json result json))
+    ((_ description result json)
+     (test description result (read-json json)))))
+
+(test-group "objects"
+  (test-read '() "{}")
+  (test-read '((foo . 123) (qux . "hey")) "{ \"foo\" : 123, \"qux\": \"hey\" } "))
+
+(test-group "arrays"
+  (test-read '#() "[]")
+  (test-read '#("foo") "[\"foo\"]")
+  (test-read '#("one" 2 3 "four" 5)
+             "[\"one\", 2,3, \"four\", 5]"))
+
+(test-group "numbers"
+  (test-read '#(0) "[0]")
+  (test-read '#(-10) "[-10]")
+  (test-read '#(222.5) "[222.5]")
+  (test-read '#(10e2) "[10e2]")
+  (test-read '#(2.3e2) "[2.3E+2]"))
+
+(test-group "strings"
+  (test-read '#("") "[\"\"]")
+  (test-read '#("E9") "[\"\\u00459\"]")
+  (test-read '#("Дҫ") "[\"\\u0414\\u04ab\"]")
+  (test-read '#("Дҫ") "[\"Дҫ\"]") ; FIXME genturfahi needs utf8 support for that to work
+  (test-read '#("\f") "[\"\\f\"]")
+  (test-read '#("\"") "[\"\\\"\"]")
+  (test-read #f "[\"\\x\"]"))
+
+(test-group "literals"
+  (test-read '#(#t) "[true]")
+  (test-read '#(#f) "[false]")
+  (test-read '#(null) "[null]"))
+
+(test-group "whitespace"
+  (test-read '() " {  \n  \t}")
+  (test-read '#() " \r  []"))
+
+(test-group "malformed"
+  (test-read #f "{..}"))
+
+(test-group "complex"
+
+(test-read "example 1 from RFC 4627"
+'((Image 
+   (Width . 800)
+   (Height . 600)
+   (Title . "View from 15th Floor")
+   (Thumbnail 
+    (Url . "http://www.example.com/image/481989943")
+    (Height . 125)
+    (Width . "100"))
+   (IDs . #(116 943 234 38793))))
+
+#<<JSON
+{
+    "Image": {
+        "Width":  800,
+        "Height": 600,
+        "Title":  "View from 15th Floor",
+        "Thumbnail": {
+            "Url":    "http://www.example.com/image/481989943",
+            "Height": 125,
+            "Width":  "100"
+        },
+        "IDs": [116, 943, 234, 38793]
+    }
+}
+JSON
+)
+
+(test-read "example 2 from RFC 4627"
+ '#(((precision . "zip")
+     (Latitude . 37.7668)
+     (Longitude . -122.3959)
+     (Address . "")
+     (City . "SAN FRANCISCO")
+     (State . "CA")
+     (Zip . "94107")
+     (Country . "US"))
+    ((precision . "zip")
+     (Latitude . 37.371991)
+     (Longitude . -122.02602)
+     (Address . "")
+     (City . "SUNNYVALE")
+     (State . "CA")
+     (Zip . "94085")
+     (Country . "US")))
+
+#<<JSON
+[
+   {
+      "precision": "zip",
+      "Latitude":  37.7668,
+      "Longitude": -122.3959,
+      "Address":   "",
+      "City":      "SAN FRANCISCO",
+      "State":     "CA",
+      "Zip":       "94107",
+      "Country":   "US"
+   },
+   {
+      "precision": "zip",
+      "Latitude":  37.371991,
+      "Longitude": -122.026020,
+      "Address":   "",
+      "City":      "SUNNYVALE",
+      "State":     "CA",
+      "Zip":       "94085",
+      "Country":   "US"
+   }
+]
+JSON
+)
+
+(test-read
+ "python test_pass1"
+ '#("JSON Test Pattern pass1"
+    ((|object with 1 member| . #("array with 1 element")))
+    ()
+    #()
+    -42
+    #t
+    #f
+    null
+    ((integer . 1234567890)
+     (real . -9876.543210)
+     (e . 0.123456789e-12)
+     (E . 1.234567890E+34)
+     (|| .  23456789012E66)
+     (zero . 0)
+     (one . 1)
+     (space . " ")
+     (quote . "\"")
+     (backslash . "\\")
+     (controls . "\b\f\n\r\t")
+     (slash . "/ & /")
+     (alpha . "abcdefghijklmnopqrstuvwyz")
+     (ALPHA . "ABCDEFGHIJKLMNOPQRSTUVWYZ")
+     (digit . "0123456789")
+     (special . "`1~!@#$%^&*()_+-={':[,]}|;.</>?")
+     (hex . "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A")
+     (true . #t)
+     (false . #f)
+     (null . null)
+     (array . #())
+     (object)
+     (address . "50 St. James Street")
+     (url . "http://www.JSON.org/")
+     (comment . "// /* <!-- --")
+     (|# -- --> */| . " ")
+     (| s p a c e d | . #(1 2 3 4 5 6 7))
+     (compact . #(1 2 3 4 5 6 7))
+     (jsontext . "{\"object with 1 member\":[\"array with 1 element\"]}")
+     (quotes . "&#34; \u0022 %22 0x22 034 &#x22;")
+     (|/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}\|;:',./<>?| . "A key can be any string"))
+    0.5
+    98.6
+    99.44
+    1066
+    "rosebud")
+
+ json-python-test-pass1)
+
+(test "reading from current-input-port by default"
+  '#("foo")
+  (with-input-from-string "[\"foo\"]" read-json))
+
+)

File tests/run.scm

+(load-relative "../medea")
+(import medea)
+(use test)
+
+(define-syntax test-read
+  (syntax-rules ()
+    ((_ result json)
+     (test-read json result json))
+    ((_ description result json)
+     (test description result (read-json json)))))
+
+(define-syntax test-write
+  (syntax-rules ()
+    ((_ result doc)
+     (test-write result result doc))
+    ((_ description result doc)
+     (test description result (with-output-to-string (lambda () (write-json doc)))))))
+
+(load-relative "reading")
+(load-relative "writing")
+(load-relative "parsers-unparsers")
+
+(test-exit)

File tests/writing.scm

+(import medea)
+(use test)
+
+(define-syntax test-read/write
+  (syntax-rules ()
+    ((_ desc doc)
+     (test desc doc (read-json
+                     (with-output-to-string 
+                         (lambda ()
+                           (write-json doc))))))))
+
+(test-group "writing"
+  (test-write "{}" '())
+  (test-write "[]" '#())
+  (test-error (json-write 'a))
+  (test-write "{\"foo\":[123]}" '((foo . #(123))))
+  (test-write "{\"foo\":null,\"bar\":true}" '((foo . null) (bar . #t)))
+  (test-write "[1,2,[3],4,{}]" '#(1 2 #(3) 4 ()))
+  (test-write "[\"\\\\\"]" '#("\\")))
+
+
+(test-group "writing and reading"
+  (test-read/write "simple" '#(1 2 3 "foo" ((bar . null) (baz))))
+  (test-read/write "python pass 1" (read-json json-python-test-pass1)))