Commits

camlspotter  committed 20a852b

trial of lazy thing

  • Participants
  • Parent commits 43f5d9b

Comments (0)

Files changed (9)

File lib/OMakefile

    utf16
    llist
    parserMonad
+   lazyParserMonad
    json
+   lazy_json
 
 LIB = tiny_json
 
 exception JSON_CastErr of string
 exception JSON_UnknownErr of string
 
+(* CR jfuruse: it uses string concat. Very bad efficiency. *)
 let show =
   let rec show_aux depth = function
     | String s -> "str(" ^s^ ")"

File lib/json.mli

 exception JSON_CastErr of string
 exception JSON_UnknownErr of string
 
+(** {6 Printer and formatter} *)
 val show : t -> string
 val format : Format.formatter -> t -> unit
+
+(** {6 Object field access} *)
+
 val getf : string -> t -> t
+(** Get field from an object. Failure raises an exception. *)
+
 val getf_opt : string -> t -> t option
+(** Get field from an object. Failure is reported as [None] *)
 
-val as_bool : t -> bool
+(** {6 Coercions. They may fail and raise JSON_CastErr.} *)
+val as_bool   : t -> bool
 val as_object : t -> obj
-val as_float : t -> float
+val as_float  : t -> float
 val as_string : t -> string
-val as_list : t -> t list
-val as_int : t -> int
+val as_list   : t -> t list
+val as_int    : t -> int
 
+(** {6 Parsers} *)
 val parse_ch : in_channel -> t
 val parse : string -> t
 val parse_function : (string -> int -> int) -> t

File lib/lazyParserMonad.ml

+open Util
+open Llist
+
+type ts = char llist
+type state = int * int * (char list * char * char list)
+type error = state * string
+type 'a t = state -> ts -> ('a * state * ts, error) either Lazy.t
+
+exception ParseError of string
+
+let lt_pos (l1,p1,_ : state) (l2,p2,_ : state) =
+  if l1 < l2 then true
+  else if l1 = l2 then p1 < p2
+  else false
+
+(* get longer survivor *)
+let eplus (st1,msg1 : error) (st2,msg2 : error) =
+  if lt_pos st1 st2 then (st2,msg2) else (st1,msg1)
+
+let showerr ((line,pos,(pre,c,post)),msg : error) =
+  !%"{\n  at (%d, %d): %s\n  %S [%c] %S\n}" 
+    line pos msg 
+    (string_of_chars pre) c (string_of_chars post)
+    
+let return : 'a -> 'a t =
+    fun x ->
+      fun state code -> Lazy.from_val (Inl (x, state, code))
+
+let error msg : 'a t = fun state _code -> Lazy.from_val (Inr (state, msg))
+
+let (>>=) : 'a t -> ('a -> 'b t) -> 'b t =
+    fun p f ->
+      fun state code ->
+	lazy (match p state code with
+	  | lazy (Inl (x, state', ts)) -> Lazy.force (f x state' ts)
+	  | lazy (Inr err) -> Inr err
+        )
+	      
+let (>>) : 'a t -> 'b t -> 'b t =
+    fun p1 p2 ->
+      p1 >>= fun _ -> p2
+
+let (<.<) : 'a t -> 'b t -> 'a t =
+    fun p1 p2 ->
+      p1 >>= fun x -> p2 >> return x
+
+let ( ^? ) : 'a t -> string -> 'a t =
+    fun p msg ->
+      fun state code ->
+        lazy (match p state code with
+	| lazy (Inl l) -> Inl l
+	| lazy (Inr (st,msg0)) -> Inr (st,msg ^": "^msg0)
+        )
+    
+(* (<|>) : 'a m -> 'a m -> 'a m *)
+let (<|>) : 'a t -> 'a t -> 'a t =
+    fun p1 p2 ->
+      fun state code ->
+        lazy (match p1 state code with
+	| lazy (Inl (x1, state', ts)) -> Inl (x1, state', ts)
+	| lazy (Inr err1) ->
+	    begin match p2 state code with
+	    | lazy (Inl (x2, state', ts)) -> Inl (x2,state',ts)
+	    | lazy (Inr err2) -> Inr (eplus err1 err2)
+	    end
+        )
+
+(*
+let (<|?>) p1 p2 = fun state code ->
+  match p1 state code with
+  | Inl (x1, state', ts) -> Inl (x1, state', ts)
+  | Inr err1 ->
+      print_endline err1;
+      begin match p2 state code with
+      | Inl (x2, state', ts) -> Inl (x2,state',ts)
+      | Inr err2 -> Inr (eplus err1 err2)
+      end
+*)	
+
+let rec many : 'a t -> ('a list) t =
+    fun p ->
+      (p >>= fun x -> many p >>= fun xs -> return (x::xs))
+	<|> (return [])
+
+let many1 p =
+  p >>= fun x -> many p >>= fun xs -> return (x::xs)
+
+let sep separator p =
+  (p >>= fun x -> many (separator >> p) >>= fun xs -> return (x::xs))
+    <|> (return [])
+
+
+let opt : 'a t -> ('a option) t =
+    fun p ->
+      (p >>= fun x -> return (Some x)) <|> (return None)
+
+
+let _char1_with_debug state = function
+  | Nil -> Lazy.from_val (Inr (state,"(Nil)"))
+  | Cons (x,xs) ->
+      lazy (
+        let next (pre,x0, _) =
+  	let pre' = if List.length pre < 100 then pre @ [x0]
+  	  else List.tl pre @ [x0]
+  	in
+  	(pre' , x, Llist.take 100 !$xs)
+        in
+        match x, state with
+        | '\n', (line,_pos,cs) ->
+  	  Inl (x,(line+1,-1, next cs), !$xs)
+        | _, (line,pos,cs) ->
+  	  Inl (x,(line, pos+1, next cs),!$xs)
+      )
+
+let char1_without_debug state = function
+  | Nil -> Lazy.from_val (Inr (state,"(Nil)"))
+  | Cons (x,xs) -> Lazy.from_val (Inl (x, state, !$xs))
+
+let char1 : char t = char1_without_debug
+
+let char_when f = char1 >>= fun c ->
+  if f c then return c
+  else error (!%"(char:'%c')" c)
+
+let char c = char_when ((=) c)
+
+let keyword w =
+  let rec iter i =
+    if i < String.length w then
+      char (String.unsafe_get w i) >> iter (i+1)
+    else return w
+  in
+  iter 0
+
+let make_ident f =
+  many1 (char_when f) >>= fun cs ->
+    return (string_of_chars cs)
+
+let int =
+  opt (char '-') >>= fun minus ->
+  make_ident (function '0'..'9' -> true | _ -> false) >>= fun s ->
+  return
+    begin match minus with
+    | None -> int_of_string s
+    | Some _ -> - int_of_string s
+    end
+
+let run (p : 'a t) (state : state) (ts : ts) =
+  lazy (match p state ts with
+    | lazy (Inl (x,_state',_xs)) -> Inl x
+    | lazy (Inr err) -> Inr (showerr err)
+  )
+
+let init_state : state = (1, 0, ([],'_',[]))
+
+let run_ch p ch =
+  run p init_state (Llist.of_stream (Stream.of_channel ch))
+
+let run_stdin p = run_ch p stdin
+
+let run_file p filename =
+  open_in_with filename (fun ch -> run_ch p ch)
+
+let run_string p s =
+  run p init_state (Llist.of_string s)
+
+let run_function p f =
+  run p init_state (Llist.of_function f)
+

File lib/lazyParserMonad.mli

+type ts
+type state
+type error
+type 'a t
+
+val error : string -> 'a t
+val showerr : error -> string
+
+val return : 'a -> 'a t
+val ( >>= ) : 'a t -> ('a -> 'b t) -> 'b t
+
+val ( >> ) : 'a t -> 'b t -> 'b t
+val ( <.< ) : 'a t -> 'b t -> 'a t
+val ( ^? ) : 'a t -> string -> 'a t
+val ( <|> ) : 'a t -> 'a t -> 'a t
+
+val many : 'a t -> 'a list t
+val many1 : 'a t -> 'a list t
+val sep : 'a t -> 'b t -> 'b list t
+val opt : 'a t -> 'a option t
+
+val char1 : char t
+val char_when : (char -> bool) -> char t
+val char : char -> char t
+val keyword : string -> string t
+val make_ident : (char -> bool) -> string t
+val int : int t
+
+val init_state : state
+val run_ch : 'a t -> in_channel -> ('a, string) Util.either Lazy.t
+val run_stdin : 'a t -> ('a, string) Util.either Lazy.t
+val run_file : 'a t -> string -> ('a, string) Util.either Lazy.t
+val run_string : 'a t -> string -> ('a, string) Util.either Lazy.t
+val run_function : 'a t -> (string -> int -> int) -> ('a, string) Util.either Lazy.t

File lib/lazy_json.ml

+open Util
+open ParserMonad
+module P = ParserMonad
+
+(* CR jfuruse: We use Llist since it exists in this library,
+   but Llist is not the "lazy list". It's head is always forced...
+*)
+
+type t = desc Lazy.t 
+
+and desc =
+  | String of string Llist.t
+  | Number of string (* float is not appropriate for decoding 64bit int *)
+  | Object of obj
+  | Array of t Llist.t
+  | Bool of bool
+  | Null
+
+and obj = (string * t) Llist.t
+
+exception JSON_NotObject of t
+exception JSON_InvalidField of (string)
+exception JSON_CastErr of string
+exception JSON_UnknownErr of string
+
+let show =
+  let rec show_aux depth x = match Lazy.force x with
+    | String s -> 
+        let s = String.concat "" (Llist.to_list (Lazy.force s)) in
+        "str(" ^s^ ")"
+    | Number x -> !%"num(%s)" x
+    | Object fs ->
+	let indent d = String.make d '\t' in
+	"{\n"
+	^indent depth^ slist (",\n"^ indent depth) (fun (k,v) -> k^":"^ (show_aux (depth+1)) v) (Llist.to_list (Lazy.force fs))
+	^"\n"^indent(depth-1)^"}"
+    | Array xs -> "[" ^slist "," (show_aux depth) (Llist.to_list (Lazy.force xs)) ^ "]"
+    | Bool true -> "TRUE"
+    | Bool false -> "FALSE"
+    | Null -> "NULL"
+  in
+  show_aux 1
+
+  let rec format_list sep f ppf = function
+    | [] -> ()
+    | [x] -> f ppf x
+    | x::xs -> f ppf x; Format.fprintf ppf sep; format_list sep f ppf xs
+
+  (* CR jfuruse: Need test! *)
+  let rec format ppf x = 
+    let open Format in
+      match Lazy.force x with
+      | String ss -> 
+          let s = String.concat "" (Llist.to_list (Lazy.force ss)) in
+          let buf = Buffer.create (String.length s * 2) in
+          Buffer.add_char buf '"';
+          for i = 0 to String.length s - 1 do
+            let c = String.unsafe_get s i in
+            match c with  
+            | '"' -> Buffer.add_string buf "\\\""
+            | '\\' -> Buffer.add_string buf "\\\\"
+            | '\b' -> Buffer.add_string buf "\\b"
+            | '\012' -> Buffer.add_string buf "\\f"
+            | '\n' -> Buffer.add_string buf "\\n"
+            | '\r' -> Buffer.add_string buf "\\r"
+            | '\t' -> Buffer.add_string buf "\\t"
+            | _ when Char.code c <= 32 && c <> ' ' -> 
+                Printf.ksprintf (Buffer.add_string buf) "\\u%04X" (Char.code c)
+            | _ -> Buffer.add_char buf c
+          done;
+          Buffer.add_char buf '"';
+          pp_print_string ppf (Buffer.contents buf)
+      | Number s -> fprintf ppf "%s" s
+      | Object o -> 
+          fprintf ppf "{ @[%a }@]"
+            (format_list ",@ " (fun ppf (s,v) -> fprintf ppf "@[\"%s\": @[<2>%a@]@]" s format v)) (Llist.to_list (Lazy.force o))
+      | Array ts -> 
+          fprintf ppf "[ @[%a ]@]"
+            (format_list ",@ " format) (Llist.to_list (Lazy.force ts))
+      | Bool b -> fprintf ppf "%b" b
+      | Null -> fprintf ppf "null"
+
+let getf field t =
+  match Lazy.force t with
+  | Object o ->
+      begin try List.assoc field (Llist.to_list (Lazy.force o)) with
+      | _ -> raise (JSON_InvalidField (field))
+      end
+  | _ -> raise (JSON_NotObject t)
+
+let getf_opt field t =
+  match Lazy.force t with
+  | Object o ->
+      begin try Some (List.assoc field (Llist.to_list (Lazy.force o))) with
+      | _ -> None
+      end
+  | _ -> None
+      
+let as_bool x = match Lazy.force x with
+  | Bool true -> true
+  | Bool false -> false
+  | v -> raise (JSON_CastErr ("as_bool:" ^ show x))
+
+let as_object x = match Lazy.force x with
+  | Object obj -> obj
+  | v -> raise (JSON_CastErr ("as_object:" ^ show x))
+
+let as_float x = match Lazy.force x with
+  | Number s -> float_of_string s (* may fail, or returns wrong result *)
+  | v -> raise (JSON_CastErr ("as_float:" ^ show x))
+
+let as_string x = match Lazy.force x with
+  | String ss -> String.concat "" (Llist.to_list (Lazy.force ss))
+  | v -> raise (JSON_CastErr ("as_string:" ^ show x))
+
+let as_list x = match Lazy.force x with
+  | Array l -> l
+  | v -> raise (JSON_CastErr ("as_list:" ^ show x))
+
+let as_int x = match Lazy.force x with
+  | Number s -> int_of_string s (* may fail, or returns wrong result *)
+  | v -> raise (JSON_CastErr ("as_int:" ^ show x))
+
+
+(*parser*)
+
+let whitespace = many (char '\n' <|> char ' ' <|> char '\t' <|> char '\r')
+
+let string s =
+  let rec iter i =
+    if i < String.length s then
+      char s.[i] >> iter (i+1)
+    else return s
+  in
+  iter 0
+
+(*
+let alp =
+  char1 >>= fun c -> if c<>' ' && c<>'\n' && c<>'\t' && c<>'\r' then return c else error""
+
+let alps0 = many alp
+let alps = alp >>= fun c -> many alp >>= fun cs -> return (string_of_chars (c::cs))
+*)
+
+type token =
+  | ObjOpen
+  | ObjClose
+  | ListOpen
+  | ListClose
+  | Comma
+  | Colon
+
+  | TTrue
+  | TFalse
+  | TNull
+  | TString of string
+  | TNumber of string (* we keep the string repr. *)
+
+let lit_string =
+  let four_hex_digits =
+    let hex = char1 >>= function
+      | '0'..'9' | 'A'..'F' | 'a'..'f' as c -> return c
+      | _ -> error ""
+    in
+    hex >>= fun d1 -> hex >>= fun d2 -> hex >>= fun d3 -> hex >>= fun d4 ->
+      let s = string_of_chars [d1;d2;d3;d4] in
+      let n = int_of_string ("0x" ^ Utf16.utf16c_to_utf8c s) in
+      let m, n1 = n / (16*16), n mod (16*16) in
+      let n3,n2 = m / (16*16), m mod (16*16) in
+      let cs = List.map char_of_int
+	begin match [n3;n2;n1] with
+	| [0; 0; _] ->         [n1]
+	| [0; _; _] ->     [n2; n1]
+	| _         -> [n3; n2; n1]
+	end
+      in
+      return (string_of_chars cs)
+  in
+  let lit_char =
+    char1 >>= function
+      | '\"' -> error ""
+      | '\\' -> char1 >>=
+	  begin function
+	  | '\"' | '\\' | '/' as c -> return (string1 c)
+	  | 'b' -> return "\b"
+(*	  | 'f' -> return "\f"*)
+	  | 'n' -> return "\n"
+	  | 'r' -> return "\r"
+	  | 't' -> return "\t"
+	  | 'u' -> four_hex_digits
+	  | _ -> error ""
+	  end
+      | c -> return (string1 c)
+  in
+  char '\"' >> many lit_char >>= fun ss -> char '\"' >> return (TString (slist "" id ss))
+
+let digits =
+  let digit =
+    char1 >>= function
+      | '0'..'9' | '-' | '.' | 'e' | 'E' | '+' as c -> return c
+      | _ -> error "digit"
+  in
+  many1 digit >>= (return $ string_of_chars)
+
+let lit_number = (* TODO *)
+  (* We cannot simply use [float_of_string] here, if we want to handle int64.
+     int64 and double are both 64bits, which means double cannot express all the int64!!! 
+  *)
+  digits >>= fun x -> return (TNumber x)
+
+let token1 =
+  let aux =
+  (char '{' >> return ObjOpen)
+    <|>
+  (char '}' >> return ObjClose)
+    <|>
+  (char '[' >> return ListOpen)
+    <|>
+  (char ']' >> return ListClose)
+    <|>
+  (char ',' >> return Comma)
+    <|>
+  (char ':' >> return Colon)
+    <|>
+  (string "true" >> return TTrue)
+    <|>
+  (string "false" >> return TFalse)
+    <|>
+  (string "null" >> return TNull)
+    <|>
+  lit_string
+    <|>
+  lit_number
+  in
+  whitespace >> aux
+
+let token t =
+  token1 >>= fun x -> if t = x then return t else error "token"
+
+let json_string =
+  token1 >>= function TString s -> return s | _ -> error "json_string"
+
+let json_number =
+  token1 >>= function TNumber x -> return x | _ -> error "json_number"
+
+let rec json (): t P.t =
+  begin
+  let field =
+    json_string >>= fun key -> token Colon >> json () >>= fun v -> return (key, v)
+  in
+  (token ObjOpen >> sep (token Comma) field >>= fun fields -> token ObjClose >>
+    return @@ Object fields)
+    <|>
+  (token ListOpen >>= (fun _ -> sep (token Comma) (json()) >>= fun vs -> token ListClose >>
+    return @@ Array vs))
+    <|>
+  (token TTrue >> return (Bool true))
+    <|>
+  (token TFalse >> return (Bool false))
+    <|>
+  (token TNull >> return Null)
+    <|>
+  (json_string >>= fun s -> return @@ String s)
+    <|>
+  (json_number >>= fun x -> return @@ Number x)
+  end 
+  
+
+let parse_ch ch = run_ch (json()) ch
+
+let parse s = run_string (json()) s
+
+let parse_function f = run_function (json()) f

File lib/lazy_json.mli

+type t = desc Lazy.t
+
+and desc =
+  | String of string Llist.t (* string may be chuncked into pieces by lazy load*)
+  | Number of string (* We keep its string repr. *)
+  | Object of obj
+  | Array of t Llist.t
+  | Bool of bool
+  | Null
+and obj = (string * t) Llist.t
+
+exception JSON_NotObject of t
+exception JSON_InvalidField of string
+exception JSON_CastErr of string
+exception JSON_UnknownErr of string
+
+(** {6 Printer and formatter} *)
+val show : t -> string
+val format : Format.formatter -> t -> unit
+
+(** {6 Object field access} *)
+
+val getf : string -> t -> t
+(** Get field from an object. Failure raises an exception. *)
+
+val getf_opt : string -> t -> t option
+(** Get field from an object. Failure is reported as [None] *)
+
+(** {6 Coercions. They may fail and raise JSON_CastErr.} *)
+val as_bool   : t -> bool
+val as_object : t -> obj
+val as_float  : t -> float
+val as_string : t -> string
+val as_list   : t -> t list
+val as_int    : t -> int
+
+(** {6 Parsers} *)
+val parse_ch : in_channel -> t
+val parse : string -> t
+val parse_function : (string -> int -> int) -> t

File lib/llist.ml

 
 type 'a llist = Nil | Cons of 'a * 'a llist Lazy.t
 
+type 'a t = 'a llist Lazy.t
+
+let to_list ll =
+  let rec to_list st = function
+    | Nil -> List.rev st
+    | Cons (x, xs) ->
+        to_list (x :: st) (Lazy.force xs)
+  in
+  to_list [] ll
+
 let hd = function | Nil -> failwith "hd" | Cons (x, _xs) -> x
 let tl = function | Nil -> failwith "tl" | Cons (_x, xs) -> !$xs
 
 let of_string =
   of_stream $ Stream.of_string
 
-let rec of_function f =
+let of_function f =
   let buf = String.create 1024 in
   
   let rec fill () = 

File lib/llist.mli

 type 'a llist = Nil | Cons of 'a * 'a llist Lazy.t
 (** It is a lazy list, but the first element is already forced *)
 
+type 'a t = 'a llist Lazy.t
+
 val hd : 'a llist -> 'a
 val tl : 'a llist -> 'a llist
 val take : int -> 'a llist -> 'a list
 (** [of_function f]: [f buf len] is a filler, a function to fill [buf]
     with at most [len] chars. If it reaches the end of the input it returns [0].
 *)
+
+val to_list : 'a llist -> 'a list