Commits

camlspotter committed e78935a

recovering ocaml parser example...

  • Participants
  • Parent commits 3302cf1

Comments (0)

Files changed (9)

File lib/op_prec.mli

 
   val format_error : Format.formatter -> error -> unit
 
+  (** List parser style *)
   val parse : [ `Op of Operator.t * A.op | `Term of A.t ] list -> A.t
 
+  (** Combinator style *)
+  type t
+
+  val terminal : A.t -> t
+  val infix    : Operator.t -> A.op option -> t -> t -> t
+    (** None for function application *)
+  val prefix   : Operator.t -> A.op -> t -> t
+  val postfix  : Operator.t -> A.op -> t -> t
+  val build : t -> A.t
 end

File ocaml/OMakefile

    ocaml/utils/warnings
    ocaml/utils/terminfo
 
-   ocaml/parsing/linenum
+#   ocaml/parsing/linenum
    ocaml/parsing/location
    ocaml/parsing/syntaxerr
    ocaml/parsing/lexer
    ocaml/utils/warnings
    ocaml/utils/terminfo
 
-   ocaml/parsing/linenum
+#   ocaml/parsing/linenum
    ocaml/parsing/location
    ocaml/parsing/syntaxerr
    ocaml/parsing/lexer

File ocaml/input.ml

 open Planck
 
-module Stream = struct
+module Stream = Schar
+(*
+struct
   module Str = Stream.Make(struct
     type elem = char
     let show_elem = Printf.sprintf "%C"
     let buf st = attr st
   end)
 end
+*)
 
 module Parser = struct
   module Base = Pbase.Make(Stream)

File ocaml/lex.ml

 
 let char_or_underscores : 'a t -> string t = fun f ->
   matched (perform
-             ignore f;
-             ?* (ignore f <|> underscore))
+             void f;
+             ?* (void f <|> underscore))
 
 let decimal_literal = char_or_underscores decimal_char
 
 let bin_literal : string t = perform
   matched (perform
     zero;
-    ignore (one_of ['b'; 'B']);
-    ignore (char_or_underscores bin_char))
+    void (one_of ['b'; 'B']);
+    void (char_or_underscores bin_char))
 
 let oct_literal : string t = perform
   matched (perform
     zero;
-    ignore (one_of ['o'; 'O']);
-    ignore (char_or_underscores oct_char))
+    void (one_of ['o'; 'O']);
+    void (char_or_underscores oct_char))
 
 let hex_literal : string t = perform
   matched (perform
     zero;
-    ignore (one_of ['x'; 'X']);
-    ignore (char_or_underscores hex_char))
+    void (one_of ['x'; 'X']);
+    void (char_or_underscores hex_char))
 
 let int_literal = (hex_literal <!> bin_literal <!> oct_literal) </> decimal_literal
 
 (* CR jfuruse: it is not regexp like. No backtracking... *)
 let float_literal = perform
   matched (perform
-             ignore (char_or_underscores decimal_char);
+             void (char_or_underscores decimal_char);
              option_ (perform
                         token '.';
-                        option_ (ignore (char_or_underscores decimal_char)));
-             option_ (seq_ [ ignore (one_of ['e'; 'E']);
-                             option_ (ignore (one_of ['-'; '+']));
-                             ignore (char_or_underscores decimal_char) ]))
+                        option_ (void (char_or_underscores decimal_char)));
+             option_ (seq_ [ void (one_of ['e'; 'E']);
+                             option_ (void (one_of ['-'; '+']));
+                             void (char_or_underscores decimal_char) ]))
 
 let float = perform
   str <-- float_literal;
     return (NATIVEINT (Nativeint.of_string s))
   with Failure _ -> critical_error pos "nativeint literal overflow"
 
-let newline = string "\r\n" <|> ignore (one_of ['\n'; '\r'])
-let blank = ignore (one_of [' '; '\009'; '\012'])
+let newline = string "\r\n" <|> void (one_of ['\n'; '\r'])
+let blank = void (one_of [' '; '\009'; '\012'])
 let lowercase = tokenp (function
   | 'a'..'z' | '\223'..'\246' | '\248'..'\255' | '_' -> true
   | _ -> false) <?> "lowercase char"
   comment levs
 
   <!> (perform
-         ignore (string "*)");
+         void (string "*)");
          (* CR jfuruse: it's side effective and may not work! *)
          match levs with
          | [] -> assert false (* since it has seen at least one beginning of comment! *)
   <!> (perform
          pos <-- position;
          token '"';
-         ignore (string_internal pos []); (* CR jfuruse; the result of string_internal is useless... 
+         void (string_internal pos []); (* CR jfuruse; the result of string_internal is useless... 
                                              concatenation could be outside of string_internal *)
          (* CR jfuruse: error handling is not done
             (EOS is done but no check of nice string in comment thing)
          comment_internal levs)
   <!> (perform
          token '\'';
-         ignore (tokenp (function
+         void (tokenp (function
            | '\\' | '\'' | '\010' | '\013' -> false
            | _ -> true));
          token '\'';
          comment_internal levs)
   <!> (perform
          token '\'';
-         ignore (one_of ['\\'; '"'; '\''; 'n'; 't'; 'b'; 'r'; ' ']);
+         void (one_of ['\\'; '"'; '\''; 'n'; 't'; 'b'; 'r'; ' ']);
          token '\'';
          comment_internal levs)
   <!> (perform
          token '\'';
-         ignore decimal_char;
-         ignore decimal_char;
-         ignore decimal_char;
+         void decimal_char;
+         void decimal_char;
+         void decimal_char;
          token '\'';
          comment_internal levs)
   <!> (perform
          string "\\x";
-         ignore hex_char;
-         ignore hex_char;
+         void hex_char;
+         void hex_char;
          token '\'';
          comment_internal levs)
 
          newline;
          comment_internal levs)
   <!> (perform
-         ignore take;
+         void take;
          comment_internal levs)
 
 let comment () = comment []
                token '*';
                return_with_pos STAR))
 
-       <!> (matched (token '*' >>= fun _ -> ignore (takeWhile  is_symbolchar)) >>= function
+       <!> (matched (token '*' >>= fun _ -> void (takeWhile  is_symbolchar)) >>= function
               | "*" -> return_with_pos STAR
               | s ->
                   if String.unsafe_get s 1 = '*' then return_with_pos (INFIXOP4 s) (* ** case *)

File ocaml/plparser.ml

 (* header *)
+module X = Location
 open Parsing
 open Token
 open Planck
 open Token.Parser
 open Plphelper
 
+open Spotlib.Spot
 
 open Location
 open Asttypes
 let reloc_pat x = { x with ppat_loc = symbol_rloc () };;
 let reloc_exp x = { x with pexp_loc = symbol_rloc () };;
 
+let putloc txt = {txt; loc=X.none}
 let mkoperator name pos =
-  { pexp_desc = Pexp_ident(Lident name); pexp_loc = rhs_loc pos }
+  { pexp_desc = Pexp_ident(putloc & Lident name); pexp_loc = rhs_loc pos }
 
 (*
   Ghost expressions and patterns:
 
 let mkassert e =
   match e with
-  | {pexp_desc = Pexp_construct (Lident "false", None, false); _ } ->
+  | {pexp_desc = Pexp_construct ({X.txt= Lident "false"; _}, None, false); _ } ->
          mkexp (Pexp_assertfalse)
   | _ -> mkexp (Pexp_assert (e))
 ;;
 
 let rec mktailexp = function
     [] ->
-      ghexp(Pexp_construct(Lident "[]", None, false))
+      ghexp(Pexp_construct(putloc & Lident "[]", None, false))
   | e1 :: el ->
       let exp_el = mktailexp el in
       let l = {loc_start = e1.pexp_loc.loc_start;
                loc_ghost = true}
       in
       let arg = {pexp_desc = Pexp_tuple [e1; exp_el]; pexp_loc = l} in
-      {pexp_desc = Pexp_construct(Lident "::", Some arg, false); pexp_loc = l}
+      {pexp_desc = Pexp_construct(putloc & Lident "::", Some arg, false); pexp_loc = l}
 
 let rec mktailpat = function
     [] ->
-      ghpat(Ppat_construct(Lident "[]", None, false))
+      ghpat(Ppat_construct(putloc & Lident "[]", None, false))
   | p1 :: pl ->
       let pat_pl = mktailpat pl in
       let l = {loc_start = p1.ppat_loc.loc_start;
                loc_ghost = true}
       in
       let arg = {ppat_desc = Ppat_tuple [p1; pat_pl]; ppat_loc = l} in
-      {ppat_desc = Ppat_construct(Lident "::", Some arg, false); ppat_loc = l}
+      {ppat_desc = Ppat_construct(putloc & Lident "::", Some arg, false); ppat_loc = l}
 
 let ghstrexp e =
   { pstr_desc = Pstr_eval e; pstr_loc = {e.pexp_loc with loc_ghost = true} }
   let get = if !Clflags.fast then "unsafe_get" else "get" in
   match bigarray_untuplify arg with
     [c1] ->
-      mkexp(Pexp_apply(ghexp(Pexp_ident(bigarray_function "Array1" get)),
+      mkexp(Pexp_apply(ghexp(Pexp_ident(putloc& bigarray_function "Array1" get)),
                        ["", arr; "", c1]))
   | [c1;c2] ->
-      mkexp(Pexp_apply(ghexp(Pexp_ident(bigarray_function "Array2" get)),
+      mkexp(Pexp_apply(ghexp(Pexp_ident(putloc& bigarray_function "Array2" get)),
                        ["", arr; "", c1; "", c2]))
   | [c1;c2;c3] ->
-      mkexp(Pexp_apply(ghexp(Pexp_ident(bigarray_function "Array3" get)),
+      mkexp(Pexp_apply(ghexp(Pexp_ident(putloc& bigarray_function "Array3" get)),
                        ["", arr; "", c1; "", c2; "", c3]))
   | coords ->
-      mkexp(Pexp_apply(ghexp(Pexp_ident(bigarray_function "Genarray" "get")),
+      mkexp(Pexp_apply(ghexp(Pexp_ident(putloc& bigarray_function "Genarray" "get")),
                        ["", arr; "", ghexp(Pexp_array coords)]))
 
 let bigarray_set arr arg newval =
   let set = if !Clflags.fast then "unsafe_set" else "set" in
   match bigarray_untuplify arg with
     [c1] ->
-      mkexp(Pexp_apply(ghexp(Pexp_ident(bigarray_function "Array1" set)),
+      mkexp(Pexp_apply(ghexp(Pexp_ident(putloc& bigarray_function "Array1" set)),
                        ["", arr; "", c1; "", newval]))
   | [c1;c2] ->
-      mkexp(Pexp_apply(ghexp(Pexp_ident(bigarray_function "Array2" set)),
+      mkexp(Pexp_apply(ghexp(Pexp_ident(putloc& bigarray_function "Array2" set)),
                        ["", arr; "", c1; "", c2; "", newval]))
   | [c1;c2;c3] ->
-      mkexp(Pexp_apply(ghexp(Pexp_ident(bigarray_function "Array3" set)),
+      mkexp(Pexp_apply(ghexp(Pexp_ident(putloc& bigarray_function "Array3" set)),
                        ["", arr; "", c1; "", c2; "", c3; "", newval]))
   | coords ->
-      mkexp(Pexp_apply(ghexp(Pexp_ident(bigarray_function "Genarray" "set")),
+      mkexp(Pexp_apply(ghexp(Pexp_ident(putloc& bigarray_function "Genarray" "set")),
                        ["", arr;
                         "", ghexp(Pexp_array coords);
                         "", newval]))
   else raise (Syntaxerr.Error(Syntaxerr.Applicative_path (symbol_rloc())))
 
 let exp_of_label lbl =
-  mkexp (Pexp_ident(Lident(Longident.last lbl)))
+  mkexp (Pexp_ident(putloc& Lident(Longident.last lbl)))
 
 let pat_of_label lbl =
-  mkpat (Ppat_var(Longident.last lbl))
+  mkpat (Ppat_var(putloc& Longident.last lbl))
 
 (* ops! *)
 

File ocaml/plphelper.ml

 let level = ref 0
 let do_debug = ref false
 
-let memoize = Token.Stream.memoize 
-
 (* Non memoization: it IS REALLY SLOW *)
 (* let memoize _k f v = f v *)
 
 
 let rule (name : string) (m : unit -> ('a * Position.Region.t) Token.Parser.t) : 'a Token.Parser.t = 
   (* This eta is required to avoid inf loop and segfault by stack overflow *) 
-  fun st -> with_rhs name (memoize name (m ())) st
+  let m' = Token.Stream.Memo.register (m ()) in
+  fun st -> with_rhs name (Token.Stream.Memo.call m') st
 
 let get_poses () = 
   match !rhs_tbl_stack with
          \ begin match st_start with
          | None -> assert false
          | Some st -> 
-             let elts = List.map fst (Token.Stream.between st st') in
+             let elts = Token.Stream.between st st' in
              Format.eprintf "%s<- %s : %s@."
                (String.make (!level*2) ' ') 
                name 
   
 let dummy = error "dummy" (* to have <|> for each real rule *)
 
-open Planck.Op_prec
+open Planck.Op_prec.Operator
 
 let infixop0 = { prec = 0.0; kind = `Infix `Left }
 let infixop1 = { prec = 1.0; kind = `Infix `Right }
 let infixop3 = { prec = 3.0; kind = `Infix `Left }
 let infixop4 = { prec = 4.0; kind = `Infix `Right }
 
-
-let _ = 
-  let old_find = !Planck.Op_prec.find in
-  Planck.Op_prec.find := fun op ->
+let opfind op = 
     match op with
     | ""          -> assert false
-    | "<-"        -> { prec = -8.0; kind = `Noassoc }
+    | "<-"        -> { prec = -8.0; kind = `Infix `Noassoc }
     | ":="        -> { prec = -7.0; kind = `Infix `Right }
-    | "as"        -> { prec = -6.0; kind = `Noassoc }
+    | "as"        -> { prec = -6.0; kind = `Infix `Noassoc }
     | "|"         -> { prec = -5.0; kind = `Infix `Left }
     | ","         -> { prec = -4.0; kind = `Infix `Left }
     | "->"        -> { prec = -3.0; kind = `Infix `Right }
     | "mod" | "land" | "lor" | "lxor" -> infixop3
     | "**" | "lsl" | "lsr" | "asr" -> infixop4
 
-    | "prec_unary_minus" | "prec_unary_plus" -> { prec = 5.0  ; kind = `Noassoc }
-    | "prec_constant_constructor"            -> { prec = 6.0  ; kind = `Noassoc }
-    | "prec_constr_appl"                     -> { prec = 7.0  ; kind = `Noassoc }
-    | "below_SHARP"                          -> { prec = 8.0  ; kind = `Noassoc }
-    | "#"                                    -> { prec = 9.0  ; kind = `Noassoc }
-    | "below_DOT"                            -> { prec = 10.0 ; kind = `Noassoc }
-    | "."                                    -> { prec = 11.0 ; kind = `Noassoc }
-    | "!" | "`"                              -> { prec = 12.0 ; kind = `Noassoc }
+    | "prec_unary_minus" | "prec_unary_plus" -> { prec = 5.0  ; kind = `Infix `Noassoc }
+    | "prec_constant_constructor"            -> { prec = 6.0  ; kind = `Infix `Noassoc }
+    | "prec_constr_appl"                     -> { prec = 7.0  ; kind = `Infix `Noassoc }
+    | "below_SHARP"                          -> { prec = 8.0  ; kind = `Infix `Noassoc }
+    | "#"                                    -> { prec = 9.0  ; kind = `Infix `Noassoc }
+    | "below_DOT"                            -> { prec = 10.0 ; kind = `Infix `Noassoc }
+    | "."                                    -> { prec = 11.0 ; kind = `Infix `Noassoc }
+    | "!" | "`"                              -> { prec = 12.0 ; kind = `Infix `Noassoc }
 (*
 /* Finally, the first tokens of simple_expr are above everything else. */
 %nonassoc BACKQUOTE BANG BEGIN CHAR FALSE FLOAT INT INT32 INT64
         | '@' | '^' -> infixop1
         | '+' | '-' -> infixop2
         | '*' | '/' -> infixop3
-        | _ -> old_find op
+        | _ -> assert false

File ocaml/token.ml

+open Spotlib.Spot
 open Planck
 open Sexplib.Conv
 
     let equal (x : string) y = x = y
   end
 
+(*
   module Memo = Hashtbl.Make(MemoKey)
+*)
 
   module Base = struct
-    type elem = t
-    let show_elem = show
-    let equal_elem = equal
+    module Elem = struct
+      type t = _token
+      let show = show
+      let format ppf = Format.pp_print_string ppf ** show 
+      let equal = equal
+      let compare = compare
+    end
     module Pos = Position.Region
-    type attr = Pos.t option (* last consumed token position *) * Pos.t * (Obj.t, exn) Result.t Memo.t
-    let position_of_attr (_,pos,_) = pos
-    let last_position_of_attr (last_pos,_,_) = last_pos 
-    let default_attr = None, Pos.none, Memo.create 1
-    let memo_of_attr (_,_,memo) = memo
-    let buf_of_attr (_,buf,_) = buf
+    module Attr = struct
+      type t = Pos.t option (* last consumed token position *) * Pos.t * Smemo.memo
+      let position (_,pos,_) = pos
+      let last_position (last_pos,_,_) = last_pos 
+      let default = None, Pos.none, Smemo.create ()
+      let memo (_,_,memo) = memo
+      let buf (_,buf(*,_*)) = buf
+    end
   end
 
   module Str = Stream.Make(Base)
 
   include Smemo.Extend(struct
     include Str
+(*
     module Memo = Memo
-    let memo st = Base.memo_of_attr (attr st)
+*)
+    let memo = Base.Attr.memo
   end)
 
   (* CR jfuruse: generalize it and port back to Planck.Core *)
   let create (m : ('a option * Position.Region.t) Input.Parser.t) = fun st ->
     let rec f last_pos st = lazy begin
       match Input.Parser.run m st with
-      | Result.Ok ((None, pos), _st') -> null_desc (last_pos, pos, Memo.create 107) (* EOS case *)
-      | Result.Ok ((Some v, pos), st') -> cons_desc v (last_pos, pos, Memo.create 107) (f (Some pos) st')
+      | Result.Ok ((None, pos), _st') -> null_desc (last_pos, pos, Smemo.create ()) (* EOS case *)
+      | Result.Ok ((Some v, pos), st') -> cons_desc v (last_pos, pos, Smemo.create ()) (f (Some pos) st')
       | Result.Error (pos, s) -> raise (Input.Parser.Critical_error (pos, s))
     end
     in
     f None st
   ;;
 
-  let last_position st : Position.Region.t option = Base.last_position_of_attr (attr st)
+  let last_position st : Position.Region.t option = Base.Attr.last_position (attr st)
 end
 
 module Parser = struct

File ocaml/tokentest.ml

 open Planck
 
 let rec parse_and_print stream = 
-  match Token.Stream.peek stream with
-  | None -> ()
-  | Some (elem, (_,pos,_), stream') ->
+  match Token.Stream.peek stream, Token.Stream.position stream with
+  | None, _ -> ()
+  | Some (elem, stream'), pos ->
       Format.eprintf "%s @@ %a@."
         (Sexplib.Sexp.to_string_hum (Token.sexp_of_t elem))
         Position.Region.format pos;

File test/expr.ml

 (* parsing rules *)
 let blank = void (one_of [' '; '\t'; '\n'; '\r'])
 
-let rec simple_expr st = begin
+let rec simple_expr st = (fun e -> e st) & 
   
   (* Skip spaces *)
   ?* blank >>= fun () -> 
        ?* blank >>= fun () ->
        token ')' >>= fun () -> 
        return (`Term e))
-end st
 
 and constant st = begin
   (* [0-9]+ *)