Commits

camlspotter committed b7692c9

identifiers

  • Participants
  • Parent commits fa0c66d

Comments (0)

Files changed (4)

-include Planck.String(Sbuffer)
-
 module OCaml = struct
   module Char = Char
 end
 
+module type SPlankSbuffer = module type of Planck.String(Sbuffer)
+
+module P : sig
+  include SPlankSbuffer (* OCaml cannot permit writing module type of Planck.String(Sbuffer) *)
+  type mark
+  val mark : mark t
+  val string_from_mark : mark -> string t
+  val substr : 'a t -> string t
+end = struct
+  include Planck.String(Sbuffer)
+
+  type mark = Sbuffer.t * int (* abs_pos *)
+
+  let mark : mark t = fun st -> 
+    let abs_pos = (Sbuffer.pos st).Sbuffer.Position.byte in
+    Result.Ok ((st, abs_pos), st)
+
+  let string_from_mark : mark -> string t = fun (sbuf, start_pos) st ->
+    let end_pos = (Sbuffer.pos st).Sbuffer.Position.byte in
+    let str = Sbuffer.substr sbuf start_pos (end_pos - start_pos) in
+    Result.Ok (str, st)
+
+  let substr : 'a t -> string t = fun t ->
+    mark >>= fun mark ->
+    t >>= fun _ ->
+    string_from_mark mark
+end
+
+include P
+
 module Literal = struct
 
   module Char = struct
   end
 
   module Num = struct
+
+    type nat = { base : int;
+		 rev_nums : int list }
+
     let code_0 = OCaml.Char.code '0' 
     let code_a = OCaml.Char.code 'a'
     let code_A = OCaml.Char.code 'A'
-    let rec base b st = function
-      | [] -> st
-      | x::xs ->
-	  let code_orig = match x with
-	    | '0'..'9' -> code_0
-	    | 'a'..'f' -> code_a - 10
-	    | 'A'..'F' -> code_A - 10
-	    | _ -> assert false
-	  in
-	  base b (st * b + OCaml.Char.code x - code_orig) xs
+
+    let rev_nums = List.rev_map (function c ->
+      let offset = match c with 
+	| '0'..'9' -> code_0
+	| 'a'..'f' -> code_a - 10
+	| 'A'..'F' -> code_A - 10
+	| _ -> assert false
+      in
+      OCaml.Char.code c - offset)
 
     let digit =
       plus Char.digit_char >>= fun chars ->
-	return (base 10 0 chars)
+	return { base = 10; rev_nums =  rev_nums chars }
     let oct =
       string "0o" >>= fun _ ->
 	plus Char.oct_char >>= fun chars ->
-	  return (base 8 0 chars)
+	  return { base = 8; rev_nums = rev_nums chars }
     let hex =
       string "0x" >>= fun _ ->
 	plus Char.hex_char >>= fun chars ->
-	  return (base 16 0 chars)
+	  return { base = 16; rev_nums = rev_nums chars }
 
     let nat = digit <|> oct <|> hex
-	(* CR jfuruse: we must take the longest match, 
-	   to make this ordering working *)
+    (* CR jfuruse: we must take the longest match, 
+       to make this ordering working *)
 
     (* CR jfuruse: no overflow. No minus. *)
+    (* Order is important *)
     let nat = oct <|> hex <|> digit
-	(* Order is important *)
+
+    let nat_int = nat >>= fun t ->
+      let base = t.base in
+      let rec to_int st mul = function
+	| [] -> st
+	| x::xs -> to_int (st + x * mul) (mul * base) xs
+      in
+      return (to_int 0 1 t.rev_nums)
   end
 
   let char = Char.char
   let string = String.string
   let nat = Num.nat
+  let nat_int = Num.nat_int
 end
 
+module Identifier = struct
+  (* [A-Za-z0-9_'] *)
+  let char_uppercase = tokenp "[A-Z] expected" (function
+    | 'A'..'Z' -> true
+    | _ -> false)
 
+  let char_lowercase = tokenp "[a-z] expected" (function
+    | 'a'..'z' -> true
+    | _ -> false)
+
+  let char_lowercase_symbol = tokenp "[_\'] expected" (function
+    | '_' | '\'' -> true
+    | _ -> false)
+
+  let char_lowercase_head_symbol = tokenp "[_] expected" (function
+    | '_' -> true
+    | _ -> false)
+
+  let lowercase : string t = 
+    substr ((char_lowercase_head_symbol <|> char_lowercase) >>= fun _ -> 
+            star (char_uppercase <|> char_lowercase <|> char_lowercase_symbol <|> Literal.Char.digit_char))
+
+  let uppercase : string t =
+    substr (char_uppercase >>= fun _ -> 
+            star (char_uppercase <|> char_lowercase <|> char_lowercase_symbol <|> Literal.Char.digit_char))
+end
   let substr t start_abs_pos len =
     if t.abs_pos.Position.byte > start_abs_pos then failwith "Sbuffer.substr: start_abs_pos is over";
     let buffer = Buffer.create len in
-    let rec substr buf buf_pos pos len =
+    let rec substr stream stream_pos (* abs position of head of buf *) pos len =
       if len = 0 then ()
-      else match Simple_string.take buf with
+      else match Simple_string.take stream with
       | None -> failwith "Sbuffer.substr: end of stream"
-      | Some (string, buf') ->
+      | Some (string, stream') ->
 	  let len_string = String.length string in
-	  let buf_pos' = buf_pos + len_string in
-	  if buf_pos' <= pos then substr buf' buf_pos' pos len
+	  let stream_pos' = stream_pos + len_string in (* abs position of head of stream' *)
+	  if stream_pos' <= pos then substr stream' stream_pos' pos len (* [string] is before [pos] *)
 	  else 
-	    let start = pos - buf_pos in
-	    let copy_len = min len (len_string - buf_pos) in
-	    if copy_len <= 0 then ()
-	    else begin
-	      Buffer.add_substring buffer string start copy_len;
-	      let len' = len - copy_len in
-	      substr buf' buf_pos' buf_pos' len'
-	    end
+	    let start = pos - stream_pos in
+	    let copy_len = min len (len_string - start) in
+	    Buffer.add_substring buffer string start copy_len;
+	    let len' = len - copy_len in
+	    if len' <= 0 then () else substr stream' stream_pos' stream_pos' len'
     in
-    Buffer.contents buffer
+    substr t.buf 0 start_abs_pos len;
+    let s = Buffer.contents buffer in
+    assert (String.length s = len);
+    s
 
   let pos t = t.abs_pos
 
 (* Stream specialized for String *)
 
 module Position : sig
+  (** Position in stream *)
   type t = {
     byte : int; (* in bytes from 0 *)
     line : int; (* from 1 *)
   include Position.S with type t := t
 
   val add_newlines : t -> int -> t
+    (** increment line and byte, and reset column *)
+
   val add_columns : t -> int -> t
+    (** increment byte and column *)
 end
 
 module type S = sig
   include Stream.S with type elem = char
                    and  type gen  = string Stream.simple_gen
   val pos : t -> Position.t
+    (** get the current position of the stream *)
+
   val substr : t -> int -> int -> string
+    (** [substr t pos len] gets the substring of the stream from the
+	position byte [pos] with length [len] bytes *)
+
   val from_string : string -> t
+    (** build a stream from string *)
 end
 
 include S
   assert (test Literal.char "'\\065'" = Some 'A');
   assert (test Literal.string "\"hello world\"" = Some "hello world");
   assert (test Literal.string "\"hello\\nworld\"" = Some "hello\nworld");
-  assert (test Literal.Num.digit "123" = Some 123);
-  assert (test Literal.Num.oct "0o10" = Some 8);
-  assert (test Literal.Num.hex "0x10" = Some 16);
-  assert (test Literal.nat "0x10" = Some 16);
+  assert (test Literal.nat_int "0123" = Some 123);
+  assert (test Literal.nat_int "0o10" = Some 8);
+  assert (test Literal.nat_int "0x10" = Some 16);
+  assert (test Identifier.lowercase "hello42World_\'*" = Some "hello42World_\'");
+  assert (test Identifier.uppercase "Hello42World_\'*" = Some "Hello42World_\'");
   prerr_endline "test done"