Commits

camlspotter committed f0b11b5 Merge

pbuffer enhancement (ugg, no real improvement, though)

Comments (0)

Files changed (11)

 
 # BYTE_ENABLED= true
 
-OCAMLINCLUDES +=
+OCAMLINCLUDES += ..
 
-OCAMLFLAGS    += -annot -w Ae -I ocaml/parsing -I ocaml/utils -I ..
+OCAMLFLAGS    += -annot -w Ae -I ocaml/parsing -I ocaml/utils
 OCAMLCFLAGS   +=
 OCAMLOPTFLAGS += 
 OCAML_LINK_FLAGS += 
 %.out.ml: %.ml ../../pa_monad_custom/pa_monad.cmo ../pa_bind_inline/pa_bind_inline.cmo
 	camlp4o -printer OCaml ../../pa_monad_custom/pa_monad.cmo ../pa_bind_inline/pa_bind_inline.cmo $< > $@
 
-# OCAMLDEPFLAGS= -syntax camlp4o -package sexplib.syntax,monad -I ..
-# OCAMLPPFLAGS= -syntax camlp4o -package sexplib.syntax,monad
 OCAMLDEPFLAGS= -syntax camlp4o -package sexplib.syntax,monad -ppopt ../pa_bind_inline/pa_bind_inline.cmo
 OCAMLPPFLAGS= -syntax camlp4o -package sexplib.syntax,monad -ppopt ../pa_bind_inline/pa_bind_inline.cmo
+# OCAMLDEPFLAGS= -syntax camlp4o -package sexplib.syntax,monad
+# OCAMLPPFLAGS= -syntax camlp4o -package sexplib.syntax,monad
 
 FILES[] = 
    ../planck
 let uppercase = tokenp (function
   | 'A'..'Z' | '\192'..'\214' | '\216'..'\222' -> true
   | _ -> false) <?> "uppercase char"
-let identchar = tokenp (function
+let is_identchar = function
   | 'A'..'Z' | 'a'..'z' | '_' | '\192'..'\214' | '\216'..'\246' | '\248'..'\255' | '\'' | '0'..'9' -> true
-  | _ -> false) <?> "ident char"
-let symbolchar = one_of ['!'; '$'; '%'; '&'; '*'; '+'; '-'; '.'; '/'; ':'; '<'; '='; '>'; '?'; '@'; '^'; '|'; '~']
+  | _ -> false
+let identchar = tokenp is_identchar <?> "ident char"
+let is_symbolchar = function
+  | '!' | '$' | '%' | '&' | '*' | '+' | '-' | '.' | '/' | ':' | '<' | '=' | '>' | '?' | '@' | '^' | '|' | '~' -> true
+  | _ -> false
+let symbolchar = tokenp is_symbolchar
 
 let rec string_internal pos buf = 
   (perform
        
          (perform 
             newline;
-            ignore (?* (one_of [' '; '\t']));
+            ??* (function ' ' | '\t' -> true | _ -> false);
             string_internal pos buf)
 
          <|> (perform
 
 let comment () = comment []
 
-let lident = matched (lowercase >>= fun _ -> ?* identchar)
-let uident = matched (uppercase >>= fun _ -> ?* identchar)
+let lident = matched (lowercase >>= fun _ -> ??* is_identchar)
+let uident = matched (uppercase >>= fun _ -> ??* is_identchar)
 
 (* token in OCaml. ``token'' is already used as Planck.token *) 
 let rec ocaml_token () : (Token.t * Str.Pos.t) t = perform
           if Hashtbl.mem keyword_table name then error "keyword as label" (* CR jfuruse: not reported! *)
           else return_with_pos (LABEL name))
           
-       <!> (matched (perform token '~'; ?* symbolchar) >>= function
+       <!> (matched (perform token '~'; ??* is_symbolchar) >>= function
               | "~" -> return_with_pos TILDE
               | s -> return_with_pos (PREFIXOP s)))
 
                token '*';
                return_with_pos STAR))
 
-       <!> (matched (token '*' >>= fun _ -> ?* symbolchar) >>= function
+       <!> (matched (token '*' >>= fun _ -> ignore (takeWhile  is_symbolchar)) >>= function
               | "*" -> return_with_pos STAR
               | s ->
                   if s.[1] = '*' then return_with_pos (INFIXOP4 s) (* ** case *)
   <|> (* case of '#' *)
       ((perform
           token '#';
-          ?* (one_of [' '; '\t']);
+          ??* (function ' ' | '\t' -> true | _ -> false);
           num <-- matched (?+ decimal_char);
-          ?* (one_of [' '; '\t']);
+          ??* (function ' ' | '\t' -> true | _ -> false);
           name_opt <-- option (perform
                                  token '"';
-                                 name <-- matched ( ?* (tokenp (function '\010' | '\013' | '"' -> false
-                                                                       | _ -> true) ) );
+                                 name <-- ??** (function '\010' | '\013' | '"' -> false
+                                                       | _ -> true);
                                  token '"';
                                  return name);
-          ?* (tokenp (function '\010' | '\013' -> false
-                             | _ -> true) );
+          ??* (function '\010' | '\013' -> false
+                      | _ -> true);
           newline;
  
           (* CR jfuruse: not tested well.. *)
               if Hashtbl.mem keyword_table name then error "keyword as label" (* CR jfuruse: not reported! *)
               else return_with_pos (OPTLABEL name))
            (* We need the following backtrack for ?label (without :) *)
-           <!> (matched (?* symbolchar) >>= 
+           <!> (??** is_symbolchar >>= 
                   function
                     | "" -> return_with_pos QUESTION
                     | "?" -> return_with_pos QUESTIONQUESTION
 
   (* CR jfuruse: Should be extremely slow *)      
   <|> (
-       (matched (token '&' >>= fun () -> ?* symbolchar) >>= 
+       (matched (token '&' >>= fun () -> ??* is_symbolchar) >>= 
           function
             | "&&" -> return_with_pos AMPERAMPER
             | "&" -> return_with_pos AMPERSAND
             | s -> return_with_pos (INFIXOP0 s))
 
-       <|> (matched (token '-' >>= fun () -> ?* symbolchar) >>=
+       <|> (matched (token '-' >>= fun () -> ??* is_symbolchar) >>=
               function
                 | "-" ->  return_with_pos MINUS
                 | "-." ->  return_with_pos MINUSDOT
        <|> (token ')'  >>= fun () -> return_with_pos RPAREN)
 
        <|> (string "|]" >>= fun () -> return_with_pos BARRBRACKET)
-       <|> (matched (token '|' >>= fun _ -> ?* symbolchar) >>=
+       <|> (matched (token '|' >>= fun _ -> ??* is_symbolchar) >>=
               function
                 | "|" -> return_with_pos BAR
                 | "||" -> return_with_pos BARBAR
        <|> (string ";;" >>= fun () -> return_with_pos SEMISEMI)
        <|> (token ';'  >>= fun () -> return_with_pos SEMI)
 
-       <|> (matched (token '<' >>= fun _ -> ?* symbolchar) >>=
+       <|> (matched (token '<' >>= fun _ -> ??* is_symbolchar) >>=
               function
                 | "<-" -> return_with_pos LESSMINUS
                 | "<" -> return_with_pos LESS
                 | s -> return_with_pos (INFIXOP0 s))
 
-       <|> (matched (token '=' >>= fun () -> ?* symbolchar) >>=
+       <|> (matched (token '=' >>= fun () -> ??* is_symbolchar) >>=
               function
                 | "=" -> return_with_pos EQUAL
                 | s -> return_with_pos (INFIXOP0 s))
 
        <|> (string ">]" >>= fun () -> return_with_pos GREATERRBRACKET)
        <|> (string ">}" >>= fun () -> return_with_pos GREATERRBRACE)
-       <|> (matched (token '>' >>= fun () -> ?* symbolchar) >>=
+       <|> (matched (token '>' >>= fun () -> ??* is_symbolchar) >>=
               function
                 | ">" -> return_with_pos GREATER
                 | s -> return_with_pos (INFIXOP0 s))
 
        <|> (string "}"  >>= fun () -> return_with_pos RBRACE)
 
-       <|> (matched (token '!' >>= fun () -> ?* symbolchar) >>=
+       <|> (matched (token '!' >>= fun () -> ??* is_symbolchar) >>=
               function
                 | "!" -> return_with_pos BANG
                 | "!=" -> return_with_pos (INFIXOP0 "!=")
                 | s -> return_with_pos (PREFIXOP s))
 
-       <|> (matched (token '+' >>= fun () -> ?* symbolchar) >>=
+       <|> (matched (token '+' >>= fun () -> ??* is_symbolchar) >>=
               function
                 | "+" -> return_with_pos PLUS
                 | "+." -> return_with_pos PLUSDOT
                 | s -> return_with_pos (INFIXOP2 s))
 
-       <|> (matched (token '$' >>= fun () -> ?* symbolchar) 
+       <|> (matched (token '$' >>= fun () -> ??* is_symbolchar) 
             >>= fun s -> return_with_pos (INFIXOP0 s))
-       <|> (matched (one_of ['@'; '^'] >>= fun _ -> ?* symbolchar) 
+       <|> (matched (one_of ['@'; '^'] >>= fun _ -> ??* is_symbolchar) 
             >>= fun s -> return_with_pos (INFIXOP1 s))
-       <|> (matched (one_of ['/'; '%'] >>= fun _ -> ?* symbolchar) 
+       <|> (matched (one_of ['/'; '%'] >>= fun _ -> ??* is_symbolchar) 
             >>= fun s -> return_with_pos (INFIXOP3 s)))
 
   <|> (take >>= fun c -> error (sprintf "ocaml lexer: illegal character %C" c) <?@> start_pos)
 let skip_sharp_bang = 
   (perform
      string "#!";
-     ?* (tokenp ((<>) '\n'));
+     ??* ((<>) '\n');
      token '\n';
-     ?* (tokenp ((<>) '\n'));
+     ??* ((<>) '\n');
      string "\n!#\n";
      return ())
   <!> (perform 
          string "#!";
-         ?* (tokenp ((<>) '\n'));
+         ??* ((<>) '\n');
          token '\n';
          return ())
   <!> return ()

ocaml/plphelper.ml

   | (rhs_counter, rhs_tbl, _) :: _ -> dump_rhs_tbl rhs_counter rhs_tbl
 
 (* CR jfuruse: Eeek! Side effects require eta! *)
-let with_rhs (name : string) (t : ('a * Position.Region.t) Token.Parser.t) : 'a Token.Parser.t = fun st -> (perform
+let with_rhs (name : string) (t : ('a * Position.Region.t) Token.Parser.t) : 'a Token.Parser.t = fun st -> Profile.incr (); (perform
   let cntr = push_rhs_tbl name in
   (* with_region misses the first location, if the first rule consumes nothing *)
   res <-- result t;
 *)
 
 let rule (name : string) (m : unit -> ('a * Position.Region.t) Token.Parser.t) : 'a Token.Parser.t = 
-  fun st -> with_rhs name (memoize name (m ())) st
+  with_rhs name (memoize name (m ()))
 
 let get_poses () = 
   match !rhs_tbl_stack with
       in
       fold [] (!rhs_counter - 1)
 
+(*
 let case (name : string) (t : (unit -> 'a) Token.Parser.t) : ('a * Planck.Position.Region.t) Token.Parser.t = fun st -> 
+  Profile.incr ();
+  if !do_debug then prerr_endline (String.make (!level*2) ' ' ^ "-> " ^ name);
+  let st_start = if !do_debug then Some st else None in (* Make [st] GCed as early as possible *) 
+
+  incr level;
+
+  let t = perform
+    last_pos <-- last_position; (* bad for nonleftrec... *)
+    f <-- t;
+    let poses = get_poses () in
+    (* action require symbol_rloc_ref is set *)
+    let symbol_rloc = calc_symbol_rloc last_pos poses in 
+    \ symbol_rloc_ref := symbol_rloc;
+    return (f (), 
+            calc_rhs_loc last_pos poses)
+  in
+
+  (result t >>= fun res ->
+   
+   decr level;
+
+   match res with
+   | Result.Ok (_, rhs_loc as v) -> 
+       if !do_debug then begin perform
+         st' <-- stream;
+         \ begin match st_start with
+         | None -> assert false
+         | Some st -> 
+             let elts = List.map fst (Token.Stream.between st st') in
+             Format.eprintf "%s<- %s : %s@."
+               (String.make (!level*2) ' ') 
+               name 
+               (Sexplib.Sexp.to_string_hum (Sexplib.Conv.sexp_of_list Token.sexp_of_t elts));
+             Format.eprintf "%s   symbol_rloc: %a@."
+               (String.make (!level*2) ' ') 
+              Planck.Position.Region.format !symbol_rloc_ref;
+             Format.eprintf "%s   rhs_loc:     %a@."
+               (String.make (!level*2) ' ') 
+               Planck.Position.Region.format rhs_loc;
+             match !rhs_tbl_stack with
+             | [] -> assert false
+             | (rhs_counter, rhs_tbl, _) :: _ ->
+                 for i = 1 to !rhs_counter-1 do
+                   Format.eprintf "%s   %d : %a@."
+                     (String.make (!level*2) ' ') 
+                     i
+                     Planck.Position.Region.format 
+                     (try Hashtbl.find rhs_tbl i with Not_found -> Position.Region.none);
+                 done;
+                 Format.eprintf "%s   END@." (String.make (!level*2) ' ') 
+         end;
+         return v
+       end else return v
+
+  | Result.Error err ->
+      if !do_debug then prerr_endline (String.make (!level*2) ' ' ^ "X- " ^ name);
+      throw err
+
+  ) st
+*)
+let case (name : string) (t : (unit -> 'a) Token.Parser.t) : ('a * Planck.Position.Region.t) Token.Parser.t = fun st -> 
+  Profile.incr ();
   if !do_debug then prerr_endline (String.make (!level*2) ' ' ^ "-> " ^ name);
   let st_start = if !do_debug then Some st else None in (* Make [st] GCed as early as possible *) 
 

pa_bind_inline/pa_bind_inline.ml

   module Normal = struct
     let bind _loc t f =
       match f with
+      | <:expr< fun $lid:x$ $lid:st$ -> Profile.incr (); $e$ >> ->
+          (* bind t (fun x -> (fun st' -> Profile.incr (); e[x,st'])) ===
+               fun st ->
+                 match t st  with
+                 | Ok (x, st') -> e[x,st']
+                 | Error s -> Error s
+          *)
+          <:expr< fun st__ -> 
+            Profile.incr ();
+            match $t$ st__ with 
+            | Result.Ok ($lid:x$, $lid:st$) -> $e$
+            | (Result.Error _ as res) -> Obj.magic res >>
       | <:expr< fun $lid:x$ $lid:st$ -> $e$ >> ->
           (* bind t (fun x -> (fun st' -> e[x,st'])) ===
                fun st ->
                  | Error s -> Error s
           *)
           <:expr< fun st__ -> 
+            Profile.incr ();
             match $t$ st__ with 
             | Result.Ok ($lid:x$, $lid:st$) -> $e$
             | (Result.Error _ as res) -> Obj.magic res >>
                  | Error s -> Error s
           *)
           <:expr< fun st__ -> 
+            Profile.incr ();
             match $t$ st__ with 
             | Result.Ok ($lid:x$, st__') -> $e$ st__'
             | (Result.Error _ as res) -> Obj.magic res >>
                  | Error s -> Error s         <--- Use Obj.magic to avoid creating a new block
           *)
           <:expr< fun st__ -> 
+            Profile.incr ();
             match $t$ st__ with 
             | Result.Ok (r__, st__') -> $f$ r__ st__'
             | (Result.Error _ as res) -> Obj.magic res >>
   
     (* return v st = Ok (v, st) *)
-    let return _loc t = <:expr< fun st__ -> Result.Ok ($t$, st__) >>
+    let return _loc t = <:expr< fun st__ -> Profile.incr (); Result.Ok ($t$, st__) >>
     let return_st _loc t st = <:expr< Result.Ok ($t$, $st$) >>
 
 (*
 
   open Open
 
-  let take : Str.elem t = fun s -> match Str.peek s with
+  let take : Str.elem t = fun s -> 
+    Profile.incr ();
+    match Str.peek s with
     | None -> Error (Str.position s, "unexpected end of stream")
     | Some (elem, _pos, s') -> Ok (elem, s')
 
-  let take_ : unit t = fun s -> match Str.peek s with
+  let take_ : unit t = fun s -> 
+    Profile.incr ();
+    match Str.peek s with
     | None -> Error (Str.position s, "unexpected end of stream")
     | Some (_, _, s') -> Ok ((),s')
 
-  let position : Str.Pos.t t = fun st -> Ok (Str.position st, st)
+  let position : Str.Pos.t t = fun st -> Profile.incr (); Ok (Str.position st, st)
 
-  let error s : 'a t = fun st -> Error (Str.position st, s)
-  let throw err = fun _st -> Error err
+  let error s : 'a t = fun st -> Profile.incr (); Error (Str.position st, s)
+  let throw err = fun _st -> Profile.incr (); Error err
 
-  let critical_error pos s = raise (Critical_error (pos, s))
+  let critical_error pos s = Profile.incr (); raise (Critical_error (pos, s))
 
-  let throw e : 'a t = fun _st -> Error e
+  let throw e : 'a t = fun _st -> Profile.incr (); Error e
 
-  let stream : Str.t t = fun st -> Ok (st, st)
+  let stream : Str.t t = fun st -> Profile.incr (); Ok (st, st)
 
-  let eos : unit t = fun s -> match Str.peek s with
+  let eos : unit t = fun s -> 
+    Profile.incr ();
+    match Str.peek s with
     | Some _ -> Error (Str.position s, "end of stream expected")
     | None -> Ok ((), s)
 
   let (<?>) : 'a t -> string -> 'a t = fun c mes st ->
+    Profile.incr ();
     let res = c st in
     match res with
     | Ok _ -> res
     | Error (pos, _) -> Error (pos, "expected " ^ mes)
 
   let (<?!>) : 'a t -> string -> 'a t = fun c mes st ->
+    Profile.incr ();
     let res = c st in
     match res with
     | Ok _ -> res
     | Error (pos, _) -> Error (pos, mes)
 
   let (<?@>) : 'a t -> Str.Pos.t -> 'a t = fun c pos st ->
+    Profile.incr ();
     let res = c st in
     match res with
     | Ok _ -> res
          (String.concat " " (List.map Str.show_elem tkns))
 
   let tokens : Str.elem list -> unit t = fun elems str ->
+    Profile.incr ();
     let rec aux str = function
       | [] -> return () str
       | x::xs -> 
     aux str elems
   
   let option : 'a t -> 'a option t = fun com s ->
+    Profile.incr ();
     match com s with
     | Error _ -> return None s
     | Ok (v, s') -> return (Some v) s'
 
   let option_ : unit t -> unit t = fun com s ->
+    Profile.incr ();
     match com s with
     | Error _ -> return () s
     | Ok ((), s') -> return () s'
 
   let result : 'a t -> ('a, error) Result.t t = fun c st ->
+    Profile.incr ();
     match c st with
     | Ok (res, st) -> Ok (Ok res, st)
     | Error e -> Ok (Error e, st)
 
   let ( ?** ) : 'a t -> 'a list t = fun com ->
     let rec aux st = fun s ->
+      Profile.incr ();
       match com s with
       | Error _ -> return (List.rev st) s
       | Ok (v, s') -> aux (v :: st) s'
   
   let ( ?* ) : 'a t -> unit t = fun com ->
     let rec aux = fun s ->
+      Profile.incr ();
       match com s with
       | Error _ -> return () s
       | Ok (_v, s') -> aux  s'
 
   let surrounded left right content =
     left >>= fun _ ->
-      content >>= fun res ->
-        right >>= fun _ ->
-  	  return res
+    content >>= fun res ->
+    right >>= fun _ ->
+    return res
   
   let critical : 'a t -> 'a t = fun t st ->
+    Profile.incr ();
     match t st with
     | (Ok _ as res) -> res
     | Error (pos, s) -> raise (Critical_error (pos, s))
 
   let (<|>) : 'a t -> 'a t -> 'a t = fun c1 c2 st -> 
+    Profile.incr ();
     let pos0 = Str.position st in
     let res = c1 st in
     match res with
 
   (* CR jfuruse: _ is used in a different meaning than option_ *)
   let try_ : 'a t -> 'a t = fun c st ->
+    Profile.incr ();
     let pos0 = Str.position st in
     let res = c st in
     match res with
     | Error (_, err) -> Error (pos0, err)
 
   let (<!>) : 'a t -> 'a t -> 'a t = fun c1 c2 st -> 
+    Profile.incr ();
     Profile.start ();
     let pos = Str.position st in
     match c1 st with
         c2 st
 
   let (<<>) : 'a t -> 'a t -> 'a t = fun c1 c2 st ->
+    Profile.incr ();
     let pos = Str.position st in
     let res1 = c1 st in
     match res1 with
   (* Used for push back *)          
   let (<&>) : 'a t -> ('a -> 'b t) -> 'b t = fun c1 c2 -> 
     fun st ->
+      Profile.incr ();
       match c1 st with
       | Ok (v, _) -> c2 v st
       | (Error _ as res) -> res
       | None -> return None
       | Some _ -> c2 >>= fun v -> return (Some v)
 
-  let set_stream new_st _st = Ok ((), new_st)
+  let set_stream new_st _st = Profile.incr (); Ok ((), new_st)
 
   let run : 'a t -> Str.t -> ('a, error) Result.t = fun t st ->
+    Profile.incr ();
     match t st with
     | Ok (v, _) -> Ok v
     | Error err -> Error err
   include Stream_intf.S
   with type elem = char
   and  type Pos.t = Position.File.t
-  val substr : t -> int -> int -> string
+  val substr : t -> int -> int -> string * t
+  val takeWhile : (char -> bool) -> t -> string * t
   val bytes : t -> int
 end)(Base : Planck_intf.S
      with type Str.elem = Sbuffer.elem
 
   let bytes = stream >>= fun stream -> return (Sbuffer.bytes stream)
 
+  let prefix n = 
+    stream >>= fun str ->
+    let pos = Sbuffer.bytes str in
+    try
+      let s, str' = Sbuffer.substr str pos n in
+      set_stream str' >>= fun () -> 
+      return s
+    with
+    | _ -> error "unexpected end of stream"
+
+  let takeWhile p =
+    stream >>= fun str ->
+    let s, str' = Sbuffer.takeWhile p str in
+    set_stream str' >>= fun () ->
+    return s
+
+  let ( ??** ) = takeWhile
+  let ( ??* ) p = ignore (takeWhile p)
+
+  let string s =
+    position >>= fun pos0 ->
+    result (prefix (String.length s)) >>= function
+      | Result.Ok s' when s = s' -> return ()
+      | _ -> throw (pos0, Printf.sprintf "expected %S" s)
+
   let matched : unit t -> string t = fun t ->
     stream >>= fun stream ->
     bytes >>= fun pos_start ->
     t >>= fun () -> 
     bytes >>= fun pos_end ->
-    return (Sbuffer.substr stream pos_start (pos_end - pos_start))
+    return (fst (Sbuffer.substr stream pos_start (pos_end - pos_start)))
 
   let with_matched : 'a t -> ('a * string) t = fun t ->
     stream >>= fun stream ->
     bytes >>= fun pos_start ->
     t >>= fun res -> 
     bytes >>= fun pos_end ->
-    return (res, Sbuffer.substr stream pos_start (pos_end - pos_start))
+    return (res, fst (Sbuffer.substr stream pos_start (pos_end - pos_start)))
 
   (** longest match *)
   let (</>) : 'a t -> 'a t -> 'a t = fun t1 t2 ->
   include Stream_intf.S
   with type elem = char
   and  type Pos.t = Position.File.t
-  val substr : t -> int -> int -> string
+  val substr : t -> int -> int -> string * t
+  val takeWhile : (char -> bool) -> t -> string * t
   val bytes : t -> int
 end)(Base : Planck_intf.S 
      with type Str.elem = Sbuffer.elem
 
   open Base
 
-  (* It contains Pstring extensions *)    
+  val prefix : int -> string t
+  (** fast retrieval of the prefix string of the given length *)
+
+  val takeWhile : (char -> bool) -> string t
+  val ( ??** ) : (char -> bool) -> string t
+  val ( ??* ) : (char -> bool) -> unit t
+
   val string : string -> unit t
-  val chars_to_string : char list Base.t -> string t
+  (** Efficient version using Sbuffer. *)
+
+  val chars_to_string : char list t -> string t
 
   val matched : unit t -> string t
   val with_matched : 'a t -> ('a * string) t
 open Planck_intf
 open Result
 
+(* Interface for generic char stream. Not optimized. *)
 module Extend(Str : Stream_intf.S with type elem = char) (Base : Planck_intf.S 
       with type Str.elem = char
       and  type Str.attr = Str.attr
   (** Exception of critical error. Used for non recoverable errors. *)
 
   module Profile : sig
+    val incr : unit -> unit
     val format : Format.t -> unit
     val reset : unit -> unit
     val recover_all : unit -> unit (* format will print the over all result *)
 
   let bytes t = (buf t).buf_pos + (buf t).rel_pos
    
+  let advance_char abs_pos = function
+    | '\n' -> Position.File.add_newlines abs_pos 1
+    | _ -> Position.File.add_columns abs_pos 1
+
+  let advance_string abs_pos s =
+    let slen = String.length s in
+    let rec fold abs_pos = function
+      | n when n = slen -> abs_pos
+      | n -> fold (advance_char abs_pos (String.unsafe_get s n)) (n+1)
+    in
+    fold abs_pos 0
+
   let rec create : buf -> t = fun i ->
     match Sstring.peek i.base with
     | None -> null (create_attr i)
                           buf_pos = i.buf_pos + String.length elem;
                           rel_pos = i.rel_pos - String.length elem }
         end else 
-    	  let char = elem.[i.rel_pos] in
-    	  let abs_pos' = match char with
-    	    | '\n' -> Position.File.add_newlines i.abs_pos 1
-    	    | _ -> Position.File.add_columns i.abs_pos 1
-    	  in 
+    	  let char = String.unsafe_get elem i.rel_pos in
+    	  let abs_pos' = advance_char i.abs_pos char in
           let i' = { i with rel_pos = i.rel_pos + 1; abs_pos = abs_pos' } in
     	  lazy (cons_desc char (create_attr i) (create i'))
 
     if i.abs_pos.Position.File.byte > start_abs_pos then failwith "Sbuffer.substr: start_abs_pos is over";
     let buffer = Buffer.create len in
     let rec substr stream stream_pos (* abs position of head of buf *) pos len =
-      if len = 0 then ()
-      else match Sstring.peek stream with
+      match Sstring.peek stream with
       | None -> failwith "Sbuffer.substr: end of stream"
       | Some (string, _, stream') ->
           (*
           let stream_pos' = stream_pos + len_string in (* abs position of head of stream' *)
           if stream_pos' <= pos then substr stream' stream_pos' pos len (* [string] is before [pos] *)
           else 
-    	  let start = pos - stream_pos in
-    	  let copy_len = min len (len_string - start) in
-    	  Buffer.add_substring buffer string start copy_len;
-    	  let len' = len - copy_len in
-    	  if len' <= 0 then () else substr stream' stream_pos' stream_pos' len'
+            (* [string] contains [pos] *)
+    	    let start = pos - stream_pos in
+    	    let copy_len = min len (len_string - start) in
+    	    Buffer.add_substring buffer string start copy_len;
+    	    let len' = len - copy_len in
+    	    if len' <= 0 then 
+              (* [pos + len] is in [string]. End of substr *)
+              let s = Buffer.contents buffer in
+              s,
+              { base = stream;
+                buf_pos = stream_pos;
+                rel_pos = start + len;
+                abs_pos = advance_string i.abs_pos s }
+            else substr stream' stream_pos' stream_pos' len'
     in
-    substr i.base i.buf_pos start_abs_pos len;
-    let s = Buffer.contents buffer in
+    let s, buf = substr i.base i.buf_pos start_abs_pos len in 
     assert (String.length s = len);
-    s
+    s, create buf
+  
+  let takeWhile p t =
+    let i = buf t in
+    let buffer = Buffer.create 10 in
+    let rec iter stream buf_pos rel_pos =
+      let finish rel_pos =
+        let s = Buffer.contents buffer in
+        s,
+        { base = stream;
+          buf_pos = buf_pos;
+          rel_pos = rel_pos;
+          abs_pos = advance_string i.abs_pos s }
+      in
+      match Sstring.peek stream with
+      | None -> finish rel_pos
+      | Some (string, _, stream') ->
+          let len_string = String.length string in
+          let rec iter_internal = function
+            | n when len_string = n -> 
+                (* try on the next string *)
+                iter stream' (buf_pos + len_string) 0 
+            | n ->
+                let char = String.unsafe_get string n in
+                if p char then begin
+                  Buffer.add_char buffer char;
+                  iter_internal (n+1)
+                end else finish n
+          in
+          iter_internal rel_pos
+    in
+    let s, buf = iter i.base i.buf_pos i.rel_pos in 
+    s, create buf
   
   let from_string ~filename str = 
     create { base = Sstring.from_string str;
   val create : buf -> t
   (** Create a stream from a buffer *)
 
-  val substr : t -> int -> int -> string
+  val substr : t -> int -> int -> string * t
   (** [substr t from len] returns a substring of [len] chars from [t]
-      from its absolute char position [from].
+      from its absolute char position [from], and a stream which starts at the end of the substring.
 
       It may raise an exception if [from] and [len] point outside of the stream *)
 
+  val takeWhile : (char -> bool) -> t -> string * t
+  (** [takeWhile p t] returns the longest prefix of [t] all whose characters suffices the predicate [p],
+      and the stream which starts at the end of the prefix.
+  *)
+
   val bytes : t -> int
   (** [bytes t] returns the head position of the stream as the nubmer of bytes. *)