Source

ocaml-lib / dcg / matcher.ml

Diff from to

File dcg/matcher.ml

  * Matcher: simple lexer pattern.
  * Copyright (C) 2006
  * Dmitri Boulytchev, St.Petersburg State University
+ * (massively) modified by Sebastien Ferre, University of Rennes 1, France
  * 
  * This software is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
 
   end
 
+let string_match regexp s pos =
+  if Str.string_match regexp s pos
+  then Some (Str.matched_string s)
+  else None
+
+class virtual str =
+  object
+    method virtual get : Str.regexp -> int -> string option
+    method virtual look : string -> int -> bool
+    method virtual at_eof : int -> bool
+    method virtual at_init : int -> bool
+    method virtual set_init : int -> unit
+  end
+
+class str_string (s0 : string) =
+  object
+    inherit str
+    val s = s0
+    val len = String.length s0
+    val mutable offset = 0
+
+    method get regexp p = string_match regexp s p
+
+    method look m p =
+      let l = String.length m in
+      p+l <= len && String.sub s p l = m
+
+    method at_eof p = (p >= len)
+
+    method at_init p = (p <= offset)
+
+    method set_init p = offset <- p
+  end
+
+class str_channel (ch : in_channel) =
+  let s0, eof0 = try input_line ch ^ "\n", false with End_of_file -> "", true in
+  object (self)
+    inherit str
+    val mutable offset = 0
+    val mutable s = s0
+    val mutable len = String.length s0
+    val mutable eof = eof0
+
+    method private expand =
+      try
+	s <- s ^ input_line ch ^ "\n";
+	len <- String.length s
+      with End_of_file ->
+	eof <- true
+
+    method get regexp p =
+      let pos = p-offset in
+      assert (pos >= 0 && pos <= len);
+      let res = ref (string_match regexp s pos) in
+      while not eof && !res = None && Str.string_partial_match regexp s pos do
+	self#expand;
+	res := string_match regexp s pos
+      done;
+      !res
+
+    method look m p =
+      let l = String.length m in
+      let pos = p-offset in
+      assert (pos >= 0 && pos <= len);
+      while not eof && pos+l > len do
+	self#expand
+      done;
+      pos+l <= len && String.sub s pos l = m
+
+    method at_eof p =
+      let pos = p-offset in
+      eof && pos >= len
+
+    method at_init p =
+      let pos = p-offset in
+      pos <= 0
+
+    method set_init p =
+      let pos = p-offset in
+      assert (pos >= 0 && pos <= len);
+      prerr_endline ("Matcher: buffer length = " ^ string_of_int len);
+      prerr_endline s;
+      s <- String.sub s pos (len - pos);
+      len <- String.length s;
+      offset <- p
+  end
+
 let shiftPos (line, col) s b n =
   let rec inner i (line, col) =
     if i = n 
   in
   inner b (line, col)
 
-class str skip s p coord =
+class cursor (skip : Str.regexp) (str : str) (p : int) (coord : Msg.Coord.t) =
   object (self)
-    method params = (s, p, coord)
-
-    method skip =
-      if Str.string_match skip s p
-      then
-	let m = Str.matched_string s in
-	let len = String.length m in
-	new str skip s (p+len) (shiftPos coord m 0 len)
-      else
-	new str skip s p coord
-
-    method shift m =
+    method coord = coord
+
+    method get (regexp : Str.regexp) : string option = str#get regexp p
+
+    method look (m : string) : bool = str#look m p
+
+    method at_eof : bool = str#at_eof p
+
+    method at_init : bool = str#at_init p
+
+    method skip : cursor =
+      match str#get skip p with
+      | Some m when m <> "" -> (self#shift m)#skip
+      | _ -> (self :> cursor)
+
+    method shift (m : string) : cursor =
       let len = String.length m in
-      new str skip s (p+len) (shiftPos coord m 0 len)
+      let p' = p + len in
+      new cursor skip str p' (shiftPos coord m 0 len)
+
+    method init : unit =
+(*      print_endline (Token.toString ("init", coord)); *)
+      str#set_init p
   end
 
-let str_of_string skip s = new str skip s 0 (1,1)
+let cursor_of_string skip s = new cursor skip (new str_string s) 0 (1,1)
+let cursor_of_channel skip ch = new cursor skip (new str_channel ch) 0 (1,1)
+
 
-class ['ctx,'res] get (name : string) (regexp : Str.regexp) (f : Token.t -> 'res) (ctx : 'ctx) (str : str) =
+class ['ctx,'res] get (name : string) (regexp : Str.regexp) (f : Token.t -> 'res) (ctx : 'ctx) (cursor : cursor) =
   object (self)
     val mutable state = `Begin
 
       match state with
       | `Begin ->
 	  state <- `End;
-	  let str = str # skip in
-	  let s, p, coord = str # params in
-	  if Str.string_match regexp s p
-	  then
-	    let m = Str.matched_string s in
-	    Parsed (ctx, f (m, coord), str # shift m)
-	  else
-	    Failed [Msg.make (sprintf "%s expected" name) [||] (Msg.Locator.Point coord)]
+	  let cursor = cursor#skip in
+	  ( match cursor#get regexp with
+	  | Some m ->
+	      Parsed (ctx, f (m, cursor#coord), cursor#shift m)
+	  | None ->
+	      Failed [Msg.make (sprintf "%s expected" name) [||] (Msg.Locator.Point cursor#coord)])
       | `End -> Failed []
   end
 
 let get = new get
 
-class ['ctx] look (kw : string) (ctx : 'ctx) (str : str) =
+class ['ctx] look (m : string) (ctx : 'ctx) (cursor : cursor) =
   object (self)
     val mutable state = `Begin
 	
       match state with
       | `Begin ->
 	  state <- `End;
-	  let str = str # skip in
-	  let s, p, coord = str # params in
-	  (try 
-	    let l = String.length kw in
-	    let m = String.sub s p l in
-	    if kw = m 
-	    then Parsed (ctx, (m, coord), str # shift m)
-	    else Failed [Msg.make (sprintf "%s expected" kw) [||] (Msg.Locator.Point coord)]
-	  with Invalid_argument _ -> Failed [Msg.make (sprintf "%s expected" kw) [||] (Msg.Locator.Point coord)])
+	  let cursor = cursor#skip in
+	  if cursor#look m
+	  then Parsed (ctx, (m, cursor#coord), cursor#shift m)
+	  else Failed [Msg.make (sprintf "'%s' expected" m) [||] (Msg.Locator.Point cursor#coord)]
       | `End -> Failed []
   end
 
 let look = new look
 
-class ['ctx] eof (ctx : 'ctx) (str : str) =
+class ['ctx] eof (ctx : 'ctx) (cursor : cursor) =
   object
     val mutable state = `Begin
 
       match state with
       | `Begin ->
 	  state <- `End;
-	  let str = str # skip in
-	  let s, p, coord = str # params in
-	  if p = String.length s
-	  then Parsed (ctx, ("<EOF>", coord), str)
-	  else Failed [Msg.make "<EOF> expected" [||] (Msg.Locator.Point coord)]
+	  let cursor = cursor#skip in
+	  if cursor#at_eof
+	  then Parsed (ctx, ("<EOF>", cursor#coord), cursor)
+	  else Failed [Msg.make "<EOF> expected" [||] (Msg.Locator.Point cursor#coord)]
       | `End -> Failed []
   end
 
 let eof = new eof
 
 
-type ('ctx,'res) parse = ('ctx, 'res, str, Msg.t) Dcg.parse
+type ('ctx,'res) parse = ('ctx, 'res, cursor, Msg.t) Dcg.parse
 
-(*
-class virtual ['a] matcher (make : string -> int -> Msg.Coord.t -> 'a) s p coord = 
-  object (self)
-
-    method virtual skip : int * Msg.Coord.t
-
-    method get (name : string) regexp =
-      LOG (printf "Trying %s at %s\n" name (sub s p (min 10 (length s - p)))); 
-      let p, coord = self#skip in
-      if string_match regexp s p
-      then begin
-	let m = matched_string s in
-	LOG (printf "Ok, repr=%s\n" m);
-	let p = p + length m in	
-	Parsed ((m, coord), make s p (shiftPos coord m 0 (length m)))
-      end
-      else 
-	Failed [Msg.make (sprintf "%s expected" name) [||] (Msg.Locator.Point coord)]
-
-    method look str = 
-      let p, coord = self#skip in
-      try 
-	let l = String.length str in
-	let m = String.sub s p l in
-	if str = m 
-	then Parsed ((m, coord), make s (p+l) (shiftPos coord m 0 (length m)))
-	else Failed [Msg.make (sprintf "%s expected" str) [||] (Msg.Locator.Point coord)]
-      with Invalid_argument _ -> Failed [Msg.make (sprintf "%s expected" str) [||] (Msg.Locator.Point coord)]
-
-    method getEOF = 
-      let p, coord = self#skip in
-      LOG (printf "Trying <EOF> at %s\n" (sub s p (min 10 (length s - p)))); 
-      if p = length s 
-      then Parsed (("<EOF>", coord), make s p coord)
-      else Failed [Msg.make "<EOF> expected" [||] (Msg.Locator.Point coord)]
-
-    method getFIRST   = self#look ""
-    method getLAST    = 
-      (
-       Parsed (("", coord), make s p coord) : 
-	 ((string * Msg.Coord.t) * 'a, Msg.t) Ostap.tag
-      )
-
-  end
-*)