Source

ocaml-lib / dcg / matcher.ml

Full commit
(*
 * Matcher: simple lexer pattern.
 * Copyright (C) 2006
 * Dmitri Boulytchev, St.Petersburg State University
 * (massively) modified by Sebastien Ferre, University of Rennes 1, France
 * 
 * This software is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License version 2, as published by the Free Software Foundation.
 * 
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * 
 * See the GNU Library General Public License version 2 for more details
 * (enclosed in the file COPYING).
 *)

open Dcg
open Printf

module Token =
  struct

    type t = string * Msg.Coord.t

    let toString (t, c) = sprintf "%s at %s" t (Msg.Coord.toString c)

    let loc (t, c) = Msg.Locator.Interval (c, ((fst c), (snd c)+(String.length t)-1))
    let repr       = fst

  end

let string_match regexp s pos =
  if Str.string_match regexp s pos
  then Some (Str.matched_string s)
  else None

class virtual str =
  object
    method virtual get : Str.regexp -> int -> string option
    method virtual look : string -> int -> bool
    method virtual eof : int -> bool
    method virtual at_init : int -> bool
    method virtual set_init : int -> unit
  end

class str_string (s0 : string) =
  object
    inherit str
    val s = s0
    val len = String.length s0
    val mutable offset = 0

    method get regexp p = string_match regexp s p

    method look m p =
      let l = String.length m in
      p+l <= len && String.sub s p l = m

    method eof p = (p >= len)

    method at_init p = (p <= offset)

    method set_init p = offset <- p
  end

class str_channel (ch : in_channel) =
  let s0, eof0 = try input_line ch ^ "\n", false with End_of_file -> "", true in
  object (self)
    inherit str
    val mutable offset = 0
    val mutable s = s0
    val mutable len = String.length s0
    val mutable eof = eof0

    method private expand =
      try
	s <- s ^ input_line ch ^ "\n";
	len <- String.length s
      with End_of_file ->
	eof <- true

    method get regexp p =
      let pos = p-offset in
      assert (pos >= 0 && pos <= len);
      let res = ref (string_match regexp s pos) in
      while not eof &&
	( match !res with
	| Some w -> pos + String.length w >= len
	| None -> Str.string_partial_match regexp s pos)
      do
	self#expand;
	res := string_match regexp s pos
      done;
      !res

    method look m p =
      let l = String.length m in
      let pos = p-offset in
      assert (pos >= 0 && pos <= len);
      while not eof && pos+l > len do
	self#expand
      done;
      pos+l <= len && String.sub s pos l = m

    method eof p =
      let pos = p-offset in
      assert (pos >= 0 && pos <= len);
      while not eof && pos+1 > len do
	self#expand
      done;
      eof && pos >= len

    method at_init p =
      let pos = p-offset in
      pos <= 0

    method set_init p =
      let pos = p-offset in
      assert (pos >= 0 && pos <= len);
(* debug *)
(*
      prerr_endline ("Matcher: buffer length = " ^ string_of_int len);
      prerr_endline s;
*)
      s <- String.sub s pos (len - pos);
      len <- String.length s;
      offset <- p
  end

let shiftPos (line, col) s b n =
  let rec inner i (line, col) =
    if i = n 
    then (line, col)
    else
      match s.[i] with
      | '\n' -> inner (i+1) (line+1, 1)
      | _    -> inner (i+1) (line, col+1)
  in
  inner b (line, col)

class cursor (*skip : Str.regexp*) (str : str) (p : int) (coord : Msg.Coord.t) =
  object (self)
    method coord = coord

    method get (regexp : Str.regexp) : string option = str#get regexp p

    method look (m : string) : bool = str#look m p

    method eof : bool = str#eof p

    method at_init : bool = str#at_init p

(*
    method skip : cursor = Common.prof "Matcher.cursor#skip" (fun () ->
      match str#get skip p with
      | Some m when m <> "" -> (self#shift m)#skip
      | _ -> (self :> cursor))
*)

    method shift (m : string) : cursor =
      let len = String.length m in
      let p' = p + len in
      let coord' = shiftPos coord m 0 len in
      new cursor (*skip*) str p' coord'

    method init : unit =
(*      print_endline (Token.toString ("init", coord)); *)
(*print_endline (Msg.Coord.toString coord); *)
      str#set_init p

(*
    initializer
      print_endline (Msg.Coord.toString coord);
*)

  end

let cursor_of_string (*skip*) s = new cursor (*skip*) (new str_string s) 0 (1,1)
let cursor_of_channel (*skip*) ch = new cursor (*skip*) (new str_channel ch) 0 (1,1)


class ['ctx,'res] get (name : string) (regexp : Str.regexp) (f : Token.t -> 'res) (ctx : 'ctx) (cursor : cursor) =
  object (self)
    val mutable state = `Begin

    method next =
      match state with
      | `Begin ->
	  state <- `End;
(*	  let cursor = cursor#skip in *)
	  ( match cursor#get regexp with
	  | Some m ->
	      Parsed (ctx, f (m, cursor#coord), cursor#shift m)
	  | None ->
	      Failed [Msg.make (name ^ " expected") [||] (Msg.Locator.Point cursor#coord)])
      | `End -> Failed []
  end

let get = new get

class ['ctx] look (m : string) (ctx : 'ctx) (cursor : cursor) =
  object (self)
    val mutable state = `Begin
	
    method next =
      match state with
      | `Begin ->
	  state <- `End;
(*	  let cursor = cursor#skip in *)
	  if cursor#look m
	  then Parsed (ctx, (m, cursor#coord), cursor#shift m)
	  else Failed [Msg.make ("'" ^ m ^ "' expected") [||] (Msg.Locator.Point cursor#coord)]
      | `End -> Failed []
  end

let look = new look

class ['ctx] eof (ctx : 'ctx) (cursor : cursor) =
  object
    val mutable state = `Begin

    method next =
      match state with
      | `Begin ->
	  state <- `End;
(*	  let cursor = cursor#skip in *)
	  if cursor#eof
	  then Parsed (ctx, ("<EOF>", cursor#coord), cursor)
	  else Failed [Msg.make "<EOF> expected" [||] (Msg.Locator.Point cursor#coord)]
      | `End -> Failed []
  end

let eof = new eof


type ('ctx,'res) parse = ('ctx, 'res, cursor, Msg.t) Dcg.parse