Source

orakuda / regexp.ml

The default branch has multiple heads

open Pcre

type 'a t = {
  string : string;
  pcre : Pcre.regexp;
  (* typ : typ; *)
  binder : left:string -> right:string -> last:string option -> string option array -> 'a;
}

module Internal_use_only = struct

  class virtual group named_groups ~left ~right ~last groups_opt = 
    let named_groups_opt = 
      List.map (fun (n,pos) -> n, Array.unsafe_get groups_opt pos) named_groups
    in
    let def = function
      | Some v -> v 
      | None -> ""
    in
    object
      method _groups = (Array.map def groups_opt : string array)
      method _groups_opt = (groups_opt : string option array)
      method _named_groups = (List.map (fun (x,y) -> x, def y) named_groups_opt : (string * string) list)
      method _named_groups_opt = (named_groups_opt : (string * string option) list)
      method _group n = def groups_opt.(n)
      method _group_opt n = groups_opt.(n)
      method _unsafe_group n = def (Array.unsafe_get groups_opt n)
      method _unsafe_group_opt n = Array.unsafe_get groups_opt n
      method _named_group s = def (List.assoc s named_groups_opt)
      method _named_group_opt s = List.assoc s named_groups_opt
      method _left     : string = left
      method _right    : string = right
      method _last     : string = def last
      method _last_opt : string option = last
  end

  let create string ~flags binder =
    (* let typ = type_regexp (from_string string) in *)
    { string = string;
      pcre = Pcre.regexp ~flags string;
      (* typ = typ; *)
      binder = binder
    }
  ;;    
  
  let make_group_obj rex s substrs =
  
    let subject_start, subject_end = Pcre.get_substring_ofs substrs 0 in (* may raise an exception *)
    let left = String.sub s 0 subject_start in
    let right = 
      String.sub s subject_end (String.length s - subject_end)
    in
    let groups = Pcre.get_opt_substrings substrs ~full_match:true in
    let last =  (* probably wrong *)
      let rec find n = 
        if n = 0 then None
        else if groups.(n) <> None then groups.(n)
        else find (n-1)
      in
      find (Array.length groups - 1)
    in
    rex.binder ~left ~right ~last groups
  ;;

end

open Internal_use_only

let regexp t = t.pcre;;

let exec_exn rex ?iflags ?flags ?pos ?callout s =
  let substrs = Pcre.exec ~rex:rex.pcre ?iflags ?flags ?pos ?callout s in
  make_group_obj rex s substrs
;;

let exec rex ?iflags ?flags ?pos ?callout s =
  try
    Some (exec_exn rex ?iflags ?flags ?pos ?callout s)
  with
  | Not_found -> None

(* replace *)
let replace rex ~templ = 
  Pcre.qreplace ~rex:rex.pcre ~templ ?pat:None

let replace_first rex ~templ = 
  Pcre.qreplace_first ~rex:rex.pcre ~templ ?pat:None 

let substitute_substrings_gen 
      (substf : ?iflags : irflag -> ?flags : rflag list ->
                ?rex : regexp -> ?pat : string -> ?pos : int ->
                ?callout : callout -> subst : (substrings -> string) ->
                string -> string) f rex 
      ?iflags ?flags ?pos ?callout
      s =
  substf ~rex:rex.pcre ?pat:None
    ~subst: (fun substrs -> f (make_group_obj rex s substrs))
    ?iflags ?flags ?pos ?callout s

let substitute_substrings f rex = 
  substitute_substrings_gen Pcre.substitute_substrings f rex

let substitute_substrings_first f rex =
  substitute_substrings_gen Pcre.substitute_substrings_first f rex

(* split *)
let split rex = Pcre.split ~rex:rex.pcre ?pat:None

module Infix = struct
  let (=~) s ?iflags ?flags ?pos ?callout rex = 
    exec rex ?iflags ?flags ?pos ?callout s
  ;;
end

let build_case rex f s =
  match exec rex s with
  | Some v -> Some (f v)
  | None -> None

let build_cases ?(default= fun () -> raise Not_found) rex_fs s = 
  let rec with_aux = function
    | [] -> default ()
    | rex_f::rex_fs ->
	match rex_f s with
	| Some v -> v
	| None -> with_aux rex_fs
  in
  with_aux rex_fs
;;