oni / cf / cf_regx.ml

(*---------------------------------------------------------------------------*
  $Change$
  Copyright (c) 2005-2010, James H. Woodyatt
  All rights reserved.
  
  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:
  
    Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
    
    Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in
    the documentation and/or other materials provided with the
    distribution
  
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  OF THE POSSIBILITY OF SUCH DAMAGE. 
 *---------------------------------------------------------------------------*)

module Symbol = struct
    type t = char and 'a map = 'a array
    let map f = Array.init 256 (fun n -> f (char_of_int n))
    let get m c = Array.unsafe_get m (int_of_char c)
end

module DFA = Cf_dfa.Create(Symbol)

exception Error of string
        
module P = Cf_llscan
open P.Op
open DFA.Op

let p_digit_ =
    P.sat (function '0'..'9' -> true | _ -> false) >>= fun c ->
    P.ret (Char.code c - 48)

let p_hexdigit_ =
    let base_ c =
        if c >= 'a' then 87 else if c >= 'A' then 55 else 48
    in
    P.sat begin
        function ('0'..'9' | 'a'..'f' | 'A'..'F') -> true | _ -> false
    end >>= fun c ->
    P.ret (Char.code c - base_ c)

let p_backtick_ = ?.'`'

let p_any_ = P.sat (fun _ -> true)

let esc_ch_list_ =
    let hexcode_ _ =
        p_hexdigit_ >>= fun a ->
        p_hexdigit_ >>= fun b ->
        P.ret (Char.chr (a * 16 + b))
    in
    let deccode_ chA =
        let a = Char.code chA - 48 in
        p_digit_ >>= fun b ->
        p_digit_ >>= fun c ->
        let code = a * 100 + b * 10 + c in
        if code > 255 then P.nil else P.ret (Char.chr code)
    in
    let control_ _ =
        P.sat begin function
            | '@'..'_' | 'a'..'z' -> true
            | _ -> false
        end >>= fun c ->
        let n = Char.code c in
        let n = if n >= 97 then n - 96 else n - 64 in
        P.ret (Char.chr n)
    in
    let newline_ _ = P.ret '\x0A' in
    let tab_ _ = P.ret '\x09' in
    let return_ _ = P.ret '\x0D' in
    [
        'n', newline_;
        't', tab_;
        'r', return_;
        'x', hexcode_;
        'c', control_;
        '0', deccode_;
        '1', deccode_;
        '2', deccode_;
        '`', P.ret;
    ]

let ch_class_ =
    let l_bracket = ?.'[' in
    let r_bracket = ?.']' in
    let hyphen = ?.'-' in
    let eq (c1 : char) (c2 : char) = (c1 = c2) in
    let raw_ch = P.sat (function '-' | ']' -> false | _ -> true) in
    let esc_ch =
        let mapF (ch, f) = ?.ch >>= f in
        let aux = P.alt (List.map mapF ((']', P.ret) :: esc_ch_list_)) in
        p_backtick_ >>= fun _ ->
        aux
    in
    let single_ch = P.alt [ esc_ch; raw_ch ] in
    let range: (char, char -> bool) P.t =
        single_ch >>= fun a ->
        hyphen >>= fun _ ->
        single_ch >>= fun b ->
        P.ret (fun ch -> ch >= a && ch <= b)
    in
    let eqLift p = p >>= fun ch -> P.ret (fun c -> c = ch) in
    let esc_set =
        let alpha = function 'A'..'Z' | 'a'..'z' -> true | _ -> false in
        let digit = function '0'..'9' -> true | _ -> false in
        let alnum ch = alpha ch || digit ch in
        let specifier = P.alt [
            (?.'a' >>= fun _ -> P.ret alpha);
            (?.'d' >>= fun _ -> P.ret digit);
            (?.'i' >>= fun _ -> P.ret alnum);
        ] in
        p_backtick_ >>= fun _ ->
        specifier
    in
    let single = P.alt (List.map eqLift [ esc_ch; raw_ch ]) in
    let hyphen_ch = P.tok (function '-' -> Some (eq '-') | _ -> None) in
    let atom0 = P.alt [ hyphen_ch; esc_set; range; single ] in
    let atomN = P.alt [ esc_set; range; single ] in
    let atomlist =
        let existF ch f = f ch in
        atom0 >>= fun hd ->
        ?*atomN >>= fun tl ->
        P.ret (fun ch -> List.exists (existF ch) (hd :: tl))
    in
    let negate =
        ?/(?.'^') >>= function None -> P.ret false | _ -> P.ret true
    in
    l_bracket >>= fun _ ->
    negate >>= fun _ ->
    atomlist >>= fun f ->
    r_bracket >>= fun _ ->
    P.ret !^f

let esc_expr_list_ =
    let meta_ ch = P.ret !:ch in
    let alpha_ _ =
        P.ret !^(function 'A'..'Z' | 'a'..'z' -> true | _ -> false)
    in
    let alnum_ _ =
        P.ret begin
            !^begin function
                | '0'..'9' | 'A'..'Z' | 'a'..'z' -> true
                | _ -> false
            end
        end
    in
    let digit_ _ = P.ret !^(function '0'..'9' -> true | _ -> false) in
    let sat_white_ x = function '\009'..'\013' | '\032' -> x | _ -> not x in
    let white_ _ = P.ret !^(sat_white_ true) in
    let nonwhite_ _ = P.ret !^(sat_white_ false) in
    [
        'a', alpha_;
        'i', alnum_;
        'd', digit_;
        's', white_;
        'w', nonwhite_;
        '.', meta_;
        '?', meta_;
        '*', meta_;
        '+', meta_;
        '(', meta_;
        ')', meta_;
        '|', meta_;
        '[', meta_;
        ']', meta_;
        '^', meta_;
        '$', meta_;
    ]

let esc_expr_ =        
    let p_escape_ fLst =
        p_backtick_ >>= fun _ ->
        P.alt (List.map (fun (ch, f) -> ?.ch >>= f) fLst)
    in
    let esc_chx_list_ =
        List.rev_map begin fun (c, f) ->
            c, fun x -> f x >>= fun y -> P.ret !:y
        end esc_ch_list_
    in
    p_escape_ (List.rev_append esc_chx_list_ esc_expr_list_)

let expr_llscan =
    let symbol =
        let f = function
            | '\x00'..'\x1f' | '?' | '*' | '+' | '(' | ')' | '|'
            | '\x7f'..'\xff' -> false
            | _ -> true
        in
        P.sat f >>= fun c ->
        P.ret !:c
    in
    let dot = ?.'.' >>= fun _ -> P.ret !^(fun c -> c <> '\n') in
    let star x = ?.'*' >>= fun _ -> P.ret !*x in
    let plus x = ?.'+' >>= fun _ -> P.ret !+x in
    let question x = ?.'?' >>= fun _ -> P.ret !?x in
    let postfix x = P.alt [ star x; plus x; question x; P.ret x ] in
    let rec expr _ =
        term () >>= fun x ->
        ?* (?.'|' >>= fun _ -> term ()) >>= fun y ->
        P.ret (List.fold_left (fun x y -> x $| y) x y)
    and term () =
        ?+(factor () >>= postfix) >>= fun (hd, tl) ->
        P.ret (List.fold_left (fun x y -> x $& y) hd tl)
    and factor () = P.alt [ group (); ch_class_; esc_expr_; dot; symbol ]
    and group () =
        ?.'(' >>= fun _ ->
        expr DFA.nil >>= fun x ->
        ?.')' >>= fun _ ->
        P.ret x
    in
    expr DFA.nil

let expr_of_seq z =
    match expr_llscan z with
    | Some (v, _) -> v
    | _ -> raise (Error (Cf_seq.to_string z))

let expr_of_string s = expr_of_seq (Cf_seq.of_string s)

let quote =
    let esc_ =
        [ '`'; '.'; '?'; '*'; '+'; '('; ')'; '|'; '['; ']'; '^'; '$' ]
    in
    let rec loop c =
        let w = Lazy.lazy_from_val (Cf_flow.Q loop) in
        match c with
        | _ when List.exists (fun c' -> c == c') esc_ ->
            let w = Lazy.lazy_from_val (Cf_flow.P (c, w)) in
            Cf_flow.P ('`', w)
        | _ ->
            Cf_flow.P (c, w)
    in
    Lazy.lazy_from_val (Cf_flow.Q loop)

let unquote =
    let rec loop c =
        let w = Lazy.lazy_from_val (Cf_flow.Q loop) in
        match c with
        | '`' ->
            Cf_flow.Q begin fun c ->
                Cf_flow.P (c, Lazy.lazy_from_val (Cf_flow.Q loop))
            end
        | _ ->
            Cf_flow.P (c, w)
    in
    Lazy.lazy_from_val (Cf_flow.Q loop)

type t = int DFA.t

let of_expression x = DFA.create (x $@ (fun n z -> Some (n, Cf_seq.shift n z)))
let of_seq z = of_expression (expr_of_seq z)
let of_string s = of_expression (expr_of_string s)

let test r s =
    let z = Cf_seq.of_string s in
    match r z with
    | Some (n, _) when n = String.length s -> true
    | _ -> false

let search =
    let rec loop r pos z =
        match r z with
        | Some (n, _) ->
            pos, n
        | None ->
            match p_any_ z with
            | Some (_, z) ->
                loop r (succ pos) z
            | None ->
                raise Not_found
    in
    fun r -> loop r 0

let rec separate r z =
    lazy begin
        try
            let pos, len = search r z in
            let s = Cf_seq.limit pos z in
            let z = Cf_seq.shift (pos + len) z in
            Cf_seq.P (s, separate r z)
        with
        | Not_found ->
            Cf_seq.P (z, Cf_seq.nil)
    end

let split =
    let rec loop r s pos acc z =
        match
            try Some (search r z) with Not_found -> None
        with
        | Some (pos', len') ->
            let x = String.sub s pos pos' in
            let pos = pos + pos' + len' in
            let z = Cf_seq.shift (pos' + len') z in
            loop r s pos (x :: acc) z
        | None ->
            List.rev_append acc [ Cf_seq.to_string z ]
    in
    fun r s ->
        loop r s 0 [] (Cf_seq.of_string s)

let llscan r z =
    match r z with
    | Some (n, tl) -> Some (Cf_seq.to_string (Cf_seq.limit n z), tl)
    | None -> None

(*--- $File$ ---*)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.