Commits

james woodyatt  committed b1e66ab

Rename the old Cf_dfa module as Cf_dfa0. The obsolescent Cf_regex
module still depends on it.

  • Participants
  • Parent commits c318f0f
  • Branches sideline

Comments (0)

Files changed (6)

File cf/OMakefile

         machine
         unicode
         parser
-        dfa
+        dfa0
         regex
         lex
         scan_parser

File cf/cf_dfa.ml

-(*---------------------------------------------------------------------------*
-  $Change$
-  Copyright (c) 2005-2010, James H. Woodyatt
-  All rights reserved.
-  
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-  
-    Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    
-    Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution
-  
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-  OF THE POSSIBILITY OF SUCH DAMAGE. 
- *---------------------------------------------------------------------------*)
-
-module N_set = Cf_rbtree.Set(Cf_ordered.Int_order)
-module N_map = Cf_rbtree.Map(Cf_ordered.Int_order)
-
-let identity_ x = x
-
-module type Symbol_T = sig
-    type t and 'a map
-    val map: (t -> 'a) -> 'a map
-    val get: 'a map -> t -> 'a
-end
-
-module type T = sig
-    
-    module S: Symbol_T
-    
-    type x
-    type 'a r
-    type 'a t = (S.t, 'a) Cf_parser.t
-    
-    val nil: x
-    
-    module type Expr_Op_T = sig
-        val ( $| ): x -> x -> x
-        val ( $& ): x -> x -> x
-        val ( !* ): x -> x
-        val ( !+ ): x -> x
-        val ( !? ): x -> x
-        val ( !: ): S.t -> x
-        val ( !^ ): (S.t -> bool) -> x
-        val ( !~ ): S.t Cf_seq.t -> x
-    end
-    
-    module Expr_Op: Expr_Op_T
-    
-    module type Op_T = sig
-        include Expr_Op_T
-        
-        val ( $= ): x -> 'a -> 'a r
-        val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> 'a r
-        val ( $@ ): x -> (int -> 'a t) -> 'a r
-        val ( !@ ): 'a r list -> 'a r
-    end
-    
-    module Op: Op_T
-    
-    val create: 'a r -> 'a t
-    
-    module X: sig
-        type ('c, 'a) r constraint 'c = S.t #Cf_parser.cursor
-        type ('c, 'a) t = ('c, S.t, 'a) Cf_parser.X.t
-            constraint 'c = S.t #Cf_parser.cursor
-        
-        module type Op_T = sig
-            include Expr_Op_T
-            
-            val ( $= ): x -> 'a -> ('c, 'a) r
-            val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> ('c, 'a) r
-            val ( $@ ): x -> (int -> ('c, 'a) t) -> ('c, 'a) r
-            val ( !@ ): ('c, 'a) r list -> ('c, 'a) r
-        end
-        
-        module Op: Op_T
-        
-        val create: ('c, 'a) r -> ('c, 'a) t
-    end
-end
-
-module Create(S: Symbol_T) = struct
-    module S = S
-    
-    class virtual ['i, 'o] satisfier state =
-        object(_:'self)
-            constraint 'f = int -> ('i, 'o) Cf_parser.t
-            
-            val state_ = state
-            
-            method state = state_
-            method follow u = {< state_ = N_set.union state_ u >}
-            method virtual edge: S.t -> N_set.t -> N_set.t
-            method accept = (None : 'f option)
-        end
-    
-    let literal_ c =
-        object
-            inherit ['i, 'o] satisfier N_set.nil
-            method edge n u = if n = c then N_set.union state_ u else u
-        end
-        
-    let mapped_ f =
-        object
-            inherit ['i, 'o] satisfier N_set.nil
-            method edge n u = if f n then N_set.union state_ u else u
-        end
-    
-    type 's y = {
-        y_counter_: int;
-        y_first_: N_set.t;
-        y_last_: N_set.t;
-        y_follow_: 's N_map.t -> 's N_map.t;
-    } constraint 's = ('i, 'o) #satisfier
-    
-    type 's w = {
-        w_null_: bool;
-        w_cons_: int -> 's y;
-    }
-    
-    type x = (Obj.t, Obj.t) satisfier w
-    
-    let nil = {
-        w_null_ = true;
-        w_cons_ = fun i -> {
-            y_counter_ = i;
-            y_first_ = N_set.nil;
-            y_last_ = N_set.nil;
-            y_follow_ = identity_;
-        }
-    }
-    
-    let expr_ n = {
-        w_null_ = false;
-        w_cons_ = fun i ->
-            let s = N_set.singleton i in {
-                y_counter_ = succ i;
-                y_first_ = s;
-                y_last_ = s;
-                y_follow_ = fun m -> N_map.replace (i, n) m;
-            }
-    }
-    
-    module type Expr_Op_T = sig
-        val ( $| ): x -> x -> x
-        val ( $& ): x -> x -> x
-        
-        val ( !* ): x -> x
-        val ( !+ ): x -> x
-        val ( !? ): x -> x
-        val ( !: ): S.t -> x
-        val ( !^ ): (S.t -> bool) -> x
-        val ( !~ ): S.t Cf_seq.t -> x
-    end
-    
-    module Expr_Op = struct
-        let ( $| ) wa wb = {
-            w_null_ = wa.w_null_ || wb.w_null_;
-            w_cons_ = fun i ->
-                let ya = wa.w_cons_ i in
-                let yb = wb.w_cons_ ya.y_counter_ in {
-                    y_counter_ = yb.y_counter_;
-                    y_first_ = N_set.union ya.y_first_ yb.y_first_;
-                    y_last_ = N_set.union ya.y_last_ yb.y_last_;
-                    y_follow_ = fun m -> yb.y_follow_ (ya.y_follow_ m);
-                }
-        }
-        
-        let follow_fold_aux_ a m i =
-            N_map.replace (i, let sat = N_map.search i m in sat#follow a) m
-        
-        let ( $& ) wa wb = {
-            w_null_ = wa.w_null_ && wb.w_null_;
-            w_cons_ = fun i -> 
-                let ya = wa.w_cons_ i in
-                let yb = wb.w_cons_ ya.y_counter_ in
-                let first =
-                    if wa.w_null_ then
-                        N_set.union ya.y_first_ yb.y_first_
-                    else
-                        ya.y_first_
-                and last =
-                    if wb.w_null_ then
-                        N_set.union ya.y_last_ yb.y_last_
-                    else
-                        yb.y_last_
-                in {
-                    y_counter_ = yb.y_counter_;
-                    y_first_ = first;
-                    y_last_ = last;
-                    y_follow_ = fun m ->
-                        let m = yb.y_follow_ (ya.y_follow_ m) in
-                        N_set.fold (follow_fold_aux_ yb.y_first_) m ya.y_last_
-                }
-        }
-        
-        let star_follow_ y m =
-            N_set.fold (follow_fold_aux_ y.y_first_) (y.y_follow_ m) y.y_last_
-        
-        let ( !* ) w = {
-            w_null_ = true;
-            w_cons_ = fun i ->
-                let y = w.w_cons_ i in { y with y_follow_ = star_follow_ y }
-        }
-        
-        let ( !? ) x = x $| nil
-        let ( !+ ) x = x $& (!* x)
-        
-        let ( !: ) i = expr_ (literal_ i)
-        let ( !^ ) f = expr_ (mapped_ f)
-        
-        let rec ( !~ ) s =
-            match Lazy.force s with
-            | Cf_seq.Z -> nil
-            | Cf_seq.P (hd, tl) -> !:hd $& !~tl
-        
-    end
-    
-    let acceptor_ f =
-        object(self:'self)
-            inherit ['i, 'o] satisfier N_set.nil
-            
-            method edge _ u = u
-            method follow _ = (self :> 'self)
-            method accept = Some f
-        end
-    
-    type 'a r = (S.t, 'a) satisfier w
-    type 'a t = (S.t, 'a) Cf_parser.t
-    
-    module type Op_T = sig
-        include Expr_Op_T
-        
-        val ( $= ): x -> 'a -> 'a r
-        val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> 'a r
-        val ( $@ ): x -> (int -> 'a t) -> 'a r
-        val ( !@ ): 'a r list -> 'a r
-    end
-    
-    module Op = struct
-        include Expr_Op
-        
-        let ( $= ) x k =
-            let f n z = Some (k, Cf_seq.shift n z) in
-            (Obj.magic x) $& (expr_ (acceptor_ f))
-        
-        let ( $> ) x f =
-            let g n z =
-                let hd = Cf_seq.limit n z and tl = Cf_seq.shift n z in
-                Some (f hd, tl)
-            in
-            (Obj.magic x) $& (expr_ (acceptor_ g))
-        
-        let ( $@ ) x f =
-            (Obj.magic x) $& (expr_ (acceptor_ f))
-        
-        let ( !@ ) =
-            let rec f e = function hd :: tl -> f (hd $| e) tl | [] -> e in
-            fun s -> f nil s
-    end
-    
-    module S_order = struct
-        type t = int array
-        
-        let compare = compare
-            
-        (*
-        let to_string a =
-            let b = Buffer.create 40 in
-            Buffer.add_string b "[|";
-            begin
-                match Array.length a with
-                | 0 -> ()
-                | 1 ->
-                    Buffer.add_string b (Printf.sprintf " %u" a.(0))
-                | n ->
-                    for i = 0 to n - 2 do
-                        Buffer.add_string b (Printf.sprintf " %u;" a.(i))
-                    done;
-                    Buffer.add_string b (Printf.sprintf " %u" a.(n - 1))
-            end;
-            Buffer.add_string b " |]";
-            Buffer.contents b
-        *)
-    end
-    
-    module S_map = Cf_rbtree.Map(S_order)
-    
-    type ('i, 'o) s = {
-        s_id_: S_order.t;
-        s_accept_: (int -> ('i, 'o) Cf_parser.t) option;
-        s_next_: ('i, 'o) s option Lazy.t S.map;
-    }
-    
-    let create_aux_ =
-        let suspend w =
-            let y = w.w_cons_ 0 in
-            let m = y.y_follow_ N_map.nil in
-            let edge n u p = let sat = N_map.search p m in sat#edge n u in
-            let rec accept u ul i =
-                if i < ul then begin
-                    let sat = N_map.search (Array.unsafe_get u i) m in
-                    match sat#accept with
-                    | None -> accept u ul (succ i)
-                    | v -> v
-                end
-                else
-                    None
-            in
-            let sh = ref S_map.nil in
-            let rec state u =
-                let s = {
-                    s_id_ = u;
-                    s_accept_ = accept u (Array.length u) 0;
-                    s_next_ = S.map (follow u);
-                } in
-                sh := S_map.replace (u, s) !sh;
-                s
-            and follow u n =
-                lazy begin
-                    let v = Array.fold_left (edge n) N_set.nil u in
-                    if N_set.empty v then
-                        None
-                    else
-                        let u = Array.of_list (N_set.to_list_incr v) in
-                        Some (try S_map.search u !sh with Not_found -> state u)
-                end
-            in
-            state (Array.of_list (N_set.to_list_incr y.y_first_))
-        in
-        let nil _ _ = None in
-        let rec loop code s f n z0 z =
-            let f = match s.s_accept_ with None -> f | Some f -> f in
-            match Lazy.force z with
-            | Cf_seq.Z ->
-                f n z0
-            | Cf_seq.P (hd, tl) ->
-                match Lazy.force (S.get s.s_next_ (code hd)) with
-                | None -> f n z0
-                | Some s -> loop code s f (succ n) z0 tl
-        in
-        fun code r ->
-            let s = suspend r in
-            fun z ->
-                loop code s nil 0 z z
-    
-    let create r = create_aux_ identity_ r
-    
-    module X = struct
-        type ('c, 'a) r = (S.t * 'c, 'a) satisfier w
-            constraint 'c = S.t #Cf_parser.cursor
-        type ('c, 'a) t = ('c, S.t, 'a) Cf_parser.X.t
-            constraint 'c = S.t #Cf_parser.cursor
-        
-        module type Op_T = sig
-            include Expr_Op_T
-            
-            val ( $= ): x -> 'a -> ('c, 'a) r
-            val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> ('c, 'a) r
-            val ( $@ ): x -> (int -> ('c, 'a) t) -> ('c, 'a) r
-            val ( !@ ): ('c, 'a) r list -> ('c, 'a) r
-        end
-        
-        module Op: Op_T = struct
-            include Expr_Op
-            
-            let ( $> ) x f =
-                let g n z =
-                    let hd = Cf_seq.limit n (Cf_seq.map fst z)
-                    and tl = Cf_seq.shift n z in
-                    Some (f hd, tl)
-                in
-                (Obj.magic x) $& (expr_ (acceptor_ g))
-
-            let ( $= ) = Op.( $= )
-            let ( $@ ) = Op.( $@ )
-            let ( !@ ) = Op.( !@ )
-        end
-        
-        let create r = create_aux_ fst r
-    end
-end
-
-(*--- $File$ ---*)

File cf/cf_dfa.mli

-(*---------------------------------------------------------------------------*
-  $Change$
-  Copyright (c) 2004-2010, James H. Woodyatt
-  All rights reserved.
-  
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-  
-    Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    
-    Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution
-  
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-  OF THE POSSIBILITY OF SUCH DAMAGE. 
- *---------------------------------------------------------------------------*)
-
-(** Functional composition of lazy deterministic finite automata. *)
-
-(** {6 Overview}
-
-    This module implements operators for functional composition of lazy
-    deterministic finite automata (DFA).  A lazy DFA is more efficient at
-    recognizing regular grammars than a non-deterministic finite automaton,
-    and the lazy evaluation amortizes the cost of compiling the state table
-    so that it compares well to that of the NFA.
-    
-    The interface defined here is used as the underlying algorithm for the
-    {!Cf_lex} module.  It uses a functor that operates on a module defining
-    the type of a symbol, the type of parser input tokens that contain such
-    symbols, and a map of symbols to some polymorphic type.  The result of the
-    functor is a module that contains operator functions for composing
-    expressions and rules for automata that operate on streams of the input
-    symbol type.
-    
-    {b Note}: a DFA can be remarkably inefficient compared to an NFA for
-    certain classes of unusual grammars and unusual input.
-*)
-
-(** {6 Module Types} *)
-
-(** The type of the input module for [Create(S: Symbol_T)] functor defined
-    below.
-*)
-module type Symbol_T = sig
-    (** The symbol type *)
-    type t
-        
-    (** The type of maps from symbols to polymorphic types. *)
-    type 'a map
-        
-    (** The engine uses [map f] to construct a map from symbols to state
-        transitions.
-    *)
-    val map: (t -> 'a) -> 'a map
-    
-    (** The engine uses [get m s] to get the state transition from map [m] for
-        the symbol [s].
-    *)
-    val get: 'a map -> t -> 'a
-end
-
-(** The output of the [Create(S: Symbol_T)] functor, which is a module that
-    can be used to compose deterministic finite automata which operate on
-    symbols of the type specified.
-*)
-module type T = sig
-    
-    (** The module used as the input to the [Create(S: Symbol_T)] functor. *)
-    module S: Symbol_T
-    
-    (** The type of an expression in the regular grammar of an automaton. *)
-    type x
-    
-    (** The type of a rule for recognizing a sequence of symbols according to
-        the regular grammar of an automaton and producing an output token.
-    *)
-    type 'a r
-    
-    (** A parser that works on the symbols used in the automaton. *)
-    type 'a t = (S.t, 'a) Cf_parser.t
-    
-    (** The expression that matches the empty symbol sequence. *)
-    val nil: x
-    
-    (** The signature of modules containing operators for composing DFA
-        expressions.
-    *)
-    module type Expr_Op_T = sig
-        
-        (** Use [a $| b] to compose an expression that matches either [a] or
-            [b] in the symbol stream.
-        *)
-        val ( $| ): x -> x -> x
-        
-        (** Use [a $& b] to compose an expression that matches [a] followed by
-            [b] in the symbol stream.
-        *)
-        val ( $& ): x -> x -> x
-        
-        (** Use [!*a] to compose an expression that matches zero or more
-            occurances of [a] in the symbol stream.
-        *)
-        val ( !* ): x -> x
-        
-        (** Use [!+a] to compose an expression that matches one or more
-            occurances of [a] in the symbol stream.
-        *)
-        val ( !+ ): x -> x
-        
-        (** Use [!?a] to compose an expression that matches zero or one
-            occurance of [a] in the symbol stream.
-        *)
-        val ( !? ): x -> x
-        
-        (** Use [!:sym] to compose an expression that matches the symbol [sym]
-            in the symbol stream.
-        *)
-        val ( !: ): S.t -> x
-        
-        (** Use [!^f] to compose an expression that matches any symbol in the
-            symbol stream for which applying the function [f] returns [true].
-        *)
-        val ( !^ ): (S.t -> bool) -> x
-        
-        (** Use [!~z] to compose an expression that matches the sequence of
-            symbols [z] in the symbol stream.
-        *)
-        val ( !~ ): S.t Cf_seq.t -> x
-    end
-    
-    (** The module containing the expression operators. *)
-    module Expr_Op: Expr_Op_T
-    
-    (** The signature of the [Op] module, which contains the composition
-        operators.
-    *)
-    module type Op_T = sig
-        include Expr_Op_T
-        
-        (** Use [e $= x] to compose a rule that produces [x] when the symbols
-            in the symbol stream match the expression [e].
-        *)
-        val ( $= ): x -> 'a -> 'a r
-        
-        (** Use [e $> f] to compose a rule that applies the tokenizer function
-            [f] to the sequence of input symbols in the stream recognized by
-            the expression [e] to produce an output token.
-        *)
-        val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> 'a r
-        
-        (** Use [e $@ f] to compose a rule that applies the scanning function
-            [f] to the input stream when it is recognized by the expression
-            [e].  The scanning function is passed the length of the recognized
-            sequence of symbols and receives a parser in return that produces
-            the output of the rule and makes any advanced manipulations of the
-            input stream necessary to continue parsing for the next token.
-            If the parser returned from the scanning function does not
-            recognize the input stream, then the rule is not matched and the
-            next best matching rule is selected.
-        *)
-        val ( $@ ): x -> (int -> 'a t) -> 'a r
-        
-        (** Use this operator to combine a list of rules into a single rule. *)
-        val ( !@ ): 'a r list -> 'a r
-    end
-    
-    (** Open this module to bring the composition operators into the current
-        scope.
-    *)
-    module Op: Op_T
-    
-    (** Use [create r] to construct a parser that recognizes the longest
-        sequence that matches the rule [r].
-    *)
-    val create: 'a r -> 'a t
-    
-    (** A module of extensions for working with input sequences that
-        require position information in the parse function.
-    *)
-    module X: sig
-        
-        (** The type of a rule for recognizing a sequence of symbols in a
-            stream woven with a cursor stream and according to the regular
-            grammar of an automaton and producing an output token.
-        *)
-        type ('c, 'a) r constraint 'c = S.t #Cf_parser.cursor
-        
-        (** An extended parser that works on pairs of symbols and cursor
-            objects and used in the automaton.
-        *)
-        type ('c, 'a) t = ('c, S.t, 'a) Cf_parser.X.t
-            constraint 'c = S.t #Cf_parser.cursor
-        
-        (** The signature of the [Op] module, which contains the composition
-            operators.
-        *)
-        module type Op_T = sig
-            include Expr_Op_T
-            
-            (** Use [e $= x] to compose a rule that produces [x] when the
-                symbols in the symbol stream match the expression [e].
-            *)
-            val ( $= ): x -> 'a -> ('c, 'a) r
-            
-            (** Use [e $> f] to compose a rule that applies the tokenizer
-                function [f] to the sequence of input symbols in the
-                symbol/cursor stream recognized by the expression [e] to
-                produce an output token.
-            *)
-            val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> ('c, 'a) r
-            
-            (** Use [e $@ f] to compose a rule that applies the scanning
-                function [f] to the symbol/cursor input stream when the symbol
-                sequence is recognized by the expression [e].  This operator
-                performs the same function as the [( $@ )] operator, but it
-                works on a stream of symbols woven with a corresponding cursor
-                stream.
-            *)
-            val ( $@ ): x -> (int -> ('c, 'a) t) -> ('c, 'a) r
-            
-            (** Use this operator to combine a list of "cursor woven" rules
-                into a single rule.
-            *)
-            val ( !@ ): ('c, 'a) r list -> ('c, 'a) r
-        end
-        
-        (** Open this module to bring the composition operators into the
-            current scope.
-        *)
-        module Op: Op_T
-        
-        (** Use [create r] to construct a parser that recognizes the longest
-            sequence that matches the rule [r].
-        *)
-        val create: ('c, 'a) r -> ('c, 'a) t
-    end
-end
-
-(** The functor that creates a DFA module. *)
-module Create(S: Symbol_T): T with module S = S
-
-(*--- $File$ ---*)

File cf/cf_dfa0.ml

+(*---------------------------------------------------------------------------*
+  $Change$
+  Copyright (c) 2005-2010, James H. Woodyatt
+  All rights reserved.
+  
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+  
+    Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    
+    Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution
+  
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+  OF THE POSSIBILITY OF SUCH DAMAGE. 
+ *---------------------------------------------------------------------------*)
+
+module N_set = Cf_rbtree.Set(Cf_ordered.Int_order)
+module N_map = Cf_rbtree.Map(Cf_ordered.Int_order)
+
+let identity_ x = x
+
+module type Symbol_T = sig
+    type t and 'a map
+    val map: (t -> 'a) -> 'a map
+    val get: 'a map -> t -> 'a
+end
+
+module type T = sig
+    
+    module S: Symbol_T
+    
+    type x
+    type 'a r
+    type 'a t = (S.t, 'a) Cf_parser.t
+    
+    val nil: x
+    
+    module type Expr_Op_T = sig
+        val ( $| ): x -> x -> x
+        val ( $& ): x -> x -> x
+        val ( !* ): x -> x
+        val ( !+ ): x -> x
+        val ( !? ): x -> x
+        val ( !: ): S.t -> x
+        val ( !^ ): (S.t -> bool) -> x
+        val ( !~ ): S.t Cf_seq.t -> x
+    end
+    
+    module Expr_Op: Expr_Op_T
+    
+    module type Op_T = sig
+        include Expr_Op_T
+        
+        val ( $= ): x -> 'a -> 'a r
+        val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> 'a r
+        val ( $@ ): x -> (int -> 'a t) -> 'a r
+        val ( !@ ): 'a r list -> 'a r
+    end
+    
+    module Op: Op_T
+    
+    val create: 'a r -> 'a t
+    
+    module X: sig
+        type ('c, 'a) r constraint 'c = S.t #Cf_parser.cursor
+        type ('c, 'a) t = ('c, S.t, 'a) Cf_parser.X.t
+            constraint 'c = S.t #Cf_parser.cursor
+        
+        module type Op_T = sig
+            include Expr_Op_T
+            
+            val ( $= ): x -> 'a -> ('c, 'a) r
+            val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> ('c, 'a) r
+            val ( $@ ): x -> (int -> ('c, 'a) t) -> ('c, 'a) r
+            val ( !@ ): ('c, 'a) r list -> ('c, 'a) r
+        end
+        
+        module Op: Op_T
+        
+        val create: ('c, 'a) r -> ('c, 'a) t
+    end
+end
+
+module Create(S: Symbol_T) = struct
+    module S = S
+    
+    class virtual ['i, 'o] satisfier state =
+        object(_:'self)
+            constraint 'f = int -> ('i, 'o) Cf_parser.t
+            
+            val state_ = state
+            
+            method state = state_
+            method follow u = {< state_ = N_set.union state_ u >}
+            method virtual edge: S.t -> N_set.t -> N_set.t
+            method accept = (None : 'f option)
+        end
+    
+    let literal_ c =
+        object
+            inherit ['i, 'o] satisfier N_set.nil
+            method edge n u = if n = c then N_set.union state_ u else u
+        end
+        
+    let mapped_ f =
+        object
+            inherit ['i, 'o] satisfier N_set.nil
+            method edge n u = if f n then N_set.union state_ u else u
+        end
+    
+    type 's y = {
+        y_counter_: int;
+        y_first_: N_set.t;
+        y_last_: N_set.t;
+        y_follow_: 's N_map.t -> 's N_map.t;
+    } constraint 's = ('i, 'o) #satisfier
+    
+    type 's w = {
+        w_null_: bool;
+        w_cons_: int -> 's y;
+    }
+    
+    type x = (Obj.t, Obj.t) satisfier w
+    
+    let nil = {
+        w_null_ = true;
+        w_cons_ = fun i -> {
+            y_counter_ = i;
+            y_first_ = N_set.nil;
+            y_last_ = N_set.nil;
+            y_follow_ = identity_;
+        }
+    }
+    
+    let expr_ n = {
+        w_null_ = false;
+        w_cons_ = fun i ->
+            let s = N_set.singleton i in {
+                y_counter_ = succ i;
+                y_first_ = s;
+                y_last_ = s;
+                y_follow_ = fun m -> N_map.replace (i, n) m;
+            }
+    }
+    
+    module type Expr_Op_T = sig
+        val ( $| ): x -> x -> x
+        val ( $& ): x -> x -> x
+        
+        val ( !* ): x -> x
+        val ( !+ ): x -> x
+        val ( !? ): x -> x
+        val ( !: ): S.t -> x
+        val ( !^ ): (S.t -> bool) -> x
+        val ( !~ ): S.t Cf_seq.t -> x
+    end
+    
+    module Expr_Op = struct
+        let ( $| ) wa wb = {
+            w_null_ = wa.w_null_ || wb.w_null_;
+            w_cons_ = fun i ->
+                let ya = wa.w_cons_ i in
+                let yb = wb.w_cons_ ya.y_counter_ in {
+                    y_counter_ = yb.y_counter_;
+                    y_first_ = N_set.union ya.y_first_ yb.y_first_;
+                    y_last_ = N_set.union ya.y_last_ yb.y_last_;
+                    y_follow_ = fun m -> yb.y_follow_ (ya.y_follow_ m);
+                }
+        }
+        
+        let follow_fold_aux_ a m i =
+            N_map.replace (i, let sat = N_map.search i m in sat#follow a) m
+        
+        let ( $& ) wa wb = {
+            w_null_ = wa.w_null_ && wb.w_null_;
+            w_cons_ = fun i -> 
+                let ya = wa.w_cons_ i in
+                let yb = wb.w_cons_ ya.y_counter_ in
+                let first =
+                    if wa.w_null_ then
+                        N_set.union ya.y_first_ yb.y_first_
+                    else
+                        ya.y_first_
+                and last =
+                    if wb.w_null_ then
+                        N_set.union ya.y_last_ yb.y_last_
+                    else
+                        yb.y_last_
+                in {
+                    y_counter_ = yb.y_counter_;
+                    y_first_ = first;
+                    y_last_ = last;
+                    y_follow_ = fun m ->
+                        let m = yb.y_follow_ (ya.y_follow_ m) in
+                        N_set.fold (follow_fold_aux_ yb.y_first_) m ya.y_last_
+                }
+        }
+        
+        let star_follow_ y m =
+            N_set.fold (follow_fold_aux_ y.y_first_) (y.y_follow_ m) y.y_last_
+        
+        let ( !* ) w = {
+            w_null_ = true;
+            w_cons_ = fun i ->
+                let y = w.w_cons_ i in { y with y_follow_ = star_follow_ y }
+        }
+        
+        let ( !? ) x = x $| nil
+        let ( !+ ) x = x $& (!* x)
+        
+        let ( !: ) i = expr_ (literal_ i)
+        let ( !^ ) f = expr_ (mapped_ f)
+        
+        let rec ( !~ ) s =
+            match Lazy.force s with
+            | Cf_seq.Z -> nil
+            | Cf_seq.P (hd, tl) -> !:hd $& !~tl
+        
+    end
+    
+    let acceptor_ f =
+        object(self:'self)
+            inherit ['i, 'o] satisfier N_set.nil
+            
+            method edge _ u = u
+            method follow _ = (self :> 'self)
+            method accept = Some f
+        end
+    
+    type 'a r = (S.t, 'a) satisfier w
+    type 'a t = (S.t, 'a) Cf_parser.t
+    
+    module type Op_T = sig
+        include Expr_Op_T
+        
+        val ( $= ): x -> 'a -> 'a r
+        val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> 'a r
+        val ( $@ ): x -> (int -> 'a t) -> 'a r
+        val ( !@ ): 'a r list -> 'a r
+    end
+    
+    module Op = struct
+        include Expr_Op
+        
+        let ( $= ) x k =
+            let f n z = Some (k, Cf_seq.shift n z) in
+            (Obj.magic x) $& (expr_ (acceptor_ f))
+        
+        let ( $> ) x f =
+            let g n z =
+                let hd = Cf_seq.limit n z and tl = Cf_seq.shift n z in
+                Some (f hd, tl)
+            in
+            (Obj.magic x) $& (expr_ (acceptor_ g))
+        
+        let ( $@ ) x f =
+            (Obj.magic x) $& (expr_ (acceptor_ f))
+        
+        let ( !@ ) =
+            let rec f e = function hd :: tl -> f (hd $| e) tl | [] -> e in
+            fun s -> f nil s
+    end
+    
+    module S_order = struct
+        type t = int array
+        
+        let compare = compare
+            
+        (*
+        let to_string a =
+            let b = Buffer.create 40 in
+            Buffer.add_string b "[|";
+            begin
+                match Array.length a with
+                | 0 -> ()
+                | 1 ->
+                    Buffer.add_string b (Printf.sprintf " %u" a.(0))
+                | n ->
+                    for i = 0 to n - 2 do
+                        Buffer.add_string b (Printf.sprintf " %u;" a.(i))
+                    done;
+                    Buffer.add_string b (Printf.sprintf " %u" a.(n - 1))
+            end;
+            Buffer.add_string b " |]";
+            Buffer.contents b
+        *)
+    end
+    
+    module S_map = Cf_rbtree.Map(S_order)
+    
+    type ('i, 'o) s = {
+        s_id_: S_order.t;
+        s_accept_: (int -> ('i, 'o) Cf_parser.t) option;
+        s_next_: ('i, 'o) s option Lazy.t S.map;
+    }
+    
+    let create_aux_ =
+        let suspend w =
+            let y = w.w_cons_ 0 in
+            let m = y.y_follow_ N_map.nil in
+            let edge n u p = let sat = N_map.search p m in sat#edge n u in
+            let rec accept u ul i =
+                if i < ul then begin
+                    let sat = N_map.search (Array.unsafe_get u i) m in
+                    match sat#accept with
+                    | None -> accept u ul (succ i)
+                    | v -> v
+                end
+                else
+                    None
+            in
+            let sh = ref S_map.nil in
+            let rec state u =
+                let s = {
+                    s_id_ = u;
+                    s_accept_ = accept u (Array.length u) 0;
+                    s_next_ = S.map (follow u);
+                } in
+                sh := S_map.replace (u, s) !sh;
+                s
+            and follow u n =
+                lazy begin
+                    let v = Array.fold_left (edge n) N_set.nil u in
+                    if N_set.empty v then
+                        None
+                    else
+                        let u = Array.of_list (N_set.to_list_incr v) in
+                        Some (try S_map.search u !sh with Not_found -> state u)
+                end
+            in
+            state (Array.of_list (N_set.to_list_incr y.y_first_))
+        in
+        let nil _ _ = None in
+        let rec loop code s f n z0 z =
+            let f = match s.s_accept_ with None -> f | Some f -> f in
+            match Lazy.force z with
+            | Cf_seq.Z ->
+                f n z0
+            | Cf_seq.P (hd, tl) ->
+                match Lazy.force (S.get s.s_next_ (code hd)) with
+                | None -> f n z0
+                | Some s -> loop code s f (succ n) z0 tl
+        in
+        fun code r ->
+            let s = suspend r in
+            fun z ->
+                loop code s nil 0 z z
+    
+    let create r = create_aux_ identity_ r
+    
+    module X = struct
+        type ('c, 'a) r = (S.t * 'c, 'a) satisfier w
+            constraint 'c = S.t #Cf_parser.cursor
+        type ('c, 'a) t = ('c, S.t, 'a) Cf_parser.X.t
+            constraint 'c = S.t #Cf_parser.cursor
+        
+        module type Op_T = sig
+            include Expr_Op_T
+            
+            val ( $= ): x -> 'a -> ('c, 'a) r
+            val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> ('c, 'a) r
+            val ( $@ ): x -> (int -> ('c, 'a) t) -> ('c, 'a) r
+            val ( !@ ): ('c, 'a) r list -> ('c, 'a) r
+        end
+        
+        module Op: Op_T = struct
+            include Expr_Op
+            
+            let ( $> ) x f =
+                let g n z =
+                    let hd = Cf_seq.limit n (Cf_seq.map fst z)
+                    and tl = Cf_seq.shift n z in
+                    Some (f hd, tl)
+                in
+                (Obj.magic x) $& (expr_ (acceptor_ g))
+
+            let ( $= ) = Op.( $= )
+            let ( $@ ) = Op.( $@ )
+            let ( !@ ) = Op.( !@ )
+        end
+        
+        let create r = create_aux_ fst r
+    end
+end
+
+(*--- $File$ ---*)

File cf/cf_dfa0.mli

+(*---------------------------------------------------------------------------*
+  $Change$
+  Copyright (c) 2004-2010, James H. Woodyatt
+  All rights reserved.
+  
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+  
+    Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    
+    Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution
+  
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+  OF THE POSSIBILITY OF SUCH DAMAGE. 
+ *---------------------------------------------------------------------------*)
+
+(** Functional composition of lazy deterministic finite automata. *)
+
+(** {6 Overview}
+
+    This module implements operators for functional composition of lazy
+    deterministic finite automata (DFA).  A lazy DFA is more efficient at
+    recognizing regular grammars than a non-deterministic finite automaton,
+    and the lazy evaluation amortizes the cost of compiling the state table
+    so that it compares well to that of the NFA.
+    
+    The interface defined here is used as the underlying algorithm for the
+    {!Cf_lex} module.  It uses a functor that operates on a module defining
+    the type of a symbol, the type of parser input tokens that contain such
+    symbols, and a map of symbols to some polymorphic type.  The result of the
+    functor is a module that contains operator functions for composing
+    expressions and rules for automata that operate on streams of the input
+    symbol type.
+    
+    {b Note}: a DFA can be remarkably inefficient compared to an NFA for
+    certain classes of unusual grammars and unusual input.
+*)
+
+(** {6 Module Types} *)
+
+(** The type of the input module for [Create(S: Symbol_T)] functor defined
+    below.
+*)
+module type Symbol_T = sig
+    (** The symbol type *)
+    type t
+        
+    (** The type of maps from symbols to polymorphic types. *)
+    type 'a map
+        
+    (** The engine uses [map f] to construct a map from symbols to state
+        transitions.
+    *)
+    val map: (t -> 'a) -> 'a map
+    
+    (** The engine uses [get m s] to get the state transition from map [m] for
+        the symbol [s].
+    *)
+    val get: 'a map -> t -> 'a
+end
+
+(** The output of the [Create(S: Symbol_T)] functor, which is a module that
+    can be used to compose deterministic finite automata which operate on
+    symbols of the type specified.
+*)
+module type T = sig
+    
+    (** The module used as the input to the [Create(S: Symbol_T)] functor. *)
+    module S: Symbol_T
+    
+    (** The type of an expression in the regular grammar of an automaton. *)
+    type x
+    
+    (** The type of a rule for recognizing a sequence of symbols according to
+        the regular grammar of an automaton and producing an output token.
+    *)
+    type 'a r
+    
+    (** A parser that works on the symbols used in the automaton. *)
+    type 'a t = (S.t, 'a) Cf_parser.t
+    
+    (** The expression that matches the empty symbol sequence. *)
+    val nil: x
+    
+    (** The signature of modules containing operators for composing DFA
+        expressions.
+    *)
+    module type Expr_Op_T = sig
+        
+        (** Use [a $| b] to compose an expression that matches either [a] or
+            [b] in the symbol stream.
+        *)
+        val ( $| ): x -> x -> x
+        
+        (** Use [a $& b] to compose an expression that matches [a] followed by
+            [b] in the symbol stream.
+        *)
+        val ( $& ): x -> x -> x
+        
+        (** Use [!*a] to compose an expression that matches zero or more
+            occurances of [a] in the symbol stream.
+        *)
+        val ( !* ): x -> x
+        
+        (** Use [!+a] to compose an expression that matches one or more
+            occurances of [a] in the symbol stream.
+        *)
+        val ( !+ ): x -> x
+        
+        (** Use [!?a] to compose an expression that matches zero or one
+            occurance of [a] in the symbol stream.
+        *)
+        val ( !? ): x -> x
+        
+        (** Use [!:sym] to compose an expression that matches the symbol [sym]
+            in the symbol stream.
+        *)
+        val ( !: ): S.t -> x
+        
+        (** Use [!^f] to compose an expression that matches any symbol in the
+            symbol stream for which applying the function [f] returns [true].
+        *)
+        val ( !^ ): (S.t -> bool) -> x
+        
+        (** Use [!~z] to compose an expression that matches the sequence of
+            symbols [z] in the symbol stream.
+        *)
+        val ( !~ ): S.t Cf_seq.t -> x
+    end
+    
+    (** The module containing the expression operators. *)
+    module Expr_Op: Expr_Op_T
+    
+    (** The signature of the [Op] module, which contains the composition
+        operators.
+    *)
+    module type Op_T = sig
+        include Expr_Op_T
+        
+        (** Use [e $= x] to compose a rule that produces [x] when the symbols
+            in the symbol stream match the expression [e].
+        *)
+        val ( $= ): x -> 'a -> 'a r
+        
+        (** Use [e $> f] to compose a rule that applies the tokenizer function
+            [f] to the sequence of input symbols in the stream recognized by
+            the expression [e] to produce an output token.
+        *)
+        val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> 'a r
+        
+        (** Use [e $@ f] to compose a rule that applies the scanning function
+            [f] to the input stream when it is recognized by the expression
+            [e].  The scanning function is passed the length of the recognized
+            sequence of symbols and receives a parser in return that produces
+            the output of the rule and makes any advanced manipulations of the
+            input stream necessary to continue parsing for the next token.
+            If the parser returned from the scanning function does not
+            recognize the input stream, then the rule is not matched and the
+            next best matching rule is selected.
+        *)
+        val ( $@ ): x -> (int -> 'a t) -> 'a r
+        
+        (** Use this operator to combine a list of rules into a single rule. *)
+        val ( !@ ): 'a r list -> 'a r
+    end
+    
+    (** Open this module to bring the composition operators into the current
+        scope.
+    *)
+    module Op: Op_T
+    
+    (** Use [create r] to construct a parser that recognizes the longest
+        sequence that matches the rule [r].
+    *)
+    val create: 'a r -> 'a t
+    
+    (** A module of extensions for working with input sequences that
+        require position information in the parse function.
+    *)
+    module X: sig
+        
+        (** The type of a rule for recognizing a sequence of symbols in a
+            stream woven with a cursor stream and according to the regular
+            grammar of an automaton and producing an output token.
+        *)
+        type ('c, 'a) r constraint 'c = S.t #Cf_parser.cursor
+        
+        (** An extended parser that works on pairs of symbols and cursor
+            objects and used in the automaton.
+        *)
+        type ('c, 'a) t = ('c, S.t, 'a) Cf_parser.X.t
+            constraint 'c = S.t #Cf_parser.cursor
+        
+        (** The signature of the [Op] module, which contains the composition
+            operators.
+        *)
+        module type Op_T = sig
+            include Expr_Op_T
+            
+            (** Use [e $= x] to compose a rule that produces [x] when the
+                symbols in the symbol stream match the expression [e].
+            *)
+            val ( $= ): x -> 'a -> ('c, 'a) r
+            
+            (** Use [e $> f] to compose a rule that applies the tokenizer
+                function [f] to the sequence of input symbols in the
+                symbol/cursor stream recognized by the expression [e] to
+                produce an output token.
+            *)
+            val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> ('c, 'a) r
+            
+            (** Use [e $@ f] to compose a rule that applies the scanning
+                function [f] to the symbol/cursor input stream when the symbol
+                sequence is recognized by the expression [e].  This operator
+                performs the same function as the [( $@ )] operator, but it
+                works on a stream of symbols woven with a corresponding cursor
+                stream.
+            *)
+            val ( $@ ): x -> (int -> ('c, 'a) t) -> ('c, 'a) r
+            
+            (** Use this operator to combine a list of "cursor woven" rules
+                into a single rule.
+            *)
+            val ( !@ ): ('c, 'a) r list -> ('c, 'a) r
+        end
+        
+        (** Open this module to bring the composition operators into the
+            current scope.
+        *)
+        module Op: Op_T
+        
+        (** Use [create r] to construct a parser that recognizes the longest
+            sequence that matches the rule [r].
+        *)
+        val create: ('c, 'a) r -> ('c, 'a) t
+    end
+end
+
+(** The functor that creates a DFA module. *)
+module Create(S: Symbol_T): T with module S = S
+
+(*--- $File$ ---*)

File cf/cf_regex.ml

     let get m c = Array.unsafe_get m (int_of_char c)
 end
 
-module DFA = Cf_dfa.Create(Symbol)
+module DFA = Cf_dfa0.Create(Symbol)
 
 exception Error of string