Commits

james woodyatt committed 0f747c4

Remove obsolete CF modules: Cf_dfa0, Cf_lex, Cf_regex, Cf_scan_parser.
Rename Xml_parser => Xml_llscan (use Cf_llscan instead of Cf_parser).

Comments (0)

Files changed (18)

-373c9f80e7b53bf2ae38c46d3ccfff8e1e3f5e94 Conjury
+626823d30a4929c64376000f07ee701967599604 Conjury
         llscan
         fmt_llscan
         deque
+        message
         heap
         pqueue
         map
         dfa
         regx
         clex
-        message
         flow
         gadget
         state_gadget
         machine
         unicode
         parser
-        dfa0
-        regex
-        lex
-        scan_parser
+    
+    # OBSOLESCENT:
+        # dfa0
+        # regex
+        # lex
+        # scan_parser
     
     primitives[]=
         common

cf/cf_dfa0.ml

-(*---------------------------------------------------------------------------*
-  $Change$
-  Copyright (c) 2005-2010, James H. Woodyatt
-  All rights reserved.
-  
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-  
-    Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    
-    Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution
-  
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-  OF THE POSSIBILITY OF SUCH DAMAGE. 
- *---------------------------------------------------------------------------*)
-
-module N_set = Cf_rbtree.Set(Cf_ordered.Int_order)
-module N_map = Cf_rbtree.Map(Cf_ordered.Int_order)
-
-let identity_ x = x
-
-module type Symbol_T = sig
-    type t and 'a map
-    val map: (t -> 'a) -> 'a map
-    val get: 'a map -> t -> 'a
-end
-
-module type T = sig
-    
-    module S: Symbol_T
-    
-    type x
-    type 'a r
-    type 'a t = (S.t, 'a) Cf_parser.t
-    
-    val nil: x
-    
-    module type Expr_Op_T = sig
-        val ( $| ): x -> x -> x
-        val ( $& ): x -> x -> x
-        val ( !* ): x -> x
-        val ( !+ ): x -> x
-        val ( !? ): x -> x
-        val ( !: ): S.t -> x
-        val ( !^ ): (S.t -> bool) -> x
-        val ( !~ ): S.t Cf_seq.t -> x
-    end
-    
-    module Expr_Op: Expr_Op_T
-    
-    module type Op_T = sig
-        include Expr_Op_T
-        
-        val ( $= ): x -> 'a -> 'a r
-        val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> 'a r
-        val ( $@ ): x -> (int -> 'a t) -> 'a r
-        val ( !@ ): 'a r list -> 'a r
-    end
-    
-    module Op: Op_T
-    
-    val create: 'a r -> 'a t
-    
-    module X: sig
-        type ('c, 'a) r constraint 'c = S.t #Cf_parser.cursor
-        type ('c, 'a) t = ('c, S.t, 'a) Cf_parser.X.t
-            constraint 'c = S.t #Cf_parser.cursor
-        
-        module type Op_T = sig
-            include Expr_Op_T
-            
-            val ( $= ): x -> 'a -> ('c, 'a) r
-            val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> ('c, 'a) r
-            val ( $@ ): x -> (int -> ('c, 'a) t) -> ('c, 'a) r
-            val ( !@ ): ('c, 'a) r list -> ('c, 'a) r
-        end
-        
-        module Op: Op_T
-        
-        val create: ('c, 'a) r -> ('c, 'a) t
-    end
-end
-
-module Create(S: Symbol_T) = struct
-    module S = S
-    
-    class virtual ['i, 'o] satisfier state =
-        object(_:'self)
-            constraint 'f = int -> ('i, 'o) Cf_parser.t
-            
-            val state_ = state
-            
-            method state = state_
-            method follow u = {< state_ = N_set.union state_ u >}
-            method virtual edge: S.t -> N_set.t -> N_set.t
-            method accept = (None : 'f option)
-        end
-    
-    let literal_ c =
-        object
-            inherit ['i, 'o] satisfier N_set.nil
-            method edge n u = if n = c then N_set.union state_ u else u
-        end
-        
-    let mapped_ f =
-        object
-            inherit ['i, 'o] satisfier N_set.nil
-            method edge n u = if f n then N_set.union state_ u else u
-        end
-    
-    type 's y = {
-        y_counter_: int;
-        y_first_: N_set.t;
-        y_last_: N_set.t;
-        y_follow_: 's N_map.t -> 's N_map.t;
-    } constraint 's = ('i, 'o) #satisfier
-    
-    type 's w = {
-        w_null_: bool;
-        w_cons_: int -> 's y;
-    }
-    
-    type x = (Obj.t, Obj.t) satisfier w
-    
-    let nil = {
-        w_null_ = true;
-        w_cons_ = fun i -> {
-            y_counter_ = i;
-            y_first_ = N_set.nil;
-            y_last_ = N_set.nil;
-            y_follow_ = identity_;
-        }
-    }
-    
-    let expr_ n = {
-        w_null_ = false;
-        w_cons_ = fun i ->
-            let s = N_set.singleton i in {
-                y_counter_ = succ i;
-                y_first_ = s;
-                y_last_ = s;
-                y_follow_ = fun m -> N_map.replace (i, n) m;
-            }
-    }
-    
-    module type Expr_Op_T = sig
-        val ( $| ): x -> x -> x
-        val ( $& ): x -> x -> x
-        
-        val ( !* ): x -> x
-        val ( !+ ): x -> x
-        val ( !? ): x -> x
-        val ( !: ): S.t -> x
-        val ( !^ ): (S.t -> bool) -> x
-        val ( !~ ): S.t Cf_seq.t -> x
-    end
-    
-    module Expr_Op = struct
-        let ( $| ) wa wb = {
-            w_null_ = wa.w_null_ || wb.w_null_;
-            w_cons_ = fun i ->
-                let ya = wa.w_cons_ i in
-                let yb = wb.w_cons_ ya.y_counter_ in {
-                    y_counter_ = yb.y_counter_;
-                    y_first_ = N_set.union ya.y_first_ yb.y_first_;
-                    y_last_ = N_set.union ya.y_last_ yb.y_last_;
-                    y_follow_ = fun m -> yb.y_follow_ (ya.y_follow_ m);
-                }
-        }
-        
-        let follow_fold_aux_ a m i =
-            N_map.replace (i, let sat = N_map.search i m in sat#follow a) m
-        
-        let ( $& ) wa wb = {
-            w_null_ = wa.w_null_ && wb.w_null_;
-            w_cons_ = fun i -> 
-                let ya = wa.w_cons_ i in
-                let yb = wb.w_cons_ ya.y_counter_ in
-                let first =
-                    if wa.w_null_ then
-                        N_set.union ya.y_first_ yb.y_first_
-                    else
-                        ya.y_first_
-                and last =
-                    if wb.w_null_ then
-                        N_set.union ya.y_last_ yb.y_last_
-                    else
-                        yb.y_last_
-                in {
-                    y_counter_ = yb.y_counter_;
-                    y_first_ = first;
-                    y_last_ = last;
-                    y_follow_ = fun m ->
-                        let m = yb.y_follow_ (ya.y_follow_ m) in
-                        N_set.fold (follow_fold_aux_ yb.y_first_) m ya.y_last_
-                }
-        }
-        
-        let star_follow_ y m =
-            N_set.fold (follow_fold_aux_ y.y_first_) (y.y_follow_ m) y.y_last_
-        
-        let ( !* ) w = {
-            w_null_ = true;
-            w_cons_ = fun i ->
-                let y = w.w_cons_ i in { y with y_follow_ = star_follow_ y }
-        }
-        
-        let ( !? ) x = x $| nil
-        let ( !+ ) x = x $& (!* x)
-        
-        let ( !: ) i = expr_ (literal_ i)
-        let ( !^ ) f = expr_ (mapped_ f)
-        
-        let rec ( !~ ) s =
-            match Lazy.force s with
-            | Cf_seq.Z -> nil
-            | Cf_seq.P (hd, tl) -> !:hd $& !~tl
-        
-    end
-    
-    let acceptor_ f =
-        object(self:'self)
-            inherit ['i, 'o] satisfier N_set.nil
-            
-            method edge _ u = u
-            method follow _ = (self :> 'self)
-            method accept = Some f
-        end
-    
-    type 'a r = (S.t, 'a) satisfier w
-    type 'a t = (S.t, 'a) Cf_parser.t
-    
-    module type Op_T = sig
-        include Expr_Op_T
-        
-        val ( $= ): x -> 'a -> 'a r
-        val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> 'a r
-        val ( $@ ): x -> (int -> 'a t) -> 'a r
-        val ( !@ ): 'a r list -> 'a r
-    end
-    
-    module Op = struct
-        include Expr_Op
-        
-        let ( $= ) x k =
-            let f n z = Some (k, Cf_seq.shift n z) in
-            (Obj.magic x) $& (expr_ (acceptor_ f))
-        
-        let ( $> ) x f =
-            let g n z =
-                let hd = Cf_seq.limit n z and tl = Cf_seq.shift n z in
-                Some (f hd, tl)
-            in
-            (Obj.magic x) $& (expr_ (acceptor_ g))
-        
-        let ( $@ ) x f =
-            (Obj.magic x) $& (expr_ (acceptor_ f))
-        
-        let ( !@ ) =
-            let rec f e = function hd :: tl -> f (hd $| e) tl | [] -> e in
-            fun s -> f nil s
-    end
-    
-    module S_order = struct
-        type t = int array
-        
-        let compare = compare
-            
-        (*
-        let to_string a =
-            let b = Buffer.create 40 in
-            Buffer.add_string b "[|";
-            begin
-                match Array.length a with
-                | 0 -> ()
-                | 1 ->
-                    Buffer.add_string b (Printf.sprintf " %u" a.(0))
-                | n ->
-                    for i = 0 to n - 2 do
-                        Buffer.add_string b (Printf.sprintf " %u;" a.(i))
-                    done;
-                    Buffer.add_string b (Printf.sprintf " %u" a.(n - 1))
-            end;
-            Buffer.add_string b " |]";
-            Buffer.contents b
-        *)
-    end
-    
-    module S_map = Cf_rbtree.Map(S_order)
-    
-    type ('i, 'o) s = {
-        s_id_: S_order.t;
-        s_accept_: (int -> ('i, 'o) Cf_parser.t) option;
-        s_next_: ('i, 'o) s option Lazy.t S.map;
-    }
-    
-    let create_aux_ =
-        let suspend w =
-            let y = w.w_cons_ 0 in
-            let m = y.y_follow_ N_map.nil in
-            let edge n u p = let sat = N_map.search p m in sat#edge n u in
-            let rec accept u ul i =
-                if i < ul then begin
-                    let sat = N_map.search (Array.unsafe_get u i) m in
-                    match sat#accept with
-                    | None -> accept u ul (succ i)
-                    | v -> v
-                end
-                else
-                    None
-            in
-            let sh = ref S_map.nil in
-            let rec state u =
-                let s = {
-                    s_id_ = u;
-                    s_accept_ = accept u (Array.length u) 0;
-                    s_next_ = S.map (follow u);
-                } in
-                sh := S_map.replace (u, s) !sh;
-                s
-            and follow u n =
-                lazy begin
-                    let v = Array.fold_left (edge n) N_set.nil u in
-                    if N_set.empty v then
-                        None
-                    else
-                        let u = Array.of_list (N_set.to_list_incr v) in
-                        Some (try S_map.search u !sh with Not_found -> state u)
-                end
-            in
-            state (Array.of_list (N_set.to_list_incr y.y_first_))
-        in
-        let nil _ _ = None in
-        let rec loop code s f n z0 z =
-            let f = match s.s_accept_ with None -> f | Some f -> f in
-            match Lazy.force z with
-            | Cf_seq.Z ->
-                f n z0
-            | Cf_seq.P (hd, tl) ->
-                match Lazy.force (S.get s.s_next_ (code hd)) with
-                | None -> f n z0
-                | Some s -> loop code s f (succ n) z0 tl
-        in
-        fun code r ->
-            let s = suspend r in
-            fun z ->
-                loop code s nil 0 z z
-    
-    let create r = create_aux_ identity_ r
-    
-    module X = struct
-        type ('c, 'a) r = (S.t * 'c, 'a) satisfier w
-            constraint 'c = S.t #Cf_parser.cursor
-        type ('c, 'a) t = ('c, S.t, 'a) Cf_parser.X.t
-            constraint 'c = S.t #Cf_parser.cursor
-        
-        module type Op_T = sig
-            include Expr_Op_T
-            
-            val ( $= ): x -> 'a -> ('c, 'a) r
-            val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> ('c, 'a) r
-            val ( $@ ): x -> (int -> ('c, 'a) t) -> ('c, 'a) r
-            val ( !@ ): ('c, 'a) r list -> ('c, 'a) r
-        end
-        
-        module Op: Op_T = struct
-            include Expr_Op
-            
-            let ( $> ) x f =
-                let g n z =
-                    let hd = Cf_seq.limit n (Cf_seq.map fst z)
-                    and tl = Cf_seq.shift n z in
-                    Some (f hd, tl)
-                in
-                (Obj.magic x) $& (expr_ (acceptor_ g))
-
-            let ( $= ) = Op.( $= )
-            let ( $@ ) = Op.( $@ )
-            let ( !@ ) = Op.( !@ )
-        end
-        
-        let create r = create_aux_ fst r
-    end
-end
-
-(*--- $File$ ---*)

cf/cf_dfa0.mli

-(*---------------------------------------------------------------------------*
-  $Change$
-  Copyright (c) 2004-2010, James H. Woodyatt
-  All rights reserved.
-  
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-  
-    Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    
-    Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution
-  
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-  OF THE POSSIBILITY OF SUCH DAMAGE. 
- *---------------------------------------------------------------------------*)
-
-(** Functional composition of lazy deterministic finite automata. *)
-
-(** {6 Overview}
-
-    This module implements operators for functional composition of lazy
-    deterministic finite automata (DFA).  A lazy DFA is more efficient at
-    recognizing regular grammars than a non-deterministic finite automaton,
-    and the lazy evaluation amortizes the cost of compiling the state table
-    so that it compares well to that of the NFA.
-    
-    The interface defined here is used as the underlying algorithm for the
-    {!Cf_lex} module.  It uses a functor that operates on a module defining
-    the type of a symbol, the type of parser input tokens that contain such
-    symbols, and a map of symbols to some polymorphic type.  The result of the
-    functor is a module that contains operator functions for composing
-    expressions and rules for automata that operate on streams of the input
-    symbol type.
-    
-    {b Note}: a DFA can be remarkably inefficient compared to an NFA for
-    certain classes of unusual grammars and unusual input.
-*)
-
-(** {6 Module Types} *)
-
-(** The type of the input module for [Create(S: Symbol_T)] functor defined
-    below.
-*)
-module type Symbol_T = sig
-    (** The symbol type *)
-    type t
-        
-    (** The type of maps from symbols to polymorphic types. *)
-    type 'a map
-        
-    (** The engine uses [map f] to construct a map from symbols to state
-        transitions.
-    *)
-    val map: (t -> 'a) -> 'a map
-    
-    (** The engine uses [get m s] to get the state transition from map [m] for
-        the symbol [s].
-    *)
-    val get: 'a map -> t -> 'a
-end
-
-(** The output of the [Create(S: Symbol_T)] functor, which is a module that
-    can be used to compose deterministic finite automata which operate on
-    symbols of the type specified.
-*)
-module type T = sig
-    
-    (** The module used as the input to the [Create(S: Symbol_T)] functor. *)
-    module S: Symbol_T
-    
-    (** The type of an expression in the regular grammar of an automaton. *)
-    type x
-    
-    (** The type of a rule for recognizing a sequence of symbols according to
-        the regular grammar of an automaton and producing an output token.
-    *)
-    type 'a r
-    
-    (** A parser that works on the symbols used in the automaton. *)
-    type 'a t = (S.t, 'a) Cf_parser.t
-    
-    (** The expression that matches the empty symbol sequence. *)
-    val nil: x
-    
-    (** The signature of modules containing operators for composing DFA
-        expressions.
-    *)
-    module type Expr_Op_T = sig
-        
-        (** Use [a $| b] to compose an expression that matches either [a] or
-            [b] in the symbol stream.
-        *)
-        val ( $| ): x -> x -> x
-        
-        (** Use [a $& b] to compose an expression that matches [a] followed by
-            [b] in the symbol stream.
-        *)
-        val ( $& ): x -> x -> x
-        
-        (** Use [!*a] to compose an expression that matches zero or more
-            occurances of [a] in the symbol stream.
-        *)
-        val ( !* ): x -> x
-        
-        (** Use [!+a] to compose an expression that matches one or more
-            occurances of [a] in the symbol stream.
-        *)
-        val ( !+ ): x -> x
-        
-        (** Use [!?a] to compose an expression that matches zero or one
-            occurance of [a] in the symbol stream.
-        *)
-        val ( !? ): x -> x
-        
-        (** Use [!:sym] to compose an expression that matches the symbol [sym]
-            in the symbol stream.
-        *)
-        val ( !: ): S.t -> x
-        
-        (** Use [!^f] to compose an expression that matches any symbol in the
-            symbol stream for which applying the function [f] returns [true].
-        *)
-        val ( !^ ): (S.t -> bool) -> x
-        
-        (** Use [!~z] to compose an expression that matches the sequence of
-            symbols [z] in the symbol stream.
-        *)
-        val ( !~ ): S.t Cf_seq.t -> x
-    end
-    
-    (** The module containing the expression operators. *)
-    module Expr_Op: Expr_Op_T
-    
-    (** The signature of the [Op] module, which contains the composition
-        operators.
-    *)
-    module type Op_T = sig
-        include Expr_Op_T
-        
-        (** Use [e $= x] to compose a rule that produces [x] when the symbols
-            in the symbol stream match the expression [e].
-        *)
-        val ( $= ): x -> 'a -> 'a r
-        
-        (** Use [e $> f] to compose a rule that applies the tokenizer function
-            [f] to the sequence of input symbols in the stream recognized by
-            the expression [e] to produce an output token.
-        *)
-        val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> 'a r
-        
-        (** Use [e $@ f] to compose a rule that applies the scanning function
-            [f] to the input stream when it is recognized by the expression
-            [e].  The scanning function is passed the length of the recognized
-            sequence of symbols and receives a parser in return that produces
-            the output of the rule and makes any advanced manipulations of the
-            input stream necessary to continue parsing for the next token.
-            If the parser returned from the scanning function does not
-            recognize the input stream, then the rule is not matched and the
-            next best matching rule is selected.
-        *)
-        val ( $@ ): x -> (int -> 'a t) -> 'a r
-        
-        (** Use this operator to combine a list of rules into a single rule. *)
-        val ( !@ ): 'a r list -> 'a r
-    end
-    
-    (** Open this module to bring the composition operators into the current
-        scope.
-    *)
-    module Op: Op_T
-    
-    (** Use [create r] to construct a parser that recognizes the longest
-        sequence that matches the rule [r].
-    *)
-    val create: 'a r -> 'a t
-    
-    (** A module of extensions for working with input sequences that
-        require position information in the parse function.
-    *)
-    module X: sig
-        
-        (** The type of a rule for recognizing a sequence of symbols in a
-            stream woven with a cursor stream and according to the regular
-            grammar of an automaton and producing an output token.
-        *)
-        type ('c, 'a) r constraint 'c = S.t #Cf_parser.cursor
-        
-        (** An extended parser that works on pairs of symbols and cursor
-            objects and used in the automaton.
-        *)
-        type ('c, 'a) t = ('c, S.t, 'a) Cf_parser.X.t
-            constraint 'c = S.t #Cf_parser.cursor
-        
-        (** The signature of the [Op] module, which contains the composition
-            operators.
-        *)
-        module type Op_T = sig
-            include Expr_Op_T
-            
-            (** Use [e $= x] to compose a rule that produces [x] when the
-                symbols in the symbol stream match the expression [e].
-            *)
-            val ( $= ): x -> 'a -> ('c, 'a) r
-            
-            (** Use [e $> f] to compose a rule that applies the tokenizer
-                function [f] to the sequence of input symbols in the
-                symbol/cursor stream recognized by the expression [e] to
-                produce an output token.
-            *)
-            val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> ('c, 'a) r
-            
-            (** Use [e $@ f] to compose a rule that applies the scanning
-                function [f] to the symbol/cursor input stream when the symbol
-                sequence is recognized by the expression [e].  This operator
-                performs the same function as the [( $@ )] operator, but it
-                works on a stream of symbols woven with a corresponding cursor
-                stream.
-            *)
-            val ( $@ ): x -> (int -> ('c, 'a) t) -> ('c, 'a) r
-            
-            (** Use this operator to combine a list of "cursor woven" rules
-                into a single rule.
-            *)
-            val ( !@ ): ('c, 'a) r list -> ('c, 'a) r
-        end
-        
-        (** Open this module to bring the composition operators into the
-            current scope.
-        *)
-        module Op: Op_T
-        
-        (** Use [create r] to construct a parser that recognizes the longest
-            sequence that matches the rule [r].
-        *)
-        val create: ('c, 'a) r -> ('c, 'a) t
-    end
-end
-
-(** The functor that creates a DFA module. *)
-module Create(S: Symbol_T): T with module S = S
-
-(*--- $File$ ---*)

cf/cf_lex.ml

-(*---------------------------------------------------------------------------*
-  $Change$
-  Copyright (c) 2005-2010, James H. Woodyatt
-  All rights reserved.
-  
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-  
-    Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    
-    Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution
-  
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-  OF THE POSSIBILITY OF SUCH DAMAGE. 
- *---------------------------------------------------------------------------*)
-
-module DFA = Cf_regex.DFA
-
-type x = DFA.x
-type 'a r = 'a DFA.r
-type 'a t = 'a DFA.t
-
-let nil = DFA.nil
-let create r = DFA.create r
-
-module type Expr_Op_T = sig
-    val ( $| ): x -> x -> x
-    val ( $& ): x -> x -> x
-    
-    val ( !* ): x -> x
-    val ( !+ ): x -> x
-    val ( !? ): x -> x
-    val ( !: ): char -> x
-    val ( !^ ): (char -> bool) -> x
-    val ( !~ ): char Cf_seq.t -> x
-    val ( !$ ): string -> x
-end
-
-module Expr_Op = struct    
-    let ( !~ ) = Cf_regex.expr_of_seq
-    let ( !$ ) = Cf_regex.expr_of_string
-end
-
-let identity_ x = x
-
-module Op = struct
-    include DFA.Op
-    include Expr_Op
-        
-    let ( $> ) e f = e $> (fun z -> f (Cf_seq.to_string z))
-    let ( ?~ ) e = DFA.create (e $> identity_)
-    let ( ?$ ) s = DFA.create (Cf_regex.expr_of_string s $> identity_)
-end
-
-module X = struct
-    type ('c, 'a) r = ('c, 'a) DFA.X.r
-    type ('c, 'a) t = ('c, 'a) DFA.X.t
-    
-    let create r = DFA.X.create r
-    
-    module Op = struct
-        include DFA.X.Op
-        include Expr_Op
-        
-        let ( $> ) e f = e $> (fun z -> f (Cf_seq.to_string z))
-        let ( ?~ ) e = DFA.X.create (e $> identity_)
-        let ( ?$ ) s = DFA.X.create (Cf_regex.expr_of_string s $> identity_)
-    end
-end
-
-type counter = {
-    c_pos: int;
-    c_row: int;
-    c_col: int;
-}
-
-let counter_zero = {
-    c_pos = 0;
-    c_row = 0;
-    c_col = 0;
-}
-
-class cursor ?(c = counter_zero) newline =
-    let nl0 = Cf_seq.to_list (Cf_seq.of_string newline) in
-    object(self:'self)
-        inherit [char] Cf_parser.cursor c.c_pos
-        
-        val row_: int = c.c_row
-        val col_: int = c.c_col
-        val nlz_: char list = nl0
-        val nl0_: char list = nl0
-        
-        method counter = {
-            c_pos = position_;
-            c_row = row_;
-            c_col = col_;
-        }
-        
-        method row = row_
-        method col = col_
-        
-        method private next ch =
-            match nlz_ with
-            | hd :: [] when ch = hd -> succ row_, 0, nl0_
-            | hd :: tl when ch = hd -> row_, succ col_, tl
-            | _ -> row_, succ col_, nlz_
-        
-        method advance ch =
-            let row, col, nlz = self#next ch in {<
-                position_ = succ position_;
-                row_ = row;
-                col_ = col;
-                nlz_ = nlz;
-            >}
-    end
-
-(*--- $File$ ---*)

cf/cf_lex.mli

-(*---------------------------------------------------------------------------*
-  $Change$
-  Copyright (c) 2005-2010, James H. Woodyatt
-  All rights reserved.
-  
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-  
-    Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    
-    Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution
-  
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-  OF THE POSSIBILITY OF SUCH DAMAGE. 
- *---------------------------------------------------------------------------*)
-
-(** Lexical analysis with functional composition of regular grammars. *)
-
-(** {6 Overview}
-    
-    This module implements functional parsers on octet character sequences
-    using regular expressions and functional composition of lazy deterministic
-    finite automata.
-*)
-
-(** {6 Core Interface} *)
-
-(** The type of regular expressions. *)
-type x = Cf_regex.DFA.x
-
-(** The type of lexical analysis rules. *)
-type 'a r = 'a Cf_regex.DFA.r
-
-(** Character stream parser. *)
-type 'a t = (char, 'a) Cf_parser.t
-
-(** Epsilon, i.e. a subexpression that matches an empty input sequence. *)
-val nil: x
-
-(** Use [create r] to compose a lexical analyzer from the rule [r]. *)
-val create: 'a r -> 'a t
-
-(** The module type containing the subexpression composition operators.  This
-    module type is included in the signatures of the [Op] and [X.Op] modules.
-*)
-module type Expr_Op_T = sig
-    
-    (** Alternating composition.  Use [a $| b] to compose an expression that
-        matches either expression [a] or expression [b].
-    *)
-    val ( $| ): x -> x -> x
-    
-    (** Serial composition.  Use [a $& b] to compose an expression that matches
-        expression [a] followed by expression [b].
-    *)
-    val ( $& ): x -> x -> x
-    
-    (** Star composition.  Use [!*a] to compose an expression that matches zero
-        or any number of instances of [a].
-    *)
-    val ( !* ): x -> x
-    
-    (** Plus composition.  Use [!+a] to compose an expression that matches one
-        or more instances of [a].
-    *)
-    val ( !+ ): x -> x
-    
-    (** Optional composition.  Use [!?a] to compose an expression that matches
-        zero or one instance of [a].
-    *)
-    val ( !? ): x -> x
-    
-    (** Character literal.  Use [!:c] to compose an expression that matches the
-        character [c].
-    *)
-    val ( !: ): char -> x
-    
-    (** Character set.  Use [!^f] to compose an expression that matches any
-        character for which the satisfier function [f] returns [true].
-    *)
-    val ( !^ ): (char -> bool) -> x
-    
-    (** Regular expression sequence.  Use [!~z] to parse the sequence [z]
-        according to the grammar defined in {!Cf_regex} module and compose
-        an expression that matches input accordingly.  Raises {!Cf_regex.Error}
-        if the sequence is not a regular expression.
-    *)
-    val ( !~ ): char Cf_seq.t -> x
-    
-    (** Regular expression string.  Use [!~s] to parse the string [s] according
-        to the grammar defined in {!Cf_regex} module and compose an expression
-        that matches input accordingly.  Raises {!Cf_regex.Error} if the string
-        is not a regular expression.
-    *)
-    val ( !$ ): string -> x
-end
-
-(** Open this module to bring the operator functions for simple parsers into
-    the current scope.
-*)
-module Op: sig
-    
-    (** Include the expression operators common among lexical analyzers. *)
-    include Expr_Op_T
-    
-    (** Literal token rule.  Use [e $= obj] to compose a rule that outputs the
-        literal object [obj] when the expression [e] is recognized.
-    *)
-    val ( $= ): x -> 'a -> 'a r
-    
-    (** String token rule.  Use [e $> f] to compose a rule that applies the
-        string recognized by the expression [e] to the tokenizer function [f]
-        to produce its result.
-    *)
-    val ( $> ): x -> (string -> 'a) -> 'a r
-    
-    (** Advanced token rule.  Use [e $@ f] to compose a rule that applies the
-        length of the character sequence recognized by the expression [e] to
-        the advanced tokenizer function [f] to obtain a parser that produces
-        the output of the rule and makes any other manipulations necessary to
-        continue parsing the input stream.  If the parser returned by [f] does
-        not recognize the input, then no output is produced and no other rules
-        are matched.
-    *)
-    val ( $@ ): x -> (int -> 'a t) -> 'a r
-    
-    (** Rule aggregation.  Use this operator to combine a list of rules into a
-        single rule.
-    *)
-    val ( !@ ): 'a r list -> 'a r
-    
-    (** String parser.  Use [?~x] to create a simple parser that recognizes any
-        string that matches the expression [x].  {b Note:} Care should be taken
-        when composing parsers with this operator to keep the lazy DFA from
-        being recreated in every pass.
-    *)
-    val ( ?~ ): x -> string t
-    
-    (** String parser.  Use [?$s] to create a simple parser that recognizes any
-        string that matches the regular expression specified in the string [s]
-        according to the grammar in the {!Cf_regex} module.  Raises
-        {!Cf_regex.Error} if the string is not a regular expression.  {b Note:}
-        Care should be taken when composing parsers with this operator to keep
-        from parsing the argument string in every pass.
-    *)
-    val ( ?$ ): string -> string t
-end
-
-(** A module of parser extensions for working with input sequences that require
-    position information to woven into the parse function.
-*)
-module X: sig
-    
-    (** The type of lexical analysis rules. *)
-    type ('c, 'a) r constraint 'c = char #Cf_parser.cursor
-    
-    (** Woven character stream parser. *)
-    type ('c, 'a) t = ('c, char, 'a) Cf_parser.X.t
-        constraint 'c = char #Cf_parser.cursor
-    
-    (** Use [create r] to compose a lexical analyzer from the rule [r]. *)
-    val create: ('c, 'a) r -> ('c, 'a) t
-    
-    (** Open this module to bring the operator functions for woven parsers into
-        the current scope.
-    *)
-    module Op: sig
-        
-        (** Include the expression operators common among lexical analyzers. *)
-        include Expr_Op_T
-        
-        (** Literal token rule.  Use [e $= obj] to compose a rule that outputs
-            the literal object [obj] when the expression [e] is recognized.
-        *)
-        val ( $= ): x -> 'a -> ('c, 'a) r
-        
-        (** String token rule.  Use [e $> f] to compose a rule that applies the
-            string recognized by the expression [e] to the tokenizer function
-            [f] to produce its result.
-        *)
-        val ( $> ): x -> (string -> 'a) -> ('c, 'a) r
-        
-        (** Advanced token rule.  Use [e $@ f] to compose a rule that applies
-            the length of the character sequence recognized by the expression
-            [e] to the advanced tokenizer function [f] to obtain a parser that
-            produces the output of the rule and makes any other manipulations
-            necessary to continue parsing the input stream.  If the parser
-            returned by [f] does not recognize the input, then no output is
-            produced and no other rules are matched.
-        *)
-        val ( $@ ): x -> (int -> ('c, 'a) t) -> ('c, 'a) r
-        
-        (** Rule aggregation.  Use this operator to combine a list of rules
-            into a single rule.
-        *)
-        val ( !@ ): ('c, 'a) r list -> ('c, 'a) r
-        
-        (** String parser.  Use [?~x] to create a simple parser that recognizes
-            any string that matches the expression [x].  {b Note:} Care should
-            be taken when composing parsers with this operator to keep the lazy
-            DFA from being recreated in every pass.
-        *)
-        val ( ?~ ): x -> ('c, string) t
-        
-        (** String parser.  Use [?$s] to create a simple parser that recognizes
-            any string that matches the regular expression specified in the
-            string [s] according to the grammar in the {!Cf_regex} module.
-            Raises {!Cf_regex.Error} if the string is not a regular expression.
-            {b Note:} Care should be taken when composing parsers with this
-            operator to keep from parsing the argument string in every pass.
-        *)
-        val ( ?$ ): string -> ('c, string) t
-    end
-end
-
-(** A record used by the [cursor] class defined below that indicates the
-    character index, row and column in the input stream associated with a
-    cursor position.
-*)
-type counter = {
-    c_pos: int;     (** The character index (counts from zero). *)
-    c_row: int;     (** The column number (counts from zero). *)
-    c_col: int;     (** The row number (counts from zero). *)
-}
-
-(** The initial value of a cursor position counter. *)
-val counter_zero: counter
-
-(** A class derived from {!Cf_parser.cursor} that intercepts newline characters
-    to track the row and column of a cursor position.  Use [new cursor ~c s] to
-    construct an initial cursor position, optionally with the counter [c]
-    (default: [counter_zero]), and a string [s] containing the character
-    sequence that is recognized as a newline, e.g. "\013\010" indicates that
-    newline is a CR LF sequence.
-*)
-class cursor:
-    ?c:counter ->
-    string ->
-    object
-        inherit [char] Cf_parser.cursor
-        
-        val row_: int           (** The current row number *)
-        val col_: int           (** The current column number *)
-        val nl0_: char list     (** The newline sequence as a [char list]. *)
-        val nlz_: char list     (** The current tail of the newline. *)
-        
-        (** [self#next c] is called in the [advance] method to return a new
-            values for the [row_], [col_] and [nlz_] members.
-        *)
-        method private next: char -> int * int * char list
-        
-        (** Returns a new counter object containing the row, column and index
-            of the current cursor position.
-        *)
-        method counter: counter
-        
-        (** Returns the [row_] member. *)
-        method row: int
-        
-        (** Returns the [col_] member. *)
-        method col: int
-    end
-
-(*--- $File$ ---*)

cf/cf_regex.ml

-(*---------------------------------------------------------------------------*
-  $Change$
-  Copyright (c) 2005-2010, James H. Woodyatt
-  All rights reserved.
-  
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-  
-    Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    
-    Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution
-  
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-  OF THE POSSIBILITY OF SUCH DAMAGE. 
- *---------------------------------------------------------------------------*)
-
-module Symbol = struct
-    type t = char and 'a map = 'a array
-    let map f = Array.init 256 (fun n -> f (char_of_int n))
-    let get m c = Array.unsafe_get m (int_of_char c)
-end
-
-module DFA = Cf_dfa0.Create(Symbol)
-
-exception Error of string
-        
-module P = Cf_parser
-open P.Op
-open DFA.Op
-
-let p_digit_ =
-    P.sat (function '0'..'9' -> true | _ -> false) >>= fun c ->
-    ~:(Char.code c - 48)
-
-let p_hexdigit_ =
-    let base_ c =
-        if c >= 'a' then 87 else if c >= 'A' then 55 else 48
-    in
-    P.sat begin
-        function ('0'..'9' | 'a'..'f' | 'A'..'F') -> true | _ -> false
-    end >>= fun c ->
-    ~:(Char.code c - base_ c)
-
-let p_backtick_ = ?.'`'
-
-let p_any_ = P.sat (fun _ -> true)
-
-let esc_ch_list_ =
-    let hexcode_ _ =
-        p_hexdigit_ >>= fun a ->
-        p_hexdigit_ >>= fun b ->
-        ~:(Char.chr (a * 16 + b))
-    in
-    let deccode_ chA =
-        let a = Char.code chA - 48 in
-        p_digit_ >>= fun b ->
-        p_digit_ >>= fun c ->
-        let code = a * 100 + b * 10 + c in
-        if code > 255 then P.nil else ~:(Char.chr code)
-    in
-    let control_ _ =
-        P.sat begin function
-            | '@'..'_' | 'a'..'z' -> true
-            | _ -> false
-        end >>= fun c ->
-        let n = Char.code c in
-        let n = if n >= 97 then n - 96 else n - 64 in
-        ~:(Char.chr n)
-    in
-    let newline_ _ = ~:('\x0A') in
-    let tab_ _ = ~:('\x09') in
-    let return_ _ = ~:('\x0D') in
-    [
-        'n', newline_;
-        't', tab_;
-        'r', return_;
-        'x', hexcode_;
-        'c', control_;
-        '0', deccode_;
-        '1', deccode_;
-        '2', deccode_;
-        '`', ( ~: );
-    ]
-
-let ch_class_ =
-    let l_bracket = ?.'[' in
-    let r_bracket = ?.']' in
-    let hyphen = ?.'-' in
-    let eq (c1 : char) (c2 : char) = (c1 = c2) in
-    let raw_ch = P.sat (function '-' | ']' -> false | _ -> true) in
-    let esc_ch =
-        let mapF (ch, f) = ?.ch >>= f in
-        let aux = P.alt (List.map mapF ((']', ( ~: )) :: esc_ch_list_)) in
-        p_backtick_ >>= fun _ ->
-        aux
-    in
-    let single_ch = P.alt [ esc_ch; raw_ch ] in
-    let range: (char, char -> bool) P.t =
-        single_ch >>= fun a ->
-        hyphen >>= fun _ ->
-        single_ch >>= fun b ->
-        ~:(fun ch -> ch >= a && ch <= b)
-    in
-    let eqLift p = p >>= fun ch -> ~:(fun c -> c = ch) in
-    let esc_set =
-        let alpha = function 'A'..'Z' | 'a'..'z' -> true | _ -> false in
-        let digit = function '0'..'9' -> true | _ -> false in
-        let alnum ch = alpha ch || digit ch in
-        let specifier = P.alt [
-            (?.'a' >>= fun _ -> ~:alpha);
-            (?.'d' >>= fun _ -> ~:digit);
-            (?.'i' >>= fun _ -> ~:alnum);
-        ] in
-        p_backtick_ >>= fun _ ->
-        specifier
-    in
-    let single = P.alt (List.map eqLift [ esc_ch; raw_ch ]) in
-    let hyphen_ch = P.tok (function '-' -> Some (eq '-') | _ -> None) in
-    let atom0 = P.alt [ hyphen_ch; esc_set; range; single ] in
-    let atomN = P.alt [ esc_set; range; single ] in
-    let atomlist =
-        let existF ch f = f ch in
-        atom0 >>= fun hd ->
-        ?*atomN >>= fun tl ->
-        ~:(fun ch -> List.exists (existF ch) (hd :: tl))
-    in
-    let negate = ?/(?.'^') >>= function None -> ~:false | _ -> ~:true in
-    l_bracket >>= fun _ ->
-    negate >>= fun _ ->
-    atomlist >>= fun f ->
-    r_bracket >>= fun _ ->
-    ~:(!^f)
-
-let esc_expr_list_ =
-    let meta_ ch = ~:(!:ch) in
-    let alpha_ _ =
-        ~:(!^(function 'A'..'Z' | 'a'..'z' -> true | _ -> false))
-    in
-    let alnum_ _ =
-        ~:begin
-            !^begin function
-                | '0'..'9' | 'A'..'Z' | 'a'..'z' -> true
-                | _ -> false
-            end
-        end
-    in
-    let digit_ _ = ~:(!^(function '0'..'9' -> true | _ -> false)) in
-    let sat_white_ x = function '\009'..'\013' | '\032' -> x | _ -> not x in
-    let white_ _ = ~:(!^(sat_white_ true)) in
-    let nonwhite_ _ = ~:(!^(sat_white_ false)) in
-    [
-        'a', alpha_;
-        'i', alnum_;
-        'd', digit_;
-        's', white_;
-        'w', nonwhite_;
-        '.', meta_;
-        '?', meta_;
-        '*', meta_;
-        '+', meta_;
-        '(', meta_;
-        ')', meta_;
-        '|', meta_;
-        '[', meta_;
-        ']', meta_;
-        '^', meta_;
-        '$', meta_;
-    ]
-
-let esc_expr_ =        
-    let p_escape_ fLst =
-        p_backtick_ >>= fun _ ->
-        P.alt (List.map (fun (ch, f) -> ?.ch >>= f) fLst)
-    in
-    let esc_chx_list_ =
-        List.rev_map begin fun (c, f) ->
-            c, fun x -> f x >>= fun y -> ~:(!:y)
-        end esc_ch_list_
-    in
-    p_escape_ (List.rev_append esc_chx_list_ esc_expr_list_)
-
-let expr_parse =
-    let symbol =
-        let f = function
-            | '\x00'..'\x1f' | '?' | '*' | '+' | '(' | ')' | '|'
-            | '\x7f'..'\xff' -> false
-            | _ -> true
-        in
-        P.sat f >>= fun c ->
-        ~:(!:c)
-    in
-    let dot = ?.'.' >>= fun _ -> ~:(!^(fun c -> c <> '\n')) in
-    let star x = ?.'*' >>= fun _ -> ~:(!*x) in
-    let plus x = ?.'+' >>= fun _ -> ~:(!+x) in
-    let question x = ?.'?' >>= fun _ -> ~:(!?x) in
-    let postfix x = P.alt [ star x; plus x; question x; ~:x ] in
-    let rec expr _ =
-        term () >>= fun x ->
-        ?* (?.'|' >>= fun _ -> term ()) >>= fun y ->
-        ~:(List.fold_left (fun x y -> x $| y) x y)
-    and term () =
-        ?+(factor () >>= postfix) >>= fun (hd, tl) ->
-        ~:(List.fold_left (fun x y -> x $& y) hd tl)
-    and factor () = P.alt [ group (); ch_class_; esc_expr_; dot; symbol ]
-    and group () =
-        ?.'(' >>= fun _ ->
-        expr DFA.nil >>= fun x ->
-        ?.')' >>= fun _ ->
-        ~:x
-    in
-    expr DFA.nil
-
-let expr_of_seq z =
-    match expr_parse z with
-    | Some (v, _) -> v
-    | _ -> raise (Error (Cf_seq.to_string z))
-
-let expr_of_string s = expr_of_seq (Cf_seq.of_string s)
-
-let quote =
-    let esc_ =
-        [ '`'; '.'; '?'; '*'; '+'; '('; ')'; '|'; '['; ']'; '^'; '$' ]
-    in
-    let rec loop c =
-        let w = Lazy.lazy_from_val (Cf_flow.Q loop) in
-        match c with
-        | _ when List.exists (fun c' -> c == c') esc_ ->
-            let w = Lazy.lazy_from_val (Cf_flow.P (c, w)) in
-            Cf_flow.P ('`', w)
-        | _ ->
-            Cf_flow.P (c, w)
-    in
-    Lazy.lazy_from_val (Cf_flow.Q loop)
-
-let unquote =
-    let rec loop c =
-        let w = Lazy.lazy_from_val (Cf_flow.Q loop) in
-        match c with
-        | '`' ->
-            Cf_flow.Q begin fun c ->
-                Cf_flow.P (c, Lazy.lazy_from_val (Cf_flow.Q loop))
-            end
-        | _ ->
-            Cf_flow.P (c, w)
-    in
-    Lazy.lazy_from_val (Cf_flow.Q loop)
-
-type t = int DFA.t
-
-let of_expression x = DFA.create (x $@ (fun n z -> Some (n, Cf_seq.shift n z)))
-let of_seq z = of_expression (expr_of_seq z)
-let of_string s = of_expression (expr_of_string s)
-
-let test r s =
-    let z = Cf_seq.of_string s in
-    match r z with
-    | Some (n, _) when n = String.length s -> true
-    | _ -> false
-
-let search =
-    let rec loop r pos z =
-        match r z with
-        | Some (n, _) ->
-            pos, n
-        | None ->
-            match p_any_ z with
-            | Some (_, z) ->
-                loop r (succ pos) z
-            | None ->
-                raise Not_found
-    in
-    fun r -> loop r 0
-
-let rec separate r z =
-    lazy begin
-        try
-            let pos, len = search r z in
-            let s = Cf_seq.limit pos z in
-            let z = Cf_seq.shift (pos + len) z in
-            Cf_seq.P (s, separate r z)
-        with
-        | Not_found ->
-            Cf_seq.P (z, Cf_seq.nil)
-    end
-
-let split =
-    let rec loop r s pos acc z =
-        match
-            try Some (search r z) with Not_found -> None
-        with
-        | Some (pos', len') ->
-            let x = String.sub s pos pos' in
-            let pos = pos + pos' + len' in
-            let z = Cf_seq.shift (pos' + len') z in
-            loop r s pos (x :: acc) z
-        | None ->
-            List.rev_append acc [ Cf_seq.to_string z ]
-    in
-    fun r s ->
-        loop r s 0 [] (Cf_seq.of_string s)
-
-let parse r z =
-    match r z with
-    | Some (n, tl) -> Some (Cf_seq.to_string (Cf_seq.limit n z), tl)
-    | None -> None
-
-let parsex r = P.to_extended (parse r)
-
-(*--- $File$ ---*)

cf/cf_regex.mli

-(*---------------------------------------------------------------------------*
-  $Change$
-  Copyright (c) 2005-2010, James H. Woodyatt
-  All rights reserved.
-  
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-  
-    Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    
-    Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution
-  
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-  OF THE POSSIBILITY OF SUCH DAMAGE. 
- *---------------------------------------------------------------------------*)
-
-(** Regular expression parsing, search and matching. *)
-
-(** {6 Overview}
-
-    This module implements regular expression parsing, search and matching in
-    pure Objective Caml.  The grammar for regular expressions is a little
-    unconventional.  Instead of using a backslash as the escape character, the
-    backtick character is used instead.  This makes it easy to write regular
-    expressions in string literals.
-    
-    Use any of the following constructions in regular expressions:
-    - [`n     ] Matches LF ("newline") character.
-    - [`t     ] Matches TAB character.
-    - [`r     ] Matches RETURN character.
-    - [`a     ] Matches an alphabetical character.
-    - [`d     ] Matches a decimal digit character.
-    - [`i     ] Matches an alphanumerical character.
-    - [`s     ] Matches a TAB, LF, VT, FF, CR or SPACE (whitespace) character.
-    - [`w     ] Matches a character other than a whitespace character.
-    - [`xNN   ] Matches the character with hexadecimal code [NN].
-    - [`DDD   ] Matches the character with decimal code [DDD], where DDD is a
-        three digit number between [000] and [255].
-    - [`c_    ] Matches the control character corresponding to the subsequent
-        printable character, e.g. [`cA] is CONTROL-A, and [`c\[] is ESCAPE.
-    - [.      ] Matches any character except newline.
-    - [*      ] (postfix) Matches the preceding expression, zero, one or
-        several times in sequence.
-    - [+      ] (postfix) Matches the preceding expression, one or several
-        times in sequence.
-    - [?      ] (postfix) Matches the preceding expression once or not at all.
-    - [[..]   ] Character set.  Ranges are denoted with ['-'], as in [[a-z]].
-        An initial ['^'], as in [[^0-9]], complements the set.  Special
-        characters in the character set syntax may be included in the set by
-        escaping them with a backtick, e.g. [[`^```\]]] is a set containing
-        three characters: the carat, the backtick and the right bracket
-        characters.
-    - [(..|..)] Alternatives.  Matches one of the expressions between the
-        parentheses, which are separated by vertical bar characters.
-    - [`_     ] Escaped special character.  The special characters are ['`'],
-        ['.'], ['*'], ['+'], ['?'], ['('], ['|'], [')'], ['\['].
-*)
-
-(** {6 Modules} *)
-
-(** The deterministic finite automata on octet character symbols. *)
-module DFA: Cf_dfa0.T with type S.t = char
-
-(** {6 Exceptions} *)
-
-(** An error parsing the specified string as a regular expression. *)
-exception Error of string
-
-(** {6 Types} *)
-
-(** An abstract type representing a regular expression. *)
-type t
-
-(** {6 Functions} *)
-
-(** A parser combinator on character streams that recognizes a regular
-    expression and produces a DFA expression for it.
-*)
-val expr_parse: (char, DFA.x) Cf_parser.t
-
-(** Use [expr_of_seq z] to evaluate the character sequence [z] as a regular
-    expression and produce the corresponding DFA expression.  Raises [Error] if
-    the sequence is not a valid expression.
-*)
-val expr_of_seq: char Cf_seq.t -> DFA.x
-
-(** Use [expr_of_string s] to evaluate the string [s] as a regular expression
-    and produce the corresponding DFA expression.  Raises [Error] if the string
-    is not a valid expression.
-*)
-val expr_of_string: string -> DFA.x
-
-(** A character flow that quotes all the special characters in the input so
-    that the output may be used in a regular expression to match the input
-    exactly.
-*)
-val quote: (char, char) Cf_flow.t
-
-(** A character flow that unquotes all the quoted special characters in the
-    input so that the output may by used in a regular expression to match the
-    specified pattern.
-*)
-val unquote: (char, char) Cf_flow.t
-
-(** Use [of_expression x] to produce a regular expression from the DFA
-    expression [x].
-*)
-val of_expression: DFA.x -> t
-
-(** Use [of_seq z] to ingest the whole character sequence [z], parse it and
-    produce a regular expression.  Raises [Error s] if the sequence is
-    not a valid regular expression, with [s] containing the string composed of
-    the characters in the sequence.
-*)
-val of_seq: char Cf_seq.t -> t
-
-(** Use [of_string s] to produce a regular expression from the string [s].
-    Raises [Error s] if the string is not a valid regular expression.
-*)
-val of_string: string -> t
-
-(** Use [test r s] to test whether the string [s] matches the regular
-    expression [r].
-*)
-val test: t -> string -> bool
-
-(** Use [search r z] to search the character sequence [z] for a pattern that
-    matches the regular expression [r].  Returns [(pos, len)], where [pos] is
-    the number of characters into the sequence where the matching sequence
-    begins, and [len] is the number matching characters.
-*)
-val search: t -> char Cf_seq.t -> int * int
-
-(** Use [separate r z] to map the character sequence [z] into the sequence of
-    sequences found between matches for the regular expression [r].
-*)
-val separate: t -> char Cf_seq.t -> char Cf_seq.t Cf_seq.t
-
-(** Use [split r s] to produce a list of strings by searching [s] left to right
-    for blocks of characters between patterns that match the regular expression
-    [r].
-*)
-val split: t -> string -> string list
-
-(** Use [parse r] to produce a parser that matches the input stream to the
-    regular expression [r] and returns the corresponding string value.
-*)
-val parse: t -> (char, string) Cf_parser.t
-
-(** Use [parse r] to produce a parser that matches the input stream to the
-    regular expression [r] and returns the corresponding string value.
-*)
-val parsex: t -> ('c, char, string) Cf_parser.X.t
-
-(*--- $File$ ---*)

cf/cf_scan_parser.ml

-(*---------------------------------------------------------------------------*
-  $Change$
-  Copyright (c) 2004-2010, James H. Woodyatt
-  All rights reserved.
-  
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-  
-    Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    
-    Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution
-  
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-  OF THE POSSIBILITY OF SUCH DAMAGE. 
- *---------------------------------------------------------------------------*)
-
-exception No_match
-
-class virtual ['i] scanner z =
-    object(self)
-        val mutable next_: 'i Cf_seq.t = z
-        
-        method private virtual get: char
-        
-        method init = Scanf.Scanning.from_function (fun () -> self#get)
-        method fini = next_
-    end
-
-let cscanf cf ef fmt rf z =
-    let s = cf z in
-    let ef0 _ x = ef s x in
-    try
-        let v = Scanf.kscanf s#init ef0 fmt rf in
-        let z = s#fini in
-        Some (v, z)
-    with
-    | No_match ->
-        None
-
-class ['cursor] lex_scanner z =
-    object
-        inherit [char] scanner z
-                
-        method private get =
-            match Lazy.force next_ with
-            | Cf_seq.Z ->
-                raise End_of_file
-            | Cf_seq.P (hd, tl) ->
-                next_ <- tl;
-                hd
-    end
-
-class ['cursor] lex_scanner_x z =
-    object
-        constraint 'cursor = char #Cf_parser.cursor
-        inherit [char * 'cursor] scanner z
-        
-        method private get =
-            match Lazy.force next_ with
-            | Cf_seq.Z ->
-                raise End_of_file
-            | Cf_seq.P ((ch, _), tl) ->
-                next_ <- tl;
-                ch
-    end
-
-let scanf fmt rf z =
-    let ef _ = raise No_match in
-    cscanf (new lex_scanner) ef fmt rf z
-
-let scanfx fmt rf z =
-    let ef _ = raise No_match in
-    cscanf (new lex_scanner_x) ef fmt rf z
-
-(*--- $File$ ---*)

cf/cf_scan_parser.mli

-(*---------------------------------------------------------------------------*
-  $Change$
-  Copyright (c) 2004-2010, James H. Woodyatt
-  All rights reserved.
-  
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-  
-    Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    
-    Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution
-  
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-  OF THE POSSIBILITY OF SUCH DAMAGE. 
- *---------------------------------------------------------------------------*)
-
-(** Lexical analysis with functional composition using [Scanf] scanners. *)
-
-(** {6 Overview}
-
-    This module implements and extension to the {!Cf_parser} module for mixing
-    calls to the standard library [Scanf] functions with functional parsers.
-*)
-
-(** {6 Classes and Types} *)
-
-(** An exception provided so that the [cscanf] function (below) can be signaled
-    to transform its answer with the effect that the parser stack is unwound
-    until an alternative production can be matched.
-*)
-exception No_match
-
-(** A virtual base class used in the [cscanf] function (below) for constructing
-    a scanning buffer from an input sequence.
-*)
-class virtual ['i] scanner:
-    'i Cf_seq.t ->      (** The input sequence *)
-    object
-        val mutable next_: 'i Cf_seq.t  (** The next unmatched input symbol *)
-        
-        (** Get the next character for the scanning buffer *)
-        method private virtual get: char
-        
-        (** Initialize the scanning buffer *)
-        method init: Scanf.Scanning.scanbuf
-        
-        (** Finalize the scanning buffer and return the next unmatched input
-            symbol.
-        *)
-        method fini: 'i Cf_seq.t
-    end
-
-(** {6 Functions} *)
-
-(** This is the primitive function in the module.  Use [cscanf cf ef fmt rf] to
-    construct a parser that applies [cf] to the input sequence to acquire a
-    scanner object [s], invokes the [s#init] method to obtain a scanning buffer
-    with which to apply [Scanf.kscanf], using the exception function [ef], the
-    scanning format [fmt] and the return continuation [rf].  If the exception
-    function raises [No_match] then the resulting parser unwinds to the next
-    production alternative, otherwise the parser answers with the result of the
-    return continuation.
-*)
-val cscanf:
-    ('i Cf_seq.t -> ('i #scanner as 's)) -> ('s -> exn -> 'o) ->
-    ('f, Scanf.Scanning.scanbuf, 'u, 'f -> 'o, 'f -> 'o, 'o) format6 -> 'f ->
-    ('i, 'o) Cf_parser.t
-
-(** Use [scanf fmt rf] to construct a lexical parser that scans the input text
-    according to the scanning format [fmt] and produces the value returned by
-    the return continuation.  If the scanner raises an exception, then the
-    parser unwinds to the next production alternative.
-*)
-val scanf:
-    ('f, Scanf.Scanning.scanbuf, 'u, 'f -> 'o, 'f -> 'o, 'o) format6 -> 'f ->
-    (char, 'o) Cf_parser.t
-
-(** Use [scanfx] in place of [scanf] to construct a parser with a cursor weaved
-    into the input stream.
-*)
-val scanfx:
-    ('f, Scanf.Scanning.scanbuf, 'u, 'f -> 'o, 'f -> 'o, 'o) format6 -> 'f ->
-    (char #Cf_parser.cursor, char, 'o) Cf_parser.X.t
-
-(*--- $File$ ---*)
 Open issues in development:
 
++ OBSOLESCENT: Cf_flow
+
 + (Cf_dyn): Exploring this as a refactoring of the Cf_flow module, with all new
     functions and maybe some different operators.  The serial loop composers
     are not included because their utility is unclear.  The transcoder section
     is not included because the LL(x) scanner should be used instead.
 
++ (Cf_gadget): Still uses `Cf_flow`.
+
++ (Cf_unicode): Still uses `Cf_flow` and relies on its transcoder section.
+
 + (Cf_seq): The tentative weave functions are probably a bad idea.  It probably
     makes more sense to unfold/weave the new Cf_llscan.t instead.
 
 (*---------------------------------------------------------------------------*
   $Change$
-  Copyright (c) 2005-2010, James H. Woodyatt
+  Copyright (c) 2005-2013, James H. Woodyatt
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without
             "Z",    0;
         ]
     in
-    let number = eat_cfws_ (Cf_scan_parser.scanf "%u" identity_) in
+    let number = eat_cfws_ (Cf_fmt_llscan.scanf "%u" identity_) in
     let weekday =
         scan_atom >>= fun wd ->
         if Mime_atom.Map.member wd weekday_names then
         end
     and numbered_zone =
         sign >>= fun d ->
-        Cf_scan_parser.scanf "%4u" identity_ >>= fun n ->
+        Cf_fmt_llscan.scanf "%4u" identity_ >>= fun n ->
         let hr = n / 100 and min = n mod 100 in
         if hr > 99 || min > 59 then
             Cf_llscan.nil
 ##############################################################################
 # 
 # $Change$
-# Copyright (C) 2010, james woodyatt
+# Copyright (C) 2010-2013, james woodyatt
 # All rights reserved.
 # 
 # Redistribution and use in source and binary forms, with or without
 
 ### Compose the xml library
 section
-    modules= expat event parser
+    modules= expat event llscan # parser
     primitives= expat
     
     ### Create source description
 (*---------------------------------------------------------------------------*
   $Change$
-  Copyright (c) 2003-2010, James H. Woodyatt
+  Copyright (c) 2003-2013, James H. Woodyatt
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without
     let foo_element =
         let tag = "foo" in
         let content v =
-            Xml_parser.accumulated_character_data >>= fun msg ->
+            Xml_llscan.accumulated_character_data >>= fun msg ->
             let data = Cf_message.contents msg in
             Cf_llscan.ret { v with v_content_ = data }
         in
                         Cf_llscan.ret { v with v_att_const_ = Some n }
                     end
                 | "array" -> begin fun v ->
-                        Xml_parser.split_attribute_value >>= fun arr ->
+                        Xml_llscan.split_attribute_value >>= fun arr ->
                         Cf_llscan.fin >>= fun () ->
                         Cf_llscan.ret { v with v_att_array_ = Some arr }
                     end
             match p acc (Cf_seq.of_string attval) with
             | None ->
                 let event = Xml_event.T_element_start ("foo", [pair]), pos in
-                raise (Xml_parser.Invalid event)
+                raise (Xml_llscan.Invalid event)
             | Some (v, _) ->
                 v
         in
             v_att_array_ = None;
             v_content_ = "";
         } in
-        Xml_parser.validated_element ~tag ~attr ~content v >>= fun v ->
-        Xml_parser.optional_whitespace >>= fun () ->
-        Xml_parser.end_of_document >>= fun () ->
+        Xml_llscan.validated_element ~tag ~attr ~content v >>= fun v ->
+        Xml_llscan.optional_whitespace >>= fun () ->
+        Xml_llscan.end_of_document >>= fun () ->
         Cf_llscan.fin >>= fun () ->
         match v.v_att_const_, v.v_att_array_ with
         | Some const, Some array ->
     
     let test () =
         try test_inner_ () with
-        | Xml_parser.Invalid (token, position) ->
+        | Xml_llscan.Invalid (token, position) ->
             failwith (X.sprintf "Parse error @ %s line %nu, col %nu"
                 (Xml_event.token_to_string token)
                 position.Xml_event.pos_line_number

xml/xml_llscan.ml

+(*---------------------------------------------------------------------------*
+  $Change$
+  Copyright (c) 2003-2013, James H. Woodyatt
+  All rights reserved.
+  
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+  
+    Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    
+    Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution
+  
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+  OF THE POSSIBILITY OF SUCH DAMAGE. 
+ *---------------------------------------------------------------------------*)
+
+type 'a t = (Xml_event.t, 'a) Cf_llscan.t
+
+open Cf_llscan.Op
+open Cf_clex.Op
+
+let character_data =
+    Cf_llscan.tok begin function
+        | Xml_event.T_character_data data, _ -> Some data
+        | _ -> None
+    end
+
+let element_start =
+    Cf_llscan.tok begin function
+        | Xml_event.T_element_start (tag, attrs), _ -> Some (tag, attrs)
+        | _ -> None
+    end
+
+let element_end =
+    Cf_llscan.tok begin function
+        | Xml_event.T_element_end tag, _ -> Some tag
+        | _ -> None
+    end
+
+let processing_instruction =
+    Cf_llscan.tok begin function
+        | Xml_event.T_processing_instruction pi, _ -> Some pi
+        | _ -> None
+    end
+
+let cdata_start =
+    let sat =
+        Cf_llscan.sat begin function
+            | Xml_event.T_cdata_start, _ -> true
+            | _ -> false
+        end
+    in
+    sat >>= fun _ -> Cf_llscan.ret ()
+
+let cdata_end =
+    let sat =
+        Cf_llscan.sat begin function
+            | Xml_event.T_cdata_end, _ -> true
+            | _ -> false
+        end
+    in
+    sat >>= fun _ -> Cf_llscan.ret ()
+
+let comment =
+    Cf_llscan.tok begin function
+        | Xml_event.T_comment text, _ -> Some text
+        | _ -> None
+    end
+
+let default_text =
+    Cf_llscan.tok begin function
+        | Xml_event.T_default_text text, _ -> Some text
+        | _ -> None
+    end
+
+let xml_decl =
+    Cf_llscan.tok begin function
+        | Xml_event.T_xml_decl decl, _ -> Some decl
+        | _ -> None
+    end
+
+let end_of_document =
+    let sat =
+        Cf_llscan.sat begin function
+            | Xml_event.T_end_of_document, _ -> true
+            | _ -> false
+        end
+    in
+    sat >>= fun _ -> Cf_llscan.ret ()
+
+let accumulated_character_data =
+    let rec inner_loop q s =
+        match character_data s with
+        | None ->
+            if Cf_deque.empty q then None else Some (q, s)
+        | Some (data, s) ->
+            inner_loop (Cf_deque.A.push (data, 0, String.length data) q) s
+    in
+    let cdata_section () =
+        cdata_start >>= fun _ ->
+        inner_loop Cf_deque.nil >>= fun q ->
+        cdata_end >>= fun _ ->
+        Cf_llscan.ret q
+    in
+    let outer_pass =
+        Cf_llscan.alt [
+            inner_loop Cf_deque.nil;
+            cdata_section ();
+        ]
+    in
+    let rec outer_loop q s =
+        match outer_pass s with
+        | None ->
+            Some (q, s)
+        | Some (q', s) ->
+            outer_loop (Cf_deque.catenate q q') s
+    in
+    outer_loop Cf_deque.nil >>= fun q ->
+    let msg = Cf_seq.reverse (Cf_deque.B.to_seq q) in
+    Cf_llscan.ret msg
+
+let optional_whitespace =
+    let rec get seq =
+        match character_data seq with
+        | Some (s, seq) when loop s (String.length s) 0 -> get seq
+        | Some (_, _) -> Some ((), seq)
+        | _ ->
+            match default_text seq with
+            | Some (s, seq) when loop s (String.length s) 0 -> get seq
+            | _ -> Some ((), seq)
+    and loop s n i =
+        if i >= n then
+            true
+        else
+            match String.unsafe_get s i with
+            | ' ' | '\t' | '\r' | '\n' -> loop s n (succ i)
+            | _ -> false
+    in
+    fun seq -> get seq
+
+exception Invalid of Xml_event.t
+
+let invalid seq =
+    match Lazy.force seq with
+    | Cf_seq.Z -> None
+    | Cf_seq.P (event, _) -> raise (Invalid event)
+
+type space_handling = S_default | S_preserve
+
+(*
+val validated_element:
+    tag:string -> attr:(Xml_event.position -> 'a -> string * string -> 'a) ->
+    content:('a -> 'a t) -> ?space:space_handling -> 'a -> 'a t
+*)
+
+let validated_element ~tag ~attr ~content ?(space = S_default) =
+    let rec attributes ~pos ~acc = function
+        | [] -> acc
+        | hd :: tl -> attributes ~pos ~acc:(attr pos acc hd) tl
+    and e_start acc seq =
+        match Lazy.force seq with
+        | Cf_seq.P ((Xml_event.T_element_start (tag', attlist), pos), tl) ->
+            if tag = tag' then
+                Some (attributes ~pos ~acc attlist, tl)
+            else
+                None
+        | _ ->
+            None
+    and e_end seq =
+        match Lazy.force seq with
+        | Cf_seq.P ((Xml_event.T_element_end tag', _ as event), tl) ->
+            if tag = tag' then Some ((), tl) else raise (Invalid event)
+        | _ ->
+            None
+    in
+    let content acc =
+        match space with
+        | S_preserve ->
+            content acc
+        | S_default ->
+            optional_whitespace >>= fun () ->
+            content acc >>= fun x ->
+            optional_whitespace >>= fun () ->
+            Cf_llscan.ret x
+    in
+    fun acc ->
+        e_start acc >>= fun acc ->
+        content acc >>= fun acc ->
+        e_end >>= fun () ->
+        Cf_llscan.ret acc
+
+let split_attribute_value =
+    let is_white x = function
+        | '\009' | '\010' | '\013' | '\032' -> x
+        | _ -> not x
+    in
+    let x_white = !+(!^(is_white true)) in
+    let x_token = !+(!^(is_white false)) in
+    let lexer = Cf_clex.create !@[
+        x_white $= None;
+        x_token $> (fun x -> Some x);
+    ] in
+    fun s ->
+        let rec loop acc sx =
+            match lexer sx with
+            | None -> Some (List.rev acc, sx)
+            | Some (None, sx) -> loop acc sx
+            | Some (Some tok, sx) -> loop (tok :: acc) sx
+        in
+        loop [] s
+
+let standalone_document content =
+    xml_decl >>= fun decl ->
+    match decl with
+    | {
+        Xml_event.xml_version = Some "1.0";
+        Xml_event.xml_standalone = Xml_expat.SA_yes;
+      } ->
+        begin
+            optional_whitespace >>= fun () ->
+            content >>= fun x ->
+            optional_whitespace >>= fun () ->
+            end_of_document >>= fun () ->
+            Cf_llscan.ret x
+        end
+    | _ ->
+        Cf_llscan.nil
+
+(*--- $File$ ---*)

xml/xml_llscan.mli

+(*---------------------------------------------------------------------------*
+  $Change$
+  Copyright (c) 2003-2013, James H. Woodyatt