Commits

james woodyatt committed 6eb3a7f

Compose the documentation for the new Cf_llscan and Cf_dfa modules.

Comments (0)

Files changed (2)

   OF THE POSSIBILITY OF SUCH DAMAGE. 
  *---------------------------------------------------------------------------*)
 
+(** Functional composition of lazy deterministic finite automata. *)
+
+(** {6 Overview}
+
+    This module implements operators for functional composition of lazy
+    deterministic finite automata (DFA).  A lazy DFA is more efficient at
+    recognizing regular grammars than a non-deterministic finite automaton,
+    and the lazy evaluation amortizes the cost of compiling the state table
+    so that it compares well to that of the NFA.
+    
+    The interface defined here is used as the underlying algorithm for the
+    {!Cf_clex} module.  It uses a functor that operates on a module defining
+    the type of a symbol, the type of parser input tokens that contain such
+    symbols, and a map of symbols to some polymorphic type.  The result of the
+    functor is a module that contains operator functions for composing
+    expressions and rules for automata that operate on streams of the input
+    symbol type.
+    
+    {b Note}: a DFA can be remarkably inefficient compared to an NFA for
+    certain classes of unusual grammars and unusual input.
+*)
+
+(** {6 Module Types} *)
+
+(** The type of the input module for [Create(S: Symbol)] functor defined
+    below.
+*)
 module type Symbol = sig
-    type t and 'a map
+    (** The symbol type *)
+    type t
+        
+    (** The type of maps from symbols to polymorphic types. *)
+    type 'a map
+        
+    (** The engine uses [map f] to construct a map from symbols to state
+        transitions.
+    *)
     val map: (t -> 'a) -> 'a map
+    
+    (** The engine uses [get m s] to get the state transition from map [m] for
+        the symbol [s].
+    *)
     val get: 'a map -> t -> 'a
 end
 
+(** The output of the [Create(S: Symbol)] functor, which is a module that
+    can be used to compose deterministic finite automata which operate on
+    symbols of the type specified.
+*)
 module type T = sig
+    
+    (** The module used as the input to the [Create(S: Symbol)] functor. *)
     module S: Symbol
     
+    (** The type of an expression in the regular grammar of an automaton. *)
     type x
     
+    (** The expression that matches the empty symbol sequence. *)
     val nil: x
     
+    (** The type of a rule for recognizing a sequence of symbols according to
+        the regular grammar of an automaton and producing an output token.
+    *)
     type 'a r
-    type 'a t = (S.t, 'a) Cf_llscan.t
     
+    (** A parser that works on the symbols used in the automaton. *)
+    type 'a t = (S.t, 'a) Cf_parser.t
+    
+    (** Open this module to bring the composition operators into the current
+        scope.
+    *)
     module Op: sig
+        
+        (** Use [a $| b] to compose an expression that matches either [a] or
+            [b] in the symbol stream.
+        *)
         val ( $| ): x -> x -> x
+        
+        (** Use [a $& b] to compose an expression that matches [a] followed by
+            [b] in the symbol stream.
+        *)
         val ( $& ): x -> x -> x
         
+        (** Use [!*a] to compose an expression that matches zero or more
+            occurances of [a] in the symbol stream.
+        *)
         val ( !* ): x -> x
+        
+        (** Use [!+a] to compose an expression that matches one or more
+            occurances of [a] in the symbol stream.
+        *)
         val ( !+ ): x -> x
+        
+        (** Use [!?a] to compose an expression that matches zero or one
+            occurance of [a] in the symbol stream.
+        *)
         val ( !? ): x -> x
+        
+        (** Use [!:sym] to compose an expression that matches the symbol [sym]
+            in the symbol stream.
+        *)
         val ( !: ): S.t -> x
+        
+        (** Use [!^f] to compose an expression that matches any symbol in the
+            symbol stream for which applying the function [f] returns [true].
+        *)
         val ( !^ ): (S.t -> bool) -> x
+        
+        (** Use [!~z] to compose an expression that matches the sequence of
+            symbols [z] in the symbol stream.
+        *)
         val ( !~ ): S.t Cf_seq.t -> x
         
+        (** Use [e $= x] to compose a rule that produces [x] when the symbols
+            in the symbol stream match the expression [e].
+        *)
         val ( $= ): x -> 'a -> 'a r
+        
+        (** Use [e $> f] to compose a rule that applies the tokenizer function
+            [f] to the sequence of input symbols in the stream recognized by
+            the expression [e] to produce an output token.
+        *)
         val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> 'a r
+        
+        (** Use [e $@ f] to compose a rule that applies the scanning function
+            [f] to the input stream when it is recognized by the expression
+            [e].  The scanning function is passed the length of the recognized
+            sequence of symbols and receives a parser in return that produces
+            the output of the rule and makes any advanced manipulations of the
+            input stream necessary to continue parsing for the next token.
+            If the parser returned from the scanning function does not
+            recognize the input stream, then the rule is not matched and the
+            next best matching rule is selected.
+        *)
         val ( $@ ): x -> (int -> 'a t) -> 'a r
+        
+        (** Use this operator to combine a list of rules into a single rule. *)
         val ( !@ ): 'a r list -> 'a r
     end
     
+    (** Use [create r] to construct a parser that recognizes the longest
+        sequence that matches the rule [r].
+    *)
     val create: 'a r -> 'a t
 end
 
+(** The functor that creates a DFA module. *)
 module Create(S: Symbol): T with module S = S
 
 (*--- $File$ ---*)
   OF THE POSSIBILITY OF SUCH DAMAGE. 
  *---------------------------------------------------------------------------*)
 
+(** Functional LL(x) parsing with monadic combinators. *)
+
+(** This module implements function left-shift/left-reduce parser combinators
+    using a state-exception monad over the input stream.  To evaluate a scanner
+    monad is to parse an input stream.  The state monad is lifted into the
+    exception monad to facilitate backtracking.  Scanners should signal errors
+    in the input stream with ordinary Objective Caml exceptions.
+*)
+
+(** The scanner monad.  A function that parses a sequence of input tokens.
+    Returns [None] if the parser does not recognize any symbols.  Otherwise
+    returns the reduced output and the remainder of the input tokens.
+*)
 type ('s, 'r) t = 's Cf_seq.t -> ('r * 's Cf_seq.t) option
 
+(** A scanner that never recognizes input, i.e. it always returns [None]. *)
 val nil: ('s, 'r) t
+
+(** The return scanner.  Use [ret x] to produce the result [x] without
+    processing any more input.
+*)
 val ret: 'r -> ('s, 'r) t
+
+(** The binding function.  Use [bind p f] to compose a scanner that passes the
+    output of parser [p] to the bound function [f] which returns the scanner
+    for the next symbol in a parsing rule.  This is normal function version of
+    the [Op.( >>= )] infix operator.
+*)
 val bind: ('s, 'a) t -> ('a -> ('s, 'b) t) -> ('s, 'b) t
 
+(** A scanner that produces the unit value when it recognizes the end of the
+    input token sequence.
+*)
 val fin: ('s, unit) t
+
+(** A scanner that recognizes, shifts and produces any input token. *)
 val any: ('s, 's) t
+
+(** Use [eq x] to compose a scanner that recognizes [x] in the input stream,
+    shifts and returns it. *)
 val eq: 's -> ('s, 's) t
+
+(** Use [sat f] to create a scanner that recognizes, shifts and reduces input
+    tokens for which the satisfier function [f] returns [true].
+*)
 val sat: ('s -> bool) -> ('s, 's) t
+
+(** Use [tok f] to recognize and shift input tokens for which the tokenizer
+    function [f] reduces an output value.
+*)
 val tok: ('s -> 'r option) -> ('s, 'r) t
+
+(** Use [lit s obj] to obtain a parser on character input sequences that
+    produces the output [obj] when it recognizes the literal [s] in the input.
+*)
 val lit: string -> 'r -> (char, 'r) t
+
+(** The optional scanner composer.  Use [opt p] to create a scanner that
+    recognizes an optional symbol in the input stream with the scanner [p].  If
+    the symbol is recognized, its tokens are shifted and reduced as [Some obj],
+    otherwise no tokens are shifted and the reduced value is [None].
+    Scanner functions created with this operator {i always} return [Some r],
+    where [r] is the reduced value, i.e. either [Some obj] or [None].  This is
+    the normal function version of the [Op.( ?/ )] prefix operator.
+*)
 val opt: ('s, 'r) t -> ('s, 'r option) t
+
+(** The list scanner composer.  Use [seq p] to create a scanner that
+    recognizes zero or more symbols in the input stream with the scanner [p].
+    The tokens of all the symbols recognized are shifted and reduced as a list
+    of objects in the order of their appearance in the input stream.  Scanner
+    functions created with this operator {i always} return [Some r], where [r]
+    is the reduced list of symbols, which may be the empty list if there are no
+    symbols recognized.  This is the normal function version of the [Op.( ?* )]
+    prefix operator.
+*)
 val seq: ('s, 'r) t -> ('s, 'r list) t
+
+(** The non-empty list scanner composer.  Use [seq1 p] to create a scanner that
+    recognizes one or more symbols in the input stream with the scanner [p].
+    If the symbols are recognized in the input stream, then their tokens are
+    shifted and reduced into a list of objects in the order of their
+    appearance in the input stream.  Otherwise, no tokens are shifted and
+    no output is reduced.  This is the normal function version of the
+    [Op.( ?+ )] prefix operator.
+*)
 val seq1: ('s, 'r) t -> ('s, 'r * 'r list) t
+
+(** Use [alt ps] to create a scanner that produces the output from the first
+    scanner in the list [ps] that recognizes a pattern in the input.  If no
+    scanner in the list recognizes a pattern, then the scanner constructed by
+    this function returns [None].  This is the normal function version of the
+    [Op.( ?^ )] prefix operator.
+*)
 val alt: ('s, 'r) t list -> ('s, 'r) t
+
+(** Use [altz ps] to create a scanner that produces the output from the first
+    scanner in the lazy sequence [ps] that recognizes a pattern in the input.
+    If no scanner in the sequence recognizes a pattern, then the scanner
+    constructed by this function returns [None]. This is the normal function
+    version of the [Op.( ?^~ )] prefix operator.
+*)
 val altz: ('s, 'r) t Cf_seq.t -> ('s, 'r) t
 
+(** Generic parser error with no parameters. *)
 exception Error
+
+(** Use [err ?f ()] to compose scanner that applies the input token stream to
+    the optional function [f] to obtain an Objective Caml exception, then
+    raises the exception.  The default function simply raises [Error].
+*)
 val err: ?f:('s Cf_seq.t -> exn) -> unit -> ('s, 'r) t
+
+(** Use [req f p] to create a scanner that requires the input stream to match
+    the scanner [p] or it will be passed to the parser [err f] instead.
+*)
 val req: ?f:('s Cf_seq.t -> exn) -> ('s, 'r) t -> ('s, 'r) t
 
+(** Use [unfold p i] to create a sequence of output values recognized by
+    applying the input token sequence [i] to the parser [p] until no more
+    input is recognized.
+*)
 val unfold: ('s, 'r) t -> 's Cf_seq.t -> 'r Cf_seq.t
 
+(** Open this module to take the parser operators into the current scope. *)
 module Op: sig
+    (** The infix operator version of the [bind] composer. *)
     val ( >>= ): ('s, 'a) t -> ('a -> ('s, 'b) t) -> ('s, 'b) t
+    
+    (** The prefix operator version of the [eq] composer. *)
     val ( ?. ): 's -> ('s, 's) t
+    
+    (** The prefix operator version of the [opt] composer. *)
     val ( ?/ ): ('s, 'r) t -> ('s, 'r option) t
+    
+    (** The prefix operator version of the [seq1] composer. *)
     val ( ?+ ): ('s, 'r) t -> ('s, 'r * 'r list) t
+    
+    (** The prefix operator version of the [seq1] composer. *)
     val ( ?* ): ('s, 'r) t -> ('s, 'r list) t
+    
+    (** The prefix operator version of the [alt] composer. *)
     val ( ?^ ): ('s, 'r) t list -> ('s, 'r) t
+    
+    (** The prefix operator version of the [altz] composer. *)
     val ( ?^~ ): ('s, 'r) t Cf_seq.t -> ('s, 'r) t
 end