# HG changeset patch
# User james woodyatt
# Date 1323653857 28800
# Branch sideline
# Node ID 6eb3a7fcd5a193095fdc30c35e7ea6a913449054
# Parent d88bacaf2a84313e98eb366c60f4d3d4e69173ad
Compose the documentation for the new Cf_llscan and Cf_dfa modules.
diff --git a/cf/cf_dfa.mli b/cf/cf_dfa.mli
--- a/cf/cf_dfa.mli
+++ b/cf/cf_dfa.mli
@@ -29,42 +29,153 @@
OF THE POSSIBILITY OF SUCH DAMAGE.
*---------------------------------------------------------------------------*)
+(** Functional composition of lazy deterministic finite automata. *)
+
+(** {6 Overview}
+
+ This module implements operators for functional composition of lazy
+ deterministic finite automata (DFA). A lazy DFA is more efficient at
+ recognizing regular grammars than a non-deterministic finite automaton,
+ and the lazy evaluation amortizes the cost of compiling the state table
+ so that it compares well to that of the NFA.
+
+ The interface defined here is used as the underlying algorithm for the
+ {!Cf_clex} module. It uses a functor that operates on a module defining
+ the type of a symbol, the type of parser input tokens that contain such
+ symbols, and a map of symbols to some polymorphic type. The result of the
+ functor is a module that contains operator functions for composing
+ expressions and rules for automata that operate on streams of the input
+ symbol type.
+
+ {b Note}: a DFA can be remarkably inefficient compared to an NFA for
+ certain classes of unusual grammars and unusual input.
+*)
+
+(** {6 Module Types} *)
+
+(** The type of the input module for [Create(S: Symbol)] functor defined
+ below.
+*)
module type Symbol = sig
- type t and 'a map
+ (** The symbol type *)
+ type t
+
+ (** The type of maps from symbols to polymorphic types. *)
+ type 'a map
+
+ (** The engine uses [map f] to construct a map from symbols to state
+ transitions.
+ *)
val map: (t -> 'a) -> 'a map
+
+ (** The engine uses [get m s] to get the state transition from map [m] for
+ the symbol [s].
+ *)
val get: 'a map -> t -> 'a
end
+(** The output of the [Create(S: Symbol)] functor, which is a module that
+ can be used to compose deterministic finite automata which operate on
+ symbols of the type specified.
+*)
module type T = sig
+
+ (** The module used as the input to the [Create(S: Symbol)] functor. *)
module S: Symbol
+ (** The type of an expression in the regular grammar of an automaton. *)
type x
+ (** The expression that matches the empty symbol sequence. *)
val nil: x
+ (** The type of a rule for recognizing a sequence of symbols according to
+ the regular grammar of an automaton and producing an output token.
+ *)
type 'a r
- type 'a t = (S.t, 'a) Cf_llscan.t
+ (** A parser that works on the symbols used in the automaton. *)
+ type 'a t = (S.t, 'a) Cf_parser.t
+
+ (** Open this module to bring the composition operators into the current
+ scope.
+ *)
module Op: sig
+
+ (** Use [a $| b] to compose an expression that matches either [a] or
+ [b] in the symbol stream.
+ *)
val ( $| ): x -> x -> x
+
+ (** Use [a $& b] to compose an expression that matches [a] followed by
+ [b] in the symbol stream.
+ *)
val ( $& ): x -> x -> x
+ (** Use [!*a] to compose an expression that matches zero or more
+ occurances of [a] in the symbol stream.
+ *)
val ( !* ): x -> x
+
+ (** Use [!+a] to compose an expression that matches one or more
+ occurances of [a] in the symbol stream.
+ *)
val ( !+ ): x -> x
+
+ (** Use [!?a] to compose an expression that matches zero or one
+ occurance of [a] in the symbol stream.
+ *)
val ( !? ): x -> x
+
+ (** Use [!:sym] to compose an expression that matches the symbol [sym]
+ in the symbol stream.
+ *)
val ( !: ): S.t -> x
+
+ (** Use [!^f] to compose an expression that matches any symbol in the
+ symbol stream for which applying the function [f] returns [true].
+ *)
val ( !^ ): (S.t -> bool) -> x
+
+ (** Use [!~z] to compose an expression that matches the sequence of
+ symbols [z] in the symbol stream.
+ *)
val ( !~ ): S.t Cf_seq.t -> x
+ (** Use [e $= x] to compose a rule that produces [x] when the symbols
+ in the symbol stream match the expression [e].
+ *)
val ( $= ): x -> 'a -> 'a r
+
+ (** Use [e $> f] to compose a rule that applies the tokenizer function
+ [f] to the sequence of input symbols in the stream recognized by
+ the expression [e] to produce an output token.
+ *)
val ( $> ): x -> (S.t Cf_seq.t -> 'a) -> 'a r
+
+ (** Use [e $@ f] to compose a rule that applies the scanning function
+ [f] to the input stream when it is recognized by the expression
+ [e]. The scanning function is passed the length of the recognized
+ sequence of symbols and receives a parser in return that produces
+ the output of the rule and makes any advanced manipulations of the
+ input stream necessary to continue parsing for the next token.
+ If the parser returned from the scanning function does not
+ recognize the input stream, then the rule is not matched and the
+ next best matching rule is selected.
+ *)
val ( $@ ): x -> (int -> 'a t) -> 'a r
+
+ (** Use this operator to combine a list of rules into a single rule. *)
val ( !@ ): 'a r list -> 'a r
end
+ (** Use [create r] to construct a parser that recognizes the longest
+ sequence that matches the rule [r].
+ *)
val create: 'a r -> 'a t
end
+(** The functor that creates a DFA module. *)
module Create(S: Symbol): T with module S = S
(*--- $File$ ---*)
diff --git a/cf/cf_llscan.mli b/cf/cf_llscan.mli
--- a/cf/cf_llscan.mli
+++ b/cf/cf_llscan.mli
@@ -29,37 +29,151 @@
OF THE POSSIBILITY OF SUCH DAMAGE.
*---------------------------------------------------------------------------*)
+(** Functional LL(x) parsing with monadic combinators. *)
+
+(** This module implements function left-shift/left-reduce parser combinators
+ using a state-exception monad over the input stream. To evaluate a scanner
+ monad is to parse an input stream. The state monad is lifted into the
+ exception monad to facilitate backtracking. Scanners should signal errors
+ in the input stream with ordinary Objective Caml exceptions.
+*)
+
+(** The scanner monad. A function that parses a sequence of input tokens.
+ Returns [None] if the parser does not recognize any symbols. Otherwise
+ returns the reduced output and the remainder of the input tokens.
+*)
type ('s, 'r) t = 's Cf_seq.t -> ('r * 's Cf_seq.t) option
+(** A scanner that never recognizes input, i.e. it always returns [None]. *)
val nil: ('s, 'r) t
+
+(** The return scanner. Use [ret x] to produce the result [x] without
+ processing any more input.
+*)
val ret: 'r -> ('s, 'r) t
+
+(** The binding function. Use [bind p f] to compose a scanner that passes the
+ output of parser [p] to the bound function [f] which returns the scanner
+ for the next symbol in a parsing rule. This is normal function version of
+ the [Op.( >>= )] infix operator.
+*)
val bind: ('s, 'a) t -> ('a -> ('s, 'b) t) -> ('s, 'b) t
+(** A scanner that produces the unit value when it recognizes the end of the
+ input token sequence.
+*)
val fin: ('s, unit) t
+
+(** A scanner that recognizes, shifts and produces any input token. *)
val any: ('s, 's) t
+
+(** Use [eq x] to compose a scanner that recognizes [x] in the input stream,
+ shifts and returns it. *)
val eq: 's -> ('s, 's) t
+
+(** Use [sat f] to create a scanner that recognizes, shifts and reduces input
+ tokens for which the satisfier function [f] returns [true].
+*)
val sat: ('s -> bool) -> ('s, 's) t
+
+(** Use [tok f] to recognize and shift input tokens for which the tokenizer
+ function [f] reduces an output value.
+*)
val tok: ('s -> 'r option) -> ('s, 'r) t
+
+(** Use [lit s obj] to obtain a parser on character input sequences that
+ produces the output [obj] when it recognizes the literal [s] in the input.
+*)
val lit: string -> 'r -> (char, 'r) t
+
+(** The optional scanner composer. Use [opt p] to create a scanner that
+ recognizes an optional symbol in the input stream with the scanner [p]. If
+ the symbol is recognized, its tokens are shifted and reduced as [Some obj],
+ otherwise no tokens are shifted and the reduced value is [None].
+ Scanner functions created with this operator {i always} return [Some r],
+ where [r] is the reduced value, i.e. either [Some obj] or [None]. This is
+ the normal function version of the [Op.( ?/ )] prefix operator.
+*)
val opt: ('s, 'r) t -> ('s, 'r option) t
+
+(** The list scanner composer. Use [seq p] to create a scanner that
+ recognizes zero or more symbols in the input stream with the scanner [p].
+ The tokens of all the symbols recognized are shifted and reduced as a list
+ of objects in the order of their appearance in the input stream. Scanner
+ functions created with this operator {i always} return [Some r], where [r]
+ is the reduced list of symbols, which may be the empty list if there are no
+ symbols recognized. This is the normal function version of the [Op.( ?* )]
+ prefix operator.
+*)
val seq: ('s, 'r) t -> ('s, 'r list) t
+
+(** The non-empty list scanner composer. Use [seq1 p] to create a scanner that
+ recognizes one or more symbols in the input stream with the scanner [p].
+ If the symbols are recognized in the input stream, then their tokens are
+ shifted and reduced into a list of objects in the order of their
+ appearance in the input stream. Otherwise, no tokens are shifted and
+ no output is reduced. This is the normal function version of the
+ [Op.( ?+ )] prefix operator.
+*)
val seq1: ('s, 'r) t -> ('s, 'r * 'r list) t
+
+(** Use [alt ps] to create a scanner that produces the output from the first
+ scanner in the list [ps] that recognizes a pattern in the input. If no
+ scanner in the list recognizes a pattern, then the scanner constructed by
+ this function returns [None]. This is the normal function version of the
+ [Op.( ?^ )] prefix operator.
+*)
val alt: ('s, 'r) t list -> ('s, 'r) t
+
+(** Use [altz ps] to create a scanner that produces the output from the first
+ scanner in the lazy sequence [ps] that recognizes a pattern in the input.
+ If no scanner in the sequence recognizes a pattern, then the scanner
+ constructed by this function returns [None]. This is the normal function
+ version of the [Op.( ?^~ )] prefix operator.
+*)
val altz: ('s, 'r) t Cf_seq.t -> ('s, 'r) t
+(** Generic parser error with no parameters. *)
exception Error
+
+(** Use [err ?f ()] to compose scanner that applies the input token stream to
+ the optional function [f] to obtain an Objective Caml exception, then
+ raises the exception. The default function simply raises [Error].
+*)
val err: ?f:('s Cf_seq.t -> exn) -> unit -> ('s, 'r) t
+
+(** Use [req f p] to create a scanner that requires the input stream to match
+ the scanner [p] or it will be passed to the parser [err f] instead.
+*)
val req: ?f:('s Cf_seq.t -> exn) -> ('s, 'r) t -> ('s, 'r) t
+(** Use [unfold p i] to create a sequence of output values recognized by
+ applying the input token sequence [i] to the parser [p] until no more
+ input is recognized.
+*)
val unfold: ('s, 'r) t -> 's Cf_seq.t -> 'r Cf_seq.t
+(** Open this module to take the parser operators into the current scope. *)
module Op: sig
+ (** The infix operator version of the [bind] composer. *)
val ( >>= ): ('s, 'a) t -> ('a -> ('s, 'b) t) -> ('s, 'b) t
+
+ (** The prefix operator version of the [eq] composer. *)
val ( ?. ): 's -> ('s, 's) t
+
+ (** The prefix operator version of the [opt] composer. *)
val ( ?/ ): ('s, 'r) t -> ('s, 'r option) t
+
+ (** The prefix operator version of the [seq1] composer. *)
val ( ?+ ): ('s, 'r) t -> ('s, 'r * 'r list) t
+
+ (** The prefix operator version of the [seq1] composer. *)
val ( ?* ): ('s, 'r) t -> ('s, 'r list) t
+
+ (** The prefix operator version of the [alt] composer. *)
val ( ?^ ): ('s, 'r) t list -> ('s, 'r) t
+
+ (** The prefix operator version of the [altz] composer. *)
val ( ?^~ ): ('s, 'r) t Cf_seq.t -> ('s, 'r) t
end