Kirill Simonov avatar Kirill Simonov committed 3bf832d

Refactoring functions.

Documented `htsql.tr.signature`.
Refactored `CAST`: moved admission checking, cast elimination
and specialization to `htsql.tr.encode`.

Comments (0)

Files changed (17)

doc/api/htsql.tr.fn.rst

 
 .. automodule:: htsql.tr.fn
    :members:
-.. automodule:: htsql.tr.fn.function
-   :members:
 

doc/api/htsql.tr.rst

    :members:
 .. automodule:: htsql.tr.scan
    :members:
+.. automodule:: htsql.tr.signature
+   :members:
 .. automodule:: htsql.tr.syntax
    :members:
 .. automodule:: htsql.tr.term

src/htsql/adapter.py

     def dominates(component, other):
         # A component implementing an adapter interface dominates
         # over another component implementing the same interface
-        # if one of the two conditions hold:
+        # if one of the following two conditions holds:
         
         # (1) The component is a subclass of the other component.
         if issubclass(component, other):

src/htsql/tr/__init__.py

 """
 
 
+# Make sure all submodules in the `tr` package are imported
+# so that any adapter components defined there are registered.
+# Since `fn` is not explicitly imported anywhere, we force
+# its import here.
 from . import fn
 
 

src/htsql/tr/assemble.py

 
 
 class EvaluateFormula(Evaluate):
+    """
+    Evaluates a formula node.
+
+    The evaluation could be specific to the formula signature and is
+    implemented by the :class:`EvaluateBySignature` adapter.
+    """
 
     adapts(FormulaCode)
 
     def __call__(self):
+        # Delegate the evaluation to `EvaluteBySignature`.
         evaluate = EvaluateBySignature(self.code, self.state)
         return evaluate()
 
 
 class EvaluateBySignature(Adapter):
+    """
+    Evaluates a formula node.
+
+    This is an auxiliary adapter used to evaluate
+    :class:`htsql.tr.code.FormulaCode` nodes.  The adapter is polymorphic
+    on the formula signature.
+
+    Unless overridden, the adapter evaluates the arguments of the formula
+    and generates a new formula phrase with the same signature.
+
+    `code` (:class:`htsql.tr.code.FormulaCode`)
+        The formula node to evaluate.
+
+    `state` (:class:`AssemblingState`)
+        The current state of the assembling process.
+
+    Aliases:
+
+    `signature` (:class:`htsql.tr.signature.Signature`)
+        The signature of the formula.
+
+    `domain` (:class:`htsql.tr.domain.Domain`)
+        The co-domain of the formula.
+
+    `arguments` (:class:`htsql.tr.signature.Bag`)
+        The arguments of the formula.
+    """
 
     adapts(Signature)
 
     @classmethod
     def dispatch(interface, code, *args, **kwds):
+        # Override the default dispatch since the adapter is polymorphic
+        # on the type of the formula signature, not on the formula itself.
         assert isinstance(code, FormulaCode)
         return (type(code.signature),)
 
         assert isinstance(state, AssemblingState)
         self.code = code
         self.state = state
+        # Extract commonly used attributes of the node.
         self.signature = code.signature
         self.domain = code.domain
         self.arguments = code.arguments
 
     def __call__(self):
+        # Evaluate the arguments of the formula.
         arguments = self.arguments.map(self.state.evaluate)
+        # By default, assume that the formula is null-regular.  The adapter
+        # should be overridden for nodes where it is not the case.
         is_nullable = any(cell.is_nullable for cell in arguments.cells())
+        # Generate a new formula node.
         return FormulaPhrase(self.signature,
                              self.domain,
                              is_nullable,
 
 
 class EvaluateIsTotallyEqual(EvaluateBySignature):
+    """
+    Evaluates the total equality (``==``) operator.
+    """
 
     adapts(IsTotallyEqualSig)
 
     def __call__(self):
+        # Override the default implementation since the total equality
+        # operator is not null-regular, and, in fact, always not nullable.
         arguments = self.arguments.map(self.state.evaluate)
-        return FormulaPhrase(self.signature, self.domain, False, self.code,
-                             **arguments)
+        return FormulaPhrase(self.signature, self.domain,
+                             False, self.code, **arguments)
 
 
 class EvaluateIsNull(EvaluateBySignature):
+    """
+    Evaluates the ``is_null()`` operator.
+    """
 
     adapts(IsNullSig)
 
     def __call__(self):
+        # Override the default implementation since the `is_null()`
+        # operator is not null-regular, and, in fact, always not nullable.
         arguments = self.arguments.map(self.state.evaluate)
-        return FormulaPhrase(self.signature, self.domain, False, self.code,
-                             **arguments)
+        return FormulaPhrase(self.signature, self.domain,
+                             False, self.code, **arguments)
 
 
 class EvaluateNullIf(EvaluateBySignature):
+    """
+    Evaluates the ``null_if()`` operator.
+    """
 
     adapts(NullIfSig)
 
     def __call__(self):
+        # Override the default implementation since the `null_if()`
+        # operator is not null-regular, and, in fact, is always nullable.
         arguments = self.arguments.map(self.state.evaluate)
-        return FormulaPhrase(self.signature, self.domain, True, self.code,
-                             **arguments)
+        return FormulaPhrase(self.signature, self.domain,
+                             True, self.code, **arguments)
 
 
 class EvaluateIfNull(EvaluateBySignature):
+    """
+    Evaluates the ``if_null()`` operator.
+    """
 
     adapts(IfNullSig)
 
     def __call__(self):
+        # Override the default implementation since the `null_if()`
+        # operator is not null-regular.  It is nullable only if all of
+        # its arguments are nullable.
         arguments = self.arguments.map(self.state.evaluate)
         is_nullable = all(cell.is_nullable for cell in arguments.cells())
-        return FormulaPhrase(self.signature, self.domain, is_nullable,
-                             self.code, **arguments)
+        return FormulaPhrase(self.signature, self.domain,
+                             is_nullable, self.code, **arguments)
 
 
 class EvaluateUnit(Evaluate):

src/htsql/tr/bind.py

             If set, the lookup context is set to `base` when
             binding the syntax node.
         """
+        # If passed, set the new lookup context.
         if base is not None:
             self.push_base(base)
+        # Realize and apply `BindByName` protocol.
         bind = BindByName(syntax, self)
         bindings = list(bind())
+        # Restore the old lookup context.
         if base is not None:
             self.pop_base()
+        # Return the generated binding nodes.
         return bindings
 
 
 
         # If the syntax node has the form:
         #   /{selector}
-        # we take the current lookup context as the segment base.
+        # we use the current lookup context as the segment base.
         base = self.state.base
         # Othewise, for queries `/base{selector}?filter` and `/base{selector}`
         # we bind the nodes `(base?filter)` and `base` respectively
             bare_elements = self.state.bind_all(self.syntax.selector, base)
         else:
             # No selector means that the segment has the form:
-            #   / base   or   / base ?filter
+            #   /base   or   /base?filter
             # This is a special case: depending on whether the base is
             # enumerable, it is interpreted either as
-            #   / base {*}
+            #   /base{*}
             # or as
-            #   / {base}
+            #   /{base}
             bare_elements = itemize(base, base.syntax)
             if bare_elements is None:
                 bare_elements = [base]
 
 
 class BindByName(Protocol):
+    """
+    Binds a call node.
+
+    This is an abstract protocol interface that provides a mechanism
+    for name-based dispatch of call syntax nodes.
+
+    The :class:`BindByName` interface has the following signature::
+
+        BindByName: (CallSyntax, BindingState) -> listof(Binding)
+
+    The protocol is polymorphic on `name` and `len(arguments)`, where
+    `name` and `arguments` are attributes of the call node.
+
+    To add an implementation of the interface, define a subclass
+    of :class:`BindByName` and specify its name and expected number
+    of arguments using function :func:`named`.
+
+    For more implementations of the interface, see :mod:`htsql.tr.fn.bind`.
+
+    Class attributes:
+
+    `names` (a list of names or pairs `(name, length)`)
+        List of names the component matches.
+
+        Here `name` is a non-empty string, `length` is an integer or
+        ``None``.
+    """
 
     names = []
 
     @classmethod
-    def dispatch(interface, syntax, *args, **kwds):
-        assert isinstance(syntax, CallSyntax)
-        return (syntax.name, len(syntax.arguments))
+    def dominates(component, other):
+        # Determine if the component dominates another component
+        # assuming that they match the same dispatch key.
 
-    @classmethod
-    def matches(component, dispatch_key):
-        assert isinstance(dispatch_key, tupleof(str, int))
-        key_name, key_arity = dispatch_key
-        if key_name.isalnum():
-            key_name = normalize(key_name)
+        # A component implementing a protocol interface dominates
+        # another component if one of the following two conditions
+        # holds:
+
+        # (1) The component is a subclass of the other component.
+        if issubclass(component, other):
+            return True
+
+        # (2) The component and the other component match the
+        # same name, but the former requires a fixed number of
+        # arguments while the latter accepts a node with any
+        # number of arguments.
         for name in component.names:
             arity = None
             if isinstance(name, tuple):
                 name, arity = name
-            if name.isalnum():
-                name = normalize(name)
-            if name == key_name:
-                if arity is None or arity == key_arity:
-                    return True
-        return False
-
-    @classmethod
-    def dominates(component, other):
-        if issubclass(component, other):
-            return True
-        for name in component.names:
-            arity = None
-            if isinstance(name, tuple):
-                name, arity = name
-            if name.isalnum():
-                name = normalize(name)
+            name = name.lower()
             for other_name in other.names:
                 other_arity = None
                 if isinstance(other_name, tuple):
                     other_name, other_arity = other_name
-                if other_name.isalnum():
-                    other_name = normalize(other_name)
+                other_name = other_name.lower()
                 if name == other_name:
                     if arity is not None and other_arity is None:
                         return True
+
         return False
 
+    @classmethod
+    def matches(component, dispatch_key):
+        # Check if the component matches the given function name
+        # and the number of arguments.
+        assert isinstance(dispatch_key, tupleof(str, int))
+
+        # The name and the number of arguments of the call node.
+        key_name, key_arity = dispatch_key
+        # We want to compare names case insensitive.  Unfortunately,
+        # we cannot use `normalize` from `htsql.tr.lookup` since it
+        # mangles symbols.
+        key_name = key_name.lower()
+
+        # Check if any of the component names matches the given name.
+        for name in component.names:
+            # `name` could be either a string or a pair of a string
+            # and an integer.  The former assumes that the component
+            # accepts call nodes with any number of arguments.
+            arity = None
+            if isinstance(name, tuple):
+                name, arity = name
+            name = name.lower()
+            # Check if the component name matches the node name.
+            if name == key_name:
+                if arity is None or arity == key_arity:
+                    return True
+
+        # None of the names matched the dispatch key.
+        return False
+
+    @classmethod
+    def dispatch(interface, syntax, *args, **kwds):
+        assert isinstance(syntax, CallSyntax)
+        # We override `dispatch` since, as opposed to regular protocol
+        # interfaces, we also want to take into account not only the
+        # function name, but also the number of arguments.
+        return (syntax.name, len(syntax.arguments))
+
     def __init__(self, syntax, state):
         assert isinstance(syntax, CallSyntax)
         assert isinstance(state, BindingState)
         self.syntax = syntax
+        self.state = state
+        # Extract commonly accessed attributes of the call node.
         self.name = syntax.name
         self.arguments = syntax.arguments
-        self.state = state
 
     def __call__(self):
-        raise BindError("unknown function %s" % self.name, self.syntax.mark)
+        # The default implementation; override in subclasses.
+        raise BindError("unknown function %s" % self.name,
+                        self.syntax.mark)
 
 
 class BindGroup(Bind):

src/htsql/tr/binding.py

 from ..entity import TableEntity, ColumnEntity, Join
 from ..domain import Domain, VoidDomain, BooleanDomain, TupleDomain
 from .syntax import Syntax
-from .coerce import coerce
 from .signature import Signature, Bag, Formula
 
 
     """
 
     def __init__(self, value, domain, syntax):
-        # FIXME: It appears `domain` is always `UntypedDomain()`.
-        # Hard-code the domain value then?
         super(LiteralBinding, self).__init__(domain, syntax)
         self.value = value
 
 
-class EqualityBindingBase(Binding):
-    """
-    Represents an equality operator.
-
-    This is an abstract class for the ``=`` and ``==`` operators.
-
-    `lop` (:class:`Binding`)
-        The left operand.
-
-    `rop` (:class:`Binding`)
-        The right operand.
-    """
-
-    def __init__(self, lop, rop, syntax):
-        assert isinstance(lop, Binding)
-        assert isinstance(rop, Binding)
-        # We want to use an engine-specific Boolean type, which, we assume,
-        # must always exist.
-        domain = coerce(BooleanDomain())
-        assert domain is not None
-        super(EqualityBindingBase, self).__init__(domain, syntax)
-        self.lop = lop
-        self.rop = rop
-
-
 class CastBinding(Binding):
     """
     Represents a type conversion operator.
 
 class FormulaBinding(Formula, Binding):
     """
-    Represents a function or an operator binding.
+    Represents a formula binding.
 
-    This is an abstract class; see subclasses for concrete functions and
-    operators.
+    A formula binding represents a function or an operator call as
+    as a binding node.
+
+    `signature` (:class:`htsql.tr.signature.Signature`)
+        The signature of the formula.
 
     `domain` (:class:`Domain`)
-        The type of the result.
+        The co-domain of the formula.
 
     `arguments` (a dictionary)
-        A mapping from argument names to values.
+        The arguments of the formula.
+
+        Note that all the arguments become attributes of the node object.
     """
 
     def __init__(self, signature, domain, syntax, **arguments):
         assert isinstance(signature, Signature)
+        # Check that the arguments match the formula signature.
         arguments = Bag(**arguments)
         assert arguments.admits(Binding, signature)
+        # This will impress the arguments to the node.
         super(FormulaBinding, self).__init__(signature, arguments,
                                              domain, syntax)
 
     """
 
     def __init__(self, base, direction, syntax):
-        assert direction in [-1, +1]
+        assert direction in [+1, -1]
         super(DirectionBinding, self).__init__(base, syntax)
         self.direction = direction
 
         The formatting hint.
     """
 
+    # FIXME: currently unused.
+
     def __init__(self, base, format, syntax):
         assert isinstance(format, str)
         super(FormatBinding, self).__init__(base, syntax)

src/htsql/tr/code.py

         Provides ordering of the space.
 
         The function returns a list of pairs `(code, direction)`, where
-        `code` is a :class:`Code` instance and `direction` is a number ``+1``
-        or ``-1``.  The `code` objects specify expressions by which the
-        rows are sorted, `direction` indicates the respective order (``+1``
-        for ascending, ``-1`` for descending).
+        `code` is a :class:`Code` instance and `direction` is a number
+        ``+1`` or ``-1``.  The `code` objects specify expressions by which
+        the rows are sorted, `direction` indicates the respective order
+        (``+1`` for ascending, ``-1`` for descending).
 
         `with_strong` (Boolean)
             If set, include strong (explicit) ordering.
 
 class FormulaCode(Formula, Code):
     """
-    Represents a function or an operator expression.
+    Represents a formula code.
 
-    This is an abstract class; see subclasses for concrete function types.
+    A formula code represents a function or an operator call as a code node.
 
-    `domain` (:class:`htsql.domain.Domain`)
-        The function co-domain.
+    `signature` (:class:`htsql.tr.signature.Signature`)
+        The signature of the formula.
+
+    `domain` (:class:`Domain`)
+        The co-domain of the formula.
 
     `arguments` (a dictionary)
-        A mapping from argument names to argument values.  Among values,
-        we expect other :class:`Code` objects or lists of :class:`Code`
-        objects.
+        The arguments of the formula.
+
+        Note that all the arguments become attributes of the node object.
     """
 
     def __init__(self, signature, domain, binding, **arguments):
         assert isinstance(signature, Signature)
+        # Check that the arguments match the formula signature.
         arguments = Bag(**arguments)
         assert arguments.admits(Code, signature)
+        # Extract unit nodes from the arguments.
         units = []
         for cell in arguments.cells():
             units.extend(cell.units)
         equality_vector = (signature, domain, arguments.freeze())
+        # The first two arguments are processed by the `Formula`
+        # constructor, the rest of them go to the `Binding` constructor.
         super(FormulaCode, self).__init__(
                     signature, arguments,
                     domain=domain,
     """
     Represents a primitive unit.
 
-    A primitive unit is a intrinsic function on a space.
+    A primitive unit is an intrinsic function on a space.
 
     This is an abstract class; for the (only) concrete subclass, see
     :class:`ColumnUnit`.

src/htsql/tr/encode.py

 """
 
 
-from ..adapter import Adapter, adapts
-from ..domain import Domain, UntypedDomain, TupleDomain, BooleanDomain
+from ..adapter import Adapter, adapts, adapts_many
+from ..domain import (Domain, UntypedDomain, TupleDomain, BooleanDomain,
+                      NumberDomain, IntegerDomain, DecimalDomain, FloatDomain,
+                      StringDomain, EnumDomain, DateDomain, OpaqueDomain)
 from .error import EncodeError
 from .coerce import coerce
 from .binding import (Binding, RootBinding, QueryBinding, SegmentBinding,
                    FilteredSpace, OrderedSpace,
                    QueryExpr, SegmentExpr, LiteralCode, FormulaCode,
                    CastCode, ColumnUnit, ScalarUnit)
-from .signature import Signature, IsNullSig
+from .signature import Signature, IsNullSig, NullIfSig
+import decimal
 
 
 class EncodingState(object):
                            self.binding)
 
 
+class EncodeCast(Encode):
+    """
+    Encodes a cast binding.
+
+    The actual encoding is performed by the :class:`Convert` adapter.
+    """
+
+    adapts(CastBinding)
+
+    def __call__(self):
+        # Delegate it to the `Convert` adapter.
+        convert = Convert(self.binding, self.state)
+        return convert()
+
+
+class DirectCast(Direct):
+    """
+    Extracts a direction modifier from a cast binding.
+    """
+
+    adapts(CastBinding)
+
+    def __call__(self):
+        # The adapter is delegated to the binding base; we have to do it
+        # because many expressions (including segment elements) are wrapped
+        # with implicit cast nodes, which otherwise would mask any decorators.
+        return self.state.direct(self.binding.base)
+
+
 class Convert(Adapter):
     """
     Encodes a cast binding to a code node.
     :class:`htsql.tr.binding.CastBinding` nodes.  The adapter is polymorphic
     by the origin and the target domains.
 
-    The purpose of the adapter is to handle conversions from special types:
-    :class:`htsql.domain.UntypedDomain` and :class:`htsql.domain.TupleDomain`.
-    Conversions from regular types are passed as is without any extra checks.
+    The purpose of the adapter is multifold.  The :class:`Convert` adapter:
+
+    - verifies that the conversion from the source to the target
+      domain is admissible;
+    - eliminates redundant conversions;
+    - handles conversion from the special types:
+      :class:`htsql.domain.UntypedDomain` and :class:`htsql.domain.TupleDomain`;
+    - when possible, expresses the cast in terms of other operations; otherwise,
+      generates a new :class:`htsql.tr.code.CastCode` node.
 
     `binding` (:class:`htsql.tr.binding.CastBinding`)
         The binding node to encode.
 
+        Note that the adapter is dispatched on the pair
+        `(binding.base.domain, binding.domain)`.
+
     `state` (:class:`EncodingState`)
         The current state of the encoding process.
 
-    The adapter is dispatched on the pair:
-    `(binding.base.domain, binding.domain)`.
-
     Aliases:
 
     `base` (:class:`htsql.tr.binding.Binding`)
         self.state = state
 
     def __call__(self):
-        # The default implementation encodes an operand and
-        # returns a cast code node.
-        # Note: this also handles the case when the origin domain is
-        # `TupleDomain` and the target domain is *not* `BooleanDomain`.
-        # In this case, encoding of the base binding will raise an error.
-        base = self.state.encode(self.base)
-        # A minor optimization: when the origin and the target domains
-        # coincide, the cast is no-op.  More elaborate optimizations
-        # are performed further on the stack.
-        if base.domain == self.domain:
-            return base
-        return CastCode(base, self.domain, self.binding)
+        # A final check to eliminate conversion when the origin and
+        # the target domains are the same.  It is likely no-op since
+        # this case should be already handled.
+        if self.base.domain == self.domain:
+            return self.state.encode(self.base)
+        # The default implementation complains that the conversion is
+        # not admissible.
+        raise EncodeError("inadmissible conversion", self.binding.mark)
 
 
 class ConvertUntyped(Convert):
         return LiteralCode(value, self.domain, self.binding)
 
 
+class ConvertToItself(Convert):
+    """
+    Eliminates redundant conversions.
+    """
+    adapts_many((BooleanDomain, BooleanDomain),
+                (IntegerDomain, IntegerDomain),
+                (FloatDomain, FloatDomain),
+                (DecimalDomain, DecimalDomain),
+                (StringDomain, StringDomain),
+                (DateDomain, DateDomain))
+    # FIXME: do we need `EnumDomain` here?
+
+    def __call__(self):
+        # Encode and return the operand of the cast.
+        return self.state.encode(self.binding.base)
+
+
 class ConvertTupleToBoolean(Convert):
     """
     Converts a tuple expression to a conditional expression.
                            self.binding, op=unit)
 
 
-class EncodeCast(Encode):
+class ConvertStringToBoolean(Convert):
     """
-    Encodes a cast binding.
-
-    The actual encoding is performed by the :class:`Convert` adapter.
+    Converts a string expression to a conditional expression.
     """
 
-    adapts(CastBinding)
+    adapts(StringDomain, BooleanDomain)
 
     def __call__(self):
-        # Delegate it to the `Convert` adapter.
-        convert = Convert(self.binding, self.state)
-        return convert()
+        # A `NULL` value and an empty string are converted to `FALSE`,
+        # any other string value is converted to `TRUE`.
 
+        # Encode the operand of the cast.
+        code = self.state.encode(self.base)
+        # An empty string.
+        empty_literal = LiteralCode('', self.base.domain, self.binding)
+        # Construct: `null_if(base,'')`.
+        code = FormulaCode(NullIfSig(), self.base.domain, self.binding,
+                           lop=code, rop=empty_literal)
+        # Construct: `!is_null(null_if(base,''))`.
+        code = FormulaCode(IsNullSig(-1), self.domain, self.binding,
+                           op=code)
+        # Return `!is_null(null_if(base,''))`.
+        return code
 
-class DirectCast(Direct):
+
+class ConvertToBoolean(Convert):
     """
-    Extracts a direction modifier from a cast binding.
+    Converts an expression of any type to a conditional expression.
     """
 
-    adapts(CastBinding)
+    adapts_many((NumberDomain, BooleanDomain),
+                (EnumDomain, BooleanDomain),
+                (DateDomain, BooleanDomain),
+                (OpaqueDomain, BooleanDomain))
+    # Note: we include the opaque domain here to ensure that any
+    # data type could be converted to Boolean.  However this may
+    # lead to unintuitive results.
 
     def __call__(self):
-        # The adapter is delegated to the binding base; we have to do it
-        # because many expressions (including segment elements) are wrapped
-        # with implicit cast nodes, which otherwise would mask any decorators.
-        return self.state.direct(self.binding.base)
+        # A `NULL` value is converted to `FALSE`; any other value is
+        # converted to `TRUE`.
+
+        # Construct and return `!is_null(base)`.
+        return FormulaCode(IsNullSig(-1), self.domain, self.binding,
+                           op=self.state.encode(self.base))
+
+
+class ConvertToString(Convert):
+    """
+    Convert an expression to a string.
+    """
+
+    adapts_many((NumberDomain, StringDomain),
+                (EnumDomain, StringDomain),
+                (DateDomain, StringDomain),
+                (OpaqueDomain, StringDomain))
+    # Note: we assume we could convert any opaque data type to string;
+    # it is risky but convenient.
+
+    def __call__(self):
+        # We generate a cast code node leaving it to the serializer
+        # to specialize on the origin data type.
+        return CastCode(self.state.encode(self.base), self.domain,
+                        self.binding)
+
+
+class ConvertToInteger(Convert):
+    """
+    Convert an expression to an integer value.
+    """
+
+    adapts_many((DecimalDomain, IntegerDomain),
+                (FloatDomain, IntegerDomain),
+                (StringDomain, IntegerDomain))
+
+    def __call__(self):
+        # We leave conversion from literal values to the database
+        # engine even though we could handle it here because the
+        # conversion may be engine-specific.
+        return CastCode(self.state.encode(self.base), self.domain,
+                        self.binding)
+
+
+class ConvertToDecimal(Convert):
+    """
+    Convert an expression to a decimal value.
+    """
+
+    adapts_many((IntegerDomain, DecimalDomain),
+                (FloatDomain, DecimalDomain),
+                (StringDomain, DecimalDomain))
+
+    def __call__(self):
+        # Encode the operand of the cast.
+        code = self.state.encode(self.base)
+        # Handle conversion from an integer literal manually.
+        # We do not handle conversion from other literal types
+        # because it may be engine-specific.
+        if isinstance(code, LiteralCode):
+            if isinstance(code.domain, IntegerDomain):
+                if code.value is None:
+                    return code.clone(domain=self.domain)
+                else:
+                    # Make sure that the string representation
+                    # of the decimal value has the decimal point.
+                    value = decimal.Decimal("%s.0" % code.value)
+                    return code.clone(value=value, domain=self.domain)
+        # For the regular case, generate an appropriate cast node.
+        return CastCode(code, self.domain, self.binding)
+
+
+class ConvertToFloat(Convert):
+    """
+    Convert an expression to a float value.
+    """
+
+    adapts_many((IntegerDomain, FloatDomain),
+                (DecimalDomain, FloatDomain),
+                (StringDomain, FloatDomain))
+
+    def __call__(self):
+        # Encode the operand of the cast.
+        code = self.state.encode(self.base)
+        # Handle conversion from an integer and decimal literals manually.
+        # We do not handle conversion from other literal types because it
+        # may be engine-specific.
+        if isinstance(code, LiteralCode):
+            if isinstance(code.domain, (IntegerDomain, DecimalDomain)):
+                if code.value is None:
+                    return code.clone(domain=self.domain)
+                else:
+                    value = float(code.value)
+                    return code.clone(value=value, domain=self.domain)
+        # For the regular case, generate an appropriate cast node.
+        return CastCode(code, self.domain, self.binding)
+
+
+class ConvertToDate(Convert):
+    """
+    Convert an expression to a date value.
+    """
+
+    adapts(StringDomain, DateDomain)
+
+    def __call__(self):
+        # We leave conversion from literal values to the database
+        # engine even though we could handle it here because the
+        # conversion may be engine-specific.
+        return CastCode(self.state.encode(self.base), self.domain,
+                        self.binding)
+
+
+class EncodeFormula(Encode):
+    """
+    Translates a formula binding to a code node.
+
+    The translation is specific to the formula signature and is implemented
+    by the :class:`EncodeBySignature` adapter.
+    """
+
+    adapts(FormulaBinding)
+
+    def __call__(self):
+        # Delegate the translation to the `EncodeBySignature` adapter.
+        encode = EncodeBySignature(self.binding, self.state)
+        return encode()
+
+
+class RelateFormula(Relate):
+    """
+    Translates a formula binding to a space node.
+
+    The translation is specific to the formula signature and is implemented
+    by the :class:`RelateBySignature` adapter.
+    """
+
+    adapts(FormulaBinding)
+
+    def __call__(self):
+        # Delegate the translation to the `RelateBySignature` adapter.
+        relate = RelateBySignature(self.binding, self.state)
+        return relate()
+
+
+class DirectFormula(Direct):
+    """
+    Extracts a direction modifier from a formula binding.
+
+    The extration is specific to the formula signature and is implemented
+    by the :class:`DirectBySignature` adapter.
+    """
+
+    adapts(FormulaBinding)
+
+    def __call__(self):
+        # Delegate the extraction to the `DirectBySignature` adapter.
+        direct = DirectBySignature(self.binding, self.state)
+        return direct()
 
 
 class EncodeBySignatureBase(Adapter):
+    """
+    Translates a formula node.
+
+    This is a base class for three encoding adapters:
+    :class:`EncodeBySignature`, :class:`RelateBySignature` and
+    :class:`DirectBySignature`; it encapsulates methods and attributes
+    shared between these adapters.
+
+    The adapter accepts a binding formula node and is polymorphic
+    on the formula signature.
+
+    `binding` (:class:`htsql.tr.binding.FormulaBinding`)
+        The formula node to encode.
+
+    `state` (:class:`EncodingState`)
+        The current state of the encoding process.
+
+    Aliases:
+
+    `signature` (:class:`htsql.tr.signature.Signature`)
+        The signature of the formula.
+
+    `domain` (:class:`htsql.tr.domain.Domain`)
+        The co-domain of the formula.
+
+    `arguments` (:class:`htsql.tr.signature.Bag`)
+        The arguments of the formula.
+    """
 
     adapts(Signature)
 
     @classmethod
     def dispatch(interface, binding, *args, **kwds):
+        # We need to override `dispatch` since the adapter is polymorphic
+        # not on the type of the node itself, but on the type of the
+        # node signature.
         assert isinstance(binding, FormulaBinding)
         return (type(binding.signature),)
 
         assert isinstance(state, EncodingState)
         self.binding = binding
         self.state = state
+        # Extract commonly used attributes of the node.
         self.signature = binding.signature
         self.domain = binding.domain
         self.arguments = binding.arguments
 
 
 class EncodeBySignature(EncodeBySignatureBase):
+    """
+    Translates a formula binding to a code node.
+
+    This is an auxiliary adapter used to encode
+    class:`htsql.tr.binding.FormulaBinding` nodes.  The adapter is
+    polymorphic on the formula signature.
+
+    Unless overridden, the adapter encodes the arguments of the formula
+    and generates a new formula code with the same signature.
+    """
 
     def __call__(self):
+        # Encode the arguments of the formula.
         arguments = self.arguments.map(self.state.encode)
+        # Produce a formula code with the same signature.
         return FormulaCode(self.signature,
                            self.domain,
                            self.binding,
 
 
 class RelateBySignature(EncodeBySignatureBase):
+    """
+    Translates a formula binding to a space node.
+
+    This is an auxiliary adapter used to relate
+    class:`htsql.tr.binding.FormulaBinding` nodes.  The adapter is
+    polymorphic on the formula signature.
+
+    Unless overridden, the adapter generates an error.
+    """
 
     def __call__(self):
+        # Override in subclasses for formulas that generate space nodes.
         raise EncodeError("expected a valid space expression",
                           self.binding.mark)
 
 
 class DirectBySignature(EncodeBySignatureBase):
+    """
+    Extracts a direction modifier from a formula node.
+
+    This is an auxiliary adapter used to extract direction modifiers
+    from class:`htsql.tr.binding.FormulaBinding` nodes.  The adapter is
+    polymorphic on the formula signature.
+
+    Unless overridden, the adapter returns no direction modifier.
+    """
 
     def __call__(self):
+        # Override in subclasses for formulas which may export non-trivial
+        # direction modifier.
         return None
 
 
-class EncodeFormula(Encode):
-
-    adapts(FormulaBinding)
-
-    def __call__(self):
-        encode = EncodeBySignature(self.binding, self.state)
-        return encode()
-
-
-class RelateFormula(Relate):
-
-    adapts(FormulaBinding)
-
-    def __call__(self):
-        relate = RelateBySignature(self.binding, self.state)
-        return relate()
-
-
-class DirectFormula(Direct):
-
-    adapts(FormulaBinding)
-
-    def __call__(self):
-        direct = DirectBySignature(self.binding, self.state)
-        return direct()
-
-
 class EncodeWrapper(Encode):
     """
     Translates a wrapper binding to a code node.

src/htsql/tr/error.py

 :mod:`htsql.tr.error`
 =====================
 
-This module implements HTSQL translation errors.
+This module declares exceptions that can be raised by the HTSQL-to-SQL
+translator.
 """
 
 
 
 class CompileError(TranslateError):
     """
-    Represents an compiler error.
+    Represents a compiler error.
 
     This error is raised when the compiler is unable to generate a term node.
     """
     kind = "compile error"
 
 
+class AssembleError(TranslateError):
+    """
+    Represents an assembler error.
+
+    This error is raised when the assembler is unable to generate a frame
+    or a phrase node.
+    """
+
+    kind = "assemble error"
+
+
 class DumpError(TranslateError):
     """
     Represents a serializer error.

src/htsql/tr/frame.py

 
 
 class LeadingAnchor(Anchor):
+    """
+    Represents the leading frame in the ``FROM`` list.
+
+    `frame` (:class:`Frame`)
+        The leading frame.
+
+    `condition` (``None``)
+        The join condition.
+
+    `is_left` (``False``)
+        Indicates that the join is ``LEFT OUTER``.
+
+    `is_right` (``False``)
+        Indicates that the join is ``RIGHT OUTER``.
+
+    """
 
     def __init__(self, frame, condition=None, is_left=False, is_right=False):
-        assert condition is None and is_left is False and is_right is False
-        super(LeadingAnchor, self).__init__(frame, condition, is_left, is_right)
+        # We retain the constructor arguments to faciliate `clone()`, but
+        # we ensure that their values are always fixed.
+        assert condition is None
+        assert is_left is False and is_right is False
+        super(LeadingAnchor, self).__init__(frame, condition,
+                                            is_left, is_right)
 
 
 class QueryFrame(Clause):
 
 class FormulaPhrase(Formula, Phrase):
     """
-    Represents a function or an operator expression.
+    Represents a formula phrase.
 
-    This is an abstract class; see subclasses for concrete functions and
-    operators.
+    A formula phrase represents a function or an operator call as
+    a phrase node.
 
-    `domain` (:class:`htsql.domain.Domain`)
-        The function co-domain.
+    `signature` (:class:`htsql.tr.signature.Signature`)
+        The signature of the formula.
+
+    `domain` (:class:`Domain`)
+        The co-domain of the formula.
 
     `arguments` (a dictionary)
-        A mapping from argument names to argument values.  Among values,
-        we expect :class:`Phrase` objects or lists of :class:`Phrase` objects.
+        The arguments of the formula.
+
+        Note that all the arguments become attributes of the node object.
     """
 
     def __init__(self, signature, domain, is_nullable, expression, **arguments):
         assert isinstance(signature, Signature)
+        # Check that the arguments match the formula signature.
         arguments = Bag(**arguments)
         assert arguments.admits(Phrase, signature)
         equality_vector = (signature, domain, arguments.freeze())
+        # The first tow arguments are processed by the `Formula`
+        # constructor; the rest of them go to the `Phrase` constructor.
         super(FormulaPhrase, self).__init__(signature, arguments,
                                             domain, is_nullable, expression,
                                             equality_vector)

src/htsql/tr/parse.py

         The input HTSQL expression.
     """
 
+    # FIXME: get rid of the metaclass and `<<`.  Implement `Parser`
+    # as an adapter with matching by rule of an LL(n) grammar.
+
     class __metaclass__(type):
         # Implements a shortcut:
         #   Parser << tokens
 
 class IdentifierParser(Parser):
     """
-    Parser an `identifier` production.
+    Parses an `identifier` production.
     """
 
     @classmethod

src/htsql/tr/reduce.py

 """
 
 
-from ..adapter import Adapter, adapts, adapts_many
-from ..domain import (Domain, BooleanDomain, IntegerDomain, FloatDomain,
-                      DecimalDomain, StringDomain, EnumDomain, DateDomain)
+from ..adapter import Adapter, adapts
+from ..domain import BooleanDomain, StringDomain
 from .coerce import coerce
 from .frame import (Clause, Frame, ScalarFrame, TableFrame, BranchFrame,
-                    NestedFrame, QueryFrame, Phrase, LiteralPhrase,
-                    NullPhrase, TruePhrase, FalsePhrase,
-                    CastPhrase, FormulaPhrase,
+                    NestedFrame, QueryFrame, Phrase, LiteralPhrase, NullPhrase,
+                    TruePhrase, FalsePhrase, CastPhrase, FormulaPhrase,
                     ExportPhrase, ReferencePhrase, Anchor, LeadingAnchor)
 from .signature import (Signature, isformula, IsEqualSig, IsTotallyEqualSig,
-                        IsInSig, IsNullSig, IfNullSig, NullIfSig, AndSig,
-                        OrSig, NotSig)
+                        IsInSig, IsNullSig, IfNullSig, NullIfSig,
+                        AndSig, OrSig, NotSig)
 
 
 class ReducingState(object):
             # Indicate that the first anchor in the tail is now
             # a leading anchor.
             if tail:
-                tail[0] = LeadingAnchor(tail[0].frame)
+                tail[0] = tail[0].clone_to(LeadingAnchor)
             # Make a new frame with a reduced `FROM` clause.
             frame = self.frame.clone(include=tail)
             # Try to further collapse the frame.
         # Merge the `FROM` clause of the head with the rest of the `FROM`
         # clause of the frame.
         if not head.include and tail:
-            tail[0] = LeadingAnchor(tail[0].frame)
+            tail[0] = tail[0].clone_to(LeadingAnchor)
         include = head.include+tail
 
         # Merge the embedded subframes.
     adapts(CastPhrase)
 
     def __call__(self):
-        # We use an auxiliary adapter `Convert` to dispatch `reduce()`
-        # basing on the origin and the target domains of the cast.
-        convert = Convert(self.phrase, self.state)
-        return convert()
-
-
-class Convert(Adapter):
-    """
-    Reduces a ``CAST`` operator.
-
-    This is an auxiliary adapter used to reduce
-    :class:`htsql.tr.frame.CastPhrase` nodes.  The adapter is polymorphic
-    on the origin and the target domains.
-
-    When possible, the adapter expresses the cast in terms of other
-    operators or eliminates the cast completely.  Otherwise, the adapter
-    just reduces the operand of the cast.
-
-    `phrase` (:class:`htsql.tr.frame.CastPhrase`)
-        The cast phrase to reduce.
-
-    `state` (:class:`ReducingState`)
-        The current state of the reducing process.
-    """
-
-    adapts(Domain, Domain)
-
-    @classmethod
-    def dispatch(interface, phrase, *args, **kwds):
-        # Override the standard producer of a dispatch key;
-        # instead dispatch by the origin and the target domains.
-        assert isinstance(phrase, CastPhrase)
-        return (type(phrase.base.domain), type(phrase.domain))
-
-    def __init__(self, phrase, state):
-        assert isinstance(phrase, CastPhrase)
-        assert isinstance(state, ReducingState)
-        self.phrase = phrase
-        self.base = phrase.base
-        self.domain = phrase.domain
-        self.state = state
-
-    def __call__(self):
-        # The default implementation simply reduces the operand.
-        base = self.state.reduce(self.base)
-        return self.phrase.clone(base=base)
-
-
-class ConvertToBoolean(Convert):
-    """
-    Reduces a cast to Boolean.
-    """
-
-    adapts(Domain, BooleanDomain)
-
-    def __call__(self):
-        # In general,
-        #   boolean(base) => !is_null(base)
-        # There could be different implementations for specific
-        # origin domains.
-        phrase = FormulaPhrase(IsNullSig(-1), self.domain, False,
-                               self.phrase.expression, op=self.base)
-        # We still need to reduce the phrase.
-        return self.state.reduce(phrase)
-
-
-class ConvertStringToBoolean(Convert):
-    """
-    Reduces a cast from a string to Boolean.
-    """
-
-    adapts(StringDomain, BooleanDomain)
-
-    def __call__(self):
-        # An empty and a `NULL` strings are considered `FALSE`, all the
-        # other strings are converted to `TRUE`.
-
-        # Handle the case when the operand is a literal:
-        #   boolean(string(null())) => false()
-        #   boolean(string('')) => false()
-        #   boolean(string('...')) => true()
-        # We assume that an empty string in SQL always corresponds to
-        # an empty string in Python.
-        if isinstance(self.base, LiteralPhrase):
-            if self.base.value is None or self.base.value == '':
-                return FalsePhrase(self.phrase.expression)
-            else:
-                return TruePhrase(self.phrase.expression)
-        # If the operand is nullable, then:
-        #   boolean(base) => !is_null(null_if(base, ''))
-        # Otherwise:
-        #   boolean(base) => (base!='')
-        empty = LiteralPhrase('', coerce(StringDomain()),
-                              self.phrase.expression)
-        if not self.base.is_nullable:
-            phrase = FormulaPhrase(IsEqualSig(-1), self.domain,
-                                   False, self.phrase.expression,
-                                   lop=self.base, rop=empty)
-        else:
-            phrase = FormulaPhrase(NullIfSig(), self.base.domain,
-                                   True, self.phrase.expression,
-                                   lop=self.base, rop=empty)
-            phrase = FormulaPhrase(IsNullSig(-1), self.domain,
-                                   False, self.phrase.expression, op=phrase)
-
-        # We still need to reduce the expression.
-        return self.state.reduce(phrase)
-
-
-class ConvertDomainToItself(Convert):
-    """
-    Reduces a cast when the origin and the target domains coincide.
-    """
-
-    adapts_many((BooleanDomain, BooleanDomain),
-                (IntegerDomain, IntegerDomain),
-                (FloatDomain, FloatDomain),
-                (DecimalDomain, DecimalDomain),
-                (StringDomain, StringDomain),
-                (DateDomain, DateDomain))
-    # FIXME: not sure if adding `EnumDomain` to this list is
-    # safe and/or necessary.
-
-    # Note: adding a new domain likely requires a similar implementation
-    # of the `Convert` adapter.
-
-    def __call__(self):
-        # Eliminate the cast operator, return a (reduced) operand.
-        return self.state.reduce(self.base)
+        # Reduce the operand of the cast.  We do not specialize
+        # on the domains here because we assume that any domain
+        # specific conversion is already done by the encoder.
+        return self.phrase.clone(base=self.state.reduce(self.phrase.base))
 
 
 class ReduceFormula(Reduce):
+    """
+    Reduces a formula node.
+
+    Reducing a formula is specific to the formula signature and is
+    implemented by the :class:`ReduceBySignature` adapter.
+    """
 
     adapts(FormulaPhrase)
 
     def __call__(self):
+        # Delegate the reduction to the `ReduceBySignature` adapter.
         reduce = ReduceBySignature(self.phrase, self.state)
         return reduce()
 
 
 class ReduceBySignature(Adapter):
+    """
+    Reduces a formula node.
+
+    This is an auxiliary adapter used to reduce
+    :class:`htsql.tr.frame.FormulaPhrase` nodes.  The adapter is polymorphic
+    on the formula signature.
+
+    Unless overridden, the adapter reduces the arguments of the formula
+    and generates a new formula with the same signature.
+
+    `phrase` (:class:`htsql.tr.frame.FormulaPhrase`)
+        The formula node to reduce.
+
+    `state` (:class:`ReducingState`)
+        The current state of the reducing process.
+
+    Aliases:
+
+    `signature` (:class:`htsql.tr.signature.Signature`)
+        The signature of the formula.
+
+    `domain` (:class:`htsql.tr.domain.Domain`)
+        The co-domain of the formula.
+
+    `arguments` (:class:`htsql.tr.signature.Bag`)
+        The arguments of the formula.
+
+    `is_nullable` (Boolean)
+        Indicates that the formula may produce a ``NULL`` value.
+    """
 
     adapts(Signature)
 
     @classmethod
     def dispatch(interface, phrase, *args, **kwds):
+        # Override the default dispatch since the adapter is polymorphic
+        # not on the type of the formula, but on the type of the formula
+        # signature.
         assert isinstance(phrase, FormulaPhrase)
         return (type(phrase.signature),)
 
         self.is_nullable = phrase.is_nullable
 
     def __call__(self):
+        # By default, just reduce the arguments of the formula.
         arguments = self.arguments.map(self.state.reduce)
         return FormulaPhrase(self.signature,
                              self.domain,
                 return FalsePhrase(self.phrase.expression)
 
         # None of specific optimizations were applied, just return
-        # the same operator with reduced operands.
-        return self.phrase.clone(lop=lop, rop=rop)
+        # the same operator with reduced operands.  Update the `is_nullable`
+        # status since it may change after reducing the arguments.
+        is_nullable = (lop.is_nullable or rop.is_nullable)
+        return self.phrase.clone(is_nullable=is_nullable, lop=lop, rop=rop)
 
 
 class ReduceIsTotallyEqual(ReduceBySignature):
         return self.phrase.clone(lop=lop, rop=rop)
 
 
+class ReduceIsIn(ReduceBySignature):
+    """
+    Reduces the ``IN`` and ``NOT IN`` clauses.
+    """
+
+    adapts(IsInSig)
+
+    def __call__(self):
+        # Reduce the left operand.
+        lop = self.state.reduce(self.phrase.lop)
+        # Reduce the right operands, eliminating duplicates.
+        rops = []
+        duplicates = set()
+        for rop in self.phrase.rops:
+            rop = self.state.reduce(rop)
+            if rop in duplicates:
+                continue
+            rops.append(rop)
+            duplicates.add(rop)
+
+        # Reduce:
+        #   null()={...} => null()
+        # We could do this substitution safely only when all operands
+        # on the right are literals.
+        if isinstance(lop, NullPhrase):
+            if all(isinstance(rop, LiteralPhrase) for rop in rops):
+                return NullPhrase(self.domain, self.phrase.expression)
+        # Similarly, reduce:
+        #   x={null(),null(),...}
+        if all(isinstance(rop, NullPhrase) for rop in rops):
+            if isinstance(lop, LiteralPhrase):
+                return NullPhrase(self.domain, self.phrase.expression)
+
+        # Reduce:
+        #   x={y} => x=y
+        if len(rops) == 1:
+            rop = [rops]
+            signature = IsEqualSig(self.signature.polarity)
+            is_nullable = (lop.is_nullable or rop.is_nullable)
+            return FormulaPhrase(signature, self.domain, is_nullable,
+                                 self.phrase.expression, lop=lop, rop=rop)
+
+        # None of specific optimizations were applied, just return
+        # the same operator with reduced operands.  Update the `is_nullable`
+        # status since it may change after reducing the arguments.
+        is_nullable = (lop.is_nullable or any(rop.is_nullable for rop in rops))
+        return self.phrase.clone(is_nullable=is_nullable, lop=lop, rops=rops)
+
+
+class ReduceIsNull(ReduceBySignature):
+    """
+    Reduces the ``IS NULL`` and ``IS NOT NULL`` clauses.
+    """
+
+    adapts(IsNullSig)
+
+    def __call__(self):
+        # Start with reducing the operand.
+        op = self.state.reduce(self.phrase.op)
+
+        # Reduce:
+        #   is_null(null()) => true()
+        #   !is_null(null()) => false()
+        if isinstance(op, NullPhrase):
+            if self.signature.polarity > 0:
+                return TruePhrase(self.phrase.expression)
+            else:
+                return FalsePhrase(self.phrase.expression)
+        # If the operand is not nullable, we could reduce the operator
+        # to a `TRUE` or a `FALSE` clause.  However it is only safe
+        # to do for a literal operand.
+        if isinstance(op, LiteralPhrase):
+            if self.signature.polarity > 0:
+                return FalsePhrase(self.phrase.expression)
+            else:
+                return TruePhrase(self.phrase.expression)
+
+        # Return the same operator with a reduced operand.
+        return self.phrase.clone(op=op)
+
+
+class ReduceIfNull(ReduceBySignature):
+    """
+    Reduces the ``IFNULL`` clause.
+    """
+
+    adapts(IfNullSig)
+
+    def __call__(self):
+        # Reduce the operands.
+        lop = self.state.reduce(self.phrase.lop)
+        rop = self.state.reduce(self.phrase.rop)
+
+        # If the first operand is not nullable, then the operation is no-op,
+        # and we could just return the first operand discarding the second
+        # one.  However discarding a clause is not safe in general, so we
+        # only do that when the second operand is a literal.
+        if not lop.is_nullable and isinstance(rop, LiteralPhrase):
+            return lop
+        # Reduce:
+        #   if_null(lop,null()) => lop
+        if isinstance(rop, NullPhrase):
+            return lop
+        # Reduce:
+        #   if_null(null(),rop) => rop
+        if isinstance(lop, NullPhrase):
+            return rop
+
+        # Return the same operator with reduced operands.
+        is_nullable = (lop.is_nullable and rop.is_nullable)
+        return self.phrase.clone(is_nullable=is_nullable, lop=lop, rop=rop)
+
+
+class ReduceNullIf(ReduceBySignature):
+    """
+    Reduces the ``NULLIF`` clause.
+    """
+
+    adapts(NullIfSig)
+
+    def __call__(self):
+        # Reduce the operands.
+        lop = self.state.reduce(self.phrase.lop)
+        rop = self.state.reduce(self.phrase.rop)
+        # Reduce (when it is safe, i.e., when `rop` is a literal):
+        #   null_if(null(),rop) => null()
+        if isinstance(lop, NullPhrase):
+            if isinstance(rop, LiteralPhrase):
+                return lop
+        # Reduce:
+        #   null_if(lop,null()) => lop
+        if isinstance(rop, NullPhrase):
+            return lop
+        # When both operands are literals, we could determine the result
+        # immediately.  We should be careful though since we cannot precisely
+        # mimic the equality operator of the database.
+        if isinstance(lop, LiteralPhrase) and isinstance(rop, LiteralPhrase):
+            # Assume that if the literals are equal in Python, they would
+            # be equal for the database too.  The reverse is not valid in
+            # general, but still valid for some literals.
+            if lop.value == rop.value:
+                return NullPhrase(self.phrase.domain, self.phrase.expression)
+            # We could safely rely on comparison for Boolean values.
+            elif isinstance(self.phrase.domain, BooleanDomain):
+                return lop
+            # In general, we can't rely on comparison for string values,
+            # but we could assume that an empty string is only equal to itself.
+            elif isinstance(self.phrase.domain, StringDomain):
+                if len(lop.value) > 0 and len(rop.value) == 0:
+                    return lop
+
+        # Return the same operator with reduced operands.
+        return self.phrase.clone(lop=lop, rop=rop)
+
+
 class ReduceAnd(ReduceBySignature):
     """
     Reduces "AND" (``&``) operator.
             if all(isinstance(op, LiteralPhrase) for op in ops):
                 return FalsePhrase(self.phrase.expression)
 
-        # Return the same operator with reduced operands.
-        return self.phrase.clone(ops=ops)
+        # Return the same operator with reduced operands.  Update
+        # the `is_nullable` status since it could change after reducing
+        # the arguments.
+        is_nullable = any(op.is_nullable for op in ops)
+        if any(isinstance(op, FalsePhrase) for op in ops):
+            is_nullable = False
+        return self.phrase.clone(is_nullable=is_nullable, ops=ops)
 
 
 class ReduceOr(ReduceBySignature):
             if all(isinstance(op, LiteralPhrase) for op in ops):
                 return TruePhrase(self.phrase.expression)
 
-        # Return the same operator with reduced operands.
         return self.phrase.clone(ops=ops)
+        # Return the same operator with reduced operands.  Update
+        # the `is_nullable` status since it could change after reducing
+        # the arguments.
+        is_nullable = any(op.is_nullable for op in ops)
+        if any(isinstance(op, TruePhrase) for op in ops):
+            is_nullable = False
+        return self.phrase.clone(is_nullable=is_nullable, ops=ops)
 
 
 class ReduceNot(ReduceBySignature):
         # Reverse polarity of equality operators:
         #   !(lop=rop) => lop!=rop
         #   ...
-        if isformula(op, (IsEqualSig, IsTotallyEqualSig, IsNullSig)):
+        if isformula(op, (IsEqualSig, IsTotallyEqualSig, IsInSig, IsNullSig)):
             return op.clone(signature=op.signature.reverse())
 
         # Return the same operator with a reduced operand.
-        return self.phrase.clone(op=op)
-
-
-class ReduceIsNull(ReduceBySignature):
-    """
-    Reduces ``IS NULL`` and ``IS NOT NULL`` clauses.
-    """
-
-    adapts(IsNullSig)
-
-    def __call__(self):
-        # Start with reducing the operand.
-        op = self.state.reduce(self.phrase.op)
-
-        # Reduce:
-        #   is_null(null()) => true()
-        #   !is_null(null()) => false()
-        if isinstance(op, NullPhrase):
-            if self.signature.polarity > 0:
-                return TruePhrase(self.phrase.expression)
-            else:
-                return FalsePhrase(self.phrase.expression)
-        # If the operand is not nullable, we could reduce the operator
-        # to a `TRUE` or a `FALSE` clause.  However it is only safe
-        # to do for a literal operand.
-        if isinstance(op, LiteralPhrase):
-            if self.signature.polarity > 0:
-                return FalsePhrase(self.phrase.expression)
-            else:
-                return TruePhrase(self.phrase.expression)
-
-        # Return the same operator with a reduced operand.
-        return self.phrase.clone(op=op)
-
-
-class ReduceIfNull(ReduceBySignature):
-    """
-    Reduces an ``IFNULL`` clause.
-    """
-
-    adapts(IfNullSig)
-
-    def __call__(self):
-        # Reduce the operands.
-        lop = self.state.reduce(self.phrase.lop)
-        rop = self.state.reduce(self.phrase.rop)
-
-        # If the first operand is not nullable, then the operation is no-op,
-        # and we could just return the first operand discarding the second
-        # one.  However discarding a clause is not safe in general, so we
-        # only do that when the second operand is a literal.
-        if not lop.is_nullable and isinstance(rop, LiteralPhrase):
-            return lop
-        # Reduce:
-        #   if_null(lop,null()) => lop
-        if isinstance(rop, NullPhrase):
-            return lop
-        # Reduce:
-        #   if_null(null(),rop) => rop
-        if isinstance(lop, NullPhrase):
-            return rop
-
-        # Return the same operator with reduced operands.
-        return self.phrase.clone(lop=lop, rop=rop)
-
-
-class ReduceNullIf(ReduceBySignature):
-    """
-    Reduces a ``NULLIF`` clause.
-    """
-
-    adapts(NullIfSig)
-
-    def __call__(self):
-        # Reduce the operands.
-        lop = self.state.reduce(self.phrase.lop)
-        rop = self.state.reduce(self.phrase.rop)
-        # Reduce (when it is safe, i.e., when `rop` is a literal):
-        #   null_if(null(),rop) => null()
-        if isinstance(lop, NullPhrase):
-            if isinstance(rop, LiteralPhrase):
-                return lop
-        # Reduce:
-        #   null_if(lop,null()) => lop
-        if isinstance(rop, NullPhrase):
-            return lop
-        # When both operands are literals, we could determine the result
-        # immediately.  We should be careful though since we cannot precisely
-        # mimic the equality operator of the database.
-        if isinstance(lop, LiteralPhrase) and isinstance(rop, LiteralPhrase):
-            # Assume that if the literals are equal in Python, they would
-            # be equal for the database too.  The reverse is not valid in
-            # general, but still valid for boolean literals.
-            if lop.value == rop.value:
-                return NullPhrase(self.phrase.domain, self.phrase.expression)
-            elif isinstance(self.phrase.domain, BooleanDomain):
-                return lop
-
-        # Return the same operator with reduced operands.
-        return self.phrase.clone(lop=lop, rop=rop)
+        return self.phrase.clone(is_nullable=op.is_nullable, op=op)
 
 
 class ReduceExport(Reduce):
 
 
 class ReduceReference(Reduce):
+    """
+    Reduce a reference phrase.
+    """
 
     adapts(ReferencePhrase)
 

src/htsql/tr/signature.py

 """
 :mod:`htsql.tr.signature`
 =========================
+
+This module defines formula nodes and formula signatures.
 """
 
 
-from ..util import maybe, listof, Comparable, Clonable
+from ..util import maybe, listof, Comparable, Clonable, Printable
 
 
 class Slot(object):
+    """
+    Represents a formula slot.
+
+    A slot is a parameter of a formula.  A slot is to be filled
+    with an argument value when a formula node is instantiated.
+
+    `name` (a string)
+        The name of the argument.
+
+    `is_mandatory` (Boolean)
+        Indicates that the slot requires at least one value.
+
+    `is_singular` (Boolean)
+        Indicates that the slot accepts no more than one value.
+    """
 
     def __init__(self, name, is_mandatory=True, is_singular=True):
+        # Sanity check on the arguments.
         assert isinstance(name, str) and len(name) > 0
         assert isinstance(is_mandatory, bool)
         assert isinstance(is_singular, bool)
+
         self.name = name
         self.is_mandatory = is_mandatory
         self.is_singular = is_singular
 
 
-class Signature(Comparable, Clonable):
+class Signature(Comparable, Clonable, Printable):
+    """
+    Represents a formula signature.
 
+    A signature identifies the type of a formula.  In particular,
+    a signature describes all slots of the formula.
+
+    Class attributes:
+
+    `slots` (a list of :class:`Slot`)
+        The formula slots.
+
+    Constructor arguments:
+
+    `equality_vector` (an immutable tuple)
+        Encapsulates all essential attributes of a signature.
+
+        Two signatures are considered equal if they are of the same type
+        and their equality vectors coincide.
+    """
+
+    # Override in subclasses.
     slots = []
 
     def __init__(self, equality_vector=()):
         super(Signature, self).__init__(equality_vector=equality_vector)
 
-    def __iter__(self):
-        return iter(self.slots)
+    def __str__(self):
+        return self.__class__.__name__
 
 
 class Bag(dict):
+    """
+    Encapsulates formula arguments.
 
-    def __init__(self, **keywords):
-        self.update(keywords)
+    `arguments` (a dictionary)
+        Maps slot names to argument values.
+
+        Depending on the slot type, a value could be one of:
+        - a node or ``None`` for singular slots;
+        - a list of nodes for plural slots.
+
+        A missing argument is indicated by ``None`` for a singular
+        slot or by an empty list for a plural slot.  Missing
+        arguments are not allowed for mandatory slots.
+
+    :class:`Bag` provides a mapping interface to `arguments`.
+    """
+
+    # FIXME: respect the order of slots in the signature.
+
+    def __init__(self, **arguments):
+        # Initialize the underlying dictionary.
+        self.update(arguments)
 
     def admits(self, kind, signature):
+        """
+        Verifies that the arguments match the given signature.
+
+        Returns ``True`` if the arguments match the given signature,
+        ``False`` otherwise.
+
+        `kind` (a type)
+            The expected type of value nodes.
+
+        `signature` (:class:`Signature`)
+            The expected signature of the arguments.
+        """
+        # Sanity check on the arguments.
         assert isinstance(kind, type)
         assert (isinstance(signature, Signature) or
                 issubclass(signature, Signature))
+
+        # Verify that the arguments match the slot names.
         if set(self.keys()) != set(slot.name for slot in signature.slots):
             return False
+
+        # Check every slot.
         for slot in signature.slots:
+            # The argument.
             value = self[slot.name]
+
+            # A value of a singular slot must be a node of the given
+            # type or ``None``; a value equal to ``None`` is allowed
+            # only for optional slots.
             if slot.is_singular:
                 if not isinstance(value, maybe(kind)):
                     return False
                 if slot.is_mandatory:
                     if value is None:
                         return False
+            # A value of a plural slot must be a list of nodes of the
+            # given type and, unless the slot is optional, must contain
+            # at least one node.
             else:
                 if not isinstance(value, listof(kind)):
                     return False
                 if slot.is_mandatory:
                     if not value:
                         return False
+
+        # All checks passed.
         return True
 
     def cells(self):
+        """
+        Returns a list of all subnodes.
+
+        This function extracts all (singular) nodes from the arguments.
+        """
+        # A list of nodes.
         cells = []
+        # Iterate over all the arguments.
         for key in sorted(self.keys()):
+            # A value: could be ``None``, a node or a list of nodes.
             value = self[key]
             if value is not None:
                 if isinstance(value, list):
         return cells
 
     def impress(self, owner):
+        """
+        Adds the arguments as attributes to the given object.
+
+        `owner` (a node object)
+            An object to update.
+        """
+        # Iterate through all the arguments.
         for key in sorted(self.keys()):
+            # Make sure we do not override an existing attribute.
             assert not hasattr(owner, key)
+            # Impress the argument to the object.
             setattr(owner, key, self[key])
 
     def map(self, method):
-        keywords = {}
+        """
+        Applies the given function to all subnodes.
+
+        Returns a new :class:`Bag` instance of the same shape
+        composed from the results of the `method` application
+        to every value node.
+
+        `method` (a callable)
+            A function to apply.
+        """
+        # The result of the `method` applications.
+        arguments = {}
+        # Iterate over all the arguments.
         for key in sorted(self.keys()):
+            # An argument value: `None`, a node, or a list of nodes.
             value = self[key]
+            # Apply `method` to `value`.
             if value is not None:
                 if isinstance(value, list):
                     value = [method(item) for item in value]
                 else:
                     value = method(value)
-            keywords[key] = value
-        return self.__class__(**keywords)
+            arguments[key] = value
+        # Produce a new `Bag` instance with updated arguments.
+        return self.__class__(**arguments)
 
     def freeze(self):
+        """
+        Returns an immutable container with all the argument values.
+
+        This function is useful for constructing an equality vector
+        of a formula node.
+        """
+        # An ordered list of the (frozen) argument values.
         values = []
+        # Iterate over the arguments; freeze mutable objects
+        # (i.e., convert a list to a tuple).
         for key in sorted(self.keys()):
             value = self[key]
             if isinstance(value, list):
                 value = tuple(value)
             values.append(value)
+        # Finally freeze and return the list itself.
         return tuple(values)
 
 
-class Formula(object):
+class Formula(Printable):
+    """
+    Represents a formula node.
+
+    This is a mixin class; it is mixed with :class:`htsql.tr.binding.Binding`,
+    :class:`htsql.tr.code.Code` and :class:`htsql.tr.frame.Phrase` to produce
+    respective formula node types.
+
+    `signature` (:class:`Signature`)
+        The formula signature.
+
+    `arguments` (:class:`Bag`)
+        The formula arguments; must be compatible with the signature.
+
+    The rest of the arguments are passed to the next base class constructor
+    unchanged.
+    """
 
     def __init__(self, signature, arguments, *args, **kwds):
         assert isinstance(signature, Signature)
+        # The caller is responsible for checking that the arguments
+        # are compatible with the signature.
         assert isinstance(arguments, Bag)
         super(Formula, self).__init__(*args, **kwds)
         self.signature = signature
         self.arguments = arguments
+        # Add an attribute for each argument.
         arguments.impress(self)
 
+    def __str__(self):
+        # Display:
+        #   Signature: ...
+        return "%s: %s" % (self.signature, super(Formula, self).__str__())
+
 
 def isformula(formula, signatures):
+    """
+    Checks if a node is a formula with the given signature.
+
+    The function returns ``True`` if the given node is a formula
+    and its signature is a subclass of the given signature class;
+    ``False`` otherwise.
+
+    `formula` (a node, possibly a :class:`Formula` node)
+        A node to check.
+
+    `signatures` (a subclass or a tuple of subclasses of :class:`Signature`)
+        The expected formula signature(s).
+    """
+    # Normalize the signatures.
     if not isinstance(signatures, tuple):
         signatures = (signatures,)
+    # Check that the given node is, indeed, a formula, and that
+    # its signature is among the given signature classes.
     return (isinstance(formula, Formula) and
             any(isinstance(formula.signature, signature)
                 for signature in signatures))
 
 
 class NullarySig(Signature):
+    """
+    Represents a signature with no slots.
+    """
 
     slots = []
 
 
 class UnarySig(Signature):
+    """
+    Represents a signature with one singular slot.
+    """
 
     slots = [
             Slot('op'),
 
 
 class BinarySig(Signature):
+    """
+    Represents a signature with two singular slots.
+    """
 
     slots = [
             Slot('lop'),
 
 
 class NArySig(Signature):
+    """
+    Represents a signature with one singular slot and one plural slot.
+    """
 
     slots = [
             Slot('lop'),
 
 
 class ConnectiveSig(Signature):
+    """
+    Represents a signature with one plural slot.
+    """
 
     slots = [
             Slot('ops', is_singular=False),
 
 
 class PolarSig(Signature):
+    """
+    Denotes a formula with two forms: positive and negative.
+
+    `polarity` (``+1`` or ``-1``)
+        Indicates the form of the formula: ``+1`` for positive,
+        ``-1`` for negative.
+    """
 
     def __init__(self, polarity):
         assert polarity in [+1, -1]
         self.polarity = polarity
 
     def reverse(self):
+        """
+        Returns the signature with the opposite polarity.
+        """
         return self.clone(polarity=-self.polarity)
 
+    def __str__(self):
+        # Display:
+        #   Signature(+/-)
+        return "%s(%s)" % (self.__class__.__name__,
+                           '+' if self.polarity > 0 else '-')
+
 
 class IsEqualSig(BinarySig, PolarSig):
-    pass
+    """
+    Denotes an equality (``=``) and an inequality (``!=``) operator.
+    """
 
 
 class IsTotallyEqualSig(BinarySig, PolarSig):
-    pass
+    """
+    Denotes a total equality (``==`` and ``!==``) operator.
+    """
 
 
 class IsInSig(NArySig, PolarSig):
-    pass
+    """
+    Denotes an N-ary equality (``={}`` and ``!={}``) operator.
+    """
 
 
 class IsNullSig(UnarySig, PolarSig):
-    pass
+    """
+    Denotes an ``is_null()`` operator.
+    """
 
 
 class IfNullSig(BinarySig):
-    pass
+    """
+    Denotes an ``if_null()`` operator.
+    """
 
 
 class NullIfSig(BinarySig):
-    pass
+    """
+    Denotes a ``null_if()`` operator.
+    """
 
 
 class CompareSig(BinarySig):
+    """
+    Denotes a comparison operator.
+
+    `relation` (one of: ``'<'``, ``'<='``, ``'>'``, ``'>='``)
+        Indicates the comparison relation.
+    """
 
     def __init__(self, relation):
         assert relation in ['<', '<=', '>', '>=']
 
 
 class AndSig(ConnectiveSig):
-    pass
+    """
+    Denotes a Boolean "AND" (``&``) operator.
+    """
 
 
 class OrSig(ConnectiveSig):
-    pass