Commits

Matt Chaput committed 7219be7

Cleanups and additions to query inspection.
Added debugging back to query parser.
Fixed lack of startchar/endchar on some syntax nodes.

  • Participants
  • Parent commits 650f540
  • Branches dawg

Comments (0)

Files changed (9)

File src/whoosh/qparser/common.py

 parser modules.
 """
 
+from __future__ import print_function
 import re
 
 from whoosh.compat import string_type
         return t
 
 
-def wsyntax(qnode, stxnode):
-    qnode.syntax = stxnode
+def xfer(qnode, stxnode):
+    qnode.startchar = stxnode.startchar
+    qnode.endchar = stxnode.endchar
     return qnode
 
 
+def print_debug(level, msg):
+    if level:
+        print("  " * (level - 1), msg)
+
+
+
+
+

File src/whoosh/qparser/dateparse.py

         try:
             dt = self.dateparser.date_from(text, self.basedate)
             if dt is None:
-                node = self.errorize(text, node)
+                return self.errorize(text, node)
             else:
-                node = DateTimeNode(node.fieldname, dt, node.boost)
+                n = DateTimeNode(node.fieldname, dt, node.boost)
         except DateParseError:
             e = sys.exc_info()[1]
-            node = self.errorize(e, node)
-        
-        return node
+            n = self.errorize(e, node)
+        n.startchar = node.startchar
+        n.endchar = node.endchar
+        return n
     
     def range_to_dt(self, node):
         start = end = None
             end = end.disambiguated(self.basedate)
             if isinstance(end, timespan):
                 end = end.end
-        return DateRangeNode(node.fieldname, start, end, boost=node.boost)
+        drn = DateRangeNode(node.fieldname, start, end, boost=node.boost)
+        drn.startchar = node.startchar
+        drn.endchar = node.endchar
+        return drn
     
     def do_dates(self, parser, group):
         schema = parser.schema

File src/whoosh/qparser/default.py

 # policies, either expressed or implied, of Matt Chaput.
 
 from whoosh import query
-# QueryParser has a plugins argument that will shadow the "plugins" module, so
-# rename the module when we import it
 from whoosh.qparser import syntax
+from whoosh.qparser.common import print_debug
 
 
 # Query parser object
         
         return self._priorized("filters")
     
-    def tag(self, text, pos=0):
+    def tag(self, text, pos=0, debug=False):
         """Returns a group of syntax nodes corresponding to the given text,
         created by matching the Taggers provided by the parser's plugins.
         
         prev = pos
         # Priorized list of taggers provided by the parser's plugins
         taggers = self.taggers()
+        print_debug(debug, "Taggers: %r" % taggers)
         
         # Define a function that will make a WordNode from the "interstitial"
         # text between matches
                     if node.endchar <= pos:
                         raise Exception("Token %r did not move cursor forward. (%r, %s)" % (tagger, text, pos))
                     if prev < pos:
-                        stack.append(inter(prev, pos))
-                    
+                        tween = inter(prev, pos)
+                        print_debug(debug, "Tween: %r" % tween)
+                        stack.append(tween)
+                        
+                    print_debug(debug, "Tagger: %r at %s: %r" % (tagger, pos, node))
                     stack.append(node)
                     prev = pos = node.endchar
                     break
             stack.append(inter(prev, len(text)))
         
         # Wrap the list of nodes in a group node
-        return self.group(stack)
+        group = self.group(stack)
+        print_debug(debug, "Tagged group: %r" % group)
+        return group
     
-    def filterize(self, nodes):
+    def filterize(self, nodes, debug=False):
         """Takes a group of nodes and runs the filters provided by the parser's
         plugins.
         """
         
         # Call each filter in the priorized list of plugin filters
+        print_debug(debug, "Pre-filtered group: %r" % nodes)
         for f in self.filters():
+            print_debug(debug, "..Applying: %r" % f)
             nodes = f(self, nodes)
+            print_debug(debug, "..Result: %r" % nodes)
             if nodes is None:
                 raise Exception("Filter %r did not return anything" % f)
         return nodes
 
-    def process(self, text, pos=0):
+    def process(self, text, pos=0, debug=False):
         """Returns a group of syntax nodes corresponding to the given text,
         tagged by the plugin Taggers and filtered by the plugin filters.
         
         :param pos: the position in the text to start tagging at.
         """
         
-        nodes = self.tag(text, pos=pos)
-        nodes = self.filterize(nodes)
+        nodes = self.tag(text, pos=pos, debug=debug)
+        nodes = self.filterize(nodes, debug=debug)
         return nodes
 
-    def parse(self, text, normalize=True):
+    def parse(self, text, normalize=True, debug=False):
         """Parses the input string and returns a :class:`whoosh.query.Query`
         object/tree. 
         
         :rtype: :class:`whoosh.query.Query`
         """
         
-        tree = self.process(text)
-        q = tree.query(self)
+        nodes = self.process(text, debug=debug)
+        print_debug(debug, "Syntax tree: %r" % nodes)
+        q = nodes.query(self)
+        print_debug(debug, "Pre-normalized query: %r" % q)
         if normalize:
             q = q.normalize()
+            print_debug(debug, "Normalized query: %r" % q)
         return q
+    
+    def parse_(self, text, normalize=True):
+        pass
 
 
 # Premade parser configurations

File src/whoosh/qparser/plugins.py

 from whoosh import query
 from whoosh.compat import iteritems, u
 from whoosh.qparser import syntax
-from whoosh.qparser.common import rcompile, wsyntax
+from whoosh.qparser.common import rcompile, xfer
 from whoosh.qparser.taggers import RegexTagger, FnTagger
 
 
     >>> q = qp.parse("hello there^2")    
     """
     
-    expr = "\\^(?P<boost>[0-9]*(\\.[0-9]+)?)($|(?=[ \t\r\n]))"
+    expr = "\\^(?P<boost>[0-9]*(\\.[0-9]+)?)($|(?=[ \t\r\n)]))"
     
     class BoostNode(syntax.SyntaxNode):
         def __init__(self, original, boost):
             
             qclass = parser.phraseclass
             q = qclass(fieldname, words, slop=self.slop, boost=self.boost)
-            return wsyntax(q, self)
+            return xfer(q, self)
     
     class PhraseTagger(RegexTagger):
         def create(self, parser, matcher):
                     # If previous was a fieldname and next node has text
                     if isinstance(prevnode, fname) and nextnode.has_text:
                         # Make the next node into a range based on the symbol
-                        newgroup.append(self.make_range(nextnode.text, node.rel))
+                        newgroup.append(self.make_range(nextnode, node.rel))
                         # Skip the next node
                         i += 1
             else:
         
         return newgroup
             
-    def make_range(self, text, rel):
+    def make_range(self, node, rel):
+        text = node.text
         if rel == "<":
-            return syntax.RangeNode(None, text, False, True)
+            n = syntax.RangeNode(None, text, False, True)
         elif rel == ">":
-            return syntax.RangeNode(text, None, True, False)
+            n = syntax.RangeNode(text, None, True, False)
         elif rel == "<=" or rel == "=<":
-            return syntax.RangeNode(None, text, False, False)
+            n = syntax.RangeNode(None, text, False, False)
         elif rel == ">=" or rel == "=>":
-            return syntax.RangeNode(text, None, False, False)
+            n = syntax.RangeNode(text, None, False, False)
+        n.startchar = node.startchar
+        n.endchar = node.endchar
+        return n
 
 
 class MultifieldPlugin(Plugin):

File src/whoosh/qparser/syntax.py

 # policies, either expressed or implied, of Matt Chaput.
 
 from whoosh import query
-from whoosh.qparser.common import (get_single_text, QueryParserError,
-                                   wsyntax)
+from whoosh.qparser.common import get_single_text, QueryParserError, xfer
 
 
 class SyntaxNode(object):
     
     @property
     def startchar(self):
+        if not self.nodes:
+            return None
         return self.nodes[0].startchar
     
     @property
     def endchar(self):
+        if not self.nodes:
+            return None
         return self.nodes[-1].endchar
     
     def apply(self, fn):
     def query(self, parser):
         q = self.qclass([node.query(parser) for node in self.nodes],
                         boost=self.boost, **self.kwargs)
-        return wsyntax(q, self)
+        return xfer(q, self)
 
     def empty_copy(self):
         """Returns an empty copy of this group.
         q = self.qclass(self.nodes[0].query(parser),
                         self.nodes[1].query(parser),
                                    boost=self.boost)
-        return wsyntax(q, self)
+        return xfer(q, self)
 
 
 class Wrapper(GroupNode):
     merging = False
     
     def query(self, parser):
-        return wsyntax(self.qclass(self.nodes[0].query(parser)), self)
+        return xfer(self.qclass(self.nodes[0].query(parser)), self)
 
 
 class ErrorNode(SyntaxNode):
     def r(self):
         return "ERR %r %r" % (self.node, self.message)
     
+    @property
+    def startchar(self):
+        return self.node.startchar
+    
+    @property
+    def endchar(self):
+        return self.node.endchar
+    
     def query(self, parser):
         if self.node:
-            return wsyntax(self.node.query(parser), self.node)
+            return xfer(self.node.query(parser), self.node)
         else:
             return query.NullQuery
 
                                           self.startexcl, self.endexcl,
                                           boost=self.boost)
                     if q is not None:
-                        return wsyntax(q, self)
+                        return xfer(q, self)
                 except QueryParserError:
                     return query.NullQuery
             
         
         q = query.TermRange(fieldname, start, end, self.startexcl,
                             self.endexcl, boost=self.boost)
-        return wsyntax(q, self)
+        return xfer(q, self)
 
 
 class TextNode(SyntaxNode):
         q = parser.term_query(fieldname, self.text, termclass,
                               boost=self.boost, tokenize=self.tokenize,
                               removestops=self.removestops)
-        return wsyntax(q, self)
+        return xfer(q, self)
 
 
 class WordNode(TextNode):

File src/whoosh/query.py

     pass
 
 
+# Functions
+
+def query_lists(q):
+    """Returns the leaves of the query tree, with the query hierarchy
+    represented as nested lists.
+    """
+    
+    if q.is_leaf():
+        return q
+    else:
+        return [query_lists(qq) for qq in q.children()]
+
+
+def term_lists(q, phrases=True):
+    """Returns the terms in the query tree, with the query hierarchy
+    represented as nested lists.
+    """
+    
+    if q.is_leaf():
+        if phrases or not isinstance(q, Phrase):
+            return list(q.terms())
+    else:
+        ls = []
+        for qq in q.children():
+            t = term_lists(qq, phrases=phrases)
+            if len(t) == 1:
+                t = t[0]
+            if t:
+                ls.append(t)
+        return ls
+
+
 # Utility classes
 
 class Lowest(object):
         And([Term("content", u"a"), Not(Term("content", u"b"))])
     """
 
-    syntax = None
+    # For queries produced by the query parser, record where in the user
+    # query this object originated
+    startchar = endchar = None
 
     def __or__(self, query):
         """Allows you to use | between query objects to wrap them in an Or
         
         return True
 
+    def children(self):
+        """Returns an iterator of the subqueries of this object.
+        """
+        
+        return iter([])
+
+    def is_range(self):
+        """Returns True if this object searches for values within a range.
+        """
+        
+        return False
+
+    def has_terms(self):
+        """Returns True if this specific object represents a search for a
+        specific term (as opposed to a pattern, as in Wildcard and Prefix) or
+        terms (i.e., whether the ``replace()`` method does something
+        meaningful on this instance).
+        """
+        
+        return False
+
     def apply(self, fn):
         """If this query has children, calls the given function on each child
         and returns a new copy of this node with the new children returned by
         *does not* modify the original query "in place".
         """
         
+        # The default implementation uses the apply method to "pass down" the
+        # replace() method call
         if self.is_leaf():
             return copy(self)
         else:
             return self.apply(methodcaller("replace", oldtext, newtext))
 
-    def all_terms(self, termset=None, phrases=True):
-        """Returns a set of all terms in this query tree.
-        
-        This method simply operates on the query itself, without reference to
-        an index (unlike existing_terms()), so it will *not* add terms that
-        require an index to compute, such as Prefix and Wildcard.
-        
-        >>> q = And([Term("content", u"render"), Term("path", u"/a/b")])
-        >>> q.all_terms()
-        set([("content", u"render"), ("path", u"/a/b")])
-        
-        :param phrases: Whether to add words found in Phrase queries.
-        :rtype: set
-        """
-
-        if termset is None:
-            termset = set()
-        self._all_terms(termset, phrases=phrases)
-        return termset
-
     def copy(self):
         """Deprecated, just use ``copy.deepcopy``.
         """
         
         return copy.deepcopy(self)
 
-    def _all_terms(self, *args, **kwargs):
-        # To be implemented in sub-classes
-        return
-
+    def all_terms(self, termset=None, phrases=True):
+        """Returns a set of all terms in this query tree.
+        
+        This method exists for backwards compatibility. For more flexibility
+        use the :meth:`Query.iter_all_terms` method instead, which simply yields
+        the terms in the query.
+        
+        :param phrases: Whether to add words found in Phrase queries.
+        :rtype: set
+        """
+        
+        if not termset:
+            termset = set()
+        for q in self.queries():
+            if q.has_terms():
+                if phrases or not isinstance(q, Phrase):
+                    termset.update(q.terms_())
+        return termset
+        
     def existing_terms(self, ixreader, termset=None, reverse=False,
                        phrases=True):
         """Returns a set of all terms in this query tree that exist in the
-        index represented by the given ixreaderder.
+        given ixreaderder.
         
-        This method references the IndexReader to expand Prefix and Wildcard
-        queries, and only adds terms that actually exist in the index (unless
-        reverse=True).
-        
-        >>> ixreader = my_index.reader()
-        >>> q = And([Or([Term("content", u"render"),
-        ...             Term("content", u"rendering")]),
-        ...             Prefix("path", u"/a/")])
-        >>> q.existing_terms(ixreader, termset)
-        set([("content", u"render"), ("path", u"/a/b"), ("path", u"/a/c")])
+        This method exists for backwards compatibility. For more flexibility
+        use the :meth:`Query.iter_all_terms` method instead, which simply yields
+        the terms in the query.
         
         :param ixreader: A :class:`whoosh.reading.IndexReader` object.
         :param reverse: If True, this method adds *missing* terms rather than
 
         if termset is None:
             termset = set()
-        self._existing_terms(ixreader, termset, reverse=reverse,
-                             phrases=phrases)
+        if reverse:
+            test = lambda t: t not in ixreader
+        else:
+            test = lambda t: t in ixreader
+        
+        termset.update(t for t in self.all_terms(phrases=phrases) if test(t))
         return termset
 
+    def leaves(self):
+        """Returns an iterator of all the leaf queries in this query tree as a
+        flat series.
+        """
+        
+        if self.is_leaf():
+            yield self
+        else:
+            for q in self.children():
+                for qq in q.leaves():
+                    yield qq
+
+    def iter_all_terms(self, phrases=True):
+        """Returns an iterator of all terms in this query tree.
+        
+        >>> qp = qparser.QueryParser("text", myindex.schema)
+        >>> q = myparser.parse("alfa bravo title:charlie")
+        >>> # List the terms in a query
+        >>> list(q.iter_all_terms())
+        [("text", "alfa"), ("text", "bravo"), ("title", "charlie")]
+        >>> # Get a set of all terms in the query that don't exist in the index
+        >>> reader = myindex.reader()
+        >>> missing = set(t for t in q.iter_all_terms() if t not in reader)
+        set([("text", "alfa"), ("title", "charlie")])
+        >>> # All terms in the query that occur in fewer than 5 documents in
+        >>> # the index
+        >>> [t for t in q.iter_all_terms() if reader.doc_frequency(t[0], t[1]) < 5]
+        [("title", "charlie")]
+        
+        :param phrases: Whether to add words found in Phrase queries.
+        """
+        
+        for q in self.leaves():
+            if q.has_terms():
+                for t in q.terms():
+                    yield t
+
+    def terms(self):
+        """Yields one or more terms searched for by this specific query object.
+        You can check whether a query object targets specific terms before you
+        call this method using :meth:`Query.has_terms`.
+        
+        To get all terms in the tree, use :meth:`Query.iter_all_terms`.
+        """
+        
+        return ([])
+        
+
     def requires(self):
         """Returns a set of queries that are *known* to be required to match
         for the entire query to match. Note that other queries might also turn
     def is_leaf(self):
         return False
     
+    def children(self):
+        yield self.child
+    
     def apply(self, fn):
         return self.__class__(fn(self.child))
     
-    def all_terms(self, termset=None, phrases=True):
-        return self.child.all_terms(termset=termset, phrases=phrases)
-    
-    def existing_terms(self, ixreader, termset=None, reverse=False,
-                       phrases=True):
-        return self.child.existing_terms(ixreader, termset=termset,
-                                         reverse=reverse, phrases=phrases)
-    
     def requires(self):
         return self.child.requires()
     
     def is_leaf(self):
         return False
 
+    def children(self):
+        return iter(self.subqueries)
+
     def apply(self, fn):
         return self.__class__([fn(q) for q in self.subqueries],
                               boost=self.boost)
             subs_min = max(0, subs_min - nots_sum)
         return subs_min
 
-    def _all_terms(self, termset, phrases=True):
-        for q in self.subqueries:
-            q.all_terms(termset, phrases=phrases)
-
-    def _existing_terms(self, ixreader, termset, reverse=False, phrases=True):
-        for q in self.subqueries:
-            q.existing_terms(ixreader, termset, reverse=reverse,
-                             phrases=phrases)
-
     def normalize(self):
         # Normalize subqueries and merge nested instances of this class
         subqueries = []
         else:
             return NullQuery
 
-    def _all_terms(self, termset, phrases=True):
-        pass
-
-    def _existing_terms(self, ixreader, termset, reverse=False, phrases=True):
-        fieldname = self.fieldname
-        for word in self._words(ixreader):
-            t = (fieldname, word)
-            contains = t in ixreader
-            if reverse:
-                contains = not contains
-            if contains:
-                termset.add(t)
-
     def estimate_size(self, ixreader):
         return sum(ixreader.doc_frequency(self.fieldname, text)
                    for text in self._words(ixreader))
     def __hash__(self):
         return hash(self.fieldname) ^ hash(self.text) ^ hash(self.boost)
 
-    def _all_terms(self, termset, phrases=True):
-        termset.add((self.fieldname, self.text))
+    def has_terms(self):
+        return True
 
-    def _existing_terms(self, ixreader, termset, reverse=False, phrases=True):
-        fieldname, text = self.fieldname, self.text
-        contains = (fieldname, text) in ixreader
-        if reverse:
-            contains = not contains
-        if contains:
-            termset.add((fieldname, text))
+    def terms(self):
+        yield (self.fieldname, self.text)
 
     def replace(self, oldtext, newtext):
         q = copy.copy(self)
     def is_leaf(self):
         return False
 
+    def children(self):
+        yield self.query
+
     def apply(self, fn):
         return self.__class__(fn(self.query))
 
         else:
             return self.__class__(query, boost=self.boost)
 
-    def _all_terms(self, termset, phrases=True):
-        self.query.all_terms(termset, phrases=phrases)
-
-    def _existing_terms(self, ixreader, termset, reverse=False, phrases=True):
-        self.query.existing_terms(ixreader, termset, reverse=reverse,
-                                  phrases=phrases)
-
     def field(self):
         return None
 
             return self
 
 
-class FuzzyTerm(MultiTerm):
+class ExpandingTerm(MultiTerm):
+    """Middleware class for queries such as FuzzyTerm and Variations that
+    expand into multiple queries, but come from a single term.
+    """
+    
+    def has_terms(self):
+        return True
+    
+    def terms(self):
+        yield (self.fieldname, self.text)
+
+
+class FuzzyTerm(ExpandingTerm):
     """Matches documents containing words similar to the given term.
     """
 
                 ^ hash(self.maxdist) ^ hash(self.prefixlength)
                 ^ hash(self.constantscore))
 
-    def _all_terms(self, termset, phrases=True):
-        termset.add((self.fieldname, self.text))
-
     def _words(self, ixreader):
         return ixreader.terms_within(self.fieldname, self.text, self.maxdist,
                                      prefix=self.prefixlength)
 
 
+class Variations(ExpandingTerm):
+    """Query that automatically searches for morphological variations of the
+    given word in the same field.
+    """
+
+    def __init__(self, fieldname, text, boost=1.0):
+        self.fieldname = fieldname
+        self.text = text
+        self.boost = boost
+
+    def __repr__(self):
+        r = "%s(%r, %r" % (self.__class__.__name__, self.fieldname, self.text)
+        if self.boost != 1:
+            r += ", boost=%s" % self.boost
+        r += ")"
+        return r
+
+    def __eq__(self, other):
+        return (other and self.__class__ is other.__class__
+                and self.fieldname == other.fieldname
+                and self.text == other.text and self.boost == other.boost)
+
+    def __hash__(self):
+        return hash(self.fieldname) ^ hash(self.text) ^ hash(self.boost)
+
+    def _words(self, ixreader):
+        fieldname = self.fieldname
+        return [word for word in variations(self.text)
+                if (fieldname, word) in ixreader]
+
+    def __unicode__(self):
+        return u("%s:<%s>") % (self.fieldname, self.text)
+
+    __str__ = __unicode__
+
+    def replace(self, oldtext, newtext):
+        q = copy.copy(self)
+        if q.text == oldtext:
+            q.text = newtext
+        return q
+
+
 class RangeMixin(object):
     # Contains methods shared by TermRange and NumericRange
     
         return (hash(self.fieldname) ^ hash(self.start) ^ hash(self.startexcl)
                 ^ hash(self.end) ^ hash(self.endexcl) ^ hash(self.boost))
     
+    def is_range(self):
+        return True
+    
     def _comparable_start(self):
         if self.start is None:
             return (Lowest, 0)
                                            self.boost)
     
 
-class Variations(MultiTerm):
-    """Query that automatically searches for morphological variations of the
-    given word in the same field.
-    """
-
-    def __init__(self, fieldname, text, boost=1.0):
-        self.fieldname = fieldname
-        self.text = text
-        self.boost = boost
-
-    def __repr__(self):
-        r = "%s(%r, %r" % (self.__class__.__name__, self.fieldname, self.text)
-        if self.boost != 1:
-            r += ", boost=%s" % self.boost
-        r += ")"
-        return r
-
-    def __eq__(self, other):
-        return (other and self.__class__ is other.__class__
-                and self.fieldname == other.fieldname
-                and self.text == other.text and self.boost == other.boost)
-
-    def __hash__(self):
-        return hash(self.fieldname) ^ hash(self.text) ^ hash(self.boost)
-
-    def _all_terms(self, termset, phrases=True):
-        termset.add(self.text)
-
-    def _existing_terms(self, ixreader, termset, reverse=False, phrases=True):
-        for word in variations(self.text):
-            t = (self.fieldname, word)
-            contains = t in ixreader
-            if reverse:
-                contains = not contains
-            if contains:
-                termset.add(t)
-
-    def _words(self, ixreader):
-        fieldname = self.fieldname
-        return [word for word in variations(self.text)
-                if (fieldname, word) in ixreader]
-
-    def __unicode__(self):
-        return u("%s:<%s>") % (self.fieldname, self.text)
-
-    __str__ = __unicode__
-
-    def replace(self, oldtext, newtext):
-        q = copy.copy(self)
-        if q.text == oldtext:
-            q.text = newtext
-        return q
-
 
 class Phrase(Query):
     """Matches documents containing a given phrase."""
             h ^= hash(w)
         return h
 
-    def _all_terms(self, termset, phrases=True):
-        if phrases:
-            fieldname = self.fieldname
-            for word in self.words:
-                termset.add((fieldname, word))
+    def has_terms(self):
+        return True
 
-    def _existing_terms(self, ixreader, termset, reverse=False, phrases=True):
-        if phrases:
-            fieldname = self.fieldname
-            for word in self.words:
-                contains = (fieldname, word) in ixreader
-                if reverse:
-                    contains = not contains
-                if contains:
-                    termset.add((fieldname, word))
+    def terms(self):
+        return ((self.fieldname, word) for word in self.words)
 
     def normalize(self):
         if not self.words:
 
         return self.__class__(a, b, boost=self.boost)
 
-    def _all_terms(self, termset, phrases=True):
-        self.a.all_terms(termset, phrases=phrases)
-
-    def _existing_terms(self, ixreader, termset, reverse=False, phrases=True):
-        self.a.existing_terms(ixreader, termset, reverse=reverse,
-                              phrases=phrases)
-        
     def requires(self):
         return self.a.requires()
 

File tests/test_parsing.py

     assert_equal(q[0].boost, 1.0)
     assert_equal(q[1].boost, 2.5)
     assert_equal(q[2].text, "^3")
-    
+
+def test_boosts():
+    qp = default.QueryParser("t", None)
+    q = qp.parse("alfa ((bravo^2)^3)^4 charlie")
+    assert_equal(q.__unicode__(), "(t:alfa AND t:bravo^24.0 AND t:charlie)")
+
 def test_wildcard1():
     qp = default.QueryParser("content", None)
     q = qp.parse(u("hello *the?e* ?star*s? test"))

File tests/test_queries.py

-import copy
+from nose.tools import assert_equal, assert_not_equal  #@UnresolvedImport
 
-from nose.tools import assert_equal, assert_not_equal
-
-from whoosh import fields, scoring
+from whoosh import fields
 from whoosh.compat import u
 from whoosh.filedb.filestore import RamStorage
 from whoosh.qparser import QueryParser
 
 def test_all_terms():
     q = QueryParser("a", None).parse(u('hello b:there c:"my friend"'))
-    ts = set()
-    q.all_terms(ts, phrases=False)
+    ts = q.all_terms(phrases=False)
     assert_equal(sorted(ts), [("a", "hello"), ("b", "there")])
-    ts = set()
-    q.all_terms(ts, phrases=True)
+    ts = q.all_terms(phrases=True)
     assert_equal(sorted(ts), [("a", "hello"), ("b", "there"), ("c", "friend"), ("c", "my")])
 
 def test_existing_terms():
     q = QueryParser("value", None).parse(u('alfa hotel tango "sierra bravo"'))
     
     ts = q.existing_terms(r, phrases=False)
+    print "ts=", sorted(ts)
     assert_equal(sorted(ts), [("value", "alfa"), ("value", "hotel")])
     
     ts = q.existing_terms(r)

File tests/test_spelling.py

 from __future__ import with_statement
 import gzip
 
-from nose.tools import assert_equal, assert_not_equal
+from nose.tools import assert_equal, assert_not_equal  #@UnresolvedImport
 
 import whoosh.support.dawg as dawg
-from whoosh import fields, spelling
+from whoosh import fields, query, spelling
 from whoosh.compat import u, text_type
 from whoosh.filedb.filestore import RamStorage
 from whoosh.support.testing import TempStorage
-from whoosh.util import now
 
 
 def test_graph_corrector():
     assert_equal(cor.suggest("specail", maxdist=1), ["special"])
     gf.close()
 
+def test_query_terms():
+    from whoosh.qparser import QueryParser
     
+    qp = QueryParser("a", None)
+    text = "alfa b:(bravo OR c:charlie) delta"
+    q = qp.parse(text)
+    assert_equal(sorted(q.iter_all_terms()), [("a", "alfa"), ("a", "delta"),
+                                              ("b", "bravo"), ("c", "charlie")])
+    assert_equal(query.term_lists(q), [("a", "alfa"),
+                                       [("b", "bravo"), ("c", "charlie")],
+                                       ("a", "delta")])
+    
+    text = "alfa brav*"
+    q = qp.parse(text)
+    assert_equal(sorted(q.iter_all_terms()), [("a", "alfa")])
+    assert_equal(query.term_lists(q), [("a", "alfa")])
+    
+    text = 'alfa "bravo charlie" delta'
+    q = qp.parse(text)
+    assert_equal(query.term_lists(q), [("a", "alfa"),
+                                       [("a", "bravo"), ("a", "charlie")],
+                                       ("a", "delta")])
+    
+    text = 'alfa (b:"bravo charlie" c:del* echo) d:foxtrot'
+    q = qp.parse(text)
+    print [list(qq.terms()) for qq in q.leaves() if qq.has_terms()]
+    assert False
 
 
+
+
+
+
+
+
+