Commits

Matt Chaput committed e8b6950

More work-in-progress on rewriting query parser.

  • Participants
  • Parent commits 9799989
  • Branches dawg

Comments (0)

Files changed (11)

File src/whoosh/filedb/filetables.py

         return dbfile.get_float(weightspos + _FLOAT_SIZE)
     
 
-# Utility functions
 
-#def dump_hash(hashreader):
-#    dbfile = hashreader.dbfile
-#    read = hashreader.read
-#    eod = hashreader._start_of_hashes
-#
-#    print "HEADER_SIZE=", hashreader.header_size, "eod=", eod
-#
-#    # Dump hashtables
-#    for bucketnum in xrange(256):
-#        pos, numslots = unpack_header_entry(read(bucketnum * header_entry_size, header_entry_size))
-#        if numslots:
-#            print "Bucket %d: %d slots" % (bucketnum, numslots)
-#
-#            dbfile.seek(pos)
-#            for _ in xrange(0, numslots):
-#                print "  %X : %d" % hashreader.unpack_pointer(read(pos, pointer_size))
-#                pos += pointer_size
-#        else:
-#            print "Bucket %d empty: %s, %s" % (bucketnum, pos, numslots)
-#
-#    # Dump keys and values
-#    print "-----"
-#    pos = hashreader.header_size
-#    dbfile.seek(pos)
-#    while pos < eod:
-#        keylen, datalen = unpack_lengths(read(pos, lengths_size))
-#        keypos = pos + lengths_size
-#        datapos = pos + lengths_size + keylen
-#        key = read(keypos, keylen)
-#        data = read(datapos, datalen)
-#        print "%d +%d,%d:%r->%r" % (pos, keylen, datalen, key, data)
-#        pos = datapos + datalen
 
-

File src/whoosh/filedb/filewriting.py

         self._added = True
         self.storedfields.append(storedvalues)
         self.docnum += 1
-        #print "%f" % (now() - t)
     
     #def update_document(self, **fields):
     

File src/whoosh/filedb/pools.py

         termswriter.add_iter(self.postbuf, lengths.get)
 
 
-#class UnixSortPool(PoolBase):
-#    def __init__(self, schema, dir=None, basename='', limitmb=32, **kwargs):
-#        super(UnixSortPool, self).__init__(schema, dir=dir, basename=basename)
-#        self._make_dir()
-#        fd, self.filename = tempfile.mkstemp(".run", dir=self.dir)
-#        self.sortfile = os.fdopen(fd, "wb")
-#        self.linebuffer = []
-#        self.bufferlimit = 100
-#        
-#    def add_posting(self, *args):
-#        self.sortfile.write(b64encode(dumps(args)) + "\n")
-#        
-#    def finish(self, termswriter, doccount, lengthfile):
-#        self.sortfile.close()
-#        from whoosh.util import now
-#        print "Sorting file...", self.filename
-#        t = now()
-#        outpath = os.path.join(os.path.dirname(self.filename), "sorted.txt")
-#        os.system("sort %s >%s" % (self.filename, outpath))
-#        print "...took", now() - t
 
     
     

File src/whoosh/qparser/dateparse2.py

+# Copyright 2010 Matt Chaput. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#    1. Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#
+#    2. Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# The views and conclusions contained in the software and documentation are
+# those of the authors and should not be interpreted as representing official
+# policies, either expressed or implied, of Matt Chaput.
+
+import re
+import sys
+from datetime import datetime, timedelta
+
+from whoosh.compat import string_type, iteritems
+from whoosh.qparser import plugins2 as plugins
+from whoosh.qparser import syntax2 as syntax
+from whoosh.qparser.common import rcompile
+from whoosh.qparser.taggers import Tagger
+from whoosh.support.relativedelta import relativedelta
+from whoosh.support.times import (adatetime, timespan, fill_in, is_void,
+                                  TimeError, relative_days)
+
+
+class DateParseError(Exception):
+    "Represents an error in parsing date text."
+
+
+# Utility functions
+
+def print_debug(level, msg, *args):
+    if level > 0:
+        print(("  " * (level - 1)) + (msg % args))
+
+
+# Parser element objects
+
+class Props(object):
+    """A dumb little object that just puts copies a dictionary into attibutes
+    so I can use dot syntax instead of square bracket string item lookup and
+    save a little bit of typing. Used by :class:`Regex`.
+    """
+    
+    def __init__(self, **args):
+        self.__dict__ = args
+    
+    def __repr__(self):
+        return repr(self.__dict__)
+    
+    def get(self, key, default=None):
+        return self.__dict__.get(key, default)
+
+
+class ParserBase(object):
+    """Base class for date parser elements.
+    """
+    
+    def to_parser(self, e):
+        if isinstance(e, string_type):
+            return Regex(e)
+        else:
+            return e
+    
+    def parse(self, text, dt, pos=0, debug=-9999):
+        raise NotImplementedError
+    
+    def date_from(self, text, dt=None, pos=0, debug=-9999):
+        if dt is None:
+            dt = datetime.now()
+        
+        d, pos = self.parse(text, dt, pos, debug + 1)
+        return d
+
+
+class MultiBase(ParserBase):
+    """Base class for date parser elements such as Sequence and Bag that
+    have sub-elements.
+    """
+    
+    def __init__(self, elements, name=None):
+        """
+        :param elements: the sub-elements to match.
+        :param name: a name for this element (for debugging purposes only).
+        """
+        
+        self.elements = [self.to_parser(e) for e in elements]
+        self.name = name
+        
+    def __repr__(self):
+        return "%s<%s>%r" % (self.__class__.__name__, self.name or '', self.elements)
+
+
+class Sequence(MultiBase):
+    """Merges the dates parsed by a sequence of sub-elements.
+    """
+    
+    def __init__(self, elements, sep="(\\s+|\\s*,\\s*)", name=None,
+                 progressive=False):
+        """
+        :param elements: the sequence of sub-elements to parse.
+        :param sep: a separator regular expression to match between elements,
+            or None to not have separators.
+        :param name: a name for this element (for debugging purposes only).
+        :param progressive: if True, elements after the first do not need to
+            match. That is, for elements (a, b, c) and progressive=True, the
+            sequence matches like ``a[b[c]]``.
+        """
+        
+        super(Sequence, self).__init__(elements, name)
+        self.sep_pattern = sep
+        if sep:
+            self.sep_expr = rcompile(sep)
+        else:
+            self.sep_expr = None
+        self.progressive = progressive
+    
+    def parse(self, text, dt, pos=0, debug=-9999):
+        d = adatetime()
+        first = True
+        foundall = False
+        failed = False
+        
+        print_debug(debug, "Seq %s sep=%r text=%r", self.name, self.sep_pattern, text[pos:])
+        for e in self.elements:
+            print_debug(debug, "Seq %s text=%r", self.name, text[pos:])
+            if self.sep_expr and not first:
+                print_debug(debug, "Seq %s looking for sep", self.name)
+                m = self.sep_expr.match(text, pos)
+                if m:
+                    pos = m.end()
+                else:
+                    print_debug(debug, "Seq %s didn't find sep", self.name)
+                    break
+            
+            print_debug(debug, "Seq %s trying=%r at=%s", self.name, e, pos)
+            
+            try:
+                at, newpos = e.parse(text, dt, pos=pos, debug=debug + 1)
+            except TimeError:
+                failed = True
+                break
+            
+            print_debug(debug, "Seq %s result=%r", self.name, at)
+            if not at:
+                break
+            pos = newpos
+            
+            print_debug(debug, "Seq %s adding=%r to=%r", self.name, at, d)
+            try:
+                d = fill_in(d, at)
+            except TimeError:
+                print_debug(debug, "Seq %s Error in fill_in", self.name)
+                failed = True
+                break
+            print_debug(debug, "Seq %s filled date=%r", self.name, d)
+            
+            first = False
+        else:
+            foundall = True
+        
+        if not failed and (foundall or (not first and self.progressive)):
+            print_debug(debug, "Seq %s final=%r", self.name, d)
+            return (d, pos)
+        else:
+            print_debug(debug, "Seq %s failed", self.name)
+            return (None, None)
+
+
+class Combo(Sequence):
+    """Parses a sequence of elements in order and combines the dates parsed
+    by the sub-elements somehow. The default behavior is to accept two dates
+    from the sub-elements and turn them into a range. 
+    """
+    
+    def __init__(self, elements, fn=None, sep="(\\s+|\\s*,\\s*)", min=2, max=2,
+                 name=None):
+        """
+        :param elements: the sequence of sub-elements to parse.
+        :param fn: a function to run on all dates found. It should return a
+            datetime, adatetime, or timespan object. If this argument is None,
+            the default behavior accepts two dates and returns a timespan.
+        :param sep: a separator regular expression to match between elements,
+            or None to not have separators.
+        :param min: the minimum number of dates required from the sub-elements.
+        :param max: the maximum number of dates allowed from the sub-elements.
+        :param name: a name for this element (for debugging purposes only).
+        """
+        
+        super(Combo, self).__init__(elements, sep=sep, name=name)
+        self.fn = fn
+        self.min = min
+        self.max = max
+    
+    def parse(self, text, dt, pos=0, debug=-9999):
+        dates = []
+        first = True
+        
+        print_debug(debug, "Combo %s sep=%r text=%r", self.name, self.sep_pattern, text[pos:])
+        for e in self.elements:
+            if self.sep_expr and not first:
+                print_debug(debug, "Combo %s looking for sep at %r", self.name, text[pos:])
+                m = self.sep_expr.match(text, pos)
+                if m:
+                    pos = m.end()
+                else:
+                    print_debug(debug, "Combo %s didn't find sep", self.name)
+                    return (None, None)
+            
+            print_debug(debug, "Combo %s trying=%r", self.name, e)
+            try:
+                at, pos = e.parse(text, dt, pos, debug + 1)
+            except TimeError:
+                at, pos = None, None
+            
+            print_debug(debug, "Combo %s result=%r", self.name, at)
+            if at is None:
+                return (None, None)
+            
+            first = False
+            if is_void(at):
+                continue
+            if len(dates) == self.max:
+                print_debug(debug, "Combo %s length > %s", self.name, self.max)
+                return (None, None)
+            dates.append(at)
+        
+        print_debug(debug, "Combo %s dates=%r", self.name, dates)
+        if len(dates) < self.min:
+            print_debug(debug, "Combo %s length < %s", self.name, self.min)
+            return (None, None)
+        
+        return (self.dates_to_timespan(dates), pos)
+    
+    def dates_to_timespan(self, dates):
+        if self.fn:
+            return self.fn(dates)
+        elif len(dates) == 2:
+            return timespan(dates[0], dates[1])
+        else:
+            raise DateParseError("Don't know what to do with %r" % (dates, ))
+
+
+class Choice(MultiBase):
+    """Returns the date from the first of its sub-elements that matches.
+    """
+    
+    def parse(self, text, dt, pos=0, debug=-9999):
+        print_debug(debug, "Choice %s text=%r", self.name, text[pos:])
+        for e in self.elements:
+            print_debug(debug, "Choice %s trying=%r", self.name, e)
+            
+            try:
+                d, newpos = e.parse(text, dt, pos, debug + 1)
+            except TimeError:
+                d, newpos = None, None
+            if d:
+                print_debug(debug, "Choice %s matched", self.name)
+                return (d, newpos)
+        print_debug(debug, "Choice %s no match", self.name)
+        return (None, None)
+
+
+class Bag(MultiBase):
+    """Parses its sub-elements in any order and merges the dates.
+    """
+    
+    def __init__(self, elements, sep="(\\s+|\\s*,\\s*)", onceper=True,
+                 requireall=False, allof=None, anyof=None, name=None):
+        """
+        :param elements: the sub-elements to parse.
+        :param sep: a separator regular expression to match between elements,
+            or None to not have separators.
+        :param onceper: only allow each element to match once.
+        :param requireall: if True, the sub-elements can match in any order,
+            but they must all match.
+        :param allof: a list of indexes into the list of elements. When this
+            argument is not None, this element matches only if all the
+            indicated sub-elements match.
+        :param allof: a list of indexes into the list of elements. When this
+            argument is not None, this element matches only if any of the
+            indicated sub-elements match.
+        :param name: a name for this element (for debugging purposes only).
+        """
+        
+        super(Bag, self).__init__(elements, name)
+        self.sep_expr = rcompile(sep)
+        self.onceper = onceper
+        self.requireall = requireall
+        self.allof = allof
+        self.anyof = anyof
+    
+    def parse(self, text, dt, pos=0, debug=-9999):
+        first = True
+        d = adatetime()
+        seen = [False] * len(self.elements)
+        
+        while True:
+            newpos = pos
+            print_debug(debug, "Bag %s text=%r", self.name, text[pos:])
+            if not first:
+                print_debug(debug, "Bag %s looking for sep", self.name)
+                m = self.sep_expr.match(text, pos)
+                if m:
+                    newpos = m.end()
+                else:
+                    print_debug(debug, "Bag %s didn't find sep", self.name)
+                    break
+            
+            for i, e in enumerate(self.elements):
+                print_debug(debug, "Bag %s trying=%r", self.name, e)
+                
+                try:
+                    at, xpos = e.parse(text, dt, newpos, debug + 1)
+                except TimeError:
+                    at, xpos = None, None
+                    
+                print_debug(debug, "Bag %s result=%r", self.name, at)
+                if at:
+                    if self.onceper and seen[i]:
+                        return (None, None)
+                    
+                    d = fill_in(d, at)
+                    newpos = xpos
+                    seen[i] = True
+                    break
+            else:
+                break
+            
+            pos = newpos
+            if self.onceper and all(seen):
+                break
+            
+            first = False
+        
+        if (not any(seen)
+            or (self.allof and not all(seen[pos] for pos in self.allof))
+            or (self.anyof and not any(seen[pos] for pos in self.anyof))
+            or (self.requireall and not all(seen))):
+            return (None, None)
+        
+        print_debug(debug, "Bag %s final=%r", self.name, d)
+        return (d, pos)
+    
+
+class Optional(ParserBase):
+    """Wraps a sub-element to indicate that the sub-element is optional.
+    """
+    
+    def __init__(self, element):
+        self.element = self.to_parser(element)
+    
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self.element)
+    
+    def parse(self, text, dt, pos=0, debug=-9999):
+        try:
+            d, pos = self.element.parse(text, dt, pos, debug + 1)
+        except TimeError:
+            d, pos = None, None
+            
+        if d:
+            return (d, pos)
+        else:
+            return (adatetime(), pos)
+
+
+class ToEnd(ParserBase):
+    """Wraps a sub-element and requires that the end of the sub-element's match
+    be the end of the text.
+    """
+    
+    def __init__(self, element):
+        self.element = element
+        
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self.element)
+    
+    def parse(self, text, dt, pos=0, debug=-9999):
+        try:
+            d, pos = self.element.parse(text, dt, pos, debug + 1)
+        except TimeError:
+            d, pos = None, None
+            
+        if d and pos == len(text):
+            return (d, pos)
+        else:
+            return (None, None)
+
+
+class Regex(ParserBase):
+    """Matches a regular expression and maps named groups in the pattern to
+    datetime attributes using a function or overridden method.
+    
+    There are two points at which you can customize the behavior of this class,
+    either by supplying functions to the initializer or overriding methods.
+    
+    * The ``modify`` function or ``modify_props`` method takes a ``Props``
+      object containing the named groups and modifies its values (in place).
+    * The ``fn`` function or ``props_to_date`` method takes a ``Props`` object
+      and the base datetime and returns an adatetime/datetime.
+    """
+    
+    fn = None
+    modify = None
+    
+    def __init__(self, pattern, fn=None, modify=None):
+        self.pattern = pattern
+        self.expr = rcompile(pattern)
+        self.fn = fn
+        self.modify = modify
+    
+    def __repr__(self):
+        return "<%r>" % (self.pattern, )
+    
+    def parse(self, text, dt, pos=0, debug=-9999):
+        m = self.expr.match(text, pos)
+        if not m:
+            return (None, None)
+        
+        props = self.extract(m)
+        self.modify_props(props)
+        
+        try:
+            d = self.props_to_date(props, dt)
+        except TimeError:
+            d = None
+        
+        if d:
+            return (d, m.end())
+        else:
+            return (None, None)
+    
+    def extract(self, match):
+        d = match.groupdict()
+        for key, value in iteritems(d):
+            try:
+                value = int(value)
+                d[key] = value
+            except (ValueError, TypeError):
+                pass
+        return Props(**d)
+    
+    def modify_props(self, props):
+        if self.modify:
+            self.modify(props)
+            
+    def props_to_date(self, props, dt):
+        if self.fn:
+            return self.fn(props, dt)
+        else:
+            args = {}
+            for key in adatetime.units:
+                args[key] = props.get(key)
+            return adatetime(**args)
+
+    
+class Month(Regex):
+    def __init__(self, *patterns):
+        self.patterns = patterns
+        self.exprs = [rcompile(pat) for pat in self.patterns]
+        
+        self.pattern = ("(?P<month>"
+                        + "|".join("(%s)" % pat for pat in self.patterns)
+                        + ")")
+        self.expr = rcompile(self.pattern)
+        
+    def modify_props(self, p):
+        text = p.month
+        for i, expr in enumerate(self.exprs):
+            m = expr.match(text)
+            if m:
+                p.month = i + 1
+                break
+            
+
+class PlusMinus(Regex):
+    def __init__(self, years, months, weeks, days, hours, minutes, seconds):
+        rel_years = "((?P<years>[0-9]+) *(%s))?" % years
+        rel_months = "((?P<months>[0-9]+) *(%s))?" % months
+        rel_weeks = "((?P<weeks>[0-9]+) *(%s))?" % weeks
+        rel_days = "((?P<days>[0-9]+) *(%s))?" % days
+        rel_hours = "((?P<hours>[0-9]+) *(%s))?" % hours
+        rel_mins = "((?P<mins>[0-9]+) *(%s))?" % minutes
+        rel_secs = "((?P<secs>[0-9]+) *(%s))?" % seconds
+        
+        self.pattern = ("(?P<dir>[+-]) *%s *%s *%s *%s *%s *%s *%s(?=(\\W|$))"
+                        % (rel_years, rel_months, rel_weeks, rel_days,
+                           rel_hours, rel_mins, rel_secs))
+        self.expr = rcompile(self.pattern)
+        
+    def props_to_date(self, p, dt):
+        if p.dir == "-":
+            dir = -1
+        else:
+            dir = 1
+    
+        delta = relativedelta(years=(p.get("years") or 0) * dir,
+                              months=(p.get("months") or 0) * dir,
+                              weeks=(p.get("weeks") or 0) * dir,
+                              days=(p.get("days") or 0) * dir,
+                              hours=(p.get("hours") or 0) * dir,
+                              minutes=(p.get("mins") or 0) * dir,
+                              seconds=(p.get("secs") or 0) * dir)
+        return dt + delta
+
+
+class Daynames(Regex):
+    def __init__(self, next, last, daynames):
+        self.next_pattern = next
+        self.last_pattern = last
+        self._dayname_exprs = tuple(rcompile(pat) for pat in daynames)
+        dn_pattern = "|".join(daynames)
+        self.pattern = "(?P<dir>%s|%s) +(?P<day>%s)(?=(\\W|$))" % (next, last, dn_pattern)
+        self.expr = rcompile(self.pattern)
+    
+    def props_to_date(self, p, dt):
+        if re.match(p.dir, self.last_pattern):
+            dir = -1
+        else:
+            dir = 1
+        
+        for daynum, expr in enumerate(self._dayname_exprs):
+            m = expr.match(p.day)
+            if m:
+                break
+        current_daynum = dt.weekday()
+        days_delta = relative_days(current_daynum, daynum, dir)
+        
+        d = dt.date() + timedelta(days=days_delta)
+        return adatetime(year=d.year, month=d.month, day=d.day)
+
+
+class Time12(Regex):
+    def __init__(self):
+        self.pattern = "(?P<hour>[1-9]|10|11|12)(:(?P<mins>[0-5][0-9])(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?)?\\s*(?P<ampm>am|pm)(?=(\\W|$))"
+        self.expr = rcompile(self.pattern)
+
+    def props_to_date(self, p, dt):
+        isam = p.ampm.lower().startswith("a")
+        
+        if p.hour == 12:
+            if isam:
+                hr = 0
+            else:
+                hr = 12
+        else:
+            hr = p.hour
+            if not isam:
+                hr += 12
+        
+        return adatetime(hour=hr, minute=p.mins, second=p.secs, microsecond=p.usecs)
+
+
+# Top-level parser classes
+
+class DateParser(object):
+    """Base class for locale-specific parser classes.
+    """
+    
+    day = Regex("(?P<day>([123][0-9])|[1-9])(?=(\\W|$))(?!=:)",
+                lambda p, dt: adatetime(day=p.day))
+    year = Regex("(?P<year>[0-9]{4})(?=(\\W|$))",
+                 lambda p, dt: adatetime(year=p.year))
+    time24 = Regex("(?P<hour>([0-1][0-9])|(2[0-3])):(?P<mins>[0-5][0-9])(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?(?=(\\W|$))",
+                   lambda p, dt: adatetime(hour=p.hour, minute=p.mins, second=p.secs, microsecond=p.usecs))
+    time12 = Time12()
+    
+    def __init__(self):
+        simple_year = "(?P<year>[0-9]{4})"
+        simple_month = "(?P<month>[0-1][0-9])"
+        simple_day = "(?P<day>[0-3][0-9])"
+        simple_hour = "(?P<hour>([0-1][0-9])|(2[0-3]))"
+        simple_minute = "(?P<minute>[0-5][0-9])"
+        simple_second = "(?P<second>[0-5][0-9])"
+        simple_usec = "(?P<microsecond>[0-9]{6})"
+        
+        simple_seq = Sequence((simple_year, simple_month, simple_day, simple_hour,
+                               simple_minute, simple_second, simple_usec),
+                               sep="[- .:/]*", name="simple", progressive=True)
+        self.simple = Sequence((simple_seq, "(?=(\\s|$))"), sep='')
+        
+        self.setup()
+    
+    def setup(self):
+        raise NotImplementedError
+    
+    #
+    
+    def get_parser(self):
+        return self.all
+    
+    def parse(self, text, dt, pos=0, debug=-9999):
+        parser = self.get_parser()
+        
+        d, newpos = parser.parse(text, dt, pos=pos, debug=debug)
+        if isinstance(d, (adatetime, timespan)):
+            d = d.disambiguated(dt)
+        
+        return (d, newpos)
+    
+    def date_from(self, text, basedate=None, pos=0, debug=-9999, toend=True):
+        if basedate is None:
+            basedate = datetime.utcnow()
+        
+        parser = self.get_parser()
+        if toend:
+            parser = ToEnd(parser)
+        
+        d = parser.date_from(text, basedate, pos=pos, debug=debug)
+        if isinstance(d, (adatetime, timespan)):
+            d = d.disambiguated(basedate)
+        return d
+    
+    
+class English(DateParser):
+    day = Regex("(?P<day>([123][0-9])|[1-9])(st|nd|rd|th)?(?=(\\W|$))",
+                lambda p, dt: adatetime(day=p.day))
+    
+    def setup(self):
+        self.plusdate = PlusMinus("years|year|yrs|yr|ys|y",
+                                  "months|month|mons|mon|mos|mo",
+                                  "weeks|week|wks|wk|ws|w",
+                                  "days|day|dys|dy|ds|d",
+                                  "hours|hour|hrs|hr|hs|h",
+                                  "minutes|minute|mins|min|ms|m",
+                                  "seconds|second|secs|sec|s")
+        
+        self.dayname = Daynames("next", "last",
+                                ("monday|mon|mo", "tuesday|tues|tue|tu",
+                                 "wednesday|wed|we", "thursday|thur|thu|th",
+                                 "friday|fri|fr", "saturday|sat|sa",
+                                 "sunday|sun|su"))
+        
+        midnight = Regex("midnight", lambda p, dt: adatetime(hour=0, minute=0, second=0, microsecond=0))
+        noon = Regex("noon", lambda p, dt: adatetime(hour=12, minute=0, second=0, microsecond=0))
+        now = Regex("now", lambda p, dt: dt)
+        self.time = Choice((self.time12, self.time24, midnight, noon, now), name="time")
+        
+        def tomorrow_to_date(p, dt):
+            d = dt.date() + timedelta(days=+1)
+            return adatetime(year=d.year, month=d.month, day=d.day)
+        tomorrow = Regex("tomorrow", tomorrow_to_date)
+        
+        def yesterday_to_date(p, dt):
+            d = dt.date() + timedelta(days=-1)
+            return adatetime(year=d.year, month=d.month, day=d.day)
+        yesterday = Regex("yesterday", yesterday_to_date)
+        
+        thisyear = Regex("this year", lambda p, dt: adatetime(year=dt.year))
+        thismonth = Regex("this month", lambda p, dt: adatetime(year=dt.year, month=dt.month))
+        today = Regex("today", lambda p, dt: adatetime(year=dt.year, month=dt.month, day=dt.day))
+        
+        self.month = Month("january|jan", "february|febuary|feb", "march|mar",
+                           "april|apr", "may", "june|jun", "july|jul", "august|aug",
+                           "september|sept|sep", "october|oct", "november|nov",
+                           "december|dec")
+        
+        # If you specify a day number you must also specify a month... this
+        # Choice captures that constraint
+        
+        self.dmy = Choice((Sequence((self.day, self.month, self.year), name="dmy"),
+                           Sequence((self.month, self.day, self.year), name="mdy"),
+                           Sequence((self.year, self.month, self.day), name="ymd"),
+                           Sequence((self.year, self.day, self.month), name="ydm"),
+                           Sequence((self.day, self.month), name="dm"),
+                           Sequence((self.month, self.day), name="md"),
+                           Sequence((self.month, self.year), name="my"),
+                           self.month, self.year, self.dayname, tomorrow,
+                           yesterday, thisyear, thismonth, today, now,
+                           ), name="date")
+        
+        self.datetime = Bag((self.time, self.dmy), name="datetime")
+        self.bundle = Choice((self.plusdate, self.datetime, self.simple), name="bundle")
+        self.torange = Combo((self.bundle, "to", self.bundle), name="torange")
+        
+        self.all = Choice((self.torange, self.bundle), name="all")
+        
+
+# QueryParser plugin
+
+class DateParserPlugin(plugins.Plugin):
+    """Adds more powerful parsing of DATETIME fields.
+    
+    >>> parser.add_plugin(DateParserPlugin())
+    >>> parser.parse(u"date:'last tuesday'")
+    """
+    
+    def __init__(self, basedate=None, dateparser=None, callback=None,
+                 free=False):
+        """
+        :param basedate: a datetime object representing the current time
+            against which to measure relative dates. If you do not supply this
+            argument, the plugin uses ``datetime.utcnow()``.
+        :param dateparser: an instance of
+            :class:`whoosh.qparser.dateparse.DateParser`. If you do not supply
+            this argument, the plugin automatically uses
+            :class:`whoosh.qparser.dateparse.English`.
+        :param callback: a callback function for parsing errors. This allows
+            you to provide feedback to the user about problems parsing dates.
+        :param remove: if True, unparseable dates are removed from the token
+            stream instead of being replaced with ErrorToken.
+        :param free: if True, this plugin will install a filter early in the
+            parsing process and try to find undelimited dates such as
+            ``date:last tuesday``. Note that allowing this could result in
+            normal query words accidentally being parsed as dates sometimes.
+        """
+        
+        self.basedate = basedate
+        if dateparser is None:
+            dateparser = English()
+        self.dateparser = dateparser
+        self.callback = callback
+        self.free = free
+    
+    def taggers(self, parser):
+        if self.free:
+            # If we're tokenizing, we have to go before the FieldsPlugin
+            return [(DateTagger, -1)]
+        else:
+            return ()
+    
+    def filters(self, parser):
+        # Run the filter after the FieldsPlugin assigns field names
+        return [(self.do_dates, 110)]
+    
+    def errorize(self, message, node):
+        if self.callback:
+            self.callback(message)
+        return syntax.ErrorNode(message, node)
+    
+    def text_to_dt(self, node):
+        text = node.text
+        try:
+            dt = self.dateparser.date_from(text, self.basedate)
+            if dt is None:
+                node = self.errorize(text, node)
+            else:
+                node = DateTimeNode(node.fieldname, dt, node.boost)
+        except DateParseError:
+            e = sys.exc_info()[1]
+            node = self.errorize(e, node)
+        
+        return node
+    
+    def range_to_dt(self, node):
+        start = end = None
+        dp = self.dateparser.get_parser()
+        
+        if node.start:
+            start = dp.date_from(node.start, self.basedate)
+            if start is None:
+                return self.errorize(node.start, node)
+        if node.end:
+            end = dp.date_from(node.end, self.basedate)
+            if end is None:
+                return self.errorize(node.end, node)
+        
+        if start and end:
+            ts = timespan(start, end).disambiguated(self.basedate)
+            start, end = ts.start, ts.end
+        elif start:
+            start = start.disambiguated(self.basedate)
+            if isinstance(start, timespan):
+                start = start.start
+        elif end:
+            end = end.disambiguated(self.basedate)
+            if isinstance(end, timespan):
+                end = end.end
+        return DateRangeNode(node.fieldname, start, end, boost=node.boost)
+    
+    def do_dates(self, parser, group):
+        schema = parser.schema
+        if not schema:
+            return group
+        
+        from whoosh.fields import DATETIME
+        datefields = frozenset(fieldname for fieldname, field
+                               in parser.schema.items()
+                               if isinstance(field, DATETIME))
+        
+        for i, node in enumerate(group):
+            if node.has_fieldname:
+                fname = node.fieldname or parser.fieldname
+            else:
+                fname = None
+            
+            if isinstance(node, syntax.GroupNode):
+                group[i] = self.do_dates(parser, node)
+            elif fname in datefields:
+                if node.has_text:
+                    group[i] = self.text_to_dt(node)
+                elif isinstance(node, syntax.RangeNode):
+                    group[i] = self.range_to_dt(node)
+        return group
+
+
+class DateTimeNode(syntax.SyntaxNode):
+    has_fieldname = True
+    has_boost = True
+    
+    def __init__(self, fieldname, dt, boost=1.0):
+        self.fieldname = fieldname
+        self.dt = dt
+        self.boost=1.0
+    
+    def r(self):
+        return repr(self.dt)
+    
+    def query(self, parser):
+        from whoosh import query
+        
+        fieldname = self.fieldname or parser.fieldname
+        field = parser.schema[fieldname]
+        dt = self.dt
+        if isinstance(self.timeobj, datetime):
+            return query.Term(fieldname, field.to_text(dt), boost=self.boost)
+        elif isinstance(self.timeobj, timespan):
+            return query.DateRange(fieldname, dt.start, dt.end,
+                                   boost=self.boost)
+        else:
+            raise Exception("Unknown time object: %r" % dt)
+
+
+class DateRangeNode(syntax.SyntaxNode):
+    has_fieldname = True
+    has_boost = True
+    
+    def __init__(self, fieldname, start, end, boost=1.0):
+        self.fieldname = fieldname
+        self.start = start
+        self.end = end
+        self.boost=1.0
+    
+    def r(self):
+        return "%r-%r" % (self.start, self.end)
+    
+    def query(self, parser):
+        from whoosh import query
+        
+        fieldname = self.fieldname or parser.fieldname
+        return query.DateRange(fieldname, self.start, self.end, boost=self.boost)
+
+
+class DateTagger(Tagger):
+    def __init__(self, plugin, expr="([A-Za-z][A-Za-z_0-9]*):([^^]+)"):
+        self.plugin = plugin
+        self.expr = rcompile(expr)
+    
+    def match(self, parser, text, pos):
+        from whoosh.fields import DATETIME
+        
+        match = self.expr.match(text, pos)
+        if match:
+            fieldname = match.group(1)
+            dtext = match.group(2)
+            
+            if parser.schema and fieldname in parser.schema:
+                field = parser.schema[fieldname]
+                if isinstance(field, DATETIME):
+                    plugin = self.plugin
+                    dateparser = plugin.dateparser
+                    basedate = plugin.basedate
+                    
+                    d, newpos = dateparser.parse(dtext, basedate)
+                    if d:
+                        node = DateTimeNode(fieldname, d)
+                        node.startchar = match.start()
+                        node.endchar = newpos + match.start(2)
+                        return node
+            
+        
+
+
+    
+
+
+    
+    
+    
+    
+    
+
+
+

File src/whoosh/qparser/default2.py

-# Copyright 2010 Matt Chaput. All rights reserved.
+# Copyright 2011 Matt Chaput. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 # policies, either expressed or implied, of Matt Chaput.
 
 from whoosh import query
+from whoosh.qparser import plugins2 as plugins
 from whoosh.qparser import syntax2 as syntax
 from whoosh.qparser.common import rcompile, QueryParserError
 
 
-# Tokenizer objects
-
-class Token(object):
-    def match(self, parser, text, pos):
-        raise NotImplementedError
-    
-
-class RegexToken(Token):
-    def __init__(self, expr):
-        self.expr = rcompile(expr)
-        
-    def match(self, parser, text, pos):
-        match = self.expr.match(text, pos)
-        if match:
-            node = self.create(parser, match)
-            node.startchar = match.start()
-            node.endchar = match.end()
-            return node
-        
-    def create(self, parser, match):
-        raise NotImplementedError
-
-
-class FnToken(RegexToken):
-    def __init__(self, expr, fn):
-        RegexToken.__init__(self, expr)
-        self.fn = fn
-    
-    def create(self, parser, match):
-        return self.fn(**match.groupdict())
-
-
 # Query parser object
 
 class QueryParser(object):
                  phraseclass=query.Phrase, group=syntax.AndGroup):
         self.fieldname = fieldname
         self.schema = schema
-        self.plugins = plugins
         self.termclass = termclass
         self.phraseclass = phraseclass
         self.group = group
+        
+        self.plugins = []
+        if not plugins:
+            plugins = self.default_set()
+        self.add_plugins(plugins)
+
+    def default_set(self):
+        return [plugins.WhitespacePlugin,
+                plugins.SingleQuotePlugin,
+                plugins.FieldsPlugin,
+                plugins.WildcardPlugin,
+                plugins.PhrasePlugin,
+                plugins.RangePlugin,
+                plugins.GroupPlugin,
+                plugins.OperatorsPlugin,
+                plugins.BoostPlugin,
+                ]
+
+    def add_plugins(self, pilist):
+        """Adds the given list of plugins to the list of plugins in this
+        parser.
+        """
+        
+        for pi in pilist:
+            self.add_plugin(pi)
+    
+    def add_plugin(self, pi):
+        """Adds the given plugin to the list of plugins in this parser.
+        """
+        
+        if isinstance(pi, type):
+            pi = pi()
+        self.plugins.append(pi)
+    
+    def remove_plugin(self, pi):
+        """Removes the given plugin object from the list of plugins in this
+        parser.
+        """
+        
+        self.plugins.remove(pi)
+    
+    def remove_plugin_class(self, cls):
+        """Removes any plugins of the given class from this parser.
+        """
+        
+        self.plugins = [pi for pi in self.plugins if not isinstance(pi, cls)]
+    
+    def replace_plugin(self, plugin):
+        """Removes any plugins of the class of the given plugin and then adds
+        it. This is a convenience method to keep from having to call
+        ``remove_plugin_class`` followed by ``add_plugin`` each time you want
+        to reconfigure a default plugin.
+        
+        >>> qp = qparser.QueryParser("content", schema)
+        >>> qp.replace_plugin(qparser.NotPlugin("(^| )-"))
+        """
+        
+        self.remove_plugin_class(plugin.__class__)
+        self.add_plugin(plugin)
 
     def _priorized(self, methodname):
         items_and_priorities = []
         
         return termclass(fieldname, text, boost=boost)
 
-    def tokens(self):
-        return self._priorized("tokens")
+    def taggers(self):
+        return self._priorized("taggers")
     
     def filters(self):
         return self._priorized("filters")
     
-    def tokenize(self, text, i=0):
+    def tag(self, text, i=0):
         stack = []
         prev = i
-        tokens = self.tokens()
+        taggers = self.taggers()
         
         def inter(startchar, endchar):
             n = syntax.WordNode(text[startchar:endchar])
         
         while i < len(text):
             node = None
-            for token in tokens:
-                node = token.match(self, text, i)
+            for tagger in taggers:
+                node = tagger.match(self, text, i)
                 if node:
                     if node.endchar <= i:
-                        raise Exception("Token %r did not move cursor forward. (%r, %s)" % (token, text, i))
+                        raise Exception("Token %r did not move cursor forward. (%r, %s)" % (tagger, text, i))
                     if prev < i:
                         stack.append(inter(prev, i))
                     
         return nodes
 
     def process(self, text, i=0):
-        return self.filterize(self.tokenize(text, i=i))
+        nodes = self.tag(text, i=i)
+        nodes = self.filterize(nodes)
+        return nodes
 
+    def parse(self, text, normalize=True):
+        tree = self.process(text)
+        q = tree.query(self)
+        if normalize:
+            q = q.normalize()
+        return q
 
 
+class ParserState(object):
+    def __init__(self, parser, text):
+        self.parser = parser
+        self.fieldname = parser.fieldname
+        self.schema = parser.schema
+        self.termclass = parser.termclass
+        self.phraseclass = parser.phraseclass
+        self.group = parser.group
+        self.text = text
 
 
+# Premade parser configurations
 
+def MultifieldParser(fieldnames, schema, fieldboosts=None, **kwargs):
+    """Returns a QueryParser configured to search in multiple fields.
+    
+    Instead of assigning unfielded clauses to a default field, this parser
+    transforms them into an OR clause that searches a list of fields. For
+    example, if the list of multi-fields is "f1", "f2" and the query string is
+    "hello there", the class will parse "(f1:hello OR f2:hello) (f1:there OR
+    f2:there)". This is very useful when you have two textual fields (e.g.
+    "title" and "content") you want to search by default.
+    
+    :param fieldnames: a list of field names to search.
+    :param fieldboosts: an optional dictionary mapping field names to boosts.
+    """
+    
+    p = QueryParser(None, schema, **kwargs)
+    mfp = plugins.MultifieldPlugin(fieldnames, fieldboosts=fieldboosts)
+    p.add_plugin(mfp)
+    return p
 
 
+def SimpleParser(fieldname, schema, **kwargs):
+    """Returns a QueryParser configured to support only +, -, and phrase
+    syntax.
+    """
+    
+    pis = [plugins.WhitespacePlugin,
+           plugins.PlusMinusPlugin,
+           plugins.PhrasePlugin]
+    return QueryParser(fieldname, schema, plugins=pis, **kwargs)
 
 
+def DisMaxParser(fieldboosts, schema, tiebreak=0.0, **kwargs):
+    """Returns a QueryParser configured to support only +, -, and phrase
+    syntax, and which converts individual terms into DisjunctionMax queries
+    across a set of fields.
+    
+    :param fieldboosts: a dictionary mapping field names to boosts.
+    """
+    
+    mfp = plugins.MultifieldPlugin(list(fieldboosts.keys()),
+                                   fieldboosts=fieldboosts,
+                                   group=syntax.DisMaxGroup)
+    pis = [plugins.WhitespacePlugin,
+           plugins.PlusMinusPlugin,
+           plugins.PhrasePlugin,
+           mfp]
+    return QueryParser(None, schema, plugins=pis, **kwargs)
 
+
+
+
+
+

File src/whoosh/qparser/plugins2.py

-# Copyright 2010 Matt Chaput. All rights reserved.
+# Copyright 2011 Matt Chaput. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 # those of the authors and should not be interpreted as representing official
 # policies, either expressed or implied, of Matt Chaput.
 
-import re
+import copy
 
 from whoosh import query
 from whoosh.compat import iteritems, u
-from whoosh.qparser import default2 as default
 from whoosh.qparser import syntax2 as syntax
-from whoosh.qparser.common import get_single_text, rcompile, QueryParserError
+from whoosh.qparser.common import rcompile
+from whoosh.qparser.taggers import RegexTagger, FnTagger
 
 
 class Plugin(object):
-    def tokens(self, parser):
+    def taggers(self, parser):
         return ()
     
     def filters(self, parser):
         return ()
 
 
-class TokenizingPlugin(default.RegexToken):
+class TaggingPlugin(RegexTagger):
+    """A plugin that also acts as a tagger, to avoid having an extra tagger
+    class for simple cases.
+    
+    A TaggingPlugin object should have a ``priority`` attribute and either a
+    ``nodetype`` attribute or a ``create()`` method. If the subclass doesn't
+    override ``create()``, the base class will call ``self.nodetype`` with the
+    Match object's named groups as keyword arguments.
+    """
+    
     priority = 0
     
     def __init__(self, expr=None):
         self.expr = rcompile(expr or self.expr)
         
-    def tokens(self, parser):
+    def taggers(self, parser):
         return [(self, self.priority)]
     
     def filters(self, parser):
         return self.nodetype(**match.groupdict())
 
 
-
-class WhitespacePlugin(TokenizingPlugin):
+class WhitespacePlugin(TaggingPlugin):
     expr=r"\s+"
     priority = 100
     nodetype = syntax.Whitespace
         return [(self.remove_whitespace, 500)]
     
     def remove_whitespace(self, parser, group):
-        newgroup = group.empty()
+        newgroup = group.empty_copy()
         for node in group:
             if isinstance(node, syntax.GroupNode):
                 newgroup.append(self.remove_whitespace(parser, node))
         return newgroup
 
 
-class SingleQuotePlugin(TokenizingPlugin):
+class SingleQuotePlugin(TaggingPlugin):
     expr=r"(^|(?<=\W))'(?P<text>.*?)'(?=\s|\]|[)}]|$)"
     nodetype = syntax.WordNode
     
 
-class PrefixPlugin(TokenizingPlugin):
+class PrefixPlugin(TaggingPlugin):
     class PrefixNode(syntax.TextNode):
         qclass = query.Prefix
         
     nodetype = PrefixNode
     
 
-class WildcardPlugin(TokenizingPlugin):
+class WildcardPlugin(TaggingPlugin):
     class WildcardNode(syntax.TextNode):
         qclass = query.Wildcard
         
     nodetype = WildcardNode
            
 
-class BoostPlugin(TokenizingPlugin):
+class BoostPlugin(TaggingPlugin):
     class BoostNode(syntax.SyntaxNode):
         def __init__(self, original, boost):
             self.original = original
         return self.BoostNode(text, boost)
     
     def filters(self, parser):
-        return [(self.do_boost, 700)]
+        return [(self.clean_boost, 0), (self.do_boost, 700)]
+    
+    def clean_boost(self, parser, group):
+        bnode = self.BoostNode
+        for i, node in enumerate(group):
+            if isinstance(node, bnode):
+                if (not i or group[i - 1].is_ws()
+                    or isinstance(group[i - 1], bnode)):
+                    group[i] = syntax.WordNode(node.original)
+        return group
     
     def do_boost(self, parser, group):
-        newgroup = group.empty()
+        newgroup = group.empty_copy()
         for node in group:
-            if isinstance(node, self.BoostNode):
+            if isinstance(node, syntax.GroupNode):
+                node = self.do_boost(parser, node)
+            elif isinstance(node, self.BoostNode):
                 if (newgroup
                     and not (newgroup[-1].is_ws()
                              or isinstance(newgroup[-1], self.BoostNode))):
                     continue
                 else:
                     node = syntax.WordNode(node.original)
-            else:
-                if isinstance(node, syntax.GroupNode):
-                    node = self.do_boost(parser, node)
             
             newgroup.append(node)
         return newgroup
         self.openexpr = openexpr
         self.closeexpr = closeexpr
     
-    def tokens(self, parser):
-        return [(default.FnToken(self.openexpr, self.openbracket), 0),
-                (default.FnToken(self.closeexpr, self.closebracket), 0)]
+    def taggers(self, parser):
+        return [(FnTagger(self.openexpr, self.openbracket), 0),
+                (FnTagger(self.closeexpr, self.closebracket), 0)]
         
     def filters(self, parser):
         return [(self.do_groups, 0)]
         return top
 
 
-class FieldsPlugin(TokenizingPlugin):
+class FieldsPlugin(TaggingPlugin):
     def __init__(self, expr=r"(?P<text>\w+):", remove_unknown=True):
         self.expr = expr
         self.removeunknown = remove_unknown
     
-    def tokens(self, parser):
-        return [(self.FieldnameToken(self.expr), 0)]
+    def taggers(self, parser):
+        return [(self.FieldnameTagger(self.expr), 0)]
     
     def filters(self, parser):
         return [(self.do_fieldnames, 100)]
     
     def do_fieldnames(self, parser, group):
-        fnclass = self.FieldnameNode
+        fnclass = syntax.FieldnameNode
         
         if self.removeunknown and parser.schema:
-            # Look for field tokens that aren't in the schema and convert them
+            # Look for field nodes that aren't in the schema and convert them
             # to text
             schema = parser.schema
-            newgroup = group.empty()
+            newgroup = group.empty_copy()
             text = None
             for node in group:
-                if isinstance(node, fnclass) and node.text not in schema:
+                if isinstance(node, fnclass) and node.fieldname not in schema:
                     text = node.original
                     continue
                 elif text:
                         node.text = text + node.text
                     else:
                         newgroup.append(syntax.WordNode(text))
+                    text = None
                 
                 newgroup.append(node)
+            if text:
+                newgroup.append(syntax.WordNode(text))
             group = newgroup
         
-        newgroup = group.empty()
+        newgroup = group.empty_copy()
         # Iterate backwards through the stream, looking for field-able objects
-        # with field tokens in front of them
+        # with field nodes in front of them
         i = len(group)
         while i > 0:
             i -= 1
                 node = self.do_fieldnames(parser, node)
             
             if i > 0 and not node.is_ws() and isinstance(group[i - 1], fnclass):
-                node.set_fieldname(group[i - 1].text, override=False)
+                node.set_fieldname(group[i - 1].fieldname, override=False)
                 i -= 1
             
             newgroup.append(node)
         newgroup.reverse()
         return newgroup
     
-    class FieldnameToken(default.RegexToken):
+    class FieldnameTagger(RegexTagger):
         def create(self, parser, match):
-            return FieldsPlugin.FieldnameNode(match.group("text"),
-                                              match.group(0))
+            return syntax.FieldnameNode(match.group("text"), match.group(0))
     
-    class FieldnameNode(syntax.SyntaxNode):
-        def __init__(self, text, original):
-            self.text = text
-            self.original = original
-            self.startchar = None
-            self.endchar = None
-            
-        def r(self):
-            return "<%s:>" % self.text
-
 
 class PhrasePlugin(Plugin):
-    # Didn't use TokenizingPlugin because I need to add slop parsing at some
+    # Didn't use TaggingPlugin because I need to add slop parsing at some
     # point
     
     def __init__(self, expr='"(?P<text>.*?)"'):
         self.expr = expr
     
-    def tokens(self, parser):
-        return [(self.PhraseToken(self.expr), 0)]
+    def taggers(self, parser):
+        return [(self.PhraseTagger(self.expr), 0)]
     
-    class PhraseToken(default.RegexToken):
+    class PhraseTagger(RegexTagger):
         def create(self, parser, match):
             return PhrasePlugin.PhraseNode(match.group("text"))
     
     
 
 class RangePlugin(Plugin):
-    class BracketToken(default.RegexToken):
+    class BracketTagger(RegexTagger):
         def __init__(self, expr, btype):
-            default.RegexToken.__init__(self, expr)
+            RegexTagger.__init__(self, expr)
             self.btype = btype
         
         def create(self, parser, match):
     def __init__(self):
         pass
     
-    def tokens(self, parser):
-        return [(self.BracketToken(r"\[|\{", self.rangeopen), 1),
-                (self.BracketToken(r"\]|\}", self.rangeclose), 1)]
+    def taggers(self, parser):
+        return [(self.BracketTagger(r"\[|\{", self.rangeopen), 1),
+                (self.BracketTagger(r"\]|\}", self.rangeclose), 1)]
     
     def filters(self, parser):
         return [(self.do_ranges, 10)]
     
-    def is_before(self, node):
-        return not (self.is_to(node) or isinstance(node, self.rangeclose))
-    
-    def is_to(self, node):
-        return node.has_text and node.text.lower() == "to"
-    
-    def is_after(self, node):
-        return not isinstance(node, self.rangeclose)
+    def is_to(self, text):
+        return text.lower() == "to"
     
     @classmethod
     def is_exclusive(cls, brackettext):
     def take_range(self, group, i):
         assert isinstance(group[i], self.rangeopen)
         open = group[i]
-        i += 1
         
-        before = []
-        while i < len(group) and self.is_before(group[i]):
-            before.append(group[i])
-            i += 1
-            
-        if i == len(group) or not self.is_to(group[i]):
-            return
-        i += 1
-        
-        after = []
-        while i < len(group) and self.is_after(group[i]):
-            after.append(group[i])
-            i += 1
-            
-        if i == len(group):
+        texts = []
+        j = i + 1
+        while j < len(group):
+            node = group[j]
+            if isinstance(node, self.rangeclose):
+                break
+            if node.has_text and not node.is_ws():
+                texts.append(node.text)
+            j += 1
+        else:
             return
         
-        assert isinstance(group[i], self.rangeclose)
-        close = group[i]
-        return (before, after, open, close, i + 1)
-    
-    def fix_nodes(self, nodelist):
-        while nodelist and nodelist[0].is_ws():
-            del nodelist[0]
-        while nodelist and nodelist[-1].is_ws():
-            del nodelist[-1]
+        close = group[j]
+        k = j + 1
+        if len(texts) == 1 and self.is_to(texts[0]):
+            return (open, None, None, close, k)
+        elif len(texts) == 2 and self.is_to(texts[0]):
+            return (open, None, texts[1], close, k)
+        elif len(texts) == 2 and self.is_to(texts[1]):
+            return (open, texts[0], None, close, k)
+        elif len(texts) == 3 and self.is_to(texts[1]):
+            return (open, texts[0], texts[2], close, k)
         
-        if not nodelist:
-            return None
-        else:
-            return self.to_placeholder(nodelist)
-    
+        return
+        
     def to_placeholder(self, nodelist):
         return syntax.Placeholder.from_nodes(nodelist)
     
     def do_ranges(self, parser, group):
         i = 0
         ropen, rclose = self.rangeopen, self.rangeclose
-        newgroup = group.empty()
+        newgroup = group.empty_copy()
         while i < len(group):
             node = group[i]
             if isinstance(node, ropen):
                 rnodes = self.take_range(group, i)
                 if rnodes:
-                    before, after, open, close, newi = rnodes
-                    before = self.fix_nodes(before)
-                    after = self.fix_nodes(after)
-                    
-                    if before or after:
-                        range = syntax.RangeNode(before, after, open.excl,
-                                                 close.excl)
-                        range.startchar = open.startchar
-                        range.endchar = close.endchar
-                        newgroup.append(range)
-                        i = newi
-                        continue
+                    open, start, end, close, newi = rnodes
+                    range = syntax.RangeNode(start, end, open.excl, close.excl)
+                    range.startchar = open.startchar
+                    range.endchar = close.endchar
+                    newgroup.append(range)
+                    i = newi
+                    continue
             
             if node.__class__ not in (ropen, rclose):
                 newgroup.append(node)
 
 
 class OperatorsPlugin(Plugin):
-    class OpToken(default.RegexToken):
+    class OpTagger(RegexTagger):
         def __init__(self, expr, grouptype, optype=syntax.InfixOperator,
                      leftassoc=True):
-            default.RegexToken.__init__(self, expr)
+            RegexTagger.__init__(self, expr)
             self.grouptype = grouptype
             self.optype = optype
             self.leftassoc = leftassoc
             ops = []
         
         if not clean:
-            otoken = self.OpToken
+            otagger = self.OpTagger
             if Not:
-                ops.append((otoken(Not, syntax.NotGroup, syntax.PrefixOperator), 0))
+                ops.append((otagger(Not, syntax.NotGroup, syntax.PrefixOperator), 0))
             if And:
-                ops.append((otoken(And, syntax.AndGroup), 0))
+                ops.append((otagger(And, syntax.AndGroup), 0))
+            if Or:
+                ops.append((otagger(Or, syntax.OrGroup), 0))
             if AndNot:
-                ops.append((otoken(AndNot, syntax.AndNotGroup), -5))
+                ops.append((otagger(AndNot, syntax.AndNotGroup), -5))
             if AndMaybe:
-                ops.append((otoken(AndMaybe, syntax.AndMaybeGroup), -5))
-            if Or:
-                ops.append((otoken(Or, syntax.OrGroup), 0))
+                ops.append((otagger(AndMaybe, syntax.AndMaybeGroup), -5))
             if Require:
-                ops.append((otoken(Require, syntax.RequireGroup), 0))
+                ops.append((otagger(Require, syntax.RequireGroup), 0))
         
         self.ops = ops
     
-    def tokens(self, parser):
+    def taggers(self, parser):
         return self.ops
     
     def filters(self, parser):
         return [(self.do_operators, 600)]
     
     def do_operators(self, parser, group):
-        # Do left associative operators forward
-        i = 0
-        while i < len(group):
-            node = group[i]
-            if isinstance(node, syntax.Operator) and node.leftassoc:
-                i = node.replace_self(parser, group, i)
+        for tagger, _ in self.ops:
+            optype = tagger.optype
+            gtype = tagger.grouptype
+            if tagger.leftassoc:
+                i = 0
+                while i < len(group):
+                    t = group[i]
+                    if isinstance(t, optype) and t.grouptype is gtype:
+                        i = t.replace_self(parser, group, i)
+                    else:
+                        i += 1
             else:
-                i += 1
+                i = len(group) - 1
+                while i >= 0:
+                    t = group[i]
+                    if isinstance(t, optype):
+                        i = t.replace_self(parser, group, i)
+                    i -= 1
         
-        # Do right associative operators in reverse
-        i = len(group) - 1
-        while i >= 0:
-            node = group[i]
-            if isinstance(node, syntax.Operator) and not node.leftassoc:
-                i = node.replace_self(parser, group, i)
-            i -= 1
-        
-        for i, node in enumerate(group):
-            if isinstance(node, syntax.GroupNode):
-                group[i] = self.do_operators(parser, node)
+        for i, t in enumerate(group):
+            if isinstance(t, syntax.GroupNode):
+                group[i] = self.do_operators(parser, t)
         
         return group
     
 
+#
 
+class PlusMinusPlugin(Plugin):
+    class plus(syntax.SyntaxNode): pass
+    class minus(syntax.SyntaxNode): pass
+    
+    def __init__(self, plusexpr="\\+", minusexpr="-"):
+        self.plusexpr = plusexpr
+        self.minusexpr = minusexpr
+    
+    def taggers(self, parser):
+        return [(FnTagger(self.plusexpr, self.plus), 0),
+                (FnTagger(self.minusexpr, self.minus), 0)]
+    
+    def filters(self, parser):
+        return [(self.do_plusminus, 510)]
+    
+    def do_plusminus(self, parser, group):
+        required = syntax.AndGroup()
+        optional = syntax.OrGroup()
+        banned = syntax.OrGroup()
 
+        next = optional
+        for node in group:
+            if isinstance(node, self.plus):
+                next = required
+            elif isinstance(node, self.minus):
+                next = banned
+            else:
+                next.append(node)
+                next = optional
+        
+        group = optional
+        if required:
+            group = syntax.AndMaybeGroup([required, group])
+        if banned:
+            group = syntax.AndNotGroup([group, banned])
+        return group
 
 
+class GtLtPlugin(TaggingPlugin):
+    class GtLtNode(syntax.SyntaxNode):
+        def __init__(self, rel):
+            self.rel = rel
+        
+        def __repr__(self):
+            return "(%s)" % self.rel
+        
+    expr=r"(?P<rel>(<=|>=|<|>|=<|=>))"
+    nodetype = GtLtNode
+    
+    def filters(self, parser):
+        return [(self.do_gtlt, 99)]
+    
+    def do_gtlt(self, parser, group):
+        gtltnode = self.GtLtNode
+        newgroup = group.empty_copy()
+        prev = None
+        for node in group:
+            if isinstance(node, gtltnode):
+                if isinstance(prev, syntax.FieldnameNode):
+                    prev = node
+                else:
+                    prev = None
+                continue
+            elif node.has_text and isinstance(prev, gtltnode):
+                node = self.make_range(node.text, prev.rel)
+            newgroup.append(node)
+        return newgroup
+            
+    def make_range(self, text, rel):
+        if rel == "<":
+            return syntax.RangeNode(None, text, False, True)
+        elif rel == ">":
+            return syntax.RangeNode(text, None, True, False)
+        elif rel == "<=" or rel == "=<":
+            return syntax.RangeNode(None, text, False, False)
+        elif rel == ">=" or rel == "=>":
+            return syntax.RangeNode(text, None, False, False)
 
 
+class MultifieldPlugin(Plugin):
+    def __init__(self, fieldnames, fieldboosts=None, group=syntax.OrGroup):
+        self.fieldnames = fieldnames
+        self.boosts = fieldboosts or {}
+        self.group = group
+    
+    def filters(self, parser):
+        return [(self.do_multifield, 110)]
+    
+    def do_multifield(self, parser, group):
+        for i, node in enumerate(group):
+            if isinstance(node, syntax.GroupNode):
+                group[i] = self.do_multifield(parser, node)
+            elif node.has_fieldname and node.fieldname is None:
+                newnodes = []
+                for fname in self.fieldnames:
+                    newnode = copy.copy(node)
+                    newnode.set_fieldname(fname)
+                    newnode.set_boost(self.boosts.get(fname, 1.0))
+                    newnodes.append(newnode)
+                group[i] = self.group(newnodes)
+        return group
 
 
+class FieldAliasPlugin(Plugin):
+    def __init__(self, fieldmap):
+        self.fieldmap = fieldmap
+        self.reverse = {}
+        for key, values in iteritems(fieldmap):
+            for value in values:
+                self.reverse[value] = key
+    
+    def filters(self, parser):
+        return [(self.do_aliases, 90)]
+    
+    def do_aliases(self, parser, group):
+        for i, node in enumerate(group):
+            if isinstance(node, syntax.GroupNode):
+                group[i] = self.do_aliases(parser, node)
+            elif node.has_fieldname and node.fieldname is not None:
+                fname = node.fieldname
+                if fname in self.reverse:
+                    node.set_fieldname(self.reverse[fname], override=True)
+        return group
 
 
+class CopyFieldPlugin(Plugin):
+    def __init__(self, map, mirror=False):
+        self.map = map
+        if mirror:
+            # Add in reversed mappings
+            map.update(dict((v, k) for k, v in iteritems(map)))
+    
+    def filters(self, parser):
+        return [(self.do_copyfield, 109)]
+    
+    def do_copyfield(self, parser, group):
+        map = self.map
+        newgroup = group.empty_copy()
+        for node in group:
+            if isinstance(node, syntax.GroupNode):
+                node = self.do_copyfield(parser, node)
+            elif node.has_fieldname:
+                fname = node.fieldname or parser.fieldname
+                if fname in map:
+                    newnode = copy.copy(node)
+                    newnode.set_fieldname(map[fname], override=True)
+                    newgroup.append(newnode)
+            newgroup.append(node)
+        return newgroup
 
+
+
+
+
+
+
+
+

File src/whoosh/qparser/syntax2.py

-# Copyright 2010 Matt Chaput. All rights reserved.
+# Copyright 2011 Matt Chaput. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
     def __repr__(self):
         r = "<"
         if self.has_fieldname:
-            r += "%s:" % self.fieldname
+            r += "%r:" % self.fieldname
         r += self.r()
         if self.has_boost and self.boost != 1.0:
             r += " ^%s" % self.boost
         return fn_wrapper(self)
     
     def query(self, parser):
-        raise NotImplementedError
+        raise NotImplementedError(self.__class__.__name__)
     
     def is_ws(self):
         return False
         return True
 
 
+class FieldnameNode(SyntaxNode):
+    has_fieldname = True
+    
+    def __init__(self, fieldname, original):
+        self.fieldname = fieldname
+        self.original = original
+        self.startchar = None
+        self.endchar = None
+        
+    def r(self):
+        return "(%r:)" % self.fieldname
+
+
 class GroupNode(SyntaxNode):
     has_boost = True
     merging = True
                               boost=self.boost, **self.kwargs)
     
     def query(self, parser):
-        return self.qclass([t.query(parser) for t in self.tokens],
+        return self.qclass([node.query(parser) for node in self.nodes],
                            boost=self.boost, **self.kwargs)
 
-    def empty(self):
+    def empty_copy(self):
         c = self.__class__(**self.kwargs)
         if self.has_boost:
             c.boost = self.boost
     def __setitem__(self, n, v):
         self.nodes.__setitem__(n, v)
     
+    def __delitem__(self, n):
+        self.nodes.__delitem__(n)
+    
     def insert(self, n, v):
         self.nodes.insert(n, v)
     
     merging = False
     
     def query(self, parser):
-        assert len(self.nodes) == 1
         return self.qclass(self.nodes[0].query(parser))
 
 
+class ErrorNode(SyntaxNode):
+    def __init__(self, message, node=None):
+        Wrapper.__init__(self, node)
+        self.message = message
+    
+    def r(self):
+        return "ERR %r %r" % (self.node, self.message)
+    
+    def query(self, parser):
+        if self.node:
+            return self.nodes[0].query(parser)
+        else:
+            return query.NullQuery
+
+
 class AndGroup(GroupNode):
     qclass = query.And
 
 class RangeNode(SyntaxNode):
     has_fieldname = True
     
-    def __init__(self, startnode, endnode, startexcl, endexcl):
-        self.startnode = startnode
-        self.endnode = endnode
-        self.nodes = [startnode, endnode]
+    def __init__(self, start, end, startexcl, endexcl):
+        self.start = Placeholder(start) if start is not None else None
+        self.end = Placeholder(end) if end is not None else None
+        self.nodes = [self.start, self.end]
         self.startexcl = startexcl
         self.endexcl = endexcl
         self.boost = 1.0
     def r(self):
         b1 = "{" if self.startexcl else "["
         b2 = "}" if self.startexcl else "]"
-        return "%s%r %r%s" % (b1, self.startnode, self.endnode, b2)
+        return "%s%r %r%s" % (b1, self.start, self.end, b2)
     
     def apply(self, fn):
         return self.__class__(fn(self.startnode), fn(self.endnode),
         
     def query(self, parser):
         fieldname = self.fieldname or parser.fieldname
-        startnode, endnode = self.startnode, self.endnode
-        if not (startnode.has_text and endnode.has_text):
-            raise SyntaxError("Not all nodes in range %r have text" % self)
-        start, end = startnode.text, endnode.text
+        start = None if self.start is None else self.start.text
+        end = None if self.end is None else self.end.text
         
         if parser.schema and fieldname in parser.schema:
             field = parser.schema[fieldname]
                     if q is not None:
                         return q
                 except QueryParserError:
-                    pass
+                    return query.NullQuery
             
             if start:
-                start = get_single_text(fieldname, start, tokenize=False,
+                start = get_single_text(field, start, tokenize=False,
                                         removestops=False)
             if end:
-                end = get_single_text(fieldname, end, tokenize=False,
+                end = get_single_text(field, end, tokenize=False,
                                       removestops=False)
         
         return query.TermRange(fieldname, start, end, self.startexcl,

File src/whoosh/qparser/taggers.py

+# Copyright 2011 Matt Chaput. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#    1. Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#
+#    2. Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT