Commits

Matt Chaput  committed 58cf479

More PEP 8 line length fixes.

  • Participants
  • Parent commits e949504

Comments (0)

Files changed (25)

File src/whoosh/filedb/fieldcache.py

     def is_loaded(self, key):
         """Returns True if an object exists in memory for the given key. This
         might be useful for scenarios where code can use a field cache if it's
-        already loaded, but is not important enough to load it for its own sake.
+        already loaded, but is not important enough to load it for its own
+        sake.
         """
 
         raise NotImplementedError
         filename = self._filename(key)
         gzfilename = filename + ".gz"
         gzipped = False
-        if storage.file_exists(gzfilename) and not storage.file_exists(filename):
+        if (storage.file_exists(gzfilename)
+            and not storage.file_exists(filename)):
             filename = gzfilename
             gzipped = True
 

File src/whoosh/filedb/fileindex.py

 from whoosh import __version__
 from whoosh.compat import pickle, integer_types, string_type, iteritems
 from whoosh.fields import ensure_schema
-from whoosh.index import Index, EmptyIndexError, IndexVersionError, _DEF_INDEX_NAME
+from whoosh.index import (Index, EmptyIndexError, IndexVersionError,
+                          _DEF_INDEX_NAME)
 from whoosh.reading import EmptyReader, MultiReader
 from whoosh.store import Storage, LockError
 from whoosh.system import _INT_SIZE, _FLOAT_SIZE, _LONG_SIZE
 def _read_toc(storage, schema, indexname):
     gen = _latest_generation(storage, indexname)
     if gen < 0:
-        raise EmptyIndexError("Index %r does not exist in %r" % (indexname, storage))
+        raise EmptyIndexError("Index %r does not exist in %r"
+                              % (indexname, storage))
 
     # Read the content of this index from the .toc file.
     tocfilename = _toc_filename(indexname, gen)
         sz = stream.read_varint()
         if sz != target:
             raise IndexError("Index was created on different architecture:"
-                             " saved %s = %s, this computer = %s" % (name, sz, target))
+                             " saved %s = %s, this computer = %s"
+                             % (name, sz, target))
 
     check_size("int", _INT_SIZE)
     check_size("long", _LONG_SIZE)
     version = stream.read_int()
     if version != _INDEX_VERSION:
         raise IndexVersionError("Can't read format %s" % version, version)
-    release = (stream.read_varint(), stream.read_varint(), stream.read_varint())
+    release = (stream.read_varint(), stream.read_varint(),
+               stream.read_varint())
 
     # If the user supplied a schema object with the constructor, don't load
     # the pickled schema from the saved index.
 
         assert isinstance(name, string_type)
         assert isinstance(doccount, integer_types)
-        assert fieldlength_totals is None or isinstance(fieldlength_totals, dict), "fl_totals=%r" % fieldlength_totals
-        assert fieldlength_maxes is None or isinstance(fieldlength_mins, dict), "fl_mins=%r" % fieldlength_maxes
-        assert fieldlength_maxes is None or isinstance(fieldlength_maxes, dict), "fl_maxes=%r" % fieldlength_maxes
+        assert (fieldlength_totals is None
+                or isinstance(fieldlength_totals, dict))
+        assert fieldlength_maxes is None or isinstance(fieldlength_mins, dict)
+        assert fieldlength_maxes is None or isinstance(fieldlength_maxes, dict)
 
         self.name = name
         self.generation = generation

File src/whoosh/filedb/filepostings.py

             if block:
                 self._write_block()
 
-            # Seek back to the start of this list of posting blocks and write the
-            # number of blocks
+            # Seek back to the start of this list of posting blocks and write
+            # the number of blocks
             pf = self.postfile
             pf.flush()
             offset = pf.tell()
     def __init__(self, postfile, offset, format, scorer=None, term=None,
                  stringids=False):
 
-        assert isinstance(offset, integer_types), "offset is %r/%s" % (offset, type(offset))
-        assert isinstance(format, Format), "format is %r/%s" % (format, type(format))
+        assert isinstance(offset, integer_types)
+        assert isinstance(format, Format)
 
         self.postfile = postfile
         self.startoffset = offset
         elif self.supports_poses:
             return [Span(pos) for pos in self.value_as("positions")]
         else:
-            raise Exception("Field does not support positions (%r)" % self._term)
+            raise Exception("Field does not support positions (%r)"
+                            % self._term)
 
     def weight(self):
         weights = self.block.weights

File src/whoosh/filedb/filereading.py

         postings = terminfo.postings
         if isinstance(postings, integer_types):
             postreader = FilePostingReader(self.postfile, postings, format,
-                                           scorer=scorer, term=(fieldname, text))
+                                           scorer=scorer,
+                                           term=(fieldname, text))
         else:
             docids, weights, values = postings
             postreader = ListMatcher(docids, weights, values, format,
             raise Exception("No vector found for document"
                             " %s field %r" % (docnum, fieldname))
 
-        return FilePostingReader(self.vpostfile, offset, vformat, stringids=True)
+        return FilePostingReader(self.vpostfile, offset, vformat,
+                                 stringids=True)
 
     # DAWG methods
 
             # Use the default caching policy but turn off saving caches to disk
             reader.set_caching_policy(save=False)
             
-            # Use the default caching policy but save caches to a custom storage
+            # Use the default caching policy but save caches to a custom
+            # storage
             from whoosh.filedb.filestore import FileStorage
             mystorage = FileStorage("path/to/cachedir")
             reader.set_caching_policy(storage=mystorage)

File src/whoosh/filedb/filestore.py

 
     def open_file(self, name, *args, **kwargs):
         try:
-            f = StructFile(open(self._fpath(name), "rb"), name=name, *args, **kwargs)
+            f = StructFile(open(self._fpath(name), "rb"), name=name, *args,
+                           **kwargs)
         except IOError:
             #print("Tried to open %r, files=%r" % (name, self.list()))
             raise
     def open_file(self, name, *args, **kwargs):
         if name not in self.files:
             raise NameError("No such file %r" % name)
-        return StructFile(BytesIO(self.files[name]), name=name, *args, **kwargs)
+        return StructFile(BytesIO(self.files[name]), name=name, *args,
+                          **kwargs)
 
     def lock(self, name):
         if name not in self.locks:

File src/whoosh/filedb/filetables.py

         for docnum, fieldname, byte in items:
             if byte:
                 if fieldname not in lengths:
-                    lengths[fieldname] = array("B", (0 for _ in xrange(self.doccount)))
+                    zeros = (0 for _ in xrange(self.doccount))
+                    lengths[fieldname] = array("B", zeros)
                 lengths[fieldname][docnum] = byte
 
     def add(self, docnum, fieldname, byte):
         lengths = self.lengths
         if byte:
             if fieldname not in lengths:
-                lengths[fieldname] = array("B", (0 for _ in xrange(self.doccount)))
+                zeros = (0 for _ in xrange(self.doccount))
+                lengths[fieldname] = array("B", zeros)
             lengths[fieldname][docnum] = byte
 
     def reader(self):
 
     def __getitem__(self, num):
         if num > self.length - 1:
-            raise IndexError("Tried to get document %s, file has %s" % (num, self.length))
+            raise IndexError("Tried to get document %s, file has %s"
+                             % (num, self.length))
 
         dbfile = self.dbfile
         start = self.directory_offset + num * stored_pointer_size
         dbfile.seek(start)
         ptr = dbfile.read(stored_pointer_size)
         if len(ptr) != stored_pointer_size:
-            raise Exception("Error reading %r @%s %s < %s" % (dbfile, start, len(ptr), stored_pointer_size))
+            raise Exception("Error reading %r @%s %s < %s"
+                            % (dbfile, start, len(ptr), stored_pointer_size))
         position, length = unpack_stored_pointer(ptr)
         vlist = loads(dbfile.map[position:position + length] + b("."))
 
 
 
 class FileTermInfo(TermInfo):
-    # Freq, Doc freq, min length, max length, max weight, max WOL, min ID, max ID
+    # Freq, Doc freq, min len, max length, max weight, max WOL, min ID, max ID
     struct = Struct("!fIBBffII")
 
     def __init__(self, weight=0.0, docfreq=0, minlength=None, maxlength=0,
     def from_string(cls, s):
         hbyte = ord(s[0:1])
         if hbyte < 2:
-            # Freq, Doc freq, min length, max length, max weight, max WOL, min ID, max ID
-            f, df, ml, xl, xw, xwol, mid, xid = cls.struct.unpack(s[1:cls.struct.size + 1])
+            st = cls.struct
+            # Freq, Doc freq, min len, max len, max w, max WOL, min ID, max ID
+            f, df, ml, xl, xw, xwol, mid, xid = st.unpack(s[1:st.size + 1])
             mid = None if mid == NO_ID else mid
             xid = None if xid == NO_ID else xid
             # Postings
-            pstr = s[cls.struct.size + 1:]
+            pstr = s[st.size + 1:]
             if hbyte == 0:
                 p = unpack_long(pstr)[0]
             else:

File src/whoosh/filedb/filewriting.py

         self.writelock = None
         if _lk:
             self.writelock = ix.lock("WRITELOCK")
-            if not try_for(self.writelock.acquire, timeout=timeout, delay=delay):
+            if not try_for(self.writelock.acquire, timeout=timeout,
+                           delay=delay):
                 raise LockError
 
         info = ix._read_toc()
         self.schema = info.schema
         self.segments = info.segments
-        self.storage = ix.storage
+        self.storage = storage = ix.storage
         self.indexname = ix.indexname
         self.is_closed = False
 
             self._doc_offsets.append(base)
             base += s.doc_count_all()
 
-        self.name = name or Segment.basename(self.indexname, self.segment_number)
+        self.name = name or Segment.basename(self.indexname,
+                                             self.segment_number)
         self.docnum = 0
         self.fieldlength_totals = defaultdict(int)
         self._added = False
         self.wordsets = {}
         self.dawg = None
         if any(field.spelling for field in self.schema):
-            self.dawgfile = self.storage.create_file(segment.dawg_filename)
+            self.dawgfile = storage.create_file(segment.dawg_filename)
             self.dawg = DawgBuilder(field_root=True)
 
         # Terms index
-        tf = self.storage.create_file(segment.termsindex_filename)
+        tf = storage.create_file(segment.termsindex_filename)
         ti = TermIndexWriter(tf)
         # Term postings file
-        pf = self.storage.create_file(segment.termposts_filename)
+        pf = storage.create_file(segment.termposts_filename)
         pw = FilePostingWriter(pf, blocklimit=blocklimit)
         # Terms writer
         self.termswriter = TermsWriter(self.schema, ti, pw, self.dawg)
 
         if self.schema.has_vectored_fields():
             # Vector index
-            vf = self.storage.create_file(segment.vectorindex_filename)
+            vf = storage.create_file(segment.vectorindex_filename)
             self.vectorindex = TermVectorWriter(vf)
 
             # Vector posting file
-            vpf = self.storage.create_file(segment.vectorposts_filename)
+            vpf = storage.create_file(segment.vectorposts_filename)
             self.vpostwriter = FilePostingWriter(vpf, stringids=True)
         else:
             self.vectorindex = None
             self.vpostwriter = None
 
         # Stored fields file
-        sf = self.storage.create_file(segment.storedfields_filename)
+        sf = storage.create_file(segment.storedfields_filename)
         self.storedfields = StoredFieldWriter(sf, self.schema.stored_names())
 
         # Field lengths file
-        self.lengthfile = self.storage.create_file(segment.fieldlengths_filename)
+        self.lengthfile = storage.create_file(segment.fieldlengths_filename)
 
         # Create the pool
         if poolclass is None:
                 for fieldname in reader.schema.scorable_names():
                     length = reader.doc_field_length(docnum, fieldname)
                     if length and fieldname in fieldnames:
-                        self.pool.add_field_length(self.docnum, fieldname, length)
+                        self.pool.add_field_length(self.docnum, fieldname,
+                                                   length)
 
                 for fieldname in reader.schema.vector_names():
                     if (fieldname in fieldnames
                         and reader.has_vector(docnum, fieldname)):
                         vpostreader = reader.vector(docnum, fieldname)
-                        self._add_vector_reader(self.docnum, fieldname, vpostreader)
+                        self._add_vector_reader(self.docnum, fieldname,
+                                                vpostreader)
 
                 self.docnum += 1
 
         # Check if the caller gave us a bogus field
         for name in fieldnames:
             if name not in schema:
-                raise UnknownFieldError("No field named %r in %s" % (name, schema))
+                raise UnknownFieldError("No field named %r in %s"
+                                        % (name, schema))
 
         storedvalues = {}
 
         offset = vpostwriter.start(self.schema[fieldname].vector)
         while vreader.is_active():
             # text, weight, valuestring, fieldlen
-            vpostwriter.write(vreader.id(), vreader.weight(), vreader.value(), 0)
+            vpostwriter.write(vreader.id(), vreader.weight(), vreader.value(),
+                              0)
             vreader.next()
         vpostwriter.finish()
 
                 # Tell the pool we're finished adding information, it should
                 # add its accumulated data to the lengths, terms index, and
                 # posting files.
-                self.pool.finish(self.termswriter, self.docnum, self.lengthfile)
+                self.pool.finish(self.termswriter, self.docnum,
+                                 self.lengthfile)
 
                 # Write out spelling files
                 if self.dawg:

File src/whoosh/filedb/gae.py

         self.value = self.getvalue()
         if oldvalue != self.value:
             self.put()
-            memcache.set(self.key().id_or_name(), self.value, namespace="DatastoreFile")
+            memcache.set(self.key().id_or_name(), self.value,
+                         namespace="DatastoreFile")
 
     def tell(self):
         return self.data.tell()

File src/whoosh/filedb/multiproc.py

     def run(self):
         jobqueue = self.jobqueue
         rqueue = self.resultqueue
-        subpool = self.subpool = TempfilePool(self.schema, limitmb=self.limitmb,
+        subpool = self.subpool = TempfilePool(self.schema,
+                                              limitmb=self.limitmb,
                                               dir=self.dir)
 
         if self.firstjob:
 
     def _new_task(self, firstjob):
         task = PoolWritingTask(self.schema, self.dir, self.jobqueue,
-                               self.resultqueue, self.limitmb, firstjob=firstjob)
+                               self.resultqueue, self.limitmb,
+                               firstjob=firstjob)
         self.tasks.append(task)
         task.start()
         return task
         runs = []
         lenfilenames = []
         for task in self.tasks:
-            taskruns, flentotals, flenmins, flenmaxes, lenfilename = rqueue.get()
-            runs.extend(taskruns)
+            truns, flentotals, flenmins, flenmaxes, lenfilename = rqueue.get()
+            runs.extend(truns)
             lenfilenames.append(lenfilename)
             for fieldname, total in iteritems(flentotals):
                 _fieldlength_totals[fieldname] += total
 
         lw = LengthWriter(lengthfile, doccount)
         for lenfilename in lenfilenames:
-            sublengths = LengthReader(StructFile(open(lenfilename, "rb")), doccount)
+            sublengths = LengthReader(StructFile(open(lenfilename, "rb")),
+                                      doccount)
             lw.add_all(sublengths)
             os.remove(lenfilename)
         lw.close()
 #                runs = pool.map(merge_runs, runs2)
 #            pool.close()
 
-        iterator = imerge([read_run(runname, count) for runname, count in runs])
+        iterator = imerge([read_run(rname, count) for rname, count in runs])
         total = sum(count for runname, count in runs)
         termswriter.add_iter(iterator, lengths.get)
         for runname, count in runs:

File src/whoosh/filedb/pools.py

         """Merge-sorts items from a list of iterators.
         """
 
-        _heappop, _heapreplace, _StopIteration = heappop, heapreplace, StopIteration
+        _heappop, _heapreplace, _StopIteration = (heappop, heapreplace,
+                                                  StopIteration)
 
         h = []
         h_append = h.append
         while 1:
             try:
                 while 1:
-                    v, itnum, next = s = h[0]   # raises IndexError when h is empty
+                    v, itnum, next = s = h[0]
                     yield v
-                    s[0] = next()               # raises StopIteration when exhausted
-                    _heapreplace(h, s)          # restore heap condition
+                    s[0] = next()
+                    _heapreplace(h, s)
             except _StopIteration:
-                _heappop(h)                     # remove empty iterator
+                _heappop(h)
             except IndexError:
                 return
 
         # putting the tuple in the postings list
         #self.size += 48 + sum(getsizeof(o) for o in tup) + 4
         valsize = len(valuestring) if valuestring else 0
-        self.size += 48 + len(fieldname) + 22 + len(text) + 26 + 16 + 16 + valsize + 22 + 4
+        self.size += (48 + len(fieldname) + 22 + len(text)
+                      + 26 + 16 + 16 + valsize + 22 + 4)
         self.postings.append(tup)
         self.count += 1
 
         con.execute("PRAGMA synchronous=OFF")
         if name not in self.fieldnames:
             self.fieldnames.add(name)
-            con.execute("create table postings (token text, docnum int, weight float, value blob)")
+            con.execute("create table postings "
+                        "(token text, docnum int, weight float, value blob)")
             #con.execute("create index postix on postings (token, docnum)")
         return con
 
 
         for name in sorted(self.fieldnames):
             con = self._con(name)
-            for text, docnum, weight, valuestring in con.execute("select * from postings order by token, docnum"):
+            cmd = "select * from postings order by token, docnum"
+            for text, docnum, weight, valuestring in con.execute(cmd):
                 yield (name, text, docnum, weight, valuestring)
             con.close()
             os.remove(self._field_filename(name))

File src/whoosh/filedb/postblocks.py

         flags = 1 if compression else 0
         blocksize = sum((self._struct.size, len(maxid_string), len(ids_string),
                          len(weights_string), len(values_string)))
-        header = self._struct.pack(blocksize, flags, postcount, typecode.encode('latin-1'),
-                                   0, len(ids_string), len(weights_string),
+        header = self._struct.pack(blocksize, flags, postcount,
+                                   typecode.encode('latin-1'), 0,
+                                   len(ids_string), len(weights_string),
                                    self.max_weight(), self.max_wol(), 0, 0,
                                    self._maxlength, self._minlength or 0)
 

File src/whoosh/filedb/structfile.py

 from whoosh.compat import integer_types, b
 from whoosh.system import (_INT_SIZE, _SHORT_SIZE, _FLOAT_SIZE, _LONG_SIZE,
                            pack_byte, pack_sbyte, pack_ushort, pack_int,
-                           pack_uint, pack_long, pack_float,
-                           unpack_byte, unpack_sbyte, unpack_ushort, unpack_int,
+                           pack_uint, pack_long, pack_float, unpack_byte,
+                           unpack_sbyte, unpack_ushort, unpack_int,
                            unpack_uint, unpack_long, unpack_float, IS_LITTLE)
 from whoosh.util import (varint, read_varint, signed_varint,
                          decode_signed_varint, float_to_byte, byte_to_float)
                 import mmap
 
                 try:
-                    self.map = mmap.mmap(fd, self.size, access=mmap.ACCESS_READ)
+                    self.map = mmap.mmap(fd, self.size,
+                                         access=mmap.ACCESS_READ)
                 except OSError:
                     self._setup_fake_map()
         else:
         self.file.write(signed_varint(i))
 
     def read_varint(self):
-        """Reads a variable-length encoded unsigned integer from the wrapped file.
+        """Reads a variable-length encoded unsigned integer from the wrapped
+        file.
         """
         return read_varint(self.file.read)
 
     def read_svarint(self):
-        """Reads a variable-length encoded signed integer from the wrapped file.
+        """Reads a variable-length encoded signed integer from the wrapped
+        file.
         """
         return decode_signed_varint(read_varint(self.file.read))
 

File src/whoosh/qparser/dateparse.py

         self.name = name
 
     def __repr__(self):
-        return "%s<%s>%r" % (self.__class__.__name__, self.name or '', self.elements)
+        return "%s<%s>%r" % (self.__class__.__name__, self.name or '',
+                             self.elements)
 
 
 class Sequence(MultiBase):
         foundall = False
         failed = False
 
-        print_debug(debug, "Seq %s sep=%r text=%r", self.name, self.sep_pattern, text[pos:])
+        print_debug(debug, "Seq %s sep=%r text=%r", self.name,
+                    self.sep_pattern, text[pos:])
         for e in self.elements:
             print_debug(debug, "Seq %s text=%r", self.name, text[pos:])
             if self.sep_expr and not first:
         dates = []
         first = True
 
-        print_debug(debug, "Combo %s sep=%r text=%r", self.name, self.sep_pattern, text[pos:])
+        print_debug(debug, "Combo %s sep=%r text=%r", self.name,
+                    self.sep_pattern, text[pos:])
         for e in self.elements:
             if self.sep_expr and not first:
-                print_debug(debug, "Combo %s looking for sep at %r", self.name, text[pos:])
+                print_debug(debug, "Combo %s looking for sep at %r",
+                            self.name, text[pos:])
                 m = self.sep_expr.match(text, pos)
                 if m:
                     pos = m.end()
     def __init__(self, next, last, daynames):
         self.next_pattern = next
         self.last_pattern = last
-        self._dayname_exprs = tuple(rcompile(pat, re.IGNORECASE) for pat in daynames)
+        self._dayname_exprs = tuple(rcompile(pat, re.IGNORECASE)
+                                    for pat in daynames)
         dn_pattern = "|".join(daynames)
-        self.pattern = "(?P<dir>%s|%s) +(?P<day>%s)(?=(\\W|$))" % (next, last, dn_pattern)
+        self.pattern = ("(?P<dir>%s|%s) +(?P<day>%s)(?=(\\W|$))"
+                        % (next, last, dn_pattern))
         self.expr = rcompile(self.pattern, re.IGNORECASE)
 
     def props_to_date(self, p, dt):
 
 class Time12(Regex):
     def __init__(self):
-        self.pattern = "(?P<hour>[1-9]|10|11|12)(:(?P<mins>[0-5][0-9])(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?)?\\s*(?P<ampm>am|pm)(?=(\\W|$))"
+        self.pattern = ("(?P<hour>[1-9]|10|11|12)(:(?P<mins>[0-5][0-9])"
+                        "(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?)?"
+                        "\\s*(?P<ampm>am|pm)(?=(\\W|$))")
         self.expr = rcompile(self.pattern, re.IGNORECASE)
 
     def props_to_date(self, p, dt):
                 lambda p, dt: adatetime(day=p.day))
     year = Regex("(?P<year>[0-9]{4})(?=(\\W|$))",
                  lambda p, dt: adatetime(year=p.year))
-    time24 = Regex("(?P<hour>([0-1][0-9])|(2[0-3])):(?P<mins>[0-5][0-9])(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?(?=(\\W|$))",
-                   lambda p, dt: adatetime(hour=p.hour, minute=p.mins, second=p.secs, microsecond=p.usecs))
+    time24 = Regex("(?P<hour>([0-1][0-9])|(2[0-3])):(?P<mins>[0-5][0-9])"
+                   "(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?"
+                   "(?=(\\W|$))",
+                   lambda p, dt: adatetime(hour=p.hour, minute=p.mins,
+                                           second=p.secs, microsecond=p.usecs))
     time12 = Time12()
 
     def __init__(self):
         simple_second = "(?P<second>[0-5][0-9])"
         simple_usec = "(?P<microsecond>[0-9]{6})"
 
-        simple_seq = Sequence((simple_year, simple_month, simple_day, simple_hour,
-                               simple_minute, simple_second, simple_usec),
-                               sep="[- .:/]*", name="simple", progressive=True)
+        tup = (simple_year, simple_month, simple_day, simple_hour,
+               simple_minute, simple_second, simple_usec)
+        simple_seq = Sequence(tup, sep="[- .:/]*", name="simple",
+                              progressive=True)
         self.simple = Sequence((simple_seq, "(?=(\\s|$))"), sep='')
 
         self.setup()
                                  "friday|fri|fr", "saturday|sat|sa",
                                  "sunday|sun|su"))
 
-        midnight = Regex("midnight", lambda p, dt: adatetime(hour=0, minute=0, second=0, microsecond=0))
-        noon = Regex("noon", lambda p, dt: adatetime(hour=12, minute=0, second=0, microsecond=0))
+        midnight_l = lambda p, dt: adatetime(hour=0, minute=0, second=0,
+                                             microsecond=0)
+        midnight = Regex("midnight", midnight_l)
+
+        noon_l = lambda p, dt: adatetime(hour=12, minute=0, second=0,
+                                         microsecond=0)
+        noon = Regex("noon", noon_l)
+
         now = Regex("now", lambda p, dt: dt)
-        self.time = Choice((self.time12, self.time24, midnight, noon, now), name="time")
+
+        self.time = Choice((self.time12, self.time24, midnight, noon, now),
+                           name="time")
 
         def tomorrow_to_date(p, dt):
             d = dt.date() + timedelta(days= +1)
         yesterday = Regex("yesterday", yesterday_to_date)
 
         thisyear = Regex("this year", lambda p, dt: adatetime(year=dt.year))
-        thismonth = Regex("this month", lambda p, dt: adatetime(year=dt.year, month=dt.month))
-        today = Regex("today", lambda p, dt: adatetime(year=dt.year, month=dt.month, day=dt.day))
+        thismonth = Regex("this month",
+                          lambda p, dt: adatetime(year=dt.year,
+                                                  month=dt.month))
+        today = Regex("today",
+                      lambda p, dt: adatetime(year=dt.year, month=dt.month,
+                                              day=dt.day))
 
         self.month = Month("january|jan", "february|febuary|feb", "march|mar",
-                           "april|apr", "may", "june|jun", "july|jul", "august|aug",
-                           "september|sept|sep", "october|oct", "november|nov",
-                           "december|dec")
+                           "april|apr", "may", "june|jun", "july|jul",
+                           "august|aug", "september|sept|sep", "october|oct",
+                           "november|nov", "december|dec")
 
         # If you specify a day number you must also specify a month... this
         # Choice captures that constraint
 
-        self.dmy = Choice((Sequence((self.day, self.month, self.year), name="dmy"),
-                           Sequence((self.month, self.day, self.year), name="mdy"),
-                           Sequence((self.year, self.month, self.day), name="ymd"),
-                           Sequence((self.year, self.day, self.month), name="ydm"),
+        self.dmy = Choice((Sequence((self.day, self.month, self.year),
+                                    name="dmy"),
+                           Sequence((self.month, self.day, self.year),
+                                    name="mdy"),
+                           Sequence((self.year, self.month, self.day),
+                                    name="ymd"),
+                           Sequence((self.year, self.day, self.month),
+                                    name="ydm"),
                            Sequence((self.day, self.month), name="dm"),
                            Sequence((self.month, self.day), name="md"),
                            Sequence((self.month, self.year), name="my"),
                            ), name="date")
 
         self.datetime = Bag((self.time, self.dmy), name="datetime")
-        self.bundle = Choice((self.plusdate, self.datetime, self.simple), name="bundle")
+        self.bundle = Choice((self.plusdate, self.datetime, self.simple),
+                             name="bundle")
         self.torange = Combo((self.bundle, "to", self.bundle), name="torange")
 
         self.all = Choice((self.torange, self.bundle), name="all")
         from whoosh import query
 
         fieldname = self.fieldname or parser.fieldname
-        return query.DateRange(fieldname, self.start, self.end, boost=self.boost)
+        return query.DateRange(fieldname, self.start, self.end,
+                               boost=self.boost)
 
 
 class DateTagger(Tagger):

File src/whoosh/qparser/default.py

             elif spec == "or":
                 qclass = query.Or
             else:
-                raise QueryParserError("Unknown multitoken_query value %r" % spec)
+                raise QueryParserError("Unknown multitoken_query value %r"
+                                       % spec)
             return qclass([termclass(fieldname, t, boost=boost)
                            for t in texts])
 
                 node = tagger.match(self, text, pos)
                 if node:
                     if node.endchar <= pos:
-                        raise Exception("Token %r did not move cursor forward. (%r, %s)" % (tagger, text, pos))
+                        raise Exception("Token %r did not move cursor forward."
+                                        " (%r, %s)" % (tagger, text, pos))
                     if prev < pos:
                         tween = inter(prev, pos)
                         print_debug(debug, "Tween: %r" % tween)
                         stack.append(tween)
 
-                    print_debug(debug, "Tagger: %r at %s: %r" % (tagger, pos, node))
+                    print_debug(debug, "Tagger: %r at %s: %r"
+                                % (tagger, pos, node))
                     stack.append(node)
                     prev = pos = node.endchar
                     break

File src/whoosh/qparser/plugins.py

     # \u055E = Armenian question mark
     # \u061F = Arabic question mark
     # \u1367 = Ethiopic question mark
-    expr = u("(?P<text>\\w*[*?\u055E\u061F\u1367](\\w|[*?\u055E\u061F\u1367])*)")
+    qms = u("\u055E\u061F\u1367")
+    expr = u("(?P<text>\\w*[*?%s](\\w|[*?%s])*)") % (qms, qms)
     nodetype = WildcardNode
 
 
             elif isinstance(node, syntax.GroupNode):
                 node = self.do_fieldnames(parser, node)
 
-            if i > 0 and not node.is_ws() and isinstance(group[i - 1], fnclass):
+            if i > 0 and not node.is_ws() and isinstance(group[i - 1],
+                                                         fnclass):
                 node.set_fieldname(group[i - 1].fieldname, override=False)
                 i -= 1
 
     a compiled expression, or None to remove the operator::
     
         qp = qparser.QueryParser("content", schema)
-        cp = qparser.OperatorsPlugin(And="&", Or="\\|", AndNot="&!", AndMaybe="&~", Not=None)
+        cp = qparser.OperatorsPlugin(And="&", Or="\\|", AndNot="&!",
+                                     AndMaybe="&~", Not=None)
         qp.replace_plugin(cp)
     
     You can also specify a list of ``(OpTagger, priority)`` pairs as the first
         if not clean:
             ot = self.OpTagger
             if Not:
-                ops.append((ot(Not, syntax.NotGroup, syntax.PrefixOperator), 0))
+                ops.append((ot(Not, syntax.NotGroup, syntax.PrefixOperator),
+                            0))
             if And:
                 ops.append((ot(And, syntax.AndGroup), 0))
             if Or:

File src/whoosh/qparser/taggers.py

         if match:
             node = self.create(parser, match)
             if node is None:
-                raise Exception("%s.match() did not return a node" % (self.__class__.__name__))
+                raise Exception("%s.match() did not return a node"
+                                % (self.__class__.__name__))
             return node.set_range(match.start(), match.end())
 
     def create(self, parser, match):

File src/whoosh/support/bench.py

 class WhooshModule(Module):
     def indexer(self, create=True):
         schema = self.bench.spec.whoosh_schema()
-        path = os.path.join(self.options.dir, "%s_whoosh" % self.options.indexname)
+        path = os.path.join(self.options.dir, "%s_whoosh"
+                            % self.options.indexname)
 
         if not os.path.exists(path):
             os.mkdir(path)
         self.writer.commit(merge=merge, optimize=optimize)
 
     def searcher(self):
-        path = os.path.join(self.options.dir, "%s_whoosh" % self.options.indexname)
+        path = os.path.join(self.options.dir, "%s_whoosh"
+                            % self.options.indexname)
         ix = index.open_dir(path)
         self.srch = ix.searcher()
-        self.parser = qparser.QueryParser(self.bench.spec.main_field, schema=ix.schema)
+        self.parser = qparser.QueryParser(self.bench.spec.main_field,
+                                          schema=ix.schema)
 
     def query(self):
         qstring = " ".join(self.args).decode("utf8")
 
 class XappyModule(Module):
     def indexer(self, **kwargs):
-        path = os.path.join(self.options.dir, "%s_xappy" % self.options.indexname)
+        path = os.path.join(self.options.dir, "%s_xappy"
+                            % self.options.indexname)
         conn = self.bench.spec.xappy_connection(path)
         return conn
 
         conn.flush()
 
     def searcher(self):
-        path = os.path.join(self.options.dir, "%s_xappy" % self.options.indexname)
+        path = os.path.join(self.options.dir, "%s_xappy"
+                            % self.options.indexname)
         return xappy.SearchConnection(path)
 
     def query(self, conn):
 
 class XapianModule(Module):
     def indexer(self, **kwargs):
-        path = os.path.join(self.options.dir, "%s_xapian" % self.options.indexname)
+        path = os.path.join(self.options.dir, "%s_xapian"
+                            % self.options.indexname)
         self.database = xapian.WritableDatabase(path, xapian.DB_CREATE_OR_OPEN)
         self.ixer = xapian.TermGenerator()
 
         self.database.flush()
 
     def searcher(self):
-        path = os.path.join(self.options.dir, "%s_xappy" % self.options.indexname)
+        path = os.path.join(self.options.dir, "%s_xappy"
+                            % self.options.indexname)
         self.db = xapian.Database(path)
         self.enq = xapian.Enquire(self.db)
         self.qp = xapian.QueryParser()
         from zcatalog import indexes  #@UnresolvedImport
         import transaction  #@UnresolvedImport
 
-        dir = os.path.join(self.options.dir, "%s_zcatalog" % self.options.indexname)
+        dir = os.path.join(self.options.dir, "%s_zcatalog"
+                           % self.options.indexname)
         if os.path.exists(dir):
             rmtree(dir)
         os.mkdir(dir)
         from zcatalog import indexes  #@UnresolvedImport
         import transaction  #@UnresolvedImport
 
-        path = os.path.join(self.options.dir, "%s_zcatalog" % self.options.indexname, "index")
+        path = os.path.join(self.options.dir, "%s_zcatalog"
+                            % self.options.indexname, "index")
         storage = FileStorage(path)
         db = DB(storage)
         conn = db.open()
         import shutil
         from nucular import Nucular
 
-        dir = os.path.join(self.options.dir, "%s_nucular" % self.options.indexname)
+        dir = os.path.join(self.options.dir, "%s_nucular"
+                           % self.options.indexname)
         if create:
             if os.path.exists(dir):
                 shutil.rmtree(dir)
     def searcher(self):
         from nucular import Nucular
 
-        dir = os.path.join(self.options.dir, "%s_nucular" % self.options.indexname)
+        dir = os.path.join(self.options.dir, "%s_nucular"
+                           % self.options.indexname)
         self.archive = Nucular.Nucular(dir)
 
     def query(self):

File src/whoosh/support/bitvector.py

     >>> bv
     <BitVector 0000010000>
     
-    You can initialize the BitVector using an iterable of integers representing bit
-    positions to turn on.
+    You can initialize the BitVector using an iterable of integers representing
+    bit positions to turn on.
     
     >>> bv2 = BitVector(10, [2, 4, 7])
     >>> bv2
     True
     
     BitVector supports bit-wise logic operations & (and), | (or), and ^ (xor)
-    between itself and another BitVector of equal size, or itself and a collection of
-    integers (usually a set() or frozenset()).
+    between itself and another BitVector of equal size, or itself and a
+    collection of integers (usually a set() or frozenset()).
     
     >>> bv | bv2
     <BitVector 00101101000>
         return self._logic(operator.__xor__, other)
 
     def __invert__(self):
-        return BitVector(self.size, source=(x for x in xrange(self.size) if x not in self))
+        return BitVector(self.size, source=(x for x in xrange(self.size)
+                                            if x not in self))
 
     def count(self):
         """Returns the number of "on" bits in the bit array."""
         """Turns the bit at the given position on."""
 
         if index >= self.size:
-            raise IndexError("Position %s greater than the size of the vector" % repr(index))
+            raise IndexError("Position %s greater than the size of the vector"
+                             % repr(index))
         self.bits[index >> 3] |= 1 << (index & 7)
         self.bcount = None
 
         return n in self._back
 
     def __repr__(self):
-        return "<%s %s/%s>" % (self.__class__.__name__, len(self._back), self.size)
+        return "<%s %s/%s>" % (self.__class__.__name__, len(self._back),
+                               self.size)
 
     def __len__(self):
         return len(self._back)
         return self.__and__(other)
 
     def invert(self):
-        return BitSet(self.size, (x for x in xrange(self.size) if x not in self))
+        return BitSet(self.size, (x for x in xrange(self.size)
+                                  if x not in self))
 
     def __and__(self, other):
         return BitSet(self.size, self._back.intersection(other))

File src/whoosh/support/dawg.py

         return list(self)
 
     def edge(self, key, expand=True):
-        """Returns the node connected to the outgoing edge with the given label.
+        """Returns the node connected to the outgoing edge with the given
+        label.
         """
 
         raise NotImplementedError
             # Replacements
             for key in node:
                 if key != char:
-                    for w in _within(node.edge(key), word, dk, ii, sofar + key):
+                    for w in _within(node.edge(key), word, dk, ii,
+                                     sofar + key):
                         yield w
 
 

File src/whoosh/support/filelock.py

 
 """
 This module contains classes implementing exclusive locks for platforms with
-fcntl (UNIX and Mac OS X) and Windows. Whoosh originally used directory creation
-as a locking method, but it had the problem that if the program crashed the
-lock directory was left behind and would keep the index locked until it was
-cleaned up. Using OS-level file locks fixes this.
+fcntl (UNIX and Mac OS X) and Windows. Whoosh originally used directory
+creation as a locking method, but it had the problem that if the program
+crashed the lock directory was left behind and would keep the index locked
+until it was cleaned up. Using OS-level file locks fixes this.
 """
 
 import errno
 
 
 def try_for(fn, timeout=5.0, delay=0.1):
-    """Calls ``fn`` every ``delay`` seconds until it returns True or ``timeout``
-    seconds elapse. Returns True if the lock was acquired, or False if the
-    timeout was reached.
+    """Calls ``fn`` every ``delay`` seconds until it returns True or
+    ``timeout`` seconds elapse. Returns True if the lock was acquired, or False
+    if the timeout was reached.
 
     :param timeout: Length of time (in seconds) to keep retrying to acquire the
         lock. 0 means return immediately. Only used when blocking is False.

File src/whoosh/support/numeric.py

 # Instead of using the character set from the ascii85 algorithm, I put the
 # characters in order so that the encoded text sorts properly (my life would be
 # a lot easier if they had just done that from the start)
-_b85chars = "!$%&*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ^_abcdefghijklmnopqrstuvwxyz{|}~"
+_b85chars = ("!$%&*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+             "^_abcdefghijklmnopqrstuvwxyz{|}~")
 _b85dec = {}
 for i in range(len(_b85chars)):
     _b85dec[_b85chars[i]] = i

File src/whoosh/support/relativedelta.py

                 if yearday > 59:
                     self.leapdays = -1
             if yday:
-                ydayidx = [31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 366]
+                ydayidx = [31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
+                           366]
                 for idx, ydays in enumerate(ydayidx):
                     if yday <= ydays:
                         self.month = idx + 1

File src/whoosh/support/testing.py

         self.dir = None
 
     def _mkdir(self):
-        self.dir = os.path.join(self.parentdir, "tmp", self.basename + ".tmpix")
+        self.dir = os.path.join(self.parentdir, "tmp",
+                                self.basename + ".tmpix")
         if not os.path.exists(self.dir):
             os.makedirs(self.dir)
 
     """Returns True if the given object has __isabstractmethod__ == True.
     """
 
-    return hasattr(attr, "__isabstractmethod__") and getattr(attr, "__isabstractmethod__")
+    return (hasattr(attr, "__isabstractmethod__")
+            and getattr(attr, "__isabstractmethod__"))
 
 
 def check_abstract_methods(base, subclass):
         attr = getattr(base, attrname)
         if is_abstract_method(attr):
             oattr = getattr(subclass, attrname)
-            assert not is_abstract_method(oattr), "%s.%s not overridden" % (subclass.__name__, attrname)
+            if is_abstract_method(oattr):
+                raise Exception("%s.%s not overridden"
+                                % (subclass.__name__, attrname))

File src/whoosh/support/times.py

     None, meaning unspecified.
     """
 
-    units = frozenset(("year", "month", "day", "hour", "minute", "second", "microsecond"))
+    units = frozenset(("year", "month", "day", "hour", "minute", "second",
+                       "microsecond"))
 
     def __init__(self, year=None, month=None, day=None, hour=None, minute=None,
                  second=None, microsecond=None):
         if isinstance(year, datetime):
-            self.year, self.month, self.day = year.year, year.month, year.day
-            self.hour, self.minute, self.second = year.hour, year.minute, year.second
-            self.microsecond = year.microsecond
+            dt = year
+            self.year, self.month, self.day = dt.year, dt.month, dt.day
+            self.hour, self.minute, self.second = dt.hour, dt.minute, dt.second
+            self.microsecond = dt.microsecond
         else:
             if month is not None and (month < 1 or month > 12):
                 raise TimeError("month must be in 1..12")
                 raise TimeError("minute must be in 0..59")
             if second is not None and (second < 0 or second > 59):
                 raise TimeError("second must be in 0..59")
-            if microsecond is not None and (microsecond < 0 or microsecond > 999999):
+            if microsecond is not None and (microsecond < 0
+                                            or microsecond > 999999):
                 raise TimeError("microsecond must be in 0..999999")
 
             self.year, self.month, self.day = year, month, day
         datetime.datetime(2009, 5, 1, 0, 0, 0, 0)
         """
 
-        year, month, day, hour, minute, second, microsecond = \
-        self.year, self.month, self.day, self.hour, self.minute, self.second, self.microsecond
+        y, m, d, h, mn, s, ms = (self.year, self.month, self.day, self.hour,
+                                 self.minute, self.second, self.microsecond)
 
-        if year is None:
+        if y is None:
             raise ValueError("Date has no year")
 
-        if month is None:
-            month = 1
-        if day is None:
-            day = 1
-        if hour is None:
-            hour = 0
-        if minute is None:
-            minute = 0
-        if second is None:
-            second = 0
-        if microsecond is None:
-            microsecond = 0
-        return datetime(year, month, day, hour, minute, second, microsecond)
+        if m is None:
+            m = 1
+        if d is None:
+            d = 1
+        if h is None:
+            h = 0
+        if mn is None:
+            mn = 0
+        if s is None:
+            s = 0
+        if ms is None:
+            ms = 0
+        return datetime(y, m, d, h, mn, s, ms)
 
     def ceil(self):
         """Returns a ``datetime`` version of this object with all unspecified
         datetime.datetime(2009, 5, 30, 23, 59, 59, 999999)
         """
 
-        year, month, day, hour, minute, second, microsecond = \
-        self.year, self.month, self.day, self.hour, self.minute, self.second, self.microsecond
+        y, m, d, h, mn, s, ms = (self.year, self.month, self.day, self.hour,
+                                 self.minute, self.second, self.microsecond)
 
-        if year is None:
+        if y is None:
             raise ValueError("Date has no year")
 
-        if month is None:
-            month = 12
-        if day is None:
-            day = calendar.monthrange(year, month)[1]
-        if hour is None:
-            hour = 23
-        if minute is None:
-            minute = 59
-        if second is None:
-            second = 59
-        if microsecond is None:
-            microsecond = 999999
-        return datetime(year, month, day, hour, minute, second, microsecond)
+        if m is None:
+            m = 12
+        if d is None:
+            d = calendar.monthrange(y, m)[1]
+        if h is None:
+            h = 23
+        if mn is None:
+            mn = 59
+        if s is None:
+            s = 59
+        if ms is None:
+            ms = 999999
+        return datetime(y, m, d, h, mn, s, ms)
 
     def disambiguated(self, basedate):
         """Returns either a ``datetime`` or unambiguous ``timespan`` version
         
         >>> adt = adatetime(year=2009, month=10, day=31)
         >>> adt.disambiguated()
-        timespan(datetime.datetime(2009, 10, 31, 0, 0, 0, 0), datetime.datetime(2009, 10, 31, 23, 59 ,59, 999999)
+        timespan(datetime(2009, 10, 31, 0, 0, 0, 0), datetime(2009, 10, 31, 23, 59 ,59, 999999)
         """
 
         dt = self
         >>> ts
         timespan(adatetime(2009, 2, None, None, None, None, None), adatetime(2009, 10, None, None, None, None, None))
         >>> td.disambiguated(datetime.now())
-        timespan(datetime.datetime(2009, 2, 28, 0, 0, 0, 0), datetime.datetime(2009, 10, 31, 23, 59 ,59, 999999)
+        timespan(datetime(2009, 2, 28, 0, 0, 0, 0), datetime(2009, 10, 31, 23, 59 ,59, 999999)
         """
 
         #- If year is in start but not end, use basedate.year for end
 
     if isinstance(at, datetime):
         return False
-    return at.hour is None and at.minute is None and at.second is None and at.microsecond is None
+    return (at.hour is None and at.minute is None and at.second is None
+            and at.microsecond is None)
 
 
 def is_ambiguous(at):
     if is_ambiguous(at) or isinstance(at, datetime):
         return at
     return datetime(year=at.year, month=at.month, day=at.day, hour=at.hour,
-                    minute=at.minute, second=at.second, microsecond=at.microsecond)
+                    minute=at.minute, second=at.second,
+                    microsecond=at.microsecond)

File src/whoosh/support/unicode.py

 
 # Note:   When comparing block names, casing, whitespace, hyphens,
 #         and underbars are ignored.
-#         For example, "Latin Extended-A" and "latin extended a" are equivalent.
+#         For example, "Latin Extended-A" and "latin extended a" are equivalent
 #         For more information on the comparison of property values, 
 #            see UCD.html.
 #