Source

whoosh / scripts / make_checkpoint.py

#!python

# Make a "checkpoint" index, capturing the index format created by a certain
# version of Whoosh

from __future__ import print_function, with_statement
import os.path, random, sys
from datetime import datetime

from whoosh import fields, index
from whoosh.compat import u, xrange


if len(sys.argv) < 2:
    print("USAGE: make_checkpoint.py <dir>")
    sys.exit(1)
indexdir = sys.argv[1]
print("Creating checkpoint index in", indexdir)

schema = fields.Schema(path=fields.ID(stored=True, unique=True),
                       num=fields.NUMERIC(int, stored=True),
                       frac=fields.NUMERIC(float, stored=True),
                       dt=fields.DATETIME(stored=True),
                       tag=fields.KEYWORD,
                       title=fields.TEXT(stored=True),
                       ngrams=fields.NGRAMWORDS,
                       )

words = u("alfa bravo charlie delta echo foxtrot golf hotel india"
          "juliet kilo lima mike november oskar papa quebec romeo"
          "sierra tango").split()

if not os.path.exists(indexdir):
    os.makedirs(indexdir)

ix = index.create_in(indexdir, schema)
counter = 0
frac = 0.0
for segnum in range(3):
    with ix.writer() as w:
        for num in range(100):
            frac += 0.15
            path = u("%s/%s" % (segnum, num))
            title = " ".join(random.choice(words) for _ in xrange(100))
            dt = datetime(year=2000 + counter, month=(counter % 12) + 1, day=15)

            w.add_document(path=path, num=counter, frac=frac, dt=dt,
                           tag=words[counter % len(words)],
                           title=title, ngrams=title)
            counter += 1

with ix.writer() as w:
    for path in ("0/42", "1/6", "2/80"):
        print("Deleted", path, w.delete_by_term("path", path))

print(counter, ix.doc_count())
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.