coady  committed 4289de4

Support for full name imports in PyLucene.

  • Participants
  • Parent commits c0c4e93

Comments (0)

Files changed (9)

File examples/

 import lucene
+    from org.apache.lucene import document, index, queryParser, search, store, util
+    from org.apache.lucene.analysis import standard
+except ImportError:
+    document = index = queryParser = search = store = util = standard = lucene
 from lupyne import engine
 ### lucene ###
-analyzer = lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT)
+analyzer = standard.StandardAnalyzer(util.Version.LUCENE_CURRENT)
 # Store the index in memory:
-directory = lucene.RAMDirectory()
+directory = store.RAMDirectory()
 # To store an index on disk, use this instead:
-#Directory directory ="/tmp/testindex"))
-iwriter = lucene.IndexWriter(directory, analyzer, True, lucene.IndexWriter.MaxFieldLength(25000))
-doc = lucene.Document()
+#Directory directory ="/tmp/testindex"))
+iwriter = index.IndexWriter(directory, analyzer, True, index.IndexWriter.MaxFieldLength(25000))
+doc = document.Document()
 text = "This is the text to be indexed."
-doc.add(lucene.Field("fieldname", text, lucene.Field.Store.YES, lucene.Field.Index.ANALYZED))
+doc.add(document.Field("fieldname", text, document.Field.Store.YES, document.Field.Index.ANALYZED))
 # Now search the index:
-ireader = # read-only=true
-isearcher = lucene.IndexSearcher(ireader)
+ireader = # read-only=true
+isearcher = search.IndexSearcher(ireader)
 # Parse a simple query that searches for "text":
-parser = lucene.QueryParser(lucene.Version.LUCENE_CURRENT, "fieldname", analyzer)
+parser = queryParser.QueryParser(util.Version.LUCENE_CURRENT, "fieldname", analyzer)
 query = parser.parse("text")
 hits =, None, 1000).scoreDocs
 assert len(hits) == 1

File examples/

 import lucene
+    from org.apache.lucene import index, search
+    from import spans
+except ImportError:
+    index = search = spans = lucene
 from lupyne.engine import Query
 ### lucene ###
-q1 = lucene.TermQuery(lucene.Term('text', 'lucene'))
-q2 = lucene.PhraseQuery()
-q2.add(lucene.Term('text', 'search'))
-q2.add(lucene.Term('text', 'engine'))
-q3 = lucene.BooleanQuery()
-q3.add(q1, lucene.BooleanClause.Occur.MUST)
-q3.add(q2, lucene.BooleanClause.Occur.MUST)
+q1 = search.TermQuery(index.Term('text', 'lucene'))
+q2 = search.PhraseQuery()
+q2.add(index.Term('text', 'search'))
+q2.add(index.Term('text', 'engine'))
+q3 = search.BooleanQuery()
+q3.add(q1, search.BooleanClause.Occur.MUST)
+q3.add(q2, search.BooleanClause.Occur.MUST)
 assert str(q3) == '+text:lucene +text:"search engine"'
-q1 = lucene.SpanTermQuery(lucene.Term('text', 'hello'))
-q2 = lucene.SpanTermQuery(lucene.Term('text', 'world'))
-q3 = lucene.SpanPositionRangeQuery(q1, 0, 10)
-q4 = lucene.SpanNearQuery([q1, q2], 0, True)
-q5 = lucene.SpanNotQuery(q3, q4)
+q1 = spans.SpanTermQuery(index.Term('text', 'hello'))
+q2 = spans.SpanTermQuery(index.Term('text', 'world'))
+q3 = spans.SpanPositionRangeQuery(q1, 0, 10)
+q4 = spans.SpanNearQuery([q1, q2], 0, True)
+q5 = spans.SpanNotQuery(q3, q4)
 assert str(q5) == 'spanNot(spanPosRange(text:hello, 0, 10), spanNear([text:hello, text:world], 0, true))'
 ### lupyne ###
 q = Query.term('text', 'lucene') & Query.phrase('text', 'search', 'engine')
-assert isinstance(q, lucene.BooleanQuery)
+assert isinstance(q, search.BooleanQuery)
 assert str(q) == '+text:lucene +text:"search engine"'
 q = Query.span('text', 'hello')[:10] - Query.near('text', 'hello', 'world')
-assert isinstance(q, lucene.SpanQuery)
+assert isinstance(q, spans.SpanQuery)
 assert str(q) == 'spanNot(spanPosRange(text:hello, 0, 10), spanNear([text:hello, text:world], 0, true))'

File examples/

 from datetime import date
 import lucene
+    from org.apache.lucene import search
+except ImportError:
+    search = lucene
 from lupyne import engine
 docs = [
 cities = ['San Francisco', 'Los Angeles', 'Portland']
 for index, distance in enumerate([1e3, 1e5, 2e5, 1e6]):
     query = indexer.fields['point'].within(-122.4, 37.7, distance=distance)
-    assert isinstance(query, lucene.BooleanQuery) and len(query) <= 4
+    assert isinstance(query, search.BooleanQuery) and len(query) <= 4
     assert set(hit['city'] for hit in == set(cities[:index])
 query = indexer.fields['location'].prefix('CA.San')

File examples/

 import lucene
+    from org.apache.lucene import search
+    from import PythonFieldComparator, PythonFieldComparatorSource
+except ImportError:
+    search = lucene
+    from lucene import PythonFieldComparator, PythonFieldComparatorSource
 from lupyne import engine
 colors = 'red', 'green', 'blue', 'cyan', 'magenta', 'yellow'
 ### lucene ###
-searcher = lucene.IndexSearcher(
-topdocs =, None, 10, lucene.Sort(lucene.SortField('color', lucene.SortField.STRING)))
+searcher = search.IndexSearcher(
+topdocs =, None, 10, search.Sort(search.SortField('color', search.SortField.STRING)))
 assert [searcher.doc(scoredoc.doc)['color'] for scoredoc in topdocs.scoreDocs] == sorted(colors)
-class ComparatorSource(lucene.PythonFieldComparatorSource):
-    class newComparator(lucene.PythonFieldComparator):
+class ComparatorSource(PythonFieldComparatorSource):
+    class newComparator(PythonFieldComparator):
         def __init__(self, name, numHits, sortPos, reversed):
-            lucene.PythonFieldComparator.__init__(self)
+            PythonFieldComparator.__init__(self)
    = name
             self.values = [None] * numHits
         def setNextReader(self, reader, base):
-            self.comparator = lucene.FieldCache.DEFAULT.getStrings(reader,
+            self.comparator = search.FieldCache.DEFAULT.getStrings(reader,
         def compare(self, slot1, slot2):
             return cmp(self.values[slot1], self.values[slot2])
         def setBottom(self, slot):
         def value(self, slot):
-sorter = lucene.Sort(lucene.SortField('color', ComparatorSource()))
+sorter = search.Sort(search.SortField('color', ComparatorSource()))
 # still must supply excessive doc count to use the sorter
-topdocs =, None, 10, sorter)
+topdocs =, None, 10, sorter)
 assert [searcher.doc(scoredoc.doc)['color'] for scoredoc in topdocs.scoreDocs] == sorted(colors)
 ### lupyne ###

File lupyne/engine/

 import collections
 import warnings
 import lucene
+    from java.lang import Double, Float, Long, Number, Object
+    from org.apache.lucene import document, search, util
+    from import grouping
+except ImportError:
+    from lucene import Double, Float, Long, Number, Object
+    document = search = util = grouping = lucene
 from .queries import Query
 class Field(object):, self.attrs = name, attrs
         if isinstance(store, bool):
             store = 'yes' if store else 'no'
- = lucene.Field.Store.valueOf(store.upper())
+ = document.Field.Store.valueOf(store.upper())
         if isinstance(index, bool):
-            self.index = lucene.Field.Index.toIndex(index, analyzed, omitNorms)
+            self.index = document.Field.Index.toIndex(index, analyzed, omitNorms)
-            self.index = lucene.Field.Index.valueOf(index.upper())
+            self.index = document.Field.Index.valueOf(index.upper())
         if isinstance(termvector, bool):
-            self.termvector = lucene.Field.TermVector.toTermVector(termvector, withOffsets, withPositions)
+            self.termvector = document.Field.TermVector.toTermVector(termvector, withOffsets, withPositions)
-            self.termvector = lucene.Field.TermVector.valueOf(termvector.upper())
+            self.termvector = document.Field.TermVector.valueOf(termvector.upper())
         next(Field.items(self, ' ')) # validate settings
     def items(self, *values):
         "Generate lucene Fields suitable for adding to a document."
         for value in values:
             if isinstance(value, basestring):
-                field = lucene.Field(, value,, self.index, self.termvector)
+                field = document.Field(, value,, self.index, self.termvector)
             elif isinstance(value, lucene.JArray_byte):
-                field = lucene.Field(, value)
+                field = document.Field(, value)
-                field = lucene.Field(, value, self.termvector)
+                field = document.Field(, value, self.termvector)
             for name, value in self.attrs.items():
                 setattr(field, name, value)
             yield field
         "Generate indexed component fields."
             for value in values:
-                yield lucene.Field(, value,, lucene.Field.Index.NO)
+                yield document.Field(, value,, document.Field.Index.NO)
         for value in values:
             for index, text in enumerate(self.values(value)):
-                yield lucene.Field(self.names[index], text, lucene.Field.Store.NO, self.index, self.termvector)
+                yield document.Field(self.names[index], text, document.Field.Store.NO, self.index, self.termvector)
     def prefix(self, value):
         "Return prefix query of the closest possible prefixed field."
         index = value.count(self.sep)
     def __init__(self, name, step=None, store=False, index=True):
         Field.__init__(self, name, store)
-        self.step = step or lucene.NumericUtils.PRECISION_STEP_DEFAULT
+        self.step = step or util.NumericUtils.PRECISION_STEP_DEFAULT
         self.index = index
     def items(self, *values):
         "Generate lucene NumericFields suitable for adding to a document."
         for value in values:
-            field = lucene.NumericField(, self.step,, self.index)
+            field = document.NumericField(, self.step,, self.index)
             if isinstance(value, float):
                 field.doubleValue = value
             yield field
     def numeric(self, cls, start, stop, lower, upper):
         if isinstance(start, float) or isinstance(stop, float):
-            start, stop = (value if value is None else lucene.Double(value) for value in (start, stop))
+            start, stop = (value if value is None else Double(value) for value in (start, stop))
             return cls.newDoubleRange(, self.step, start, stop, lower, upper)
         if start is not None:
-            start = None if start < lucene.Long.MIN_VALUE else lucene.Long(long(start))
+            start = None if start < Long.MIN_VALUE else Long(long(start))
         if stop is not None:
-            stop = None if stop > lucene.Long.MAX_VALUE else lucene.Long(long(stop))
+            stop = None if stop > Long.MAX_VALUE else Long(long(stop))
         return cls.newLongRange(, self.step, start, stop, lower, upper)
     def range(self, start, stop, lower=True, upper=False):
         "Return lucene NumericRangeQuery."
-        return self.numeric(lucene.NumericRangeQuery, start, stop, lower, upper)
+        return self.numeric(search.NumericRangeQuery, start, stop, lower, upper)
     def term(self, value):
         "Return range query to match single term."
         return self.range(value, value, upper=True)
     def filter(self, start, stop, lower=True, upper=False):
         "Return lucene NumericRangeFilter."
-        return self.numeric(lucene.NumericRangeFilter, start, stop, lower, upper)
+        return self.numeric(search.NumericRangeFilter, start, stop, lower, upper)
 class DateTimeField(NumericField):
     """Field which indexes datetimes as a NumericField of timestamps.
 def convert(value):
     "Return python object from java Object."
-    if not lucene.Number.instance_(value):
-        return value.toString() if lucene.Object.instance_(value) else value
-    value = lucene.Number.cast_(value)
-    return value.doubleValue() if lucene.Float.instance_(value) or lucene.Double.instance_(value) else int(value.longValue())
+    if not Number.instance_(value):
+        return value.toString() if Object.instance_(value) else value
+    value = Number.cast_(value)
+    return value.doubleValue() if Float.instance_(value) or Double.instance_(value) else int(value.longValue())
 class Hit(Document):
     "A Document from a search result, with :attr:`id`, :attr:`score`, and optional sort :attr:`keys`."
     def __init__(self, searcher, scoredocs, count=None, maxscore=None, fields=None):
         self.searcher, self.scoredocs = searcher, scoredocs
         self.count, self.maxscore = count, maxscore
-        self.fields = lucene.MapFieldSelector(fields) if isinstance(fields, collections.Iterable) else fields
+        self.fields = document.MapFieldSelector(fields) if isinstance(fields, collections.Iterable) else fields
     def __len__(self):
         return len(self.scoredocs)
     def __getitem__(self, index):
             scoredocs = self.scoredocs[start:stop] if stop - start < len(self) else self.scoredocs
             return type(self)(self.searcher, scoredocs, self.count, self.maxscore, self.fields)
         scoredoc = self.scoredocs[index]
-        keys = lucene.FieldDoc.cast_(scoredoc).fields if lucene.FieldDoc.instance_(scoredoc) else ()
+        keys = search.FieldDoc.cast_(scoredoc).fields if search.FieldDoc.instance_(scoredoc) else ()
         return Hit(self.searcher.doc(scoredoc.doc, self.fields), scoredoc.doc, scoredoc.score, keys)
     def ids(self):
     def __init__(self, searcher, field, query=None, count=None, sort=None):
         self.searcher, self.field = searcher, field
-        self.query = query or lucene.MatchAllDocsQuery()
-        self.sort = sort or lucene.Sort.RELEVANCE
+        self.query = query or search.MatchAllDocsQuery()
+        self.sort = sort or search.Sort.RELEVANCE
         if count is None:
-            collector = lucene.TermAllGroupsCollector(field)
-  , self.query, collector)
+            collector = grouping.TermAllGroupsCollector(field)
+  , self.query, collector)
             count = collector.groupCount
-        collector = lucene.TermFirstPassGroupingCollector(field, self.sort, count)
-, self.query, collector)
-        self.searchgroups = collector.getTopGroups(0, False).of_(lucene.SearchGroup)
+        collector = grouping.TermFirstPassGroupingCollector(field, self.sort, count)
+, self.query, collector)
+        self.searchgroups = collector.getTopGroups(0, False).of_(grouping.SearchGroup)
     def __len__(self):
         return self.searchgroups.size()
     def __iter__(self):
             yield searchgroup.groupValue.toString()
     def facets(self, filter):
         "Generate field values and counts which match given filter."
-        collector = lucene.TermSecondPassGroupingCollector(self.field, self.searchgroups, self.sort, self.sort, 1, False, False, False)
-, self.query, filter, collector)
+        collector = grouping.TermSecondPassGroupingCollector(self.field, self.searchgroups, self.sort, self.sort, 1, False, False, False)
+, self.query, filter, collector)
         for groupdocs in collector.getTopGroups(0).groups:
             yield groupdocs.groupValue.toString(), groupdocs.totalHits
     def groups(self, count=1, sort=None, scores=False, maxscore=False):
         :param maxscore: compute maximum score of all results
         sort = sort or self.sort
-        if sort == lucene.Sort.RELEVANCE:
+        if sort == search.Sort.RELEVANCE:
             scores = maxscore = True
-        collector = lucene.TermSecondPassGroupingCollector(self.field, self.searchgroups, self.sort, sort, count, scores, maxscore, True)
-, self.query, collector)
+        collector = grouping.TermSecondPassGroupingCollector(self.field, self.searchgroups, self.sort, sort, count, scores, maxscore, True)
+, self.query, collector)
         for groupdocs in collector.getTopGroups(0).groups:
             hits = Hits(self.searcher, groupdocs.scoreDocs, groupdocs.totalHits, groupdocs.maxScore, getattr(self, 'fields', None))
             hits.value = groupdocs.groupValue.toString()

File lupyne/engine/

 import abc, collections
 import warnings
 import lucene
+    from import File, StringReader
+    from java.lang import Float
+    from java.util import HashMap
+    from org.apache.lucene import analysis, document, index, queryParser, search, store, util
+    from org.apache.lucene.analysis import standard, tokenattributes
+    from org.apache.lucene.index import memory
+    from import similar, spans
+    from org.apache.pylucene.analysis import PythonAnalyzer, PythonTokenFilter
+    from org.apache.pylucene.queryParser import PythonQueryParser
+except ImportError:
+    from lucene import File, StringReader, Float, HashMap, PythonAnalyzer, PythonTokenFilter, PythonQueryParser
+    analysis = document = index = queryParser = search = store = util = \
+    standard = tokenattributes = memory = similar = spans = lucene
 from .queries import Query, TermsFilter, SortField, Highlighter, FastVectorHighlighter, SpellChecker, SpellParser
 from .documents import Field, Document, Hits, Grouping
 from .spatial import DistanceComparator
     def __subclasshook__(cls, other):
         return not issubclass(other, collections.Iterable) or NotImplemented
-for cls in (basestring, lucene.TokenStream, lucene.JArray_byte):
+for cls in (basestring, analysis.TokenStream, lucene.JArray_byte):
 class closing(set):
     def analyzer(self, analyzer, version=None):
         if analyzer is None:
-            analyzer = lucene.StandardAnalyzer(version or lucene.Version.values()[-1])
+            analyzer = standard.StandardAnalyzer(version or util.Version.values()[-1])
         return analyzer
     def directory(self, directory):
         if directory is None:
-            directory = lucene.RAMDirectory()
+            directory = store.RAMDirectory()
         elif isinstance(directory, basestring):
-            directory =
+            directory =
         return directory
     def reader(self, reader):
         reader =
-        if isinstance(reader, lucene.IndexReader):
+        if isinstance(reader, index.IndexReader):
-            reader =, True)
+            reader =, True)
         return reader
 def copy(commit, dest):
     Optimized to use hard links if the destination is a file system path.
     src =
-    if isinstance(dest, lucene.Directory):
+    if isinstance(dest, store.Directory):
         for filename in commit.fileNames:
             src.copy(dest, filename, filename)
-        src = lucene.FSDirectory.cast_(src).directory.path
+        src = store.FSDirectory.cast_(src).directory.path
         os.path.isdir(dest) or os.makedirs(dest)
         for filename in commit.fileNames:
             paths = os.path.join(src, filename), os.path.join(dest, filename)
                 if not os.path.samefile(*paths):
-class TokenStream(lucene.TokenStream):
+class TokenStream(analysis.TokenStream):
     "TokenStream mixin with support for iteration and attributes cached as properties."
     def __iter__(self):
         return self
             return self
         raise StopIteration
     def __getattr__(self, name):
-        cls = getattr(lucene, name + 'Attribute').class_
+        cls = getattr(tokenattributes, name + 'Attribute').class_
         attr = self.getAttribute(cls) if self.hasAttribute(cls) else self.addAttribute(cls)
         setattr(self, name, attr)
         return attr
         return payload and getattr(, 'string_', None)
     def payload(self, data):
-        self.Payload.payload = lucene.Payload(lucene.JArray_byte(data))
+        self.Payload.payload = index.Payload(lucene.JArray_byte(data))
     def positionIncrement(self):
         "Position relative to the previous token."
     def type(self, text):
-class TokenFilter(lucene.PythonTokenFilter, TokenStream):
+class TokenFilter(PythonTokenFilter, TokenStream):
     """Create an iterable lucene TokenFilter from a TokenStream.
     Subclass and override :meth:`incrementToken` or :meth:`setattrs`.
     def __init__(self, input):
-        lucene.PythonTokenFilter.__init__(self, input)
+        PythonTokenFilter.__init__(self, input)
         self.input = input
     def incrementToken(self):
         "Advance to next token and return whether the stream is not empty."
     def setattrs(self):
         "Customize current token."
-class Analyzer(lucene.PythonAnalyzer):
+class Analyzer(PythonAnalyzer):
     """Return a lucene Analyzer which chains together a tokenizer and filters.
     :param tokenizer: lucene Tokenizer or Analyzer
     :param filters: lucene TokenFilters
     def __init__(self, tokenizer, *filters):
-        lucene.PythonAnalyzer.__init__(self)
+        PythonAnalyzer.__init__(self)
         self.tokenizer, self.filters = tokenizer, filters
     def tokenStream(self, field, reader):
-        tokens = self.tokenizer.tokenStream(field, reader) if isinstance(self.tokenizer, lucene.Analyzer) else self.tokenizer(reader)
+        tokens = self.tokenizer.tokenStream(field, reader) if isinstance(self.tokenizer, analysis.Analyzer) else self.tokenizer(reader)
         for filter in self.filters:
             tokens = filter(tokens)
         return tokens
     def tokens(self, text, field=None):
         "Return lucene TokenStream from text."
-        return self.tokenStream(field, lucene.StringReader(text))
+        return self.tokenStream(field, StringReader(text))
     def parse(self, query, field='', op='', version='', parser=None, **attrs):
         """Return parsed lucene Query.
         :param attrs: additional attributes to set on the parser
         # parsers aren't thread-safe (nor slow), so create one each time
-        args = [lucene.Version.valueOf('LUCENE_' + version.replace('.', '')) if version else lucene.Version.values()[-1]]
+        args = [util.Version.valueOf('LUCENE_' + version.replace('.', '')) if version else util.Version.values()[-1]]
         if isinstance(field, collections.Mapping):
-            boosts = lucene.HashMap()
+            boosts = HashMap()
             for key in field:
-                boosts.put(key, lucene.Float(field[key]))
+                boosts.put(key, Float(field[key]))
             args += list(field), self, boosts
             args += field, self
-        parser = (parser or lucene.QueryParser if isinstance(field, basestring) else lucene.MultiFieldQueryParser)(*args)
+        parser = (parser or queryParser.QueryParser if isinstance(field, basestring) else queryParser.MultiFieldQueryParser)(*args)
         if op:
-            parser.defaultOperator = getattr(lucene.QueryParser.Operator, op.upper())
+            parser.defaultOperator = getattr(queryParser.QueryParser.Operator, op.upper())
         for name, value in attrs.items():
             setattr(parser, name, value)
-        if isinstance(parser, lucene.MultiFieldQueryParser):
-            return lucene.MultiFieldQueryParser.parse(parser, query)
+        if isinstance(parser, queryParser.MultiFieldQueryParser):
+            return queryParser.MultiFieldQueryParser.parse(parser, query)
             return parser.parse(query)
-            if isinstance(parser, lucene.PythonQueryParser):
+            if isinstance(parser, PythonQueryParser):
 class IndexReader(object):
     def segments(self):
         "segment filenames with document counts"
-        return dict((lucene.SegmentReader.cast_(reader).segmentName, reader.numDocs()) for reader in self.sequentialSubReaders)
+        return dict((index.SegmentReader.cast_(reader).segmentName, reader.numDocs()) for reader in self.sequentialSubReaders)
     def copy(self, dest, query=None, exclude=None, merge=0):
         """Copy the index to the destination directory.
         Optimized to use hard links if the destination is a file system path.
         copy(self.indexCommit, dest)
         with contextlib.closing(IndexWriter(dest)) as writer:
             if query:
-                writer.delete(Query(lucene.MatchAllDocsQuery) - query)
+                writer.delete(Query(search.MatchAllDocsQuery) - query)
             if exclude:
             return len(writer)
     def count(self, name, value):
         "Return number of documents with given term."
-        return self.docFreq(lucene.Term(name, value))
+        return self.docFreq(index.Term(name, value))
     def names(self, option='all', **attrs):
         """Return field names, given option description.
         .. versionchanged:: 1.2 lucene 3.6 requires FieldInfo filter attributes instead of option
-        if hasattr(lucene.IndexReader, 'getFieldNames'):
+        if hasattr(index.IndexReader, 'getFieldNames'):
             return list(self.getFieldNames(getattr(self.FieldOption, option.upper())))
-        fieldinfos = lucene.ReaderUtil.getMergedFieldInfos(self.indexReader).iterator()
+        fieldinfos = util.ReaderUtil.getMergedFieldInfos(self.indexReader).iterator()
         return [ for fieldinfo in fieldinfos if all(getattr(fieldinfo, name) == attrs[name] for name in attrs)]
     def terms(self, name, value='', stop=None, counts=False, **fuzzy):
         """Generate a slice of term values, optionally with frequency counts.
         :param counts: include frequency counts
         :param fuzzy: optional keyword arguments for fuzzy terms
-        term = lucene.Term(name, value)
+        term = index.Term(name, value)
         if fuzzy:
             args = fuzzy.pop('minSimilarity', 0.5), fuzzy.pop('prefixLength', 0)
-            termenum = lucene.FuzzyTermEnum(self.indexReader, term, *args, **fuzzy)
+            termenum = search.FuzzyTermEnum(self.indexReader, term, *args, **fuzzy)
         elif '*' in value or '?' in value:
-            termenum = lucene.WildcardTermEnum(self.indexReader, term)
+            termenum = search.WildcardTermEnum(self.indexReader, term)
-            termenum = lucene.TermRangeTermEnum(self.indexReader, name, value, stop, True, False, None)
+            termenum = search.TermRangeTermEnum(self.indexReader, name, value, stop, True, False, None)
         with contextlib.closing(termenum):
             term = termenum.term()
             while term:
         :param type: int or float
         :param counts: include frequency counts
-        term = lucene.Term(name, chr(ord(' ') + step))
-        decode = lucene.NumericUtils.prefixCodedToLong
-        convert = lucene.NumericUtils.sortableLongToDouble if issubclass(type, float) else int
-        with contextlib.closing(lucene.PrefixTermEnum(self.indexReader, term)) as termenum:
+        term = index.Term(name, chr(ord(' ') + step))
+        decode = util.NumericUtils.prefixCodedToLong
+        convert = util.NumericUtils.sortableLongToDouble if issubclass(type, float) else int
+        with contextlib.closing(search.PrefixTermEnum(self.indexReader, term)) as termenum:
             term = termenum.term()
             while term:
                 value = convert(decode(term.text()))
                 term = and termenum.term()
     def docs(self, name, value, counts=False):
         "Generate doc ids which contain given term, optionally with frequency counts."
-        with contextlib.closing(self.termDocs(lucene.Term(name, value))) as termdocs:
+        with contextlib.closing(self.termDocs(index.Term(name, value))) as termdocs:
                 doc = termdocs.doc()
                 yield (doc, termdocs.freq()) if counts else doc
     def positions(self, name, value, payloads=False):
         "Generate doc ids and positions which contain given term, optionally only with payloads."
         array = lucene.JArray_byte('')
-        with contextlib.closing(self.termPositions(lucene.Term(name, value))) as termpositions:
+        with contextlib.closing(self.termPositions(index.Term(name, value))) as termpositions:
                 doc = termpositions.doc()
                 positions = (termpositions.nextPosition() for n in xrange(termpositions.freq()))
         :param positions: optionally include slice positions instead of counts
         :param payloads: optionally only include slice positions with payloads
-        spans = itertools.takewhile(, itertools.repeat(query.getSpans(self.indexReader)))
-        for doc, spans in itertools.groupby(spans, key=lucene.Spans.doc):
+        spans_ = itertools.takewhile(, itertools.repeat(query.getSpans(self.indexReader)))
+        for doc, spans_ in itertools.groupby(spans_, key=spans.Spans.doc):
             if payloads:
                 yield doc, [(span.start(), span.end(), [lucene.JArray_byte.cast_(data).string_ for data in span.payload]) \
-                    for span in spans if span.payloadAvailable]
+                    for span in spans_ if span.payloadAvailable]
             elif positions:
-                yield doc, [(span.start(), span.end()) for span in spans]
+                yield doc, [(span.start(), span.end()) for span in spans_]
-                yield doc, sum(1 for span in spans)
+                yield doc, sum(1 for span in spans_)
     def termvector(self, id, field, counts=False):
         "Generate terms for given doc id and field, optionally with frequency counts."
-        tfv = self.getTermFreqVector(id, field) or lucene.QueryTermVector([])
+        tfv = self.getTermFreqVector(id, field) or search.QueryTermVector([])
         return zip(tfv.terms, tfv.termFrequencies) if counts else iter(tfv.terms)
     def positionvector(self, id, field, offsets=False):
         "Generate terms and positions for given doc id and field, optionally with character offsets."
-        tpv = lucene.TermPositionVector.cast_(self.getTermFreqVector(id, field))
-        for index, term in enumerate(tpv.terms):
+        tpv = index.TermPositionVector.cast_(self.getTermFreqVector(id, field))
+        for idx, term in enumerate(tpv.terms):
             if offsets:
-                yield term, list(map(operator.attrgetter('startOffset', 'endOffset'), tpv.getOffsets(index)))
+                yield term, list(map(operator.attrgetter('startOffset', 'endOffset'), tpv.getOffsets(idx)))
-                yield term, list(tpv.getTermPositions(index))
+                yield term, list(tpv.getTermPositions(idx))
     def morelikethis(self, doc, *fields, **attrs):
         """Return MoreLikeThis query for document.
         :param fields: document fields to use, optional for termvectors
         :param attrs: additional attributes to set on the morelikethis object
-        mlt = lucene.MoreLikeThis(self.indexReader)
+        mlt = similar.MoreLikeThis(self.indexReader)
         mlt.fieldNames = fields or None
         for name, value in attrs.items():
             setattr(mlt, name, value)
-        return if isinstance(doc, basestring) else doc)
+        return if isinstance(doc, basestring) else doc)
     def overlap(self, left, right):
         "Return intersection count of cached filters."
-        count, bitset = 0, getattr(lucene, 'FixedBitSet', lucene.OpenBitSet)
+        count, bitset = 0, getattr(util, 'FixedBitSet', util.OpenBitSet)
         for reader in self.sequentialSubReaders:
             docsets = left.getDocIdSet(reader), right.getDocIdSet(reader)
-            if lucene.DocIdSet.EMPTY_DOCIDSET not in docsets:
+            if search.DocIdSet.EMPTY_DOCIDSET not in docsets:
                 bits = [bitset.cast_(docset).bits for docset in docsets]
-                count += lucene.BitUtil.pop_intersect(bits[0], bits[1], 0, min(map(len, bits)))
+                count += util.BitUtil.pop_intersect(bits[0], bits[1], 0, min(map(len, bits)))
         return int(count)
-class IndexSearcher(lucene.IndexSearcher, IndexReader):
+class IndexSearcher(search.IndexSearcher, IndexReader):
     """Inherited lucene IndexSearcher, with a mixed-in IndexReader.
     :param directory: directory path, lucene Directory, or lucene IndexReader
     def __init__(self, directory, analyzer=None):
         self.shared = closing()
-        lucene.IndexSearcher.__init__(self, self.shared.reader(directory))
+        search.IndexSearcher.__init__(self, self.shared.reader(directory))
         self.analyzer = self.shared.analyzer(analyzer)
         self.filters, self.sorters, self.spellcheckers = {}, {}, {}
         self.termsfilters, self.groupings = set(), {}
     def load(cls, directory, analyzer=None):
         "Open `IndexSearcher`_ with a lucene RAMDirectory, loading index into memory."
         ref = closing()
-        self = cls(lucene.RAMDirectory(, analyzer)
+        self = cls(store.RAMDirectory(, analyzer)
         return self
     def __del__(self):
         other = type(self)(self.indexReader.reopen(), self.analyzer)
         other.shared = self.shared
-        other.filters.update((key, value if isinstance(value, lucene.Filter) else dict(value)) for key, value in self.filters.items())
+        other.filters.update((key, value if isinstance(value, search.Filter) else dict(value)) for key, value in self.filters.items())
         for termsfilter in self.termsfilters:
         if filters:
         return Document(self.doc(id))
     def get(self, id, *fields):
         "Return `Document`_ with only selected fields loaded."
-        return Document(self.doc(id, lucene.MapFieldSelector(fields)))
+        return Document(self.doc(id, document.MapFieldSelector(fields)))
     def parse(self, query, spellcheck=False, **kwargs):
-        if isinstance(query, lucene.Query):
+        if isinstance(query, search.Query):
             return query
         if spellcheck:
             kwargs['parser'], kwargs['searcher'] = SpellParser, self
         :param options: additional :meth:`search` options
         if len(query) > 1:
-            return self.docFreq(lucene.Term(*query))
-        query = self.parse(*query, **options) if query else lucene.MatchAllDocsQuery()
-        collector = lucene.TotalHitCountCollector()
-, query, options.get('filter'), collector)
+            return self.docFreq(index.Term(*query))
+        query = self.parse(*query, **options) if query else search.MatchAllDocsQuery()
+        collector = search.TotalHitCountCollector()
+, query, options.get('filter'), collector)
         return collector.totalHits
     def collector(self, query, count=None, sort=None, reverse=False, scores=False, maxscore=False):
         weight = self.createNormalizedWeight(query) if hasattr(self, 'createNormalizedWeight') else query.weight(self)
         inorder = not weight.scoresDocsOutOfOrder()
         if count is None:
-            return lucene.CachingCollector.create(not inorder, True, float('inf'))
+            return search.CachingCollector.create(not inorder, True, float('inf'))
         count = min(count, self.maxDoc() or 1)
         if callable(sort) or sort is None:
-            return lucene.TopScoreDocCollector.create(count, inorder)
+            return search.TopScoreDocCollector.create(count, inorder)
         if isinstance(sort, basestring):
             sort = self.sorter(sort, reverse=reverse)
-        if not isinstance(sort, lucene.Sort):
-            sort = lucene.Sort(sort)
-        return lucene.TopFieldCollector.create(sort, count, True, scores, maxscore, inorder)
+        if not isinstance(sort, search.Sort):
+            sort = search.Sort(sort)
+        return search.TopFieldCollector.create(sort, count, True, scores, maxscore, inorder)
     def search(self, query=None, filter=None, count=None, sort=None, reverse=False, scores=False, maxscore=False, timeout=None, **parser):
         """Run query and return `Hits`_.
         :param timeout: stop search after elapsed number of seconds
         :param parser: :meth:`Analyzer.parse` options
-        query = lucene.MatchAllDocsQuery() if query is None else self.parse(query, **parser)
+        query = search.MatchAllDocsQuery() if query is None else self.parse(query, **parser)
         cache = collector = self.collector(query, count, sort, reverse, scores, maxscore)
-        args = [lucene.TimeLimitingCollector.getGlobalCounter()] if hasattr(lucene, 'Counter') else []
-        results = collector if timeout is None else lucene.TimeLimitingCollector(collector, *(args + [long(timeout * 1000)]))
+        args = [search.TimeLimitingCollector.getGlobalCounter()] if hasattr(util, 'Counter') else []
+        results = collector if timeout is None else search.TimeLimitingCollector(collector, *(args + [long(timeout * 1000)]))
-  , query, filter, results)
+  , query, filter, results)
         except lucene.JavaError as timeout:
-            if not lucene.TimeLimitingCollector.TimeExceededException.instance_(timeout.getJavaException()):
+            if not search.TimeLimitingCollector.TimeExceededException.instance_(timeout.getJavaException()):
-        if isinstance(cache, lucene.CachingCollector):
-            collector = lucene.TotalHitCountCollector()
+        if isinstance(cache, search.CachingCollector):
+            collector = search.TotalHitCountCollector()
             collector = self.collector(query, collector.totalHits or 1, sort, reverse, scores, maxscore)
         counts = collections.defaultdict(dict)
         if isinstance(query, basestring):
             query = self.parse(query)
-        if isinstance(query, lucene.Query):
-            query = lucene.QueryWrapperFilter(query)
-        if not isinstance(query, lucene.CachingWrapperFilter):
-            query = lucene.CachingWrapperFilter(query)
+        if isinstance(query, search.Query):
+            query = search.QueryWrapperFilter(query)
+        if not isinstance(query, search.CachingWrapperFilter):
+            query = search.CachingWrapperFilter(query)
         for key in keys:
             filters = self.filters.get(key)
             if key in self.groupings:
                 counts[key] = dict(self.groupings[key].facets(query))
-            elif isinstance(filters, lucene.Filter):
+            elif isinstance(filters, search.Filter):
                 counts[key] = self.overlap(query, filters)
                 name, value = (key, None) if isinstance(key, basestring) else key
                 yield word
     def match(self, document, *queries):
         "Generate scores for all queries against a given document mapping."
-        searcher = lucene.MemoryIndex()
+        searcher = memory.MemoryIndex()
         for name, value in document.items():
             if isinstance(value, basestring):
                 value = value, self.analyzer
-            elif isinstance(value, lucene.TokenStream):
+            elif isinstance(value, analysis.TokenStream):
                 value = value,
             searcher.addField(name, *value)
         for query in queries:
     def __init__(self, reader, analyzer=None):
         shared = closing()
-        if not lucene.MultiReader.instance_(reader):
-            reader = lucene.MultiReader(list(map(shared.reader, reader)))
+        if not index.MultiReader.instance_(reader):
+            reader = index.MultiReader(list(map(shared.reader, reader)))
             ref = closing([reader])
         IndexSearcher.__init__(self, reader, analyzer)
     def overlap(self, *filters):
         return sum(IndexReader(reader).overlap(*filters) for reader in self.sequentialSubReaders)
-class IndexWriter(lucene.IndexWriter):
+class IndexWriter(index.IndexWriter):
     """Inherited lucene IndexWriter.
     Supports setting fields parameters explicitly, so documents can be represented as dictionaries.
     :param version: lucene Version argument passed to IndexWriterConfig or StandardAnalyzer, default is latest
     :param attrs: additional attributes to set on IndexWriterConfig
-    __len__ = lucene.IndexWriter.numDocs
+    __len__ = index.IndexWriter.numDocs
     parse = IndexSearcher.__dict__['parse']
     def __init__(self, directory=None, mode='a', analyzer=None, version=None, **attrs):
         self.shared = closing()
         if version is None:
-            version = lucene.Version.values()[-1]
-        config = lucene.IndexWriterConfig(version, self.shared.analyzer(analyzer, version))
-        config.openMode = lucene.IndexWriterConfig.OpenMode.values()['wra'.index(mode)]
+            version = util.Version.values()[-1]
+        config = index.IndexWriterConfig(version, self.shared.analyzer(analyzer, version))
+        config.openMode = index.IndexWriterConfig.OpenMode.values()['wra'.index(mode)]
         for name, value in attrs.items():
             setattr(config, name, value)
-        config.indexDeletionPolicy = self.policy = lucene.SnapshotDeletionPolicy(config.indexDeletionPolicy)
-        lucene.IndexWriter.__init__(self,, config)
+        config.indexDeletionPolicy = self.policy = index.SnapshotDeletionPolicy(config.indexDeletionPolicy)
+        index.IndexWriter.__init__(self,, config)
         self.fields = {}
     def __del__(self):
         if hash(self):
         :param params: store,index,termvector options compatible with `Field`_
         self.fields[name] = cls(name, **params)
-    def document(self, document=(), boost=None, **terms):
+    def document(self, items=(), boost=None, **terms):
         "Return lucene Document from mapping of field names to one or multiple values."
-        doc = lucene.Document()
+        doc = document.Document()
         if boost is not None:
             warnings.warn('Document boosting has been removed from lucene 4; set Field boosts instead.', DeprecationWarning)
             doc.boost = boost
-        for name, values in dict(document, **terms).items():
+        for name, values in dict(items, **terms).items():
             if isinstance(values, Atomic):
                 values = values,
             for field in self.fields[name].items(*values):
     def update(self, name, value='', document=(), **terms):
         "Atomically delete documents which match given term and add the new :meth:`document` with optional boost."
         doc = self.document(document, **terms)
-        self.updateDocument(lucene.Term(name, *[value] if value else doc.getValues(name)), doc)
+        self.updateDocument(index.Term(name, *[value] if value else doc.getValues(name)), doc)
     def delete(self, *query, **options):
         """Remove documents which match given query or term.
         :param query: :meth:`` compatible query, or optimally a name and value
         :param options: additional :meth:`Analyzer.parse` options
-        parse = self.parse if len(query) == 1 else lucene.Term
+        parse = self.parse if len(query) == 1 else index.Term
         self.deleteDocuments(parse(*query, **options))
     def __iadd__(self, directory):
         "Add directory (or reader, searcher, writer) to index."
         ref = closing()
         directory =
-        self.addIndexes([directory if isinstance(directory, lucene.Directory) else])
+        self.addIndexes([directory if isinstance(directory, store.Directory) else])
         return self
     def snapshot(self, id=''):
     def update(self, value, document=(), **terms):
         "Atomically update document based on unique field."
         terms[self.field] = value
-        self.updateDocument(lucene.Term(self.field, value), self.document(document, **terms))
+        self.updateDocument(index.Term(self.field, value), self.document(document, **terms))
     def refresh(self, **caches):
         "Store refreshed searcher and synchronize :attr:`termsfilters`."
         sorter, segments = self.sorter(self.field), self.segments
         searcher = self.indexSearcher.reopen(**caches)
-        readers = [reader for reader in searcher.sequentialSubReaders if lucene.SegmentReader.cast_(reader).segmentName not in segments]
+        readers = [reader for reader in searcher.sequentialSubReaders if index.SegmentReader.cast_(reader).segmentName not in segments]
         terms = list(itertools.chain.from_iterable(IndexReader(reader).terms(self.field) for reader in readers))
         for filter, termsfilter in self.termsfilters.items():
             if terms:

File lupyne/engine/

 import heapq
 import threading
 import lucene
+    from java.lang import Integer
+    from java.util import Arrays, HashSet
+    from org.apache.lucene import document, index, search, util
+    from import highlight, spans, vectorhighlight
+    from org.apache.pylucene import search as search_
+    from org.apache.pylucene.queryParser import PythonQueryParser
+except ImportError:
+    from lucene import Integer, Arrays, HashSet, PythonQueryParser
+    document = index = search = util = highlight = spans = vectorhighlight = search_ = lucene
 class Query(object):
     """Inherited lucene Query, with dynamic base class acquisition.
         base.__init__(self, *args)
     def filter(self, cache=True):
         "Return lucene CachingWrapperFilter, optionally just QueryWrapperFilter."
-        if isinstance(self, lucene.PrefixQuery):
-            filter = lucene.PrefixFilter(self.getPrefix())
-        elif isinstance(self, lucene.TermRangeQuery):
-            filter = lucene.TermRangeFilter(self.field, self.lowerTerm, self.upperTerm, self.includesLower(), self.includesUpper())
-        elif isinstance(self, lucene.TermQuery):
-            filter = lucene.TermsFilter()
+        if isinstance(self, search.PrefixQuery):
+            filter = search.PrefixFilter(self.getPrefix())
+        elif isinstance(self, search.TermRangeQuery):
+            filter = search.TermRangeFilter(self.field, self.lowerTerm, self.upperTerm, self.includesLower(), self.includesUpper())
+        elif isinstance(self, search.TermQuery):
+            filter = search.TermsFilter()
-            filter = lucene.QueryWrapperFilter(self)
-        return lucene.CachingWrapperFilter(filter) if cache else filter
+            filter = search.QueryWrapperFilter(self)
+        return search.CachingWrapperFilter(filter) if cache else filter
     def terms(self):
         "Generate set of query term items."
-        terms = lucene.HashSet().of_(lucene.Term)
+        terms = HashSet().of_(index.Term)
         for term in terms:
             yield term.field(), term.text()
     def term(cls, name, value, boost=1.0):
         "Return lucene TermQuery."
-        self = cls(lucene.TermQuery, lucene.Term(name, value))
+        self = cls(search.TermQuery, index.Term(name, value))
         self.boost = boost
         return self
     def boolean(cls, occur, *queries, **terms):
-        self = BooleanQuery(lucene.BooleanQuery)
+        self = BooleanQuery(search.BooleanQuery)
         for query in queries:
             self.add(query, occur)
         for name, values in terms.items():
     def any(cls, *queries, **terms):
         "Return `BooleanQuery`_ (OR) from queries and terms."
-        return cls.boolean(lucene.BooleanClause.Occur.SHOULD, *queries, **terms)
+        return cls.boolean(search.BooleanClause.Occur.SHOULD, *queries, **terms)
     def all(cls, *queries, **terms):
         "Return `BooleanQuery`_ (AND) from queries and terms."
-        return cls.boolean(lucene.BooleanClause.Occur.MUST, *queries, **terms)
+        return cls.boolean(search.BooleanClause.Occur.MUST, *queries, **terms)
     def disjunct(cls, multiplier, *queries, **terms):
         "Return lucene DisjunctionMaxQuery from queries and terms."
-        self = cls(lucene.DisjunctionMaxQuery, lucene.Arrays.asList(queries), multiplier)
+        self = cls(search.DisjunctionMaxQuery, Arrays.asList(queries), multiplier)
         for name, values in terms.items():
             for value in ([values] if isinstance(values, basestring) else values):
                 self.add(cls.term(name, value))
     def span(cls, *term):
         "Return `SpanQuery`_ from term name and value or a MultiTermQuery."
         if len(term) <= 1:
-            return SpanQuery(lucene.SpanMultiTermQueryWrapper, *term)
-        return SpanQuery(lucene.SpanTermQuery, lucene.Term(*term))
+            return SpanQuery(spans.SpanMultiTermQueryWrapper, *term)
+        return SpanQuery(spans.SpanTermQuery, index.Term(*term))
     def near(cls, name, *values, **kwargs):
         """Return :meth:`SpanNearQuery <SpanQuery.near>` from terms.
     def prefix(cls, name, value):
         "Return lucene PrefixQuery."
-        return cls(lucene.PrefixQuery, lucene.Term(name, value))
+        return cls(search.PrefixQuery, index.Term(name, value))
     def range(cls, name, start, stop, lower=True, upper=False):
         "Return lucene RangeQuery, by default with a half-open interval."
-        return cls(lucene.TermRangeQuery, name, start, stop, lower, upper)
+        return cls(search.TermRangeQuery, name, start, stop, lower, upper)
     def phrase(cls, name, *values):
         "Return lucene PhraseQuery.  None may be used as a placeholder."
-        self = cls(lucene.PhraseQuery)
-        for index, value in enumerate(values):
+        self = cls(search.PhraseQuery)
+        for idx, value in enumerate(values):
             if value is not None:
-                self.add(lucene.Term(name, value), index)
+                self.add(index.Term(name, value), idx)
         return self
     def multiphrase(cls, name, *values):
         "Return lucene MultiPhraseQuery.  None may be used as a placeholder."
-        self = cls(lucene.MultiPhraseQuery)
-        for index, words in enumerate(values):
+        self = cls(search.MultiPhraseQuery)
+        for idx, words in enumerate(values):
             if isinstance(words, basestring):
                 words = [words]
             if words is not None:
-                self.add([lucene.Term(name, word) for word in words], index)
+                self.add([index.Term(name, word) for word in words], idx)
         return self
     def wildcard(cls, name, value):
         "Return lucene WildcardQuery."
-        return cls(lucene.WildcardQuery, lucene.Term(name, value))
+        return cls(search.WildcardQuery, index.Term(name, value))
     def fuzzy(cls, name, value, minimumSimilarity=0.5, prefixLength=0):
         "Return lucene FuzzyQuery."
-        return cls(lucene.FuzzyQuery, lucene.Term(name, value), minimumSimilarity, prefixLength)
+        return cls(search.FuzzyQuery, index.Term(name, value), minimumSimilarity, prefixLength)
     def __pos__(self):
         return Query.all(self)
     def __neg__(self):
-        return Query.boolean(lucene.BooleanClause.Occur.MUST_NOT, self)
+        return Query.boolean(search.BooleanClause.Occur.MUST_NOT, self)
     def __and__(self, other):
         return Query.all(self, other)
     def __rand__(self, other):
     def __getitem__(self, index):
         return self.getClauses()[index]
     def __iand__(self, other):
-        self.add(other, lucene.BooleanClause.Occur.MUST)
+        self.add(other, search.BooleanClause.Occur.MUST)
         return self
     def __ior__(self, other):
-        self.add(other, lucene.BooleanClause.Occur.SHOULD)
+        self.add(other, search.BooleanClause.Occur.SHOULD)
         return self
     def __isub__(self, other):
-        self.add(other, lucene.BooleanClause.Occur.MUST_NOT)
+        self.add(other, search.BooleanClause.Occur.MUST_NOT)
         return self
 class SpanQuery(Query):
     "Inherited lucene SpanQuery with additional span constructors."
     def filter(self, cache=True):
         "Return lucene CachingSpanFilter, optionally just SpanQueryFilter."
-        filter = lucene.SpanQueryFilter(self)
-        return lucene.CachingSpanFilter(filter) if cache else filter
+        filter = search.SpanQueryFilter(self)
+        return search.CachingSpanFilter(filter) if cache else filter
     def __getitem__(self, slc):
-        start, stop, step = slc.indices(lucene.Integer.MAX_VALUE)
+        start, stop, step = slc.indices(Integer.MAX_VALUE)
         assert step == 1, 'slice step is not supported'
-        return SpanQuery(lucene.SpanPositionRangeQuery, self, start, stop)
+        return SpanQuery(spans.SpanPositionRangeQuery, self, start, stop)
     def __sub__(self, other):
-        return SpanQuery(lucene.SpanNotQuery, self, other)
-    def __or__(*spans):
-        return SpanQuery(lucene.SpanOrQuery, spans)
-    def near(*spans, **kwargs):
+        return SpanQuery(spans.SpanNotQuery, self, other)
+    def __or__(*spans_):
+        return SpanQuery(spans.SpanOrQuery, spans_)
+    def near(*spans_, **kwargs):
         """Return lucene SpanNearQuery from SpanQueries.
         :param slop: default 0
         :param collectPayloads: default True
         args = map(kwargs.get, ('slop', 'inOrder', 'collectPayloads'), (0, True, True))
-        return SpanQuery(lucene.SpanNearQuery, spans, *args)
+        return SpanQuery(spans.SpanNearQuery, spans_, *args)
     def mask(self, name):
         "Return lucene FieldMaskingSpanQuery, which allows combining span queries from different fields."
-        return SpanQuery(lucene.FieldMaskingSpanQuery, self, name)
+        return SpanQuery(spans.FieldMaskingSpanQuery, self, name)
     def payload(self, *values):
         "Return lucene SpanPayloadCheckQuery from payload values."
-        base = lucene.SpanNearPayloadCheckQuery if lucene.SpanNearQuery.instance_(self) else lucene.SpanPayloadCheckQuery
-        return SpanQuery(base, self, lucene.Arrays.asList(list(map(lucene.JArray_byte, values))))
+        base = spans.SpanNearPayloadCheckQuery if spans.SpanNearQuery.instance_(self) else spans.SpanPayloadCheckQuery
+        return SpanQuery(base, self, Arrays.asList(list(map(lucene.JArray_byte, values))))
-class TermsFilter(lucene.CachingWrapperFilter):
+class TermsFilter(search.CachingWrapperFilter):
     """Caching filter based on a unique field and set of matching values.
     Optimized for many terms and docs, with support for incremental updates.
     Suitable for searching external metadata associated with indexed identifiers.
     ops = {'or': 'update', 'and': 'intersection_update', 'andNot': 'difference_update'}
     def __init__(self, field, values=()):
         assert lucene.VERSION >= '3.5', 'requires FixedBitSet set operations introduced in lucene 3.5'
-        lucene.CachingWrapperFilter.__init__(self, lucene.TermsFilter())
+        search.CachingWrapperFilter.__init__(self, search.TermsFilter())
         self.field = field
         self.values = set(values)
         self.readers = set()
     def filter(self, values, cache=True):
         "Return lucene TermsFilter, optionally using the FieldCache."
         if cache:
-            return lucene.FieldCacheTermsFilter(self.field, tuple(values))
-        filter, term = lucene.TermsFilter(), lucene.Term(self.field)
+            return search.FieldCacheTermsFilter(self.field, tuple(values))
+        filter, term = search.TermsFilter(), index.Term(self.field)
         for value in values:
         return filter
     def apply(self, filter, op, readers):
         for reader in readers:
-            bitset = lucene.FixedBitSet.cast_(self.getDocIdSet(reader))
+            bitset = util.FixedBitSet.cast_(self.getDocIdSet(reader))
             getattr(bitset, op)(filter.getDocIdSet(reader).iterator())
     def update(self, values, op='or', cache=True):
         """Update allowed values and corresponding cached bitsets.
         point = bisect.bisect_right(self.offsets, index) - 1
         return self.arrays[point][index - self.offsets[point]]
-class SortField(lucene.SortField):
+class SortField(search.SortField):
     """Inherited lucene SortField used for caching FieldCache parsers.
     :param name: field name
     def __init__(self, name, type='string', parser=None, reverse=False):
         type = self.typename = getattr(type, '__name__', type).capitalize()
         if parser is None:
-            parser = getattr(lucene.SortField, type.upper())
-        elif not lucene.FieldCache.Parser.instance_(parser):
-            base = getattr(lucene, 'Python{0}Parser'.format(type))
+            parser = getattr(search.SortField, type.upper())
+        elif not search.FieldCache.Parser.instance_(parser):
+            base = getattr(search_, 'Python{0}Parser'.format(type))
             namespace = {'parse' + type: staticmethod(parser)}
             parser = object.__class__(base.__name__, (base,), namespace)()
-        lucene.SortField.__init__(self, name, parser, reverse)
+        search.SortField.__init__(self, name, parser, reverse)
     def array(self, reader):
-        method = getattr(lucene.FieldCache.DEFAULT, 'get{0}s'.format(self.typename))
+        method = getattr(search.FieldCache.DEFAULT, 'get{0}s'.format(self.typename))
         return method(reader, self.field, *[self.parser][:bool(self.parser)])
     def comparator(self, reader):
         "Return indexed values from default FieldCache using the given top-level reader."
         readers = reader.sequentialSubReaders
-        if lucene.MultiReader.instance_(reader):
+        if index.MultiReader.instance_(reader):
             readers = itertools.chain.from_iterable(reader.sequentialSubReaders for reader in readers)
         arrays = list(map(self.array, readers))
         return arrays[0] if len(arrays) <= 1 else Comparator(arrays)
     def filter(self, start, stop, lower=True, upper=False):
         "Return lucene FieldCacheRangeFilter based on field and type."
-        method = getattr(lucene.FieldCacheRangeFilter, 'new{0}Range'.format(self.typename))
+        method = getattr(search.FieldCacheRangeFilter, 'new{0}Range'.format(self.typename))
         return method(self.field, self.parser, start, stop, lower, upper)
     def terms(self, filter, *readers):
         "Generate field cache terms from docs which match filter from all segments."
         for reader in readers:
             array, docset = self.array(reader), filter.getDocIdSet(reader)
-            for id in iter(docset.iterator().nextDoc, lucene.DocIdSetIterator.NO_MORE_DOCS):
+            for id in iter(docset.iterator().nextDoc, search.DocIdSetIterator.NO_MORE_DOCS):
                 yield array[id]
-class Highlighter(lucene.Highlighter):
+class Highlighter(highlight.Highlighter):
     """Inherited lucene Highlighter with stored analysis options.
     :param searcher: `IndexSearcher`_ used for analysis, scoring, and optionally text retrieval
     def __init__(self, searcher, query, field, terms=False, fields=False, tag='', formatter=None, encoder=None):
         if tag:
-            formatter = lucene.SimpleHTMLFormatter('<{0}>'.format(tag), '</{0}>'.format(tag))
-        scorer = (lucene.QueryTermScorer if terms else lucene.QueryScorer)(query, *(searcher.indexReader, field) * (not fields))
-        lucene.Highlighter.__init__(self, *filter(None, [formatter, encoder, scorer]))
+            formatter = highlight.SimpleHTMLFormatter('<{0}>'.format(tag), '</{0}>'.format(tag))
+        scorer = (highlight.QueryTermScorer if terms else highlight.QueryScorer)(query, *(searcher.indexReader, field) * (not fields))
+        highlight.Highlighter.__init__(self, *filter(None, [formatter, encoder, scorer]))
         self.searcher, self.field = searcher, field
-        self.selector = lucene.MapFieldSelector([field])
+        self.selector = document.MapFieldSelector([field])
     def fragments(self, doc, count=1):
         """Return highlighted text fragments.
             doc = self.searcher.doc(doc, self.selector)[self.field]
         return doc and list(self.getBestFragments(self.searcher.analyzer, self.field, doc, count))
-class FastVectorHighlighter(lucene.FastVectorHighlighter):
+class FastVectorHighlighter(vectorhighlight.FastVectorHighlighter):
     """Inherited lucene FastVectorHighlighter with stored query.
     Fields must be stored and have term vectors with offsets and positions.
     def __init__(self, searcher, query, field, terms=False, fields=False, tag='', fragListBuilder=None, fragmentsBuilder=None):
         if tag:
-            fragmentsBuilder = lucene.SimpleFragmentsBuilder(['<{0}>'.format(tag)], ['</{0}>'.format(tag)])
-        args = fragListBuilder or lucene.SimpleFragListBuilder(), fragmentsBuilder or lucene.SimpleFragmentsBuilder()
-        lucene.FastVectorHighlighter.__init__(self, not terms, not fields, *args)
+            fragmentsBuilder = vectorhighlight.SimpleFragmentsBuilder(['<{0}>'.format(tag)], ['</{0}>'.format(tag)])
+        args = fragListBuilder or vectorhighlight.SimpleFragListBuilder(), fragmentsBuilder or vectorhighlight.SimpleFragmentsBuilder()
+        vectorhighlight.FastVectorHighlighter.__init__(self, not terms, not fields, *args)
         self.searcher, self.field = searcher, field
         self.query = self.getFieldQuery(query)
     def fragments(self, id, count=1, size=100):
             groups = map(self.edits, edits, edits.values())
             edits = dict((edit, group[edit]) for group in groups for edit in group if edit not in previous)
-class SpellParser(lucene.PythonQueryParser):
+class SpellParser(PythonQueryParser):
     """Inherited lucene QueryParser which corrects spelling.
     Assign a searcher attribute or override :meth:`correct` implementation.
         "Return term with text replaced as necessary."
         field = term.field()
         for text in self.searcher.correct(field, term.text()):
-            return lucene.Term(field, text)
+            return index.Term(field, text)
         return term
     def rewrite(self, query):
         "Return term or phrase query with corrected terms substituted."
-        if lucene.TermQuery.instance_(query):
-            term = lucene.TermQuery.cast_(query).term
-            return lucene.TermQuery(self.correct(term))
-        query = lucene.PhraseQuery.cast_(query)
-        phrase = lucene.PhraseQuery()
+        if search.TermQuery.instance_(query):
+            term = search.TermQuery.cast_(query).term
+            return search.TermQuery(self.correct(term))
+        query = search.PhraseQuery.cast_(query)
+        phrase = search.PhraseQuery()
         for position, term in zip(query.positions, query.terms):
             phrase.add(self.correct(term), position)
         return phrase

File lupyne/

     import json
 import warnings
 import lucene
+    from org.apache.lucene import document, index, search, store
+except ImportError:
+    document = index = search = store = lucene
 import cherrypy
     from . import engine, client
     except exceptions as exc:
-        raise cherrypy.HTTPError(status, str(exc))
+        raise cherrypy.HTTPError(status, str(exc.getJavaException() if isinstance(exc, lucene.JavaError) else exc))
 class WebSearcher(object):
     """Dispatch root with a delegated Searcher.
     def sync(self, host, path=''):
         "Sync with remote index."
         path = '/' + '{0}/update/{1}/'.format(path, uuid.uuid1()).lstrip('/')
-        directory = lucene.FSDirectory.cast_(
+        directory = store.FSDirectory.cast_(
         resource = client.Resource(host)
         names = sorted(set(resource.put(path)).difference(os.listdir(directory)))
             self.sync(host, path)
             cherrypy.response.status = httplib.ACCEPTED
         reader = self.searcher.indexReader
-        readers = reader.sequentialSubReaders if lucene.MultiReader.instance_(reader) else [reader]
+        readers = reader.sequentialSubReaders if index.MultiReader.instance_(reader) else [reader]
         return dict((unicode(, reader.numDocs()) for reader in readers)
         if fields is None:
             fields = {}
-            hits.fields = lucene.MapFieldSelector(list(itertools.chain(fields, multi)))
+            hits.fields = document.MapFieldSelector(list(itertools.chain(fields, multi)))
         with HTTPError(httplib.BAD_REQUEST, AttributeError):
             groups = hits.groupby(searcher.comparator(*group.split(':')).__getitem__) if group else [hits]
         result['groups'], limit = [], options.get('group.limit', len(groups))
             result['groups'].append({'docs': [], 'count': len(hits), 'value': hits.value})
         if not group:
             result['docs'] = result.pop('groups')[0]['docs']
-        q = q or lucene.MatchAllDocsQuery()
+        q = q or search.MatchAllDocsQuery()
         if facets:
             facets = (tuple(facet.split(':')) if ':' in facet else facet for facet in facets)
             facets = result['facets'] = searcher.facets(q, *facets)
         if not name:
             return list(commit.fileNames)
         with HTTPError(httplib.NOT_FOUND, TypeError, AssertionError):
-            directory = lucene.FSDirectory.cast_(
+            directory = store.FSDirectory.cast_(
             assert name in commit.fileNames, 'file not referenced in commit'
         return cherrypy.lib.static.serve_download(os.path.join(directory, name))

File test/

 import bisect
 import contextlib
 import lucene
+    from import StringReader
+    from org.apache.lucene import analysis, document, search, store, util
+    from org.apache.lucene.analysis import miscellaneous, standard
+    from import grouping, highlight, vectorhighlight
+    from import PythonFilter
+except ImportError:
+    from lucene import StringReader, PythonFilter
+    analysis = document = search = store = util = miscellaneous = standard = grouping = highlight = vectorhighlight = lucene
 from lupyne import engine
 from . import fixture
     for message, category in itertools.izip_longest(messages, categories):
          assert issubclass(message.category, category), message
-class Filter(lucene.PythonFilter):
+class Filter(PythonFilter):
     "Broken filter to test errors are raised."
     def getDocIdSet(self, indexReader):
         assert False
     def testInterface(self):
         "Indexer and document interfaces."
         self.assertRaises(TypeError, engine.IndexSearcher)
-        analyzer = lucene.StandardAnalyzer(lucene.Version.values()[-1])
-        stemmer = engine.Analyzer(analyzer, lucene.PorterStemFilter, typeAsPayload)
+        analyzer = standard.StandardAnalyzer(util.Version.values()[-1])
+        stemmer = engine.Analyzer(analyzer, analysis.PorterStemFilter, typeAsPayload)
         for token in stemmer.tokens('hello'):
             assert token.positionIncrement == 1
-            assert engine.TokenFilter(lucene.EmptyTokenStream()).payload is None
+            assert engine.TokenFilter(miscellaneous.EmptyTokenStream()).payload is None
             assert token.term == 'hello'
             assert token.type == token.payload == '<ALPHANUM>'
             assert token.offset == (0, 5)
             token.offset, token.positionIncrement = (0, 0), 0
         assert str(stemmer.parse('hellos', field=['body', 'title'])) == 'body:hello title:hello'
         assert str(stemmer.parse('hellos', field={'body': 1.0, 'title': 2.0})) == 'body:hello title:hello^2.0'
-        indexer = engine.Indexer(analyzer=stemmer, version=lucene.Version.LUCENE_30, writeLockTimeout=100L)
+        indexer = engine.Indexer(analyzer=stemmer, version=util.Version.LUCENE_30, writeLockTimeout=100L)
         assert indexer.writeLockTimeout == 100
         self.assertRaises(lucene.JavaError, engine.Indexer,
         indexer.set('name', store=True, index=False, boost=2.0)
         for field in indexer.fields['name'].items('sample'):
-            assert isinstance(field, lucene.Field) and field.boost == 2.0
+            assert isinstance(field, document.Field) and field.boost == 2.0
         indexer.set('tag', store=True, index=True)
         searcher = indexer.indexSearcher
         query = engine.Query.multiphrase('text', ('hello', 'hi'), None, 'world')
         assert str(query).startswith('text:"(hello hi) ') and list(query.positions) == [0, 2]
         query = engine.Query.wildcard('text', '*')
-        assert str(query) == 'text:*' and isinstance(query, lucene.WildcardQuery)
-        assert str(lucene.MatchAllDocsQuery() | query) == '*:* text:*'
-        assert str(lucene.MatchAllDocsQuery() - query) == '*:* -text:*'
+        assert str(query) == 'text:*' and isinstance(query, search.WildcardQuery)
+        assert str(search.MatchAllDocsQuery() | query) == '*:* text:*'
+        assert str(search.MatchAllDocsQuery() - query) == '*:* -text:*'
         query = +query
         query &= engine.Query.fuzzy('text', 'hello')
         query |= engine.Query.fuzzy('text', 'hello', 0.1)
         query = engine.Query.span('text', 'world')
         assert str(query.mask('name')) == 'mask(text:world) as name'
         assert str(query.payload()) == 'spanPayCheck(text:world, payloadRef: )'
-        assert isinstance(query.filter(cache=False), lucene.SpanQueryFilter) and isinstance(query.filter(), lucene.CachingSpanFilter)
+        assert isinstance(query.filter(cache=False), search.SpanQueryFilter) and isinstance(query.filter(), search.CachingSpanFilter)
         query = engine.Query.disjunct(0.1, query, name='sample')
         assert str(query) == '(text:world | name:sample)~0.1'
         query = engine.Query.near('text', 'hello', ('tag', 'python'), slop=-1, inOrder=False)
         indexer +=
         indexer += self.tempdir
         assert len(indexer) == 3
-        indexer.add(text=lucene.WhitespaceTokenizer(lucene.StringReader('?')), name=lucene.JArray_byte('{}'))
+        indexer.add(text=analysis.WhitespaceTokenizer(StringReader('?')), name=lucene.JArray_byte('{}'))
         assert indexer[next('text', '?'))] == {'name': ['{}']}
         reader = engine.indexers.IndexReader(indexer.indexReader)
         searcher = engine.IndexSearcher.load(self.tempdir)
         engine.IndexSearcher.load( # ensure directory isn't closed
-        assert len(indexer) == len(searcher) and lucene.RAMDirectory.instance_(
+        assert len(indexer) == len(searcher) and store.RAMDirectory.instance_(
         assert indexer.filters == indexer.spellcheckers == {}
-        assert indexer.facets(lucene.MatchAllDocsQuery(), 'amendment')
+        assert indexer.facets(search.MatchAllDocsQuery(), 'amendment')
         assert indexer.suggest('amendment', '')
         assert list(indexer.filters) == list(indexer.spellcheckers) == ['amendment']
         indexer.delete('amendment', doc['amendment'])
         indexer.commit(filters=True, spellcheckers=True)
         assert reader.refCount == 0
         assert list(indexer.filters) == list(indexer.spellcheckers) == ['amendment']
-        doc['amendment'] = engine.Analyzer(lucene.WhitespaceTokenizer).tokens(doc['amendment'])
-        doc['date'] = engine.Analyzer(lucene.WhitespaceTokenizer).tokens(doc['date']), 2.0
+        doc['amendment'] = engine.Analyzer(analysis.WhitespaceTokenizer).tokens(doc['amendment'])
+        doc['date'] = engine.Analyzer(analysis.WhitespaceTokenizer).tokens(doc['date']), 2.0
         scores = list(searcher.match(doc, 'text:congress', 'text:law', 'amendment:27', 'date:19*'))
         assert 0.0 == scores[0] < scores[1] < scores[2] < scores[3] == 1.0
         searcher = engine.MultiSearcher([indexer.indexReader, self.tempdir])
         comparator = searcher.comparator('amendment')
         assert set(map(type, comparator)) == set([int])
         assert searcher is searcher.reopen()
-        assert searcher.facets(lucene.MatchAllDocsQuery(), 'amendment')['amendment'] == dict.fromkeys(map(str, range(1, 28)), 2)
+        assert searcher.facets(search.MatchAllDocsQuery(), 'amendment')['amendment'] == dict.fromkeys(map(str, range(1, 28)), 2)
         reader = searcher.indexReader
         del searcher
         self.assertRaises(lucene.JavaError, reader.isCurrent)
         assert len(hits) == 5 and hits.count == 8
         assert not any(map(math.isnan, hits.scores))
         assert hits.maxscore == max(hits.scores)
-        hits ='text:people', count=5, sort=lucene.Sort.INDEXORDER)
+        hits ='text:people', count=5, sort=search.Sort.INDEXORDER)
         assert sorted(hits.ids) == list(hits.ids)
         sort = engine.SortField('amendment', type=int)
         hits ='text:people', count=5, sort=sort)
         query = engine.Query.term('text', 'right', boost=2.0)
         assert query.boost == 2.0
         assert indexer.facets(str(query), 'amendment', 'article') == {'amendment': 12, 'article': 1}
-        self.assertRaises(TypeError, indexer.overlap, query.filter(), lucene.QueryWrapperFilter(query))
+        self.assertRaises(TypeError, indexer.overlap, query.filter(), search.QueryWrapperFilter(query))
         hits ='text:people', filter=query.filter())
         assert len(hits) == 4
         hit, ='date:192*')
         assert list(indexer.correct('text', 'write', distance=1, minSimilarity=0.7)) == ['writs', 'writ']
         assert list(indexer.correct('text', 'write', minSimilarity=0.9)) == ['writs', 'writ', 'crime', 'written']
         query = indexer.parse('text:write', spellcheck=True)
-        assert lucene.TermQuery.instance_(query) and str(query) == 'text:writs'
+        assert search.TermQuery.instance_(query) and str(query) == 'text:writs'
         query = indexer.parse('"hello world"', field='text', spellcheck=True)
-        assert lucene.PhraseQuery.instance_(query) and str(query) == 'text:"held would"'
+        assert search.PhraseQuery.instance_(query) and str(query) == 'text:"held would"'
         assert str(indexer.parse('vwxyz', field='text', spellcheck=True)) == 'text:vwxyz'
         with indexer.snapshot() as commit:
             self.assertRaises(lucene.JavaError, indexer.snapshot().__enter__)
         assert orange == 'CA.Orange' and facets[orange] > 10
         (field, facets), = indexer.facets(query, ('state.county', 'CA.*')).items()
         assert all(value.startswith('CA.') for value in facets) and set(facets) < set(indexer.filters[field])
-        if hasattr(lucene, 'TermFirstPassGroupingCollector'):
+        if hasattr(grouping, 'TermFirstPassGroupingCollector'):
             assert set(indexer.grouping('state', count=1)) < set(indexer.grouping('state')) == set(states)
-            grouping = indexer.grouping(field, query, sort=lucene.Sort(indexer.sorter(field)))
-            assert len(grouping) == 2 and list(grouping) == [la, orange]
-            for value, (name, count) in zip(grouping, grouping.facets(None)):
+            grouper = indexer.grouping(field, query, sort=search.Sort(indexer.sorter(field)))
+            assert len(grouper) == 2 and list(grouper) == [la, orange]
+            for value, (name, count) in zip(grouper, grouper.facets(None)):
                 assert value == name and count > 0
-            grouping = indexer.groupings[field] = indexer.grouping(field, engine.Query.term('state', 'CA'))
+            grouper = indexer.groupings[field] = indexer.grouping(field, engine.Query.term('state', 'CA'))
             assert indexer.facets(query, field)[field] == facets
-            hits = next(grouping.groups())
+            hits = next(grouper.groups())
             assert hits.value == 'CA.Los Angeles' and hits.count > 100 and len(hits) == 1
             hit, = hits
             assert hit.score in hit.keys
             assert hit['county'] == 'Los Angeles' and hits.maxscore >= hit.score > 0
-            hits = next(grouping.groups(count=2, sort=lucene.Sort(indexer.sorter('zipcode')), scores=True))
+            hits = next(grouper.groups(count=2, sort=search.Sort(indexer.sorter('zipcode')), scores=True))
             assert hits.value == 'CA.Los Angeles' and math.isnan(hits.maxscore) and len(hits) == 2
             assert all(hit.score > 0 and hit['zipcode'] > '90000' and hit['zipcode'] in hit.keys for hit in hits)
         for count in (None, len(indexer)):
             hits =, count=count, timeout=-1)
             assert len(hits) == 0 and hits.count is hits.maxscore is None
         self.assertRaises(lucene.JavaError,, filter=Filter())
-        directory = lucene.RAMDirectory()
+        directory = store.RAMDirectory()
         query = engine.Query.term('state', 'CA')
         size = indexer.copy(directory, query)
         searcher = engine.IndexSearcher(directory)
         assert indexer.count(query) == len(sizes) - len(ids)
         indexer.sorters['year'] = engine.SortField('Y-m-d', type=int, parser=lambda date: int(date.split('-')[0]))
         assert indexer.comparator('year')[:10] == [1791] * 10
-        cache = len(lucene.FieldCache.DEFAULT.cacheEntries)
+        cache = len(search.FieldCache.DEFAULT.cacheEntries)
         hits =, sort='year')
         assert [int(hit['amendment']) for hit in hits] == [1, 2, 3]
         hits =, sort='year', reverse=True)
         assert [int(hit['amendment']) for hit in hits] == [27, 26, 25]
         assert indexer.count(filter=indexer.sorters['year'].filter(None, 1792)) == 10
-        assert cache == len(lucene.FieldCache.DEFAULT.cacheEntries)
+        assert cache == len(search.FieldCache.DEFAULT.cacheEntries)
-        cache = len(lucene.FieldCache.DEFAULT.cacheEntries)
+        cache = len(search.FieldCache.DEFAULT.cacheEntries)
         assert list(indexer.comparator('year'))[-1] == 0
-        assert cache == len(lucene.FieldCache.DEFAULT.cacheEntries)
+        assert cache == len(search.FieldCache.DEFAULT.cacheEntries)
     def testNumeric(self):
         "Numeric fields."
         id = 3
         text = indexer[id]['text']
         query = '"persons, houses, papers"'
-        highlighter = indexer.highlighter(query, '', terms=True, fields=True, formatter=lucene.SimpleHTMLFormatter('*', '*'))
+        highlighter = indexer.highlighter(query, '', terms=True, fields=True, formatter=highlight.SimpleHTMLFormatter('*', '*'))
         fragments = highlighter.fragments(text, count=3)
         assert len(fragments) == 2 and fragments[0].count('*') == 2*3 and '*persons*' in fragments[1]
         highlighter = indexer.highlighter(query, '', terms=True)
-        highlighter.textFragmenter = lucene.SimpleFragmenter(200)
+        highlighter.textFragmenter = highlight.SimpleFragmenter(200)
         fragment, = highlighter.fragments(text, count=3)
         assert len(fragment) > len(text) and fragment.count('<B>persons</B>') == 2
         fragment, = indexer.highlighter(query, 'text', tag='em').fragments(id, count=3)
         assert len(fragment) < len(text) and fragment.index('<em>persons') < fragment.index('papers</em>')
         fragment, = indexer.highlighter(query, 'text').fragments(id)
         assert fragment.count('<b>') == fragment.count('</b>') == 1
-        highlighter = indexer.highlighter(query, 'text', fragListBuilder=lucene.SingleFragListBuilder())
+        highlighter = indexer.highlighter(query, 'text', fragListBuilder=vectorhighlight.SingleFragListBuilder())
         text, = highlighter.fragments(id)
         assert fragment in text and len(text) > len(fragment)
     def testNearRealTime(self):
         "Near real-time index updates."
-        indexer = engine.Indexer(version=lucene.Version.LUCENE_30, nrt=True)
+        indexer = engine.Indexer(version=util.Version.LUCENE_30, nrt=True)
         assert indexer.count() == 0 and not indexer.current