1. Jan Schrewe
  2. whoosh

Source

whoosh / tests / test_vectors.py

Matt Chaput ac0952b 
Matt Chaput adc9bef 
Matt Chaput 3a53d12 
Matt Chaput 3493100 
Matt Chaput 3a53d12 
Vinay Sajip f5c1a0a 
Matt Chaput 0444bc2 
Matt Chaput ac0952b 
Matt Chaput adc9bef 

Matt Chaput 940c4a5 








Matt Chaput 3493100 
Matt Chaput 940c4a5 


Matt Chaput 3493100 

Vinay Sajip f5c1a0a 

Matt Chaput 3493100 
Matt Chaput 940c4a5 
Matt Chaput 3493100 

Vinay Sajip f5c1a0a 
Matt Chaput 3493100 

Matt Chaput 940c4a5 


Matt Chaput 3493100 

Vinay Sajip f5c1a0a 

Matt Chaput 3493100 
Matt Chaput 940c4a5 
Matt Chaput 3493100 
Vinay Sajip f5c1a0a 

Matt Chaput 3493100 
Matt Chaput 940c4a5 
Matt Chaput 3493100 

Matt Chaput 940c4a5 
Vinay Sajip f5c1a0a 
Matt Chaput 3493100 
Vinay Sajip f5c1a0a 
Matt Chaput 940c4a5 
Vinay Sajip f5c1a0a 
Matt Chaput 940c4a5 
Matt Chaput 3493100 
Vinay Sajip f5c1a0a 
Matt Chaput 940c4a5 
Matt Chaput 3493100 
Matt Chaput 940c4a5 
Matt Chaput 0444bc2 
Matt Chaput 940c4a5 
Matt Chaput 0444bc2 



Matt Chaput 940c4a5 
Matt Chaput 0444bc2 



Matt Chaput 940c4a5 
Matt Chaput 0444bc2 

from __future__ import with_statement

from nose.tools import assert_equal  #@UnresolvedImport

from whoosh import fields, formats
from whoosh.compat import u
from whoosh.filedb.filestore import RamStorage
from whoosh.support.testing import TempIndex


def test_single_term():
    schema = fields.Schema(text=fields.TEXT(vector=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(text=u("TEST TEST TEST"))
    with ix.searcher() as s:
        v = s.vector(0, "text")
        assert v.is_active()

def test_vector_reading():
    schema = fields.Schema(title=fields.TEXT,
                           content=fields.TEXT(vector=formats.Frequency()))

    with TempIndex(schema, "vectorreading") as ix:
        writer = ix.writer()
        writer.add_document(title=u("one"),
                            content=u("This is the story of the black hole story"))
        writer.commit()

        with ix.reader() as r:
            assert_equal(list(r.vector_as("frequency", 0, "content")),
                             [(u('black'), 1), (u('hole'), 1), (u('story'), 2)])

def test_vector_merge():
    schema = fields.Schema(title=fields.TEXT,
                           content=fields.TEXT(vector=formats.Frequency()))

    with TempIndex(schema, "vectormerge") as ix:
        writer = ix.writer()
        writer.add_document(title=u("one"),
                            content=u("This is the story of the black hole story"))
        writer.commit()

        writer = ix.writer()
        writer.add_document(title=u("two"),
                            content=u("You can read along in your book"))
        writer.commit()

        with ix.searcher() as s:
            r = s.reader()

            docnum = s.document_number(title=u("one"))
            vec = list(r.vector_as("frequency", docnum, "content"))
            assert_equal(vec, [(u('black'), 1), (u('hole'), 1), (u('story'), 2)])

            docnum = s.document_number(title=u("two"))

            vec = list(r.vector_as("frequency", docnum, "content"))
            assert_equal(vec, [(u('along'), 1), (u('book'), 1), (u('read'), 1)])

def test_vector_unicode():
    schema = fields.Schema(content=fields.TEXT(vector=formats.Frequency()))
    ix = RamStorage().create_index(schema)

    writer = ix.writer()
    writer.add_document(content=u("\u1234\u2345\u3456 \u4567\u5678\u6789"))
    writer.add_document(content=u("\u0123\u1234\u4567 \u4567\u5678\u6789"))
    writer.commit()

    writer = ix.writer()
    writer.add_document(content=u("\u2345\u3456\u4567 \u789a\u789b\u789c"))
    writer.add_document(content=u("\u0123\u1234\u4567 \u2345\u3456\u4567"))
    writer.commit()

    with ix.reader() as r:
        vec = list(r.vector_as("frequency", 0, "content"))
        assert_equal(vec, [(u('\u3456\u4567'), 1), (u('\u789a\u789b\u789c'), 1)])