Not able to search for non english(hindi) words

Issue #440 new
Rahul Shrivastava
created an issue

Hey I have used Whoosh for english and its working fine, but when I am indexing hindi language and try to search, I am getting mixed results. For some words I am getting results and for others I am not.

Here is my search query

def search_query(search_term=None, page=1, result_len=10):
    '''Search the provided query.'''
    # print(search_term)
    if not search_term or search_term == '':
        return None, 0
    if not index.exists_in(INDEX_DIR, indexname=INDEX_NAME):
        return None, 0
    ix = get_index()
    parser = qparser.MultifieldParser(
        [FIELD_TITLE, FIELD_PUBLISHER, FIELD_KEYWORDS, FIELD_TOPIC],
        ix.schema)
    # parser = qparser.QueryParser(FIELD_PUBLISHER, schema=ix.schema)
    query = parser.parse(search_term)
    query.normalize()
    search_results = []
    with ix.searcher() as searcher:
        results = searcher.search_page(
            query,
            pagenum=page,
            pagelen=result_len,
            sortedby=[sorting_timestamp, scores],
            reverse=True,
            terms=True
        )
        if results.scored_length() > 0:
            for hit in results:
                search_results.append(append_to(hit))
            return (search_results, results.pagecount)

    parser = qparser.MultifieldParser(
        [FIELD_TITLE, FIELD_PUBLISHER, FIELD_TOPIC, FIELD_KEYWORDS],
        ix.schema, termclass=FuzzyTerm)
    parser.add_plugin(qparser.FuzzyTermPlugin())
    query = parser.parse(search_term)
    query.normalize()
    search_results = []
    with ix.searcher() as searcher:
        results = searcher.search_page(
            query,
            pagenum=page,
            pagelen=result_len,
            sortedby=[sorting_timestamp, scores],
            reverse=True,
            terms=True
        )
        if results.scored_length() > 0:
            for hit in results:
                search_results.append(append_to(hit))
            return (search_results, results.pagecount)
    return None, 0

Comments (0)

  1. Log in to comment