Source

DocIRHadoop / Query / queryParser.py

Full commit
#!/usr/bin/env python

from DocIRHadoop.Query import searchparser2

Set = set

class BoolQuery(searchparser2.SearchQueryParser):
    """A boolean query parser."""
    def setIndex(self, index):
        self.index = index.copy()
        return
        
    def setDocs(self, docs):
        self.docs = docs.copy()
        return
        
    def doParsing(self, query):
        r = self.Parse(query)
        return r
    
    def GetWord(self, word):
        if (self.index.has_key(word)):
            return self.index[word]
        else:
            return Set()

    def GetWordWildcard(self, word):
        result = Set()
        for item in self.index.keys():
            if word == item[0:len(word)]:
                result = result.union(self.index[item])
        return result

    def GetNot(self, not_set):
        all = Set(self.docs.keys())
        return all.difference(not_set)
        

def test():
    bq = BoolQuery()
    index = {'queen': Set([1, 4, 10]),
             'story': Set([1, 4, 10, 15]),
             'king': Set([4, 5, 10, 20]),
             'kill': Set([6]),
            }    
    docs = {}
    for i in range(1, 30):
        docs[i] = i
    bq.setIndex(index)
    bq.setDocs(docs)
    
    print bq.Parse("queen") 
    print bq.Parse("queen and king or kill")
    print bq.Parse("queen and (king or kill)")
    print bq.Parse("queen and king")
    print bq.Parse("not king")
    print bq.Parse("queen and not king")

    return        
        
if __name__ == "__main__":
    test()