Commits

Jason Scheirer committed d04c057

Look up all matches of an exact length, work-in-progress on better wordbag lookup

  • Participants
  • Parent commits 2710abd

Comments (0)

Files changed (1)

 class WordLookup(object):
     def __init__(self, filename):
         self._word_lookup = ddmaker()
+        self._wordbag_lookup = ddmaker()
         #self._ngram_index = collections.defaultdict(set)
         with open(filename, 'rb') as handle:
             for word in handle.readlines():
         for letter in word:
             lookup = lookup[letter]
         lookup[None] = None
+
+        bag_lookup = self._wordbag_lookup
+        for letter in sorted(word):
+            bag_lookup = bag_lookup[letter]
+        if not None in bag_lookup:
+            bag_lookup[None] = set([word])
+        else:
+            bag_lookup[None].add(word)
         #for s, e in itertools.combinations(range(len(word) + 1), 2):
         #    x = word[s:e]
         #    self._ngram_index[x].add(word)
         return None in lookup
     def words_with_prefix(self, prefix, exact_length=-1):
         def yield_words(prefix, lookup_table, exact_length=-1):
-            if exact_length == 0 or None in lookup_table:
+            if None in lookup_table:
                 yield prefix
-                if exact_length == 0:
-                    return
+            if exact_length == 0:
+                return
             for k, v in lookup_table.iteritems():
                 if k is not None:
                     for word in yield_words(prefix + k, v, 
             for prefix, lookup in lookups:
                 for word in yield_words(prefix, lookup, exact_length):
                     yield word
+    def matches(self, word):
+        for word in self.words_with_prefix(word, len(word)):
+            yield word
+    def matches_for_bag(self, letterbag, use_whole_bag=False):
+        seen = set()
+        for length in (xrange(3, len(letterbag) + 1)
+                            if not use_whole_bag
+                            else [len(letterbag)]):
+            for letterlist in itertools.permutations(letterbag, 
+                                                     length):
+                new_word = ''.join(letterlist)
+                if not new_word in seen:
+                    seen.add(new_word)
+                    for word in self.words_with_prefix(new_word, 
+                                                       length):
+                        yield word
 
 if __name__ == "__main__":
     import os
                                         os.path.abspath(
                                             __file__)),
                                      'words.txt'))
-    print time.time() - x
+    print "Loaded words:", time.time() - x
     for word in sys.argv[1:]:
         x = time.time()
         print "Word {:18} in dictionary: {}".format(word, 
                     'Yes' if lookup.is_word(word) else 'No')
         for prefixed in lookup.words_with_prefix(word, len(word)):
+            pass
             print "     * {}".format(prefixed)
         print "----"
         for prefixed in lookup.words_with_prefix(word):
+            pass
             print "     * {}".format(prefixed)
+        print "----"
+        for bagged in lookup.matches_for_bag(word, True):
+            pass
+            print "     * {}".format(bagged)
         print time.time() - x
+        print "========"