Mikhail Korobov avatar Mikhail Korobov committed 257f543

.prefixes() support

Comments (0)

Files changed (5)

     BytesDAWG keys():       6.347 ops/sec
     RecordDAWG keys():      6.428 ops/sec
 
+    DAWG.prefixes (hits):    0.729M ops/sec
+    DAWG.prefixes (mixed):   1.770M ops/sec
+    DAWG.prefixes (misses):  1.420M ops/sec
 
     RecordDAWG.keys(prefix="xxx"), avg_len(res)==415:       1.531K ops/sec
     RecordDAWG.keys(prefix="xxxxx"), avg_len(res)==17:      39.823K ops/sec
             bench(full_test_name, timer, descr, op_count, repeats)
 
 
-    # trie-specific benchmarks
+    # DAWG-specific benchmarks
     for struct_name, setup in structures[1:]:
-#        _bench_data = [
-#            ('hits', 'WORDS100k'),
-#            ('mixed', 'MIXED_WORDS100k'),
-#            ('misses', 'NON_WORDS100k'),
-#        ]
-#
-#        for meth in ['prefixes', 'iter_prefixes']:
-#            for name, data in _bench_data:
-#                bench(
-#                    '%s.%s (%s)' % (struct_name, meth, name),
-#                    timeit.Timer(
-#                        "for word in %s:\n"
-#                        "   for it in data.%s(word): pass" % (data, meth),
-#                        setup
-#                    ),
-#                    runs=3
-#                )
+        _bench_data = [
+            ('hits', 'WORDS100k'),
+            ('mixed', 'MIXED_WORDS100k'),
+            ('misses', 'NON_WORDS100k'),
+        ]
+
+        for meth in ['prefixes']:
+            for name, data in _bench_data:
+                bench(
+                    '%s.%s (%s)' % (struct_name, meth, name),
+                    timeit.Timer(
+                        "for word in %s:\n"
+                        "   data.%s(word)" % (data, meth),
+                        setup
+                    ),
+                    runs=3
+                )
 
         _bench_data = [
             ('xxx', 'avg_len(res)==415', 'PREFIXES_3_1k'),

dawg_python/dawgs.py

             for k, v in replaces.items()
         )
 
+    def prefixes(self, key):
+        '''
+        Returns a list with keys of this DAWG that are prefixes of the ``key``.
+        '''
+        res = []
+        index = self.dct.root()
+        if not isinstance(key, bytes):
+            key = key.encode('utf8')
+
+        pos = 1
+
+        for ch in bytearray(key):
+            index = self.dct.follow_char(ch, index)
+            if not index:
+                break
+
+            if self._has_value(index):
+                res.append(key[:pos].decode('utf8'))
+            pos += 1
+
+        return res
+
+
 
 class CompletionDAWG(DAWG):
     """

tests/test_dawg.py

         with pytest.raises(Exception) as e:
             d.load(path)
 
+    def test_prefixes(self):
+        d = self.dawg()
+        assert d.prefixes("foobarz") == ["f", "foo", "foobar"]
+        assert d.prefixes("x") == []
+        assert d.prefixes("bar") == ["bar"]
+
+
 
 #class TestIntDAWG(object):
 #

tests/test_payload_dawg.py

         d = self.dawg()
         assert d.items('foob') == [('foobar', b'data4')]
 
+    def test_prefixes(self):
+        d = self.dawg()
+        assert d.prefixes("foobarz") == ["foo", "foobar"]
+        assert d.prefixes("x") == []
+        assert d.prefixes("bar") == ["bar"]
+
 
 class TestRecordDAWG(object):
 
         assert sorted(d.keys('fo')) == ['foo', 'foo', 'foobar']
         assert d.keys('bar') == ['bar']
         assert d.keys('barz') == []
+
+    def test_prefixes(self):
+        d = self.dawg()
+        assert d.prefixes("foobarz") == ["foo", "foobar"]
+        assert d.prefixes("x") == []
+        assert d.prefixes("bar") == ["bar"]
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.