Commits

Mikhail Korobov committed cd7b2bb

iterkeys() and iteritems() methods

Comments (0)

Files changed (4)

                     runs=3
                 )
 
+            for meth in ['iterkeys', 'iteritems']:
+                bench(
+                    '%s.%s(prefix="%s"), %s' % (struct_name, meth, xxx, avg),
+                    timeit.Timer(
+                        "for word in %s: list(data.%s(word))" % (data, meth),
+                        setup
+                    ),
+                    'K ops/sec',
+                    op_count=1,
+                    runs=3
+                )
+
 if __name__ == '__main__':
     benchmark()
     #profiling()

dawg_python/dawgs.py

 
     def keys(self, prefix=""):
         b_prefix = prefix.encode('utf8')
-        index = self.dct.root()
         res = []
 
-        index = self.dct.follow_bytes(b_prefix, index)
+        index = self.dct.follow_bytes(b_prefix, self.dct.root())
         if index is None:
             return res
 
 
         return res
 
+    def iterkeys(self, prefix=""):
+        b_prefix = prefix.encode('utf8')
+        index = self.dct.follow_bytes(b_prefix, self.dct.root())
+        if index is None:
+            return
+
+        self.completer.start(index, b_prefix)
+
+        while self.completer.next():
+            yield self.completer.key.decode('utf8')
+
+
     def load(self, path):
         """
         Loads DAWG from a file.
             res.append(u_key)
         return res
 
+    def iterkeys(self, prefix=""):
+        if not isinstance(prefix, bytes):
+            prefix = prefix.encode('utf8')
+
+        index = self.dct.root()
+
+        if prefix:
+            index = self.dct.follow_bytes(prefix, index)
+            if not index:
+                return
+
+        self.completer.start(index, prefix)
+        while self.completer.next():
+            payload_idx = self.completer.key.index(PAYLOAD_SEPARATOR)
+            u_key = self.completer.key[:payload_idx].decode('utf8')
+            yield u_key
+
     def items(self, prefix=""):
         if not isinstance(prefix, bytes):
             prefix = prefix.encode('utf8')
         while self.completer.next():
             key, value = self.completer.key.split(PAYLOAD_SEPARATOR)
             res.append(
-                (key.decode('utf8'), a2b_base64(bytes(value))) # python 2.6 fix
+                (key.decode('utf8'), a2b_base64(bytes(value))) # bytes() cast is a python 2.6 fix
             )
 
         return res
 
+    def iteritems(self, prefix=""):
+        if not isinstance(prefix, bytes):
+            prefix = prefix.encode('utf8')
+
+        index = self.dct.root()
+        if prefix:
+            index = self.dct.follow_bytes(prefix, index)
+            if not index:
+                return
+
+        self.completer.start(index, prefix)
+        while self.completer.next():
+            key, value = self.completer.key.split(PAYLOAD_SEPARATOR)
+            item = (key.decode('utf8'), a2b_base64(bytes(value))) # bytes() cast is a python 2.6 fix
+            yield item
+
 
     def _has_value(self, index):
         return self.dct.follow_bytes(PAYLOAD_SEPARATOR, index)
         res = super(RecordDAWG, self).items(prefix)
         return [(key, self._struct.unpack(val)) for (key, val) in res]
 
+    def iteritems(self, prefix=""):
+        res = super(RecordDAWG, self).iteritems(prefix)
+        return ((key, self._struct.unpack(val)) for (key, val) in res)
+

tests/test_dawg.py

         d = self.dawg()
         assert d.keys() == sorted(self.keys)
 
+    def test_iterkeys(self):
+        d = self.dawg()
+        assert list(d.iterkeys()) == d.keys()
+
     def test_completion(self):
         d = self.dawg()
 

tests/test_payload_dawg.py

         d = self.dawg()
         assert d.keys() == ['bar', 'foobar', 'foo', 'foo'] # order?
 
+    def test_iterkeys(self):
+        d = self.dawg()
+        assert list(d.iterkeys()) == d.keys()
+
     def test_key_completion(self):
         d = self.dawg()
         assert d.keys('fo') == ['foobar', 'foo', 'foo'] # order?
         d = self.dawg()
         assert sorted(d.items()) == sorted(self.DATA)
 
+    def test_iteritems(self):
+        d = self.dawg()
+        assert list(d.iteritems('xxx')) == []
+        assert list(d.iteritems('fo')) == d.items('fo')
+        assert list(d.iteritems()) == d.items()
+
     def test_items_completion(self):
         d = self.dawg()
         assert d.items('foob') == [('foobar', b'data4')]