Mikhail Korobov avatar Mikhail Korobov committed 4fa4cef

RecordDAWG is fixed (using a poor hack - data is encoded to base64)

Comments (0)

Files changed (4)

 
     $ tox
 
-from the source checkout. Tests should pass under python 2.6, 2.7, 3.2 and 3.3.
-
-.. note::
-
-    At the moment of writing the latest pip release (1.1) does not
-    support Python 3.3; in order to run tox tests under Python 3.3
-    find the "virtualenv_support" directory in site-packages
-    (of the env you run tox from) and place an sdist zip/tarball of the newer
-    pip (from github) there.
+from the source checkout. Tests should pass under python 2.6, 2.7 and 3.2.
 
 .. _cython: http://cython.org
 .. _tox: http://tox.testrun.org
 import collections
 import struct
 
+from binascii import a2b_base64, b2a_base64
+
 cdef class DAWG:
     """
     Base DAWG wrapper.
     def __init__(self, arg=None):
         if arg is None:
             arg = []
-        self._build_from_keys_iterable(sorted(list(arg)))
+        self._build_from_iterable(sorted(list(arg)))
 
 
-    cdef _build_from_keys_iterable(self, iterable):
+    def _build_from_iterable(self, iterable):
         cdef DawgBuilder dawg_builder
 
         cdef bytes b_key
 # as a separator between utf8-encoded string and binary payload.
 DEF PAYLOAD_SEPARATOR = b'\xff'
 
-cdef class PayloadDAWG(CompletionDAWG):
+cdef class BytesDAWG(CompletionDAWG):
     """
     DAWG that is able to transparently store extra binary payload in keys;
     there may be several payloads for the same key.
 
         keys = (self._raw_key(d[0], d[1]) for d in arg)
 
-        super(PayloadDAWG, self).__init__(keys)
+        super(BytesDAWG, self).__init__(keys)
 
 
     cpdef bytes _raw_key(self, unicode key, bytes payload):
-        return key.encode('utf8') + PAYLOAD_SEPARATOR + payload
+        cdef bytes encoded_payload = b2a_base64(payload)
+        return key.encode('utf8') + PAYLOAD_SEPARATOR + encoded_payload
 
     cpdef bint b_has_key(self, bytes key) except -1:
         cdef BaseType index
         cdef BaseType index
         cdef list res = []
         cdef bytes payload
+        cdef bytes decoded_payload
 
         if not self._follow_key(key, &index):
             return res
             completer.Start(index)
             while completer.Next():
                 payload = completer.key()[:completer.length()]
-                res.append(payload)
+                decoded_payload = a2b_base64(payload)
+                res.append(decoded_payload)
         finally:
             del completer
 
 
     cpdef list items(self, unicode prefix=""):
         cdef bytes b_prefix = prefix.encode('utf8')
-        cdef bytes key, raw_key, value
+        cdef bytes key, raw_key, value, decoded_value
         cdef list res = []
 
         cdef BaseType index = self.dct.root()
             while completer.Next():
                 raw_key = completer.key()[:completer.length()]
                 key, value = raw_key.split(PAYLOAD_SEPARATOR, 1)
+                decoded_value = a2b_base64(value)
                 res.append(
-                    (key.decode('utf8'), value)
+                    (key.decode('utf8'), decoded_value)
                 )
 
         finally:
         return res
 
     cpdef list keys(self, unicode prefix=""):
-        keys, values = zip(*self.items(prefix))
-        return keys
+        items = self.items(prefix)
+        if not items:
+            return []
+        keys, values = zip(*items)
+        return list(keys)
 
 
-cdef class StructuredDAWG(PayloadDAWG):
+cdef class RecordDAWG(BytesDAWG):
     """
     DAWG that is able to transparently store binary payload in keys;
     there may be several payloads for the same key.
             arg = []
 
         keys = ((d[0], self._struct.pack(*d[1])) for d in arg)
-        super(StructuredDAWG, self).__init__(keys)
+        super(RecordDAWG, self).__init__(keys)
 
 
     cpdef list b_get_value(self, bytes key):
-        cdef list values = PayloadDAWG.b_get_value(self, key)
+        cdef list values = BytesDAWG.b_get_value(self, key)
         return [self._struct.unpack(val) for val in values]
 
 
     cpdef list items(self, unicode prefix=""):
-        cdef list items = super(StructuredDAWG, self).items(prefix)
+        cdef list items = BytesDAWG.items(self, prefix)
         return [(key, self._struct.unpack(val)) for (key, val) in items]
 
 
             iterable = arg
 
         iterable = sorted(iterable, key=operator.itemgetter(0))
-        self._build_from_key_value_iterable(iterable)
+        super(IntDict, self).__init__(iterable)
 
 
-    cpdef _build_from_key_value_iterable(self, iterable):
+    def _build_from_iterable(self, iterable):
         cdef DawgBuilder dawg_builder
 
         cdef bytes b_key

tests/test_dawg.py

     assert d.keys('b') == ['bar']
     assert d.keys('z') == []
 
+#def test_int_keys():
+#    payload = {'foo': 1, 'bar': 5, 'foobar': 3}
+#    d = dawg.IntDict(payload)
+#    assert d.keys() == payload.keys()
+
 def test_completion_dawg_saveload():
     keys = ['f', 'bar', 'foo', 'foobar']
 

tests/test_payload_dawg.py

 )
 
 def test_contains():
-    d = dawg.PayloadDAWG(DATA)
+    d = dawg.BytesDAWG(DATA)
     for key, val in DATA:
         assert key in d
 
 
 
 def test_getitem():
-    d = dawg.PayloadDAWG(DATA)
+    d = dawg.BytesDAWG(DATA)
 
     assert d['foo'] == [b'data1', b'data3']
     assert d['bar'] == [b'data2']
         d['x']
 
 
-def test_strutured_getitem():
-    d = dawg.StructuredDAWG("=3H", STRUCTURED_DATA)
+def test_record_getitem():
+    d = dawg.RecordDAWG("=3H", STRUCTURED_DATA)
     assert d['foo'] == [(3, 2, 0), (3, 2, 1)]
     assert d['bar'] == [(3, 1, 0)]
     assert d['foobar'] == [(6, 3, 0)]
+
+def test_record_items():
+    d = dawg.RecordDAWG("=3H", STRUCTURED_DATA)
+    assert sorted(d.items()) == sorted(STRUCTURED_DATA)
+
+def test_record_keys():
+    d = dawg.RecordDAWG("=3H", STRUCTURED_DATA)
+    assert sorted(d.keys()) == ['bar', 'foo', 'foo', 'foobar',]
+
+def test_record_keys_prefix():
+    d = dawg.RecordDAWG("=3H", STRUCTURED_DATA)
+    assert sorted(d.keys('fo')) == ['foo', 'foo', 'foobar']
+    assert d.keys('bar') == ['bar']
+    assert d.keys('barz') == []
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.