Mikhail Korobov avatar Mikhail Korobov committed 0c3edde

IntCompletionDAWG

Comments (0)

Files changed (3)

 * ``dawg.IntDAWG`` - ``dawg.DAWG`` subclass that maps unicode keys
   to integer values.
 
+* ``dawg.IntCompletionDAWG`` - ``dawg.CompletionDAWG`` subclass
+  that maps unicode keys to integer values.
+
 DAWG and CompletionDAWG
 -----------------------
 
     >>> d2.items()
     [(u'foo', (3, 2, 1)), (u'foo', (3, 2, 3)), (u'foo', (3, 2, 256))]
 
-IntDAWG
--------
+IntDAWG and IntCompletionDAWG
+-----------------------------
 
 ``IntDAWG`` is a ``{unicode -> int}`` mapping. It is possible to
 use ``RecordDAWG`` for this, but ``IntDAWG`` is natively
     >>> int_dawg[u'foo']
     1
 
+``IntCompletionDAWG`` supports all ``IntDAWG`` and ``CompletionDAWG`` methods,
+plus ``.items()`` and ``.iteritems()``.
 
 Persistence
 -----------
 
 .. include:: ../AUTHORS.rst
 
-.. include:: ../CHANGES.rst
+.. include:: ../CHANGES.rst
     def _build_from_iterable(self, iterable):
         cdef DawgBuilder dawg_builder
         cdef bytes b_key
+        cdef int value
 
         for key in iterable:
+            if isinstance(key, tuple) or isinstance(key, list):
+                key, value = key
+                if value < 0:
+                    raise ValueError("Negative values are not supported")
+            else:
+                value = 0
+
             if isinstance(key, unicode):
                 b_key = key.encode('utf8')
             else:
                 b_key = key
 
-            if not dawg_builder.Insert(b_key, len(b_key), 0):
-                raise Error("Can't insert key %r" % b_key)
+            if not dawg_builder.Insert(b_key, len(b_key), value):
+                raise Error("Can't insert key %r (with value %r)" % (b_key, value))
 
         if not dawg_builder.Finish(&self.dawg):
             raise Error("dawg_builder.Finish error")
         if not _dictionary_builder.Build(self.dawg, &self.dct):
             raise Error("Can't build dictionary")
 
+
     def __contains__(self, key):
         if isinstance(key, unicode):
             return self.has_key(key)
             yield (key, self._struct.unpack(val))
 
 
+def _iterable_from_argument(arg):
+    if arg is None:
+        arg = []
+
+    if isinstance(arg, collections.Mapping):
+        return ((key, arg[key]) for key in arg)
+    else:
+        return arg
+
+DEF LOOKUP_ERROR = -1
+
 cdef class IntDAWG(DAWG):
     """
     Dict-like class based on DAWG.
         ``arg`` must be an iterable of tuples (unicode_key, int_value)
         or a dict {unicode_key: int_value}.
         """
-        if arg is None:
-            arg = []
-
-        if isinstance(arg, collections.Mapping):
-            iterable = ((key, arg[key]) for key in arg)
-        else:
-            iterable = arg
-
+        iterable = _iterable_from_argument(arg)
         super(IntDAWG, self).__init__(iterable, input_is_sorted)
 
-
-    def _build_from_iterable(self, iterable):
-        cdef DawgBuilder dawg_builder
-
-        cdef bytes b_key
-        for key, value in iterable:
-            if value < 0:
-                raise ValueError("Negative values are not supported")
-            b_key = key.encode('utf8')
-            dawg_builder.Insert(b_key, value)
-
-        cdef _dawg.Dawg dawg
-        dawg_builder.Finish(&dawg)
-        _dictionary_builder.Build(dawg, &(self.dct))
-
     def __getitem__(self, key):
-        cdef int res = self.get(key, -1)
-        if res == -1:
+        cdef int res = self.get(key, LOOKUP_ERROR)
+        if res == LOOKUP_ERROR:
             raise KeyError(key)
         return res
 
         else:
             res = self.b_get_value(key)
 
-        if res == -1:
+        if res == LOOKUP_ERROR:
             return default
         return res
 
 
     cpdef int b_get_value(self, bytes key):
         return self.dct.Find(key)
+
+
+# FIXME: code duplication.
+cdef class IntCompletionDAWG(CompletionDAWG):
+    """
+    Dict-like class based on DAWG.
+    It can store integer values for unicode keys and support key completion.
+    """
+
+    def __init__(self, arg=None, input_is_sorted=False):
+        """
+        ``arg`` must be an iterable of tuples (unicode_key, int_value)
+        or a dict {unicode_key: int_value}.
+        """
+        iterable = _iterable_from_argument(arg)
+        super(IntCompletionDAWG, self).__init__(iterable, input_is_sorted)
+
+    def __getitem__(self, key):
+        cdef int res = self.get(key, LOOKUP_ERROR)
+        if res == LOOKUP_ERROR:
+            raise KeyError(key)
+        return res
+
+    cpdef get(self, key, default=None):
+        """
+        Return value for the given key or ``default`` if the key is not found.
+        """
+        cdef int res
+
+        if isinstance(key, unicode):
+            res = self.get_value(key)
+        else:
+            res = self.b_get_value(key)
+
+        if res == LOOKUP_ERROR:
+            return default
+        return res
+
+    cpdef int get_value(self, unicode key):
+        cdef bytes b_key = key.encode('utf8')
+        return self.dct.Find(b_key)
+
+    cpdef int b_get_value(self, bytes key):
+        return self.dct.Find(key)
+
+    cpdef list items(self, unicode prefix=""):
+        cdef bytes b_prefix = prefix.encode('utf8')
+        cdef BaseType index = self.dct.root()
+        cdef list res = []
+        cdef int value
+
+        if not self.dct.Follow(b_prefix, &index):
+            return res
+
+        cdef Completer completer
+        init_completer(completer, self.dct, self.guide)
+        completer.Start(index, b_prefix)
+
+        while completer.Next():
+            key = (<char*>completer.key()).decode('utf8')
+            value = completer.value()
+            res.append((key, value))
+
+        return res
+
+    def iteritems(self, unicode prefix=""):
+        cdef bytes b_prefix = prefix.encode('utf8')
+        cdef BaseType index = self.dct.root()
+        cdef int value
+
+        if not self.dct.Follow(b_prefix, &index):
+            return
+
+        cdef Completer completer
+        init_completer(completer, self.dct, self.guide)
+        completer.Start(index, b_prefix)
+
+        while completer.Next():
+            key = (<char*>completer.key()).decode('utf8')
+            value = completer.value()
+            yield key, value

tests/test_dawg.py

 
 class TestIntDAWG(object):
 
+    IntDAWG = dawg.IntDAWG
+
     def dawg(self):
         payload = {'foo': 1, 'bar': 5, 'foobar': 3}
-        d = dawg.IntDAWG(payload)
+        d = self.IntDAWG(payload)
         return payload, d
 
     def test_getitem(self):
         payload, d = self.dawg()
         data = d.tobytes()
 
-        d2 = dawg.IntDAWG()
+        d2 = self.IntDAWG()
         d2.frombytes(data)
         for key, value in payload.items():
             assert key in d2
         payload, _ = self.dawg()
 
         buf = BytesIO()
-        dawg.IntDAWG(payload).write(buf)
+        self.IntDAWG(payload).write(buf)
         buf.seek(0)
 
-        d = dawg.IntDAWG()
+        d = self.IntDAWG()
         d.read(buf)
 
         for key, value in payload.items():
 
     def test_int_value_ranges(self):
         for val in [0, 5, 2**16-1, 2**31-1]:
-            d = dawg.IntDAWG({'f': val})
+            d = self.IntDAWG({'f': val})
             assert d['f'] == val
 
         with pytest.raises(ValueError):
-            dawg.IntDAWG({'f': -1})
+            self.IntDAWG({'f': -1})
 
         with pytest.raises(OverflowError):
-            dawg.IntDAWG({'f': 2**32-1})
+            self.IntDAWG({'f': 2**32-1})
+
+
+class TestIntCompletionDAWG(TestIntDAWG):
+    IntDAWG = dawg.IntCompletionDAWG  # checks that all tests for IntDAWG pass
 
 
 class TestCompletionDAWG(object):
     def dawg(self):
         return dawg.CompletionDAWG(self.keys)
 
+    def empty_dawg(self):
+        return dawg.CompletionDAWG()
+
     def test_contains(self):
         d = self.dawg()
         for key in self.keys:
 
     def test_completion_dawg_saveload(self):
         buf = BytesIO()
-        dawg.CompletionDAWG(self.keys).write(buf)
+        self.dawg().write(buf)
         buf.seek(0)
 
-        d = dawg.CompletionDAWG()
+        d = self.empty_dawg()
         d.read(buf)
 
         for key in self.keys:
         d = dawg.CompletionDAWG([])
         assert d.keys() == []
 
+
+class TestIntCompletionDAWGComplete(TestCompletionDAWG):
+    keys = ['f', 'bar', 'foo', 'foobar']
+
+    def dawg(self):
+        return dawg.IntCompletionDAWG((k, len(k)) for k in self.keys)
+
+    def empty_dawg(self):
+        return dawg.IntCompletionDAWG()
+
+    def test_no_segfaults_on_empty_dawg(self):
+        d = dawg.IntCompletionDAWG([])
+        assert d.keys() == []
+
+    def test_items(self):
+        d = self.dawg()
+        items = d.items()
+        assert isinstance(items, list)
+        for key, value in items:
+            assert len(key) == value
+
+    def test_iteritems(self):
+        d = self.dawg()
+        for key, value in d.iteritems():
+            assert len(key) == value
+
+    def test_items_prefix(self):
+        d = self.dawg()
+        assert d.items('fo') == [('foo', 3), ('foobar', 6)]
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.