Commits

Jeethu Rao committed 928cd99

Minor code re-org, use the builtin json module in python, if available

  • Participants
  • Parent commits 2846ae8

Comments (0)

Files changed (6)

File bloom.py

-# -*- coding: utf-8 -*-
-'''
-A simple counting bloom filter implementation using the SHA256 hash function.
--- Jeethu Rao <jeethu@jeethurao.com>
-'''
-import array
-import hashlib
-import os
-import itertools
-import math
-from string import hexdigits
-import simplejson
-
-class BloomFilter( object ) :
-    HASH_ALGO = 'sha256'
-    DIGEST_SIZE = hashlib.new(HASH_ALGO).digest_size
-    M = 16384
-    K = 8
-
-    def __init__( self, m=None, k=None, ivs=None, value=None ) :
-        if m is not None :
-            self.M = m
-        if k is not None :
-            self.K = k
-        if value is not None :
-            self._filter = array.array('B',value)
-            l = len(self._filter)
-            if l < self.M :
-                self._filter.extend(itertools.repeat(0,self.M-l))
-        else :
-            self._filter = array.array('B',itertools.repeat(0,self.M))
-        self.calc_nbytes()
-        if ivs is None :
-            self.ivs = [os.urandom(16) for x in
-                        range((self.n_bytes*self.K/self.DIGEST_SIZE)-1)]
-            self.ivs.insert(0,'')
-        else :
-            self.ivs = ivs
-
-    def calc_nbytes( self ) :
-        n_bits = int(math.floor(math.log(self.M,2)))
-        self.n_bytes = int(math.ceil(n_bits/8.0))
-
-    def __eq__( self, other ) :
-        if self.K == other.K :
-            if self.M == other.M :
-                if self._filter == other._filter :
-                    return True
-        return False
-
-    def __ne__( self, other ) :
-        return not (self == other)
-
-    def __setstate__( self, d ) :
-        self.M = d['m']
-        self.K = d['k']
-        self._filter = array.array('B',d['filter'])
-        self.calc_nbytes()
-        self.ivs = d['ivs']
-
-    def __getstate__( self ) :
-        return {
-            'm'      : self.M,
-            'k'      : self.K,
-            'filter' : self._filter.tolist(),
-            'ivs'    : self.ivs,
-        }
-
-    def hash( self, d ) :
-        h_l,l = [], []
-        for iv in self.ivs :
-            h = hashlib.new(self.HASH_ALGO,iv)
-            h.update(d)
-            h_l.append(h.digest())
-        hashes = ''.join(h_l)
-        pos = 0
-        for x in range(self.K) :
-            v = 0
-            for n,x in enumerate(hashes[pos:pos+self.n_bytes]) :
-                v += ord(x) << (n*8)
-            l.append(v%self.M)
-            pos += self.n_bytes
-        return l
-
-    def full( self ) :
-        return sum(bool(x) for x in self._filter) == self.M
-
-    def empty( self ) :
-        return sum(bool(x) for x in self._filter) == 0
-
-    def __nonzero__( self ) :
-        return not self.empty()
-
-    def add( self, value ) :
-        hash = self.hash(value)
-        if all(self._filter[x] for x in hash) :
-            return
-        for x in hash :
-            self._filter[x] += 1
-            if self._filter[x] == 0 :   # Prevent overflow
-                self._filter[x] -= 1
-
-    def _remove( self, value, hash, cnt=0 ) :
-        for x in hash :
-            if self._filter[x] :
-                self._filter[x] -= 1
-        # Check if the value is still present
-        if cnt > 5 :
-            raise RuntimeError, "Can't remove the element from this filter"
-        if all(self._filter[x] for x in hash) :  
-            return self._remove( value, hash, cnt+1 )
-        return True
-
-    def remove( self, value ) :
-        hash = self.hash(value) 
-        return self._remove( value, hash )
-        
-    def __contains__( self, value ) :
-        return all(self._filter[x] for x in self.hash(value))
-
-    def serialize( self ) :
-        d = {
-            'm'      : self.M,
-            'k'      : self.K,
-        }
-        min_val = min(self._filter)
-        cfilter = [x-min_val for x in self._filter.tolist()]
-        if any(cfilter) :
-            d['filter'] = cfilter
-        if min_val :
-            d['min_val'] = min_val
-        if any(self.ivs) :
-            d['ivs'] = self.ivs
-        return d
-
-    @classmethod
-    def unserialize( cls, d ) :
-        min_val = d.get('min_val',0)
-        filter = [x+min_val for x in d.get('filter',[])]
-        if not filter :
-            filter = None
-        ivs = d.get('ivs',[''])
-        return cls( d['m'],d['k'],ivs,filter )
-
-    def toJSON( self ) :
-        return simplejson.dumps(self.serialize())
-
-    @classmethod
-    def fromJSON( cls, s ) :
-        d = simplejson.loads(s)
-        return cls.unserialize( d )
-
-HEX_MAP         = list('0123456789ABCDEF')
-REVERSE_HEX_MAP = dict(zip(HEX_MAP,range(16)))
-
-def _rle( s ) :
-    '''
-    Simple RLE to encode on wire data
-    If the run length is less than 11 chars,
-    its rather more efficient to use the string.
-    '''
-    for k, g in itertools.groupby(s) :
-        l = len(list(g))
-        if l < 11 :
-            yield k*l
-        else :
-            yield [k,l]
-
-def rle( s ) :
-    it = itertools.groupby(_rle(s), lambda x : isinstance(x, (str,unicode)))
-    for k,g in it :
-        if k is False :
-            for x in g :
-                yield x
-        else :
-            yield ''.join(g)
-
-def unrle( l ) :
-    '''
-    Undo the RLE
-    '''
-    o_l = []
-    for x in l :
-        if isinstance(x,list) :
-            o_l.append(x[0]*x[1])
-        else :
-            o_l.append(x)
-    return ''.join(o_l)
-
-
-class SimpleBloomFilter(BloomFilter) :
-    def __init__( self, capacity=100, err=0.1, ivs=None, value=None ) :
-        '''
-        @capacity: The capacity of the filter
-        @err: The tolerable false positives rate ( 0 < err < 0.5 )
-        '''
-        m, k = self.calc_mk( capacity, err )
-        BloomFilter.__init__( self, m, k, ivs, value )
-
-    @staticmethod
-    def calc_mk( capacity, err ) :
-        m = int(math.ceil(capacity * math.log(err) / math.log(1.0 / 2**math.log(2))))
-        k = int(math.floor(math.log(2) * m / capacity))
-        return m, k
-
-    def serialize( self ) :
-        d = BloomFilter.serialize( self )
-        cfilter = d.get('filter',[])
-        if cfilter :
-            max_value = max(cfilter)
-            if max_value < 16 :
-                filter = ''.join([HEX_MAP[x] for x in cfilter])
-            else :  # Its a byte value
-                l = []
-                for x in cfilter :
-                    l.append(HEX_MAP[x>>4])
-                    l.append(HEX_MAP[x&0x0F])
-                filter = ''.join(l)
-            d['filter'] = list(rle(filter))
-        return d
-
-    @classmethod
-    def unserialize( cls, d ) :
-        filter = unrle(d.get('filter',[]))
-        f_l = len(filter)
-        m = d['m']
-        if f_l == 0 :
-            cfilter = []
-        elif f_l == m :
-            cfilter = [REVERSE_HEX_MAP[x] for x in filter]
-        elif f_l == (m << 1) :
-            state = 0
-            v = 0
-            cfilter = []
-            for x in filter :
-                if state == 0 :
-                    v = REVERSE_HEX_MAP[x] << 4
-                else :
-                    v += REVERSE_HEX_MAP[x]
-                    cfilter.append(v)
-                state = (state + 1) & 1
-        else :
-            RuntimeError, "Inconsistent filter length %d for m: %d"%(f_l,m)
-        d['filter'] = cfilter
-        return BloomFilter.unserialize( d )

File js/test.html

         <script type="text/javascript" src="sha256.js"></script>
         <script type="text/javascript" src="bloom.js"></script>
         <script type="text/javascript" src="jsunity-0.1.js"></script>
-        <script type="text/javascript">
-            (function() {
-                BloomFilter.prototype.sha256 = sha256_digest;
-
-                function bloomTestSuite() {
-                    var data = {"filter": [["0", 153], "10000100001", ["0", 35], "1", ["0", 34], "10000001", ["0", 238]], "k": 3, "m": 480};
-
-                    function testUnrle() {
-                        var rle_data = [["A", 17], ["B", 8], "CCCCCC", ["D", 10], "EEEEEEEkjlfdkfkf", ["1", 8], "233333344444445555555"];
-                        data = 'AAAAAAAAAAAAAAAAABBBBBBBBCCCCCCDDDDDDDDDDEEEEEEEkjlfdkfkf11111111233333344444445555555';
-                        var decoded = BloomFilter.unrle(rle_data);
-                        assertEquals(decoded,data);
-                    }
-
-                    function testHash() {
-                        var bf = new BloomFilter(data);
-                        var h = bf.hash('Jeethu Rao');
-                        var l = [12,133,45];
-                        assertEquals(h.length,l.length);
-                        for(var i=0;i<l.length;i++) {
-                            assertEquals(h[i],l[i]);
-                        }
-                    }
-
-                    function testBloom() {
-                        var bf = new BloomFilter( data );
-                        var i=0;
-                        assertEquals(bf.empty(),false);
-                        bf.add('Jeethu Rao');
-                        assertEquals(bf.hasKey('Jeethu Rao'),true);
-                        bf.remove('Jeethu Rao');
-                        assertEquals(bf.hasKey('Jeethu Rao'),false);
-                        var data1 = {'filter':'','k':3,'m':400};
-                        var bf1 = new BloomFilter( data1 );
-                        assertEquals(bf1.empty(),true);
-                        var l = ['Jeethu Rao','Pria Chander','Thilak Raj Rao','Lakshminarayana Kamath'];
-                        for(i=0;i<l.length;i++) {
-                            bf1.add(l[i]);
-                        }
-                        assertEquals(bf1.empty(),false);
-                        for(i=0;i<l.length;i++) {
-                            assertEquals(bf1.hasKey(l[i]),true);
-                        }
-                        for(i=0;i<l.length;i++) {
-                            bf1.remove(l[i]);
-                        }
-                        for(i=0;i<l.length;i++) {
-                            assertEquals(bf1.hasKey(l[i]),false);
-                        }
-                        assertEquals(bf1.empty(),true);
-                    }
-                }
-                jsUnity.log = function(s) { 
-                    var node = document.createElement('div');
-                    node.innerHTML = s;
-                    document.body.appendChild(node);
-                };
-                jsUnity.error = function(s) {
-                    var node = document.createElement('div');
-                    node.style.color = 'red';
-                    node.innerHTML = s;
-                    document.body.appendChild(node);
-                };
-                var results = jsUnity.run(bloomTestSuite);
-            })();
-        </script>
+        <script type="text/javascript" src="test_bloom.js"></script>
     </body>
 </html>

File js/test_bloom.js

+(function() {
+    BloomFilter.prototype.sha256 = sha256_digest;
+
+    function bloomTestSuite() {
+        // List of most common words from Wikipedia 
+        // (http://en.wikipedia.org/wiki/Most_common_words_in_English)
+        var english_words =
+            [ "the", "be", "to", "of", "and", "a", "in", "that", "have", "I", "it", "for",
+            "not", "on", "with", "he", "as", "you", "do", "at", "this", "but", "his", "by",
+            "from", "they", "we", "say", "her", "she", "or", "an", "will", "my", "one", "all",
+            "would", "there", "their", "what", "so", "up", "out", "if", "about", "who", "get",
+            "which", "go", "me", "when", "make", "can", "like", "time", "no", "just", "him",
+            "know", "take", "person", "into", "year", "your", "good", "some", "could", "them",
+            "see", "other", "than", "then", "now", "look", "only", "come", "its", "over",
+            "think", "also", "back", "after", "use", "two", "how", "our", "work", "first", 
+            "well", "way", "even", "new", "want", "because", "any", "these", "give", "day", 
+            "most", "us" ];
+        var data = {"filter": [["0", 153], "10000100001", ["0", 35], "1", ["0", 34], "10000001", ["0", 238]], "k": 3, "m": 480};
+
+        function testUnrle() {
+            var rle_data = [["A", 17], ["B", 8], "CCCCCC", ["D", 10], "EEEEEEEkjlfdkfkf", ["1", 8], "233333344444445555555"];
+            data = 'AAAAAAAAAAAAAAAAABBBBBBBBCCCCCCDDDDDDDDDDEEEEEEEkjlfdkfkf11111111233333344444445555555';
+            var decoded = BloomFilter.unrle(rle_data);
+            assertEquals(decoded,data);
+        }
+
+        function testHash() {
+            var bf = new BloomFilter(data);
+            var h = bf.hash('Jeethu Rao');
+            var l = [12,133,45];
+            assertEquals(h.length,l.length);
+            for(var i=0;i<l.length;i++) {
+                assertEquals(h[i],l[i]);
+            }
+        }
+
+        function testBloom() {
+            var bf = new BloomFilter( data );
+            var i=0;
+            assertEquals(bf.empty(),false);
+            bf.add('Jeethu Rao');
+            assertEquals(bf.hasKey('Jeethu Rao'),true);
+            bf.remove('Jeethu Rao');
+            assertEquals(bf.hasKey('Jeethu Rao'),false);
+        }
+
+        function testBloomAdd() {
+            var bf = new BloomFilter({'filter':'','k':3,'m':400});
+            assertEquals(bf.empty(),true);
+            var l = english_words;
+            for(i=0;i<l.length;i++) {
+                bf.add(l[i]);
+            }
+            assertEquals(bf.empty(),false);
+            for(i=0;i<l.length;i++) {
+                assertEquals(bf.hasKey(l[i]),true);
+            }
+            for(i=0;i<l.length;i++) {
+                bf.remove(l[i]);
+            }
+            for(i=0;i<l.length;i++) {
+                assertEquals(bf.hasKey(l[i]),false);
+            }
+            assertEquals(bf.empty(),true);
+        }
+    }
+    jsUnity.log = function(s) { 
+        var node = document.createElement('div');
+        node.innerHTML = s;
+        document.body.appendChild(node);
+    };
+    jsUnity.error = function(s) {
+        var node = document.createElement('div');
+        node.style.color = 'red';
+        node.innerHTML = s;
+        document.body.appendChild(node);
+    };
+    var results = jsUnity.run(bloomTestSuite);
+})();
+

File python/bloom.py

+# -*- coding: utf-8 -*-
+'''
+A simple counting bloom filter implementation using the SHA256 hash function.
+-- Jeethu Rao <jeethu@jeethurao.com>
+'''
+import array
+import hashlib
+import os
+import itertools
+import math
+from string import hexdigits
+try :
+    import json
+except ImportError :
+    import simplejson as json
+
+class BloomFilter( object ) :
+    HASH_ALGO = 'sha256'
+    DIGEST_SIZE = hashlib.new(HASH_ALGO).digest_size
+    M = 16384
+    K = 8
+
+    def __init__( self, m=None, k=None, ivs=None, value=None ) :
+        if m is not None :
+            self.M = m
+        if k is not None :
+            self.K = k
+        if value is not None :
+            self._filter = array.array('B',value)
+            l = len(self._filter)
+            if l < self.M :
+                self._filter.extend(itertools.repeat(0,self.M-l))
+        else :
+            self._filter = array.array('B',itertools.repeat(0,self.M))
+        self.calc_nbytes()
+        if ivs is None :
+            self.ivs = [os.urandom(16) for x in
+                        range((self.n_bytes*self.K/self.DIGEST_SIZE)-1)]
+            self.ivs.insert(0,'')
+        else :
+            self.ivs = ivs
+
+    def calc_nbytes( self ) :
+        n_bits = int(math.floor(math.log(self.M,2)))
+        self.n_bytes = int(math.ceil(n_bits/8.0))
+
+    def __eq__( self, other ) :
+        if self.K == other.K :
+            if self.M == other.M :
+                if self._filter == other._filter :
+                    return True
+        return False
+
+    def __ne__( self, other ) :
+        return not (self == other)
+
+    def __setstate__( self, d ) :
+        self.M = d['m']
+        self.K = d['k']
+        self._filter = array.array('B',d['filter'])
+        self.calc_nbytes()
+        self.ivs = d['ivs']
+
+    def __getstate__( self ) :
+        return {
+            'm'      : self.M,
+            'k'      : self.K,
+            'filter' : self._filter.tolist(),
+            'ivs'    : self.ivs,
+        }
+
+    def hash( self, d ) :
+        h_l,l = [], []
+        for iv in self.ivs :
+            h = hashlib.new(self.HASH_ALGO,iv)
+            h.update(d)
+            h_l.append(h.digest())
+        hashes = ''.join(h_l)
+        pos = 0
+        for x in range(self.K) :
+            v = 0
+            for n,x in enumerate(hashes[pos:pos+self.n_bytes]) :
+                v += ord(x) << (n*8)
+            l.append(v%self.M)
+            pos += self.n_bytes
+        return l
+
+    def full( self ) :
+        return sum(bool(x) for x in self._filter) == self.M
+
+    def empty( self ) :
+        return sum(bool(x) for x in self._filter) == 0
+
+    def __nonzero__( self ) :
+        return not self.empty()
+
+    def add( self, value ) :
+        hash = self.hash(value)
+        if all(self._filter[x] for x in hash) :
+            return
+        for x in hash :
+            self._filter[x] += 1
+            if self._filter[x] == 0 :   # Prevent overflow
+                self._filter[x] -= 1
+
+    def _remove( self, value, hash, cnt=0 ) :
+        for x in hash :
+            if self._filter[x] :
+                self._filter[x] -= 1
+        # Check if the value is still present
+        if cnt > 5 :
+            raise RuntimeError, "Can't remove the element from this filter"
+        if all(self._filter[x] for x in hash) :  
+            return self._remove( value, hash, cnt+1 )
+        return True
+
+    def remove( self, value ) :
+        hash = self.hash(value) 
+        return self._remove( value, hash )
+        
+    def __contains__( self, value ) :
+        return all(self._filter[x] for x in self.hash(value))
+
+    def serialize( self ) :
+        d = {
+            'm'      : self.M,
+            'k'      : self.K,
+        }
+        min_val = min(self._filter)
+        cfilter = [x-min_val for x in self._filter.tolist()]
+        if any(cfilter) :
+            d['filter'] = cfilter
+        if min_val :
+            d['min_val'] = min_val
+        if any(self.ivs) :
+            d['ivs'] = self.ivs
+        return d
+
+    @classmethod
+    def unserialize( cls, d ) :
+        min_val = d.get('min_val',0)
+        filter = [x+min_val for x in d.get('filter',[])]
+        if not filter :
+            filter = None
+        ivs = d.get('ivs',[''])
+        return cls( d['m'],d['k'],ivs,filter )
+
+    def toJSON( self ) :
+        return json.dumps(self.serialize())
+
+    @classmethod
+    def fromJSON( cls, s ) :
+        d = json.loads(s)
+        return cls.unserialize( d )
+
+HEX_MAP         = list('0123456789ABCDEF')
+REVERSE_HEX_MAP = dict(zip(HEX_MAP,range(16)))
+
+def _rle( s ) :
+    '''
+    Simple RLE to encode on wire data
+    If the run length is less than 11 chars,
+    its rather more efficient to use the string.
+    '''
+    for k, g in itertools.groupby(s) :
+        l = len(list(g))
+        if l < 11 :
+            yield k*l
+        else :
+            yield [k,l]
+
+def rle( s ) :
+    it = itertools.groupby(_rle(s), lambda x : isinstance(x, (str,unicode)))
+    for k,g in it :
+        if k is False :
+            for x in g :
+                yield x
+        else :
+            yield ''.join(g)
+
+def unrle( l ) :
+    '''
+    Undo the RLE
+    '''
+    o_l = []
+    for x in l :
+        if isinstance(x,list) :
+            o_l.append(x[0]*x[1])
+        else :
+            o_l.append(x)
+    return ''.join(o_l)
+
+
+class SimpleBloomFilter(BloomFilter) :
+    def __init__( self, capacity=100, err=0.1, ivs=None, value=None ) :
+        '''
+        @capacity: The capacity of the filter
+        @err: The tolerable false positives rate ( 0 < err < 0.5 )
+        '''
+        m, k = self.calc_mk( capacity, err )
+        BloomFilter.__init__( self, m, k, ivs, value )
+
+    @staticmethod
+    def calc_mk( capacity, err ) :
+        m = int(math.ceil(capacity * math.log(err) / math.log(1.0 / 2**math.log(2))))
+        k = int(math.floor(math.log(2) * m / capacity))
+        return m, k
+
+    def serialize( self ) :
+        d = BloomFilter.serialize( self )
+        cfilter = d.get('filter',[])
+        if cfilter :
+            max_value = max(cfilter)
+            if max_value < 16 :
+                filter = ''.join([HEX_MAP[x] for x in cfilter])
+            else :  # Its a byte value
+                l = []
+                for x in cfilter :
+                    l.append(HEX_MAP[x>>4])
+                    l.append(HEX_MAP[x&0x0F])
+                filter = ''.join(l)
+            d['filter'] = list(rle(filter))
+        return d
+
+    @classmethod
+    def unserialize( cls, d ) :
+        filter = unrle(d.get('filter',[]))
+        f_l = len(filter)
+        m = d['m']
+        if f_l == 0 :
+            cfilter = []
+        elif f_l == m :
+            cfilter = [REVERSE_HEX_MAP[x] for x in filter]
+        elif f_l == (m << 1) :
+            state = 0
+            v = 0
+            cfilter = []
+            for x in filter :
+                if state == 0 :
+                    v = REVERSE_HEX_MAP[x] << 4
+                else :
+                    v += REVERSE_HEX_MAP[x]
+                    cfilter.append(v)
+                state = (state + 1) & 1
+        else :
+            RuntimeError, "Inconsistent filter length %d for m: %d"%(f_l,m)
+        d['filter'] = cfilter
+        return BloomFilter.unserialize( d )

File python/test_bloom.py

+#!/usr/bin/python
+import unittest
+import random
+import os
+from bloom import BloomFilter, SimpleBloomFilter
+import cPickle as pickle
+
+class BaseBFTest(unittest.TestCase) :
+    KLASS = None
+    N = 500
+    def get_filter( self, capacity, err ) :
+        raise NotImplemented
+
+    def test_all( self ) :
+        filter = self.get_filter(self.N,0.1)
+        print 'n=%s, m=%s,k=%s'%(self.N,filter.M,filter.K)
+        print 'the filter would be ~ %f KB in size'%(filter.M/1024.0)
+        ds = set([os.urandom(10) for x in xrange(self.N)])
+        map(filter.add,ds)
+        for x in ds :
+            self.assertTrue(x in filter)
+        self.assertFalse(filter.full())
+        print "Max value in the filter is: %s"%(max(filter._filter))
+        s = random.sample(ds,self.N/10)
+        cnt = 0
+        for x in s :
+            filter.remove(x)
+            self.assertFalse(x in filter, "%s"%cnt)
+        for x in ds :
+            filter.remove(x)
+        self.assertTrue(filter.empty())
+
+    def test_serialization( self ) :
+        filter = self.get_filter(self.N,0.1)
+        self._test_serialization(filter)
+        self._test_pickling(filter)
+        self._test_json(filter)
+        for x in xrange(self.N) :
+            filter.add(os.urandom(100))
+        self._test_serialization(filter)
+        self._test_pickling(filter)
+        self._test_json(filter)
+
+    def _test_serialization( self, filter ) :
+        d = filter.serialize()
+        print "serialized size: %s"%(len(d.get('filter',[])))
+        nf = self.KLASS.unserialize(d)
+        self.assertEqual(filter,nf)
+
+    def _test_pickling( self, filter ) :
+        s = pickle.dumps(filter)
+        f1 = pickle.loads(s)
+        self.assertEqual(filter,f1)
+
+    def _test_json( self, filter ) :
+        s = filter.toJSON()
+        f1 = self.KLASS.fromJSON(s)
+        self.assertEqual(filter,f1)
+
+class TestBloomFilter(BaseBFTest) :
+    KLASS = BloomFilter
+    def get_filter( self, capacity, err ) :
+        m,k = SimpleBloomFilter.calc_mk( capacity, err )
+        return self.KLASS( m, k )
+
+class TestSimpleBloomFilter(BaseBFTest) :
+    KLASS = SimpleBloomFilter
+    def get_filter( self, capacity, err ) :
+        return self.KLASS( capacity, err )
+
+del BaseBFTest
+
+if __name__ == '__main__' :
+    unittest.main()

File test_bloom.py

-#!/usr/bin/python
-import unittest
-import random
-import os
-from bloom import BloomFilter, SimpleBloomFilter
-import cPickle as pickle
-
-class BaseBFTest(unittest.TestCase) :
-    KLASS = None
-    N = 500
-    def get_filter( self, capacity, err ) :
-        raise NotImplemented
-
-    def test_all( self ) :
-        filter = self.get_filter(self.N,0.1)
-        print 'n=%s, m=%s,k=%s'%(self.N,filter.M,filter.K)
-        print 'the filter would be ~ %f KB in size'%(filter.M/1024.0)
-        ds = set([os.urandom(10) for x in xrange(self.N)])
-        map(filter.add,ds)
-        for x in ds :
-            self.assertTrue(x in filter)
-        self.assertFalse(filter.full())
-        print "Max value in the filter is: %s"%(max(filter._filter))
-        s = random.sample(ds,self.N/10)
-        cnt = 0
-        for x in s :
-            filter.remove(x)
-            self.assertFalse(x in filter, "%s"%cnt)
-        for x in ds :
-            filter.remove(x)
-        self.assertTrue(filter.empty())
-
-    def test_serialization( self ) :
-        filter = self.get_filter(self.N,0.1)
-        self._test_serialization(filter)
-        self._test_pickling(filter)
-        self._test_json(filter)
-        for x in xrange(self.N) :
-            filter.add(os.urandom(100))
-        self._test_serialization(filter)
-        self._test_pickling(filter)
-        self._test_json(filter)
-
-    def _test_serialization( self, filter ) :
-        d = filter.serialize()
-        print "serialized size: %s"%(len(d.get('filter',[])))
-        nf = self.KLASS.unserialize(d)
-        self.assertEqual(filter,nf)
-
-    def _test_pickling( self, filter ) :
-        s = pickle.dumps(filter)
-        f1 = pickle.loads(s)
-        self.assertEqual(filter,f1)
-
-    def _test_json( self, filter ) :
-        s = filter.toJSON()
-        f1 = self.KLASS.fromJSON(s)
-        self.assertEqual(filter,f1)
-
-class TestBloomFilter(BaseBFTest) :
-    KLASS = BloomFilter
-    def get_filter( self, capacity, err ) :
-        m,k = SimpleBloomFilter.calc_mk( capacity, err )
-        return self.KLASS( m, k )
-
-class TestSimpleBloomFilter(BaseBFTest) :
-    KLASS = SimpleBloomFilter
-    def get_filter( self, capacity, err ) :
-        return self.KLASS( capacity, err )
-
-del BaseBFTest
-
-if __name__ == '__main__' :
-    unittest.main()