Commits

George Sakkis committed aa84455

slopeone

  • Participants
  • Parent commits b4721f1

Comments (0)

Files changed (8)

File slopeone/BX-Book-Ratings.csv.bz2

Binary file added.

File slopeone/BX-Books.csv.bz2

Binary file added.

File slopeone/benchmark.py

+import sys, csv, time
+from collections import defaultdict
+from itertools import islice
+
+import orig_slopeone, slopeone, numpy_slopeone
+
+classes = [
+    #orig_slopeone.SlopeOne,
+    #orig_slopeone.SlopeOneNumpy, # XXX: dog slow
+    #slopeone.SlopeOne,
+    #slopeone.SlopeOne2,
+    #slopeone.SlopeOneSym,
+    #slopeone.SlopeOneSym2,
+    #numpy_slopeone.SlopeOneDense,
+    numpy_slopeone.SlopeOneSparse,
+]
+
+class ItemsStorage(object):
+    def __init__(self, maxsize=None):
+        self.items = []
+        self.item2id = {}
+        self.maxsize = maxsize
+
+    def add_item(self, item):
+        id = self.item2id.get(item)
+        if id is None:
+            n = len(self.items)
+            if self.maxsize is None or n < self.maxsize:
+                self.item2id[item] = n
+                self.items.append(item)
+                id = n
+        return id
+
+    def id(self, item):
+        return self.item2id[item]
+
+    def item(self, id):
+        return self.items[id]
+
+    def __len__(self):
+        return len(self.items)
+
+
+def load_data(max_num_items=None):
+    users_ratings = defaultdict(dict)
+    items_storage = ItemsStorage(max_num_items)
+    reader = csv.reader(open('BX-Book-Ratings.csv'), delimiter=';')
+    reader.next() # skip header
+    for user_id, isbn, rating in reader:
+        item_id = items_storage.add_item(isbn)
+        rating = int(rating)
+        if item_id is None or rating == 0:
+            continue
+        users_ratings[user_id][item_id] = rating
+    return len(items_storage), users_ratings
+
+
+def main(max_num_items, num_users, top=5):
+    num_items, user_ratings = load_data(max_num_items)
+    print >> sys.stderr, '%d items / %d users / %d ratings' % (
+                                num_items, len(user_ratings),
+                                sum(len(r) for r in user_ratings.itervalues()))
+    for cls in classes:
+        name = '%s.%s' % (cls.__module__, cls.__name__)
+        print >> sys.stderr, name
+
+        start = time.time()
+        slopone = cls(num_items)
+        slopone.update(user_ratings.itervalues())
+        print >> sys.stderr, '  training time: %s' % (time.time() - start)
+        out = open('%s.out' % name, 'w')
+        if 0:
+            slopone.dump(out)
+        else:
+            start = time.time()
+            for itemid2rating in islice(user_ratings.itervalues(), num_users):
+                print >> out, slopone.recomendations(itemid2rating, top)
+            print >> sys.stderr, '  prediction time (top %d items for %d users): %s' % (
+                                            top, num_users, time.time() - start)
+        out.close()
+
+
+if __name__ == '__main__':
+    main(*map(int, sys.argv[1:]))

File slopeone/numpy_slopeone.py

+from collections import defaultdict
+from heapq import nlargest
+from itertools import izip
+from operator import itemgetter
+
+import numpy as np
+from scipy import sparse
+
+
+class SlopeOneDense(object):
+    def __init__(self, num_items):
+        self.num_items = num_items
+
+    def update(self, users):
+        # collect the freqs and dicts in dict-of-dicts
+        freqdict = defaultdict(lambda: defaultdict(int))
+        diffdict = defaultdict(lambda: defaultdict(int))
+        for user in users:
+            itemid_ratings = sorted(user.iteritems(), key=itemgetter(0))
+            while itemid_ratings:
+                i, ri = itemid_ratings.pop()
+                freqs_i = freqdict[i]
+                diffs_i = diffdict[i]
+                for j, rj in itemid_ratings:
+                    freqs_i[j] += 1
+                    diffs_i[j] += ri - rj
+        self.freqsmat = freqsmat = np.zeros((self.num_items, self.num_items), dtype=int)
+        self.diffsmat = diffsmat = np.zeros((self.num_items, self.num_items), dtype=float)
+        while freqdict:
+            i, freqdict_i = freqdict.popitem()
+            freqsmat[i].put(freqdict_i.keys(), freqdict_i.values())
+            diffdict_i = diffdict[i]
+            diffsmat[i].put(diffdict_i.keys(), diffdict_i.values())
+            del diffdict[i]
+        nonzero = freqsmat.nonzero()
+        # scale the diffs by freq
+        diffsmat[nonzero] /= freqsmat[nonzero]
+        # freqsmat is symmetric
+        freqsmat.T[nonzero] = freqsmat[nonzero]
+        # diffsmat is antisymmetric
+        diffsmat.T[nonzero] = -diffsmat[nonzero]
+
+    def dump(self, fileobj):
+        freqsmat, diffsmat = self.freqsmat, self.diffsmat
+        for i, j in izip(*freqsmat.nonzero()):
+            diff = diffsmat[i,j]
+            # convert -0.0 to 0.0
+            if diff == 0: diff = np.abs(diff)
+            print >> fileobj, i, j, freqsmat[i,j], diff
+
+    def recomendations(self, user, top=None):
+        # ids and ratings of items already rated by user
+        user_itemids = user.keys()
+        # take the diff and freq columns corresponding to the user itemids
+        user_diffs_mat = self.diffsmat[:,user_itemids]
+        user_freqs_mat = self.freqsmat[:,user_itemids]
+        # for each candidate item compute the sum of co-occurences with all user
+        # rated items
+        user_freqs_vec = user_freqs_mat.sum(axis=1)
+        # consider only the items with non-zero total freq excluding the already
+        # rated ones
+        candidate_ids = np.setdiff1d(user_freqs_vec.nonzero()[0], user_itemids)
+        if candidate_ids.size == 0:
+            return []
+        # memory-efficient and slightly faster version of:
+        # predictions_vec = (user_freqs_mat[candidate_ids] *
+        #                    (user_ratings_vec + user_diffs_mat[candidate_ids])
+        #                   ).sum(axis=1) / user_freqs_vec[candidate_ids]
+        predictions_mat = user_diffs_mat[candidate_ids]
+        predictions_mat += user.values()
+        predictions_mat *= user_freqs_mat[candidate_ids]
+        predictions_vec = predictions_mat.sum(axis=1)
+        predictions_vec /= user_freqs_vec[candidate_ids]
+        # translate from numpy to python-land and sort or find top largest
+        predictions = izip(predictions_vec, candidate_ids)
+        if top is not None:
+            return nlargest(top, predictions)
+        else:
+            return sorted(predictions, reverse=True)
+
+
+class SlopeOneSparse(object):
+    def __init__(self, num_items):
+        self.num_items = num_items
+
+    def update(self, users):
+        # collect the freqs and dicts in dict-of-dicts
+        freqdict = defaultdict(lambda: defaultdict(int))
+        diffdict = defaultdict(lambda: defaultdict(int))
+        for user in users:
+            itemid_ratings = sorted(user.iteritems(), key=itemgetter(0))
+            while itemid_ratings:
+                i, ri = itemid_ratings.pop()
+                freqs_i = freqdict[i]
+                diffs_i = diffdict[i]
+                for j, rj in itemid_ratings:
+                    freqs_i[j] += 1
+                    diffs_i[j] += ri - rj
+        # double the size of nonzero entries to account for the symmetric ones
+        nnz = 2 * sum(len(v) for v in freqdict.itervalues())
+        # (i, j) rows for nonzero freqs
+        ij = np.empty((2, nnz), dtype='uint16')
+        # nonzero freqs
+        freqs = np.empty(nnz, dtype='uint16')
+        # diffs for nonzero freqs
+        diffs = np.empty(nnz, dtype='float32')
+        # populate the lower triangle
+        start = 0
+        while freqdict:
+            i, freqdict_i = freqdict.popitem()
+            end = start + len(freqdict_i)
+            ij[0, start:end] = i
+            ij[1, start:end] = freqdict_i.keys()
+            freqs[start:end] = freqdict_i.values()
+            diffs[start:end] = diffdict[i].values()
+            del diffdict[i]
+            start = end
+        # populate the upper triangle
+        nnz2 = nnz / 2
+        ij[:, nnz2:] = ij[[1,0],:nnz2]
+        freqs[nnz2:] = freqs[:nnz2]
+        # scale the diffs by freq
+        diffs[:nnz2] /= freqs[:nnz2]
+        # diffs is antisymmetric
+        diffs[nnz2:] = -diffs[:nnz2]
+        shape = (self.num_items, self.num_items)
+        self.freqsmat = sparse.csr_matrix((freqs, ij), shape=shape)
+        # negate the diffs because we'll be selecting itemids by rows instead
+        # of columns
+        self.diffsmat = sparse.csr_matrix((-diffs, ij), shape=shape)
+
+    def dump(self, fileobj):
+        freqsmat, diffsmat = self.freqsmat, self.diffsmat
+        for i, j in sorted(izip(*freqsmat.nonzero())):
+            print >> fileobj, i, j, freqsmat[i,j], float(diffsmat[i,j])
+
+    def recomendations(self, user, top=None):
+        # ids and ratings of items already rated by user
+        user_itemids = user.keys()
+        # take the diff and freq rows corresponding to the user itemids
+        user_diffs_mat = self.diffsmat[user_itemids]
+        user_freqs_mat = self.freqsmat[user_itemids]
+        # for each candidate item compute the sum of co-occurences with all user
+        # rated items
+        user_freqs_vec = user_freqs_mat.sum(axis=0)
+        # consider only the items with non-zero total freq excluding the already
+        # rated ones
+        candidate_ids = np.setdiff1d(np.asarray(user_freqs_vec.nonzero()[1]).ravel(),
+                                     user_itemids)
+        if candidate_ids.size == 0:
+            return []
+        # compute the prediction vector
+        predictions_mat = np.asarray(user_diffs_mat[:, candidate_ids].todense())
+        predictions_mat += np.array(user.values()).reshape((len(user), 1))
+        predictions_mat *= user_freqs_mat[:, candidate_ids].todense()
+        predictions_vec = predictions_mat.sum(axis=0)
+        predictions_vec /= np.asarray(user_freqs_vec[:, candidate_ids]).ravel()
+        # translate from numpy to python-land and sort or find top largest
+        predictions = izip(predictions_vec, candidate_ids)
+        if top is not None:
+            return nlargest(top, predictions)
+        else:
+            return sorted(predictions, reverse=True)

File slopeone/orig_slopeone.py

+import sys
+from heapq import nlargest
+from operator import itemgetter
+
+from numpy import int32, float32
+from scipy.sparse import dok_matrix
+
+
+class SlopeOne(object):
+    def __init__(self, num_items):
+        self.diffs = {}
+        self.freqs = {}
+
+    def recomendations(self, useritems, top=None):
+        preds, freqs = {}, {}
+        useritems = dict(useritems)
+        for item, rating in useritems.iteritems():
+            for diffitem, diffratings in self.diffs.iteritems():
+                try:
+                    freq = self.freqs[diffitem][item]
+                except KeyError:
+                    continue
+                preds.setdefault(diffitem, 0.0)
+                freqs.setdefault(diffitem, 0)
+                preds[diffitem] += freq * (diffratings[item] + rating)
+                freqs[diffitem] += freq
+        predlist =  [(value / freqs[item], item)
+                     for item, value in preds.iteritems()
+                      if item not in useritems and freqs[item] > 0]
+        if top is not None:
+            predlist = nlargest(top, predlist)
+        else:
+            predlist.sort(key=lambda x: (-x[0], -x[1]))
+        return predlist
+
+    def update(self, userdata):
+        for userratings in userdata:
+            for item1, rating1 in userratings.iteritems():
+                self.freqs.setdefault(item1, {})
+                self.diffs.setdefault(item1, {})
+                for item2, rating2 in userratings.iteritems():
+                    if item2 == item1:
+                        continue
+                    self.freqs[item1].setdefault(item2, 0)
+                    self.diffs[item1].setdefault(item2, 0.0)
+                    self.freqs[item1][item2] += 1
+                    self.diffs[item1][item2] += rating1 - rating2
+        for item1, ratings in self.diffs.iteritems():
+            for item2 in ratings:
+                ratings[item2] /= self.freqs[item1][item2]
+
+    def dump(self, fileobj=sys.stdout):
+        diffs = self.diffs
+        key = itemgetter(0)
+        for i, rows in sorted(self.freqs.iteritems(), key=key):
+            diff_i = diffs[i]
+            for j, freq in sorted(rows.iteritems(), key=key):
+                print >> fileobj, i, j, freq, float(diff_i[j])
+
+
+class SlopeOneNumpy(object):
+    def __init__(self, num_items):
+        self.freqs = dok_matrix((num_items, num_items), dtype=int32)
+        self.diffs = dok_matrix((num_items, num_items), dtype=float32)
+
+    def recomendations(self, useritems, top=None):
+        resultpreds, resultfreqs = {}, {}
+        useritems = dict(useritems)
+        for id, rating in useritems.iteritems():
+            freqrow = self.freqs.getrow(id)
+            for freq, indice in zip(freqrow.data, freqrow.indices):
+                resultpreds.setdefault(indice, 0.0)
+                resultfreqs.setdefault(indice, 0)
+                resultpreds[indice] += freq * (self.diffs[indice, id] + rating)
+                resultfreqs[indice] += freq
+        predlist =  [(value / resultfreqs[item], item)
+                     for item, value in resultpreds.iteritems()
+                      if item not in useritems and resultfreqs[item] > 0]
+        if top is not None:
+            predlist = nlargest(top, predlist)
+        else:
+            predlist.sort(key=lambda x: (-x[0], -x[1]))
+        return predlist
+
+    def update(self, userdata):
+        for userratings in userdata:
+            for item1, rating1 in userratings.iteritems():
+                for item2, rating2 in userratings.iteritems():
+                    self.freqs[item1, item2] += 1
+                    delta = float32(rating1 - rating2)
+                    if delta != 0:
+                        # there is a bug in scipy when assign a 0 value for a already zero entry
+                        self.diffs[item1, item2] += delta
+        for key, rating in self.diffs.iteritems():
+            self.diffs[key] = rating/self.freqs[key]

File slopeone/slopeone.py

+from __future__ import division
+import sys
+from collections import defaultdict
+from heapq import nlargest
+from operator import itemgetter
+
+
+class SlopeOne(object):
+    def __init__(self, num_items):
+        self.freqs = defaultdict(lambda: defaultdict(int))
+        self.diffs = defaultdict(lambda: defaultdict(int))
+
+    def update(self, users):
+        freqs, diffs = self.freqs, self.diffs
+        for user in users:
+            item_ratings = sorted(user.iteritems(), key=itemgetter(0))
+            while item_ratings:
+                item1, rating1 = item_ratings.pop()
+                item1_freqs = freqs[item1]
+                item1_diffs = diffs[item1]
+                for item2, rating2 in item_ratings:
+                    item1_freqs[item2] += 1
+                    freqs[item2][item1] += 1
+                    diff = rating1 - rating2
+                    if diff:
+                        item1_diffs[item2] += diff
+                        diffs[item2][item1] -= diff
+        for item1, ratings in diffs.iteritems():
+            item1_freqs = freqs[item1]
+            for item2 in ratings:
+                ratings[item2] /= item1_freqs[item2]
+
+    def recomendations(self, item2rating, top=None):
+        freqs, diffs = self.freqs, self.diffs
+        predfreqs = defaultdict(int)
+        preds = defaultdict(int)
+        for olditem in freqs:
+            if olditem in item2rating:
+                 continue
+            freqs_olditem = freqs[olditem]
+            for item in item2rating:
+                freq = freqs_olditem.get(item)
+                if freq is not None:
+                    predfreqs[olditem] += freq
+                    diffrating = diffs[olditem][item]
+                    preds[olditem] += freq * (item2rating[item] + diffrating)
+        results = ((preds[item]/predfreqs[item], item) for item in preds)
+        if top is not None:
+            results = nlargest(top, results)
+        else:
+            results = sorted(results, reverse=True)
+        return results
+
+    def dump(self, fileobj=sys.stdout):
+        diffs = self.diffs
+        key = itemgetter(0)
+        for i, rows in sorted(self.freqs.iteritems(), key=key):
+            diff_i = diffs[i]
+            for j, freq in sorted(rows.iteritems(), key=key):
+                print >> fileobj, i, j, freq, float(diff_i[j])
+
+
+class SlopeOne2(SlopeOne):
+    def update(self, users):
+        freqs, diffs = self.freqs, self.diffs
+        for user in users:
+            for item1, rating1 in user.iteritems():
+                item1_freqs = freqs[item1]
+                item1_diffs = diffs[item1]
+                for item2, rating2 in user.iteritems():
+                    item1_freqs[item2] += 1
+                    item1_diffs[item2] += rating1 - rating2
+        for item1, ratings in diffs.iteritems():
+            item1_freqs = freqs[item1]
+            for item2 in ratings:
+                ratings[item2] /= item1_freqs[item2]
+
+
+class SlopeOneSym(SlopeOne):
+    def update(self, users):
+        freqs, diffs = self.freqs, self.diffs
+        for user in users:
+            item_ratings = sorted(user.iteritems(), key=itemgetter(0))
+            while item_ratings:
+                item1, rating1 = item_ratings.pop()
+                item1_freqs = freqs[item1]
+                item1_diffs = diffs[item1]
+                for item2, rating2 in item_ratings:
+                    item1_freqs[item2] += 1
+                    item1_diffs[item2] += rating1 - rating2
+        for item1, ratings in diffs.iteritems():
+            item1_freqs = freqs[item1]
+            for item2 in ratings:
+                ratings[item2] /= item1_freqs[item2]
+
+    def recomendations(self, item2rating, top=None):
+        freqs, diffs = self.freqs, self.diffs
+        predfreqs = defaultdict(int)
+        preds = defaultdict(int)
+        for olditem in freqs:
+            if olditem in item2rating:
+                 continue
+            freqs_olditem = freqs[olditem]
+            for item in item2rating:
+                if olditem > item:
+                    freq = freqs_olditem.get(item)
+                else:
+                    freq = freqs[item].get(olditem)
+                if freq is not None:
+                    predfreqs[olditem] += freq
+                    if olditem > item:
+                        diffrating = diffs[olditem][item]
+                    else:
+                        diffrating = -diffs[item][olditem]
+                    preds[olditem] += freq * (item2rating[item] + diffrating)
+        results = ((preds[item]/predfreqs[item], item) for item in preds)
+        if top is not None:
+            results = nlargest(top, results)
+        else:
+            results = sorted(results, reverse=True)
+        return results
+
+
+class SlopeOneSym2(SlopeOneSym):
+    def update(self, users):
+        freqs, diffs = self.freqs, self.diffs
+        for user in users:
+            for item1, rating1 in user.iteritems():
+                item1_freqs = freqs[item1]
+                item1_diffs = diffs[item1]
+                for item2 in user:
+                    if item2 >= item1:
+                        continue
+                    item1_freqs[item2] += 1
+                    item1_diffs[item2] += rating1 - user[item2]
+        for item1, ratings in diffs.iteritems():
+            item1_freqs = freqs[item1]
+            for item2 in ratings:
+                ratings[item2] /= item1_freqs[item2]

File slopeone/slsparse.py

+from collections import defaultdict
+from heapq import nlargest
+from itertools import izip
+from operator import itemgetter
+
+import numpy as np
+
+
+class SlopeOneSparseSym(object):
+    def __init__(self, num_items):
+        self.num_items = num_items
+
+    def update(self, users):
+        # collect the freqs and dicts in dict-of-dicts
+        freqdict = defaultdict(lambda: defaultdict(int))
+        diffdict = defaultdict(lambda: defaultdict(int))
+        for user in users:
+            itemid_ratings = sorted(user.iteritems(), key=itemgetter(0))
+            while itemid_ratings:
+                i, ri = itemid_ratings.pop()
+                freqs_i = freqdict[i]
+                diffs_i = diffdict[i]
+                for j, rj in itemid_ratings:
+                    freqs_i[j] += 1
+                    diffs_i[j] += ri - rj
+        # transform into arrays for rows, cols, freqs, diffs
+        freq_data = []; extend_freq_data = freq_data.extend
+        diffs = []; extend_diffs = diffs.extend
+        while freqdict:
+            i, freqdict_i = freqdict.popitem()
+            extend_freq_data((i, j, fj) for j, fj in freqdict_i.iteritems())
+            diffs_i = diffdict[i]
+            extend_diffs(diffs_i[j] for j in freqdict_i)
+            del diffdict[i]
+        freq_data = np.array(freq_data)
+        self.rows = freq_data[:,0]
+        self.cols = freq_data[:,1]
+        assert np.alltrue(self.rows > self.cols)
+        self.freqs = freq_data[:,2]
+        assert np.alltrue(self.freqs > 0)
+        # scale the diffs by freq
+        self.diffs = 1.0 / self.freqs * diffs
+
+    def dump(self, fileobj):
+        for i, j, f, d in sorted(izip(self.rows, self.cols, self.freqs, self.diffs),
+                                 key=itemgetter(0,1)):
+            print >> fileobj, i, j, f, d
+
+    def recomendations(self, user, top=None):
+        rows, cols = self.rows, self.cols
+        freqs = np.zeros(self.num_items, dtype=int)
+        ratings = freqs.astype(float)
+
+        # find which item ids in rows are equal to any of user keys
+        in_rows = np.zeros(len(rows), dtype=bool)
+        for item_id in user:
+            in_rows |= rows == item_id
+        for i,j,f,d in izip(rows[in_rows], cols[in_rows],
+                            self.freqs[in_rows], self.diffs[in_rows]):
+            freqs[j] += f
+            ratings[j] += f * (user[i] - d)
+
+        # find which item ids in cols are equal to any of user keys
+        in_cols = np.zeros(len(cols), dtype=bool)
+        for item_id in user:
+            in_cols |= cols == item_id
+        for i,j,f,d in izip(rows[in_cols], cols[in_cols],
+                            self.freqs[in_cols], self.diffs[in_cols]):
+            freqs[i] += f
+            ratings[i] += f * (user[j] + d)
+
+        # normalize by freq; this will create NaNs for zero freqs
+        ratings /= freqs
+        # drop the NaNs and the known itemids
+        keep_ids = np.setdiff1d(np.isfinite(ratings).nonzero()[0], user.keys())
+        rating_itemids = izip(ratings[keep_ids], keep_ids)
+        if top is not None:
+            rating_itemids = nlargest(top, rating_itemids)
+        else:
+            rating_itemids = sorted(rating_itemids, reverse=True)
+        return rating_itemids

File slopeone/test_slopeone.py

+import unittest
+
+import orig_slopeone, slopeone, numpy_slopeone
+from benchmark import ItemsStorage
+
+
+class SlopeOneTest(unittest.TestCase):
+
+    users = [
+        dict(b1=10, b2=5, b3=7, b4=0, b5=8),
+        dict(b1=3, b3=6, b5=9, b7=1),
+        dict(b2=4, b4=6, b6=8),
+        dict(b5=2, b6=3, b7=4, b8=5, b9=6),
+        dict(b8=10, b9=2, b10=8),
+        dict(b4=3, b5=5, b6=7),
+    ]
+
+    recommendations = [
+        [('b9', 12), ('b8', 11), ('b6', 6.8), ('b7', 5)],
+        [('b9', 8), ('b8', 7), ('b6', 7), ('b2', 8/3.), ('b4', 0)],
+        [('b1', 12.5), ('b9', 11), ('b8', 10), ('b3', 9.5), ('b7', 9.0), ('b5', 8.4)],
+        [('b10', 7.5), ('b3', 3), ('b1', 2), ('b2', -1), ('b4', -1.5)],
+        [('b7', 4.5), ('b6', 3.5), ('b5', 2.5)],
+        [('b9', 9.5), ('b8', 8.5), ('b1', 19/3.), ('b3', 16/3.), ('b7', 4), ('b2', 3.5)],
+    ]
+
+    def setUp(self):
+        self.items_storage = items_storage = ItemsStorage()
+        for user in self.users:
+            for book_id  in user.iterkeys():
+                items_storage.add_item(book_id)
+        self.ratings = map(self._itemid2rating, self.users)
+
+    def test_slope_one(self):
+        self._test(slopeone.SlopeOne)
+
+    def test_slope_one2(self):
+        self._test(slopeone.SlopeOne2)
+
+    def test_slope_one_sym(self):
+        self._test(slopeone.SlopeOneSym)
+
+    def test_slope_one_sym2(self):
+        self._test(slopeone.SlopeOneSym2)
+
+    def test_slope_one_dense(self):
+        self._test(numpy_slopeone.SlopeOneDense)
+
+    def test_slope_one_sparse(self):
+        self._test(numpy_slopeone.SlopeOneSparse)
+
+    def test_orig_slope_one(self):
+        self._test(orig_slopeone.SlopeOne)
+
+    def test_orig_slope_one_numpy(self):
+        self._test(orig_slopeone.SlopeOneNumpy)
+
+    def _itemid2rating(self, user):
+        return dict((self.items_storage.id(book_id), rating)
+                    for book_id, rating in user.iteritems())
+
+    def _test(self, cls):
+        slopeone = cls(len(self.items_storage))
+        slopeone.update(self.ratings)
+        for user, recommendations in zip(self.users, self.recommendations):
+            for top in None, 1, 2, 3:
+                predictions = [
+                    (self.items_storage.item(item_id), rating)
+                    for rating, item_id in
+                    slopeone.recomendations(self._itemid2rating(user), top=top)
+                ]
+                self.assertEqual(predictions, recommendations[:top])
+
+
+class SmallSlopeOneTest(SlopeOneTest):
+
+    users = [
+        dict(b1=5, b2=3, b3=2),
+        dict(b1=3, b2=4),
+        dict(b2=2, b3=5),
+    ]
+
+    recommendations = [
+        [],
+        [('b3', 10/3.)],
+        [('b1', 13/3.)],
+    ]