Commits

Pierre Carbonnelle committed d83a357

use group_by instead of for_each for rank, running_sum

Comments (0)

Files changed (3)

     (f[Key]==aggregate(Value, for_each=For_each, order_by=Order_by)) <= q(Key, Value, For_each, Order_by)
 
 in knowledge base :
-    (f[Key]==_pyD_X1) <= f!°(Key, Value, Group_by, For_each, Order_by, _pyD_X1)
-        where f!° is a literal with aggregate attribute
+    (f[Key]==_pyD_X1) <= f!1°(Key, Value, Group_by, For_each, Order_by, _pyD_X1)
+        where f!1° is a literal with aggregate attribute
               whose argument list has no repetition of variables
-    f!(Key, Value, Group_by, For_each, Order_by) <= q(Key, Value, Group_by, For_each, Order_by)
-        where the argument list of f! has no repetition of variables
+    f!1(Key, Value, Group_by, For_each, Order_by) <= q(Key, Value, Group_by, For_each, Order_by)
+        where the argument list of f!1 has no repetition of variables
 
-resolution algorithm for f!°:
+resolution algorithm for f!1°:
     drop the last term
     variabilize Key that are not in Group_by (for rank, running_sum aggregation)
     find all f!(Key, Value, Group_by, For_each, Order_by)

pyDatalog/examples/test.py

         #print ask(place['Jerry']==Rank) # TODO
         assert ask(place[Person]==1) == set([('Jerry',)])
 
-        (a_rank1[Z] == rank(for_each=Z, order_by=Z)) <= q(X, Y, Z)
+        (a_rank1[Z] == rank(group_by=Z, order_by=Z)) <= q(X, Y, Z)
         assert ask(a_rank1[X]==Y) == set([(1, 0), (2, 0), (4, 0)])
         assert ask(a_rank1[X]==0) == set([(1,), (2,), (4,)])
         assert ask(a_rank1[1]==X) == set([(0,)])
         assert ask(a_rank2[a,Y]==0) == set([('c',)])
 
         # rank
-        (a_rank[X,Y] == rank(for_each=(X,Y), order_by=Z)) <= q(X, Y, Z) & q(X,Y2,Z2)
+        (a_rank[X,Y] == rank(group_by=(X,Y), order_by=Z)) <= q(X, Y, Z) & q(X,Y2,Z2)
         assert ask(a_rank[X,Y]==Z) == set([('a', 'b', 0), ('a', 'c', 0), ('b', 'b', 0)])
         assert ask(a_rank[a,b]==1) == None
         assert ask(a_rank[a,b]==Y) == set([(0,)])
         assert ask(a_rank[X,Y]==1) == None
         """
         # reversed
-        (b_rank[X,Y] == rank(for_each=(X,Y2), order_by=-Z2)) <= q(X, Y, Z) & q(X,Y2,Z2)
+        (b_rank[X,Y] == rank(group_by=(X,Y2), order_by=-Z2)) <= q(X, Y, Z) & q(X,Y2,Z2)
         assert ask(b_rank[X,Y]==Z) == set([('a', 'b', 0), ('a', 'c', 1), ('b', 'b', 0)])
         assert ask(b_rank[a,b]==0) == set([()])
         assert ask(b_rank[a,b]==Y) == set([(0,)])
         +movement('Account2', 'date1', 10)
         +movement('Account2', 'date2', -5)
         
-        (balance[Account, Date] == running_sum(Amount, for_each=Account, order_by=Date)) <= movement(Account, Date, Amount)
+        (balance[Account, Date] == running_sum(Amount, group_by=Account, order_by=Date)) <= movement(Account, Date, Amount)
         
         assert ask(balance[Account, Date]==Amount) == set([('Account1', 'date1', 10),('Account1', 'date2', 2),('Account1', 'date3', 0),('Account2', 'date1', 10),('Account2', 'date2', 5)])
         assert ask(balance['Account1', Date]==Amount) == set([('date1', 10), ('date2', 2), ('date3', 0)])
         #TODO assert ask(balance[Account, 'date2']==Amount) #TODO !
         assert ask(balance[Account, Date]==0) == set([('Account1', 'date3')])
 
-        (a_run_sum1[Z] == running_sum(Z, for_each=Z, order_by=Z)) <= q(X, Y, Z)
+        (a_run_sum1[Z] == running_sum(Z, group_by=Z, order_by=Z)) <= q(X, Y, Z)
         assert ask(a_run_sum1[X]==Y) == set([(1, 1), (2, 2), (4, 4)])
         assert ask(a_run_sum1[X]==1) == set([(1,)])
         assert ask(a_run_sum1[1]==X) == set([(1,)])
         assert ask(a_run_sum1[1]==0) == None
 
         # running_sum
-        (a_run_sum[X,Y] == running_sum(Z, for_each=(Y), order_by=Z2)) <= q(X, Y, Z) & q(X,Y,Z2)
+        (a_run_sum[X,Y] == running_sum(Z, group_by=(Y), order_by=Z2)) <= q(X, Y, Z) & q(X,Y,Z2)
         assert ask(a_run_sum[X,Y]==Z) == set([('a', 'b', 2), ('a', 'c', 1), ('b', 'b', 6)])
         #assert ask(a_run_sum[b,b]==6) == set([()])
         #assert ask(a_run_sum[b,b]==Y) == set([(6,)])
         assert ask(a_run_sum[a,y]==Y) == None
 
         """
-        (b_run_sum[X,Y] == running_sum(Z, for_each=(X,Y2), order_by=-Z)) <= q(X, Y, Z) & q(X,Y2,Z)
+        (b_run_sum[X,Y] == running_sum(Z, group_by=(X,Y2), order_by=-Z)) <= q(X, Y, Z) & q(X,Y2,Z)
         assert ask(b_run_sum[X,Y]==Z) == set([('a', 'b', 2), ('a', 'c', 1), ('b', 'b', 4)])
         assert ask(b_run_sum[a,b]==2) == set([()])
         assert ask(b_run_sum[a,b]==Y) == set([(2,)])

pyDatalog/pyParser.py

             else:
                 return max(args)
         elif self._pyD_name in ('rank', 'rank_'):
-            return Rank_aggregate(None, for_each=kwargs.get('for_each', []), order_by=kwargs.get('order_by', []))
+            return Rank_aggregate(None, group_by=kwargs.get('group_by', []), order_by=kwargs.get('order_by', []))
         elif self._pyD_name in ('running_sum', 'running_sum_'):
-            return Running_sum(args[0], for_each=kwargs.get('for_each', []), order_by=kwargs.get('order_by', []))
+            return Running_sum(args[0], group_by=kwargs.get('group_by', []), order_by=kwargs.get('order_by', []))
         elif self._pyD_name == 'tuple_':
             return Tuple(args[0], order_by=kwargs.get('order_by', []))
         elif self._pyD_name in ('_len', 'len_'):
     """
     counter = util.Counter()
     
-    def __init__(self, Y=None, for_each=tuple(), order_by=tuple(), sep=None):
+    def __init__(self, Y=None, group_by=tuple(), for_each=tuple(), order_by=tuple(), sep=None):
         # convert for_each=Z to for_each=(Z,)
         self.Y = Y
+        self.group_by = (group_by,) if isinstance(group_by, Expression) else tuple(group_by)
         self.for_each = (for_each,) if isinstance(for_each, Expression) else tuple(for_each)
         self.order_by = (order_by,) if isinstance(order_by, Expression) else tuple(order_by)
         
         # try to recast expressions to variables
+        self.group_by = tuple([e.__dict__.get('variable', e) for e in self.group_by]) 
         self.for_each = tuple([e.__dict__.get('variable', e) for e in self.for_each]) 
         self.order_by = tuple([e.__dict__.get('variable', e) for e in self.order_by])
         
                 raise util.DatalogError("Error: argument missing in aggregate", None, None)
         
         # used to create literal.
-        self.args = ((Y,) if Y is not None else tuple()) + self.for_each + self.order_by + ((sep,) if sep is not None else tuple())
+        self.args = ((Y,) if Y is not None else tuple()) + self.group_by + self.for_each + self.order_by + ((sep,) if sep is not None else tuple())
         self.Y_arity = 1 if Y is not None else 0
         self.sep_arity = 1 if sep is not None else 0
         
         self.slice_for_each = [variables[variable._pyD_name] for variable in self.for_each]
         self.reversed_order_by = [variables[variable._pyD_name] for variable in self.order_by][::-1]
         self.reverse_order = [variable._pyD_negated for variable in new_terms[:-1]]
-        self.slice_group_by = [variables[variable._pyD_name] for variable in function._pyD_keys]
+        if isinstance(self, Rank_aggregate): # can't use required_kw because rank does not require group_by
+            self.slice_group_by = [variables[variable._pyD_name] for variable in self.group_by]
+        else:
+            self.slice_group_by = [variables[variable._pyD_name] for variable in function._pyD_keys]
         
         # return a literal without the result
         new_literal = Literal.make(new_name, new_terms[:-1], {})
             result.sort(key=lambda literal, i=i, self=self: literal[i].id,
                 reverse = self.reverse_order[i])
         # then sort per group_by
-        result.sort(key=lambda literal, self=self: [literal[i].id for i in self.slice_group_by])
+        result.sort(key=lambda literal, self=self: [id(literal[i]) for i in self.slice_group_by]) # faster than .id; ok for group_by
         pass
     
     def key(self, result):
             a._pyD_negated = not(a._pyD_negated)
 
 class Rank_aggregate(Aggregate):
-    """ represents rank_(for_each=Z, order_by=T)"""
+    """ represents rank_(group_by=Z, order_by=T)"""
     required_kw = ('order_by',)
     
-    def key(self, result):
-        """ return the grouping key of a result """
-        return list(result[i] for i in self.slice_for_each)
-    
-    def sort_result(self, result):
-        """ sort result according to the aggregate argument """
-        # first sort per order_by, allowing for _pyD_negated
-        for i in self.reversed_order_by:
-            result.sort(key=lambda literal, i=i, self=self: literal[i].id,
-                reverse = self.reverse_order[i])
-        # then sort per for_each
-        result.sort(key=lambda literal, self=self: [literal[i].id for i in self.slice_for_each])
-
     def add(self, row):
         self._value += 1
         return list(row) + [self._value-1]
         return None
 
 class Running_sum(Rank_aggregate):
-    """ represents running_sum(Y, for_each=Z, order_by=T"""
-    required_kw = ('Y', 'for_each', 'order_by')
+    """ represents running_sum(Y, group_by=Z, order_by=T"""
+    required_kw = ('Y', 'group_by', 'order_by')
     
     def add(self, row):
         self._value += row[self.index_value].id
-        return list(row) + [self._value] #TODO
+        return list(row) + [self._value]
         
         
 """                             Parser methods                                                   """