Olemis Lang avatar Olemis Lang committed bb56cc3

GViz QL : Group values according to group by clause. Aggregation functions avg, count, sum ... (failures=5)

Comments (0)

Files changed (4)

trac-dev/gviz/TODO

 
 - GViz API QL: Implement `group by` clause.
 
+- GViz API QL: Match types for column supplied in to aggregate functions.
+
 - Add `limit` parameter for history methods in VersionControlRPC.
 
 - Implement data source provider for Ticket->Query

trac-dev/gviz/tracgviz/aggregate.py

 Licensed under the Apache License, Version 2.0 
 """
 __author__ = 'Olemis Lang'
-__all__ = 'avg count max min sum'.split()
+__all__ = ['agg_' + nm for nm in 'avg count max min sum'.split()]
+
+from itertools import count, ifilter, izip
+from math import fsum
 
 def notimpl(*args, **kwds):
   raise NotImplementedError('Aggregation not supported ... yet')
 
-avg = lambda *args, **kwds: notimpl(*args, **kwds)
-avg.return_type = 'number'
+def _iter_col(rows, getvalue, colnm):
+  # FIXME: Always wrap using ifilter ?
+  return ifilter(lambda x: x is not None, (getvalue(r, colnm) for r in rows))
 
-count = lambda *args, **kwds: notimpl(*args, **kwds)
-count.return_type = 'number'
+def agg_avg(rows, getvalue, colnm):
+  # Ignore null values
+  dataset = _iter_col(rows, getvalue, colnm)
+  c = [0]
+  s = fsum(c.__setitem__(0, idx) or value \
+      for idx, value in izip(count(1), dataset))
+  c = c[0]
+  return None if c == 0 else s/c
 
-max = lambda *args, **kwds: notimpl(*args, **kwds)
+agg_avg.return_type = 'number'
 
-min = lambda *args, **kwds: notimpl(*args, **kwds)
+def agg_count(rows, getvalue, colnm):
+  idx = 0
+  dataset = _iter_col(rows, getvalue, colnm)
+  for idx, _ in izip(count(1), dataset):
+    pass
+  return idx
 
-sum = lambda *args, **kwds: notimpl(*args, **kwds)
-sum.return_type = 'number'
+agg_count.return_type = 'number'
 
+def agg_max(rows, getvalue, colnm):
+  dataset = _iter_col(rows, getvalue, colnm)
+  try:
+    return max(dataset)
+  except ValueError:
+    # max() arg is an empty sequence
+    return None
+
+def agg_min(rows, getvalue, colnm):
+  dataset = _iter_col(rows, getvalue, colnm)
+  try:
+    return min(dataset)
+  except ValueError:
+    # max() arg is an empty sequence
+    return None
+
+def agg_sum(rows, getvalue, colnm):
+  dataset = _iter_col(rows, getvalue, colnm)
+  return fsum(dataset)
+
+agg_sum.return_type = 'number'
+

trac-dev/gviz/tracgviz/gvizql.py

   # Special markers 
 
   Sequence = Sequence
+  Tuple = Tuple
 
   @property
   def SyntaxError(self):
     colnm = colnm[1]
     col_schema = colnm['schema'](lambda colnm: (str(colnm), None) )
     try:
-      f = self.agg_funcs[funcnm]
+      f = self.agg_funcs['agg_' + funcnm]
     except KeyError:
       raise LookupError('Unknown aggregation function %s' % (funcnm,))
     return_type = getattr(f, 'return_type', None)
     func_label = '%s(%s)' % (funcnm, col_schema[0])
+
+    def eval_aggregate(row, getvalue):
+      if not isinstance(row, self.Tuple):
+        raise GVizRuntimeError("%s invoked without GROUP BY, PIVOT" % (funcnm,))
+      else:
+        getvalue = GVizQLClauseHandler._column_accessor(row.__schema__)
+        try:
+          return f(row.__group__, getvalue, col_schema[0])
+        except GVizRuntimeError:
+          # Target column is not available
+          raise GVizRuntimeError('%s(%s) : No such column in GROUP BY, PIVOT'% \
+              (funcnm, col_schema[0]) )
+
+    if not return_type:
+      def agg_schema(get_col_schema):
+        try:
+          return (func_label, get_col_schema(col_schema[0])[1])
+        except GVizRuntimeError:
+          # Target column is not available
+          raise GVizRuntimeError('%s(%s) : No such column in GROUP BY, PIVOT'% \
+              (funcnm, col_schema[0]) )
+
     return dict(
-        eval=self._unsupported, 
-        schema=(func_label, return_type) if return_type else 
-            lambda get_col_schema: (func_label, get_col_schema(col_schema[0])[1]),
+        eval=eval_aggregate,
+        schema=(func_label, return_type) if return_type else agg_schema,
         is_eval_callable=True,
         is_schema_callable=not bool(return_type)
       )

trac-dev/gviz/tracgviz/testing/test_gvizql.py

       = Columns =
       max(salary) number
       <BLANKLINE>
-      NotImplementedError  :  Feature not supported ... yet
+      GVizRuntimeError  :  max invoked without GROUP BY, PIVOT
 
       """,
   'Parsing FROM' : r"""
       = Row =
          dept = Marketing
 
-
       >>> parse("select dept, max(salary) group by   dept   ", \
       ...           'aggregate', 'cols')
       ... 
       lunchTime timeofday
       avg(salary) number
       count(age) number
-      <BLANKLINE>
-      NotImplementedError  :  Feature not supported ... yet
+      = Row =
+         lunchTime = 12:00:00
+         avg(salary) = 700.0
+         count(age) = 2
+      = Row =
+         lunchTime = 13:00:00
+         avg(salary) = 600.0
+         count(age) = 1
+      = Row =
+         lunchTime = 12:00:00
+         avg(salary) = 425.0
+         count(age) = 2
+      = Row =
+         lunchTime = 13:00:00
+         avg(salary) = 800.0
+         count(age) = 1
 
       """,
   'Parsing GROUP BY (complex)' : r"""
+      >>> parse("select isSenior, max(salary) group by isSenior", 
+      ...       'aggregate', 'cols')
+      ...
+      *****
+      * Tokens
+      *****
+      Token.Keyword.Reserved select
+      Token.Name.Variable isSenior
+      Token.Punctuation ,
+      Token.Name.Builtin max
+      Token.Punctuation (
+      Token.Name.Variable salary
+      Token.Punctuation )
+      Token.Keyword.Reserved group by
+      Token.Name.Variable isSenior
+      *****
+      * Parsing
+      *****
+      ['isSenior']
+      ['isSenior', 'max(salary)']
+      *****
+      * Result
+      *****
+
+      >>> parse("select dept, avg(salary) group by dept",
+      ...       'aggregate', 'cols')
+      ...
+      *****
+      * Tokens
+      *****
+      Token.Keyword.Reserved select
+      Token.Name.Variable dept
+      Token.Punctuation ,
+      Token.Name.Builtin avg
+      Token.Punctuation (
+      Token.Name.Variable salary
+      Token.Punctuation )
+      Token.Keyword.Reserved group by
+      Token.Name.Variable dept
+      *****
+      * Parsing
+      *****
+      ['dept']
+      ['dept', 'avg(salary)']
+      *****
+      * Result
+      *****
+      = Columns =
+      dept string
+      avg(salary) number
+      = Row =
+         dept = Eng
+         avg(salary) = 700.0
+      = Row =
+         dept = Sales
+         avg(salary) = 375.0
+      = Row =
+         dept = Marketing
+         avg(salary) = 800.0
+
+      >>> parse("select dept, sum(salary) group by dept",
+      ...       'aggregate', 'cols')
+      ...
+      *****
+      * Tokens
+      *****
+      Token.Keyword.Reserved select
+      Token.Name.Variable dept
+      Token.Punctuation ,
+      Token.Name.Builtin sum
+      Token.Punctuation (
+      Token.Name.Variable salary
+      Token.Punctuation )
+      Token.Keyword.Reserved group by
+      Token.Name.Variable dept
+      *****
+      * Parsing
+      *****
+      ['dept']
+      ['dept', 'sum(salary)']
+      *****
+      * Result
+      *****
+      = Columns =
+      dept string
+      sum(salary) number
+      = Row =
+         dept = Eng
+         sum(salary) = 2100.0
+      = Row =
+         dept = Sales
+         sum(salary) = 750.0
+      = Row =
+         dept = Marketing
+         sum(salary) = 800.0
+
+      >>> parse(r'''select dept, count(isSenior) group by dept '''
+      ...       '''where isSenior = true''',
+      ...       'aggregate', 'filter', 'cols')
+      ...
+      *****
+      * Tokens
+      *****
+      Token.Keyword.Reserved select
+      Token.Name.Variable dept
+      Token.Punctuation ,
+      Token.Name.Builtin count
+      Token.Punctuation (
+      Token.Name.Variable isSenior
+      Token.Punctuation )
+      Token.Keyword.Reserved group by
+      Token.Name.Variable dept
+      Token.Keyword.Reserved where
+      Token.Name.Variable isSenior
+      Token.Operator.Comparison =
+      Token.Keyword.Constant true
+      *****
+      * Parsing
+      *****
+      ['dept']
+      isSenior = true
+      ['dept', 'count(isSenior)']
+      *****
+      * Result
+      *****
+      = Columns =
+      dept string
+      count(isSenior) number
+      = Row =
+         dept = Eng
+         count(isSenior) = 1
+      = Row =
+         dept = Sales
+         count(isSenior) = 1
+      = Row =
+         dept = Marketing
+         count(isSenior) = 1
+
+      >>> parse("select isSenior, min(salary) group by isSenior",
+      ...       'aggregate', 'cols')
+      ...
+      *****
+      * Tokens
+      *****
+      Token.Keyword.Reserved select
+      Token.Name.Variable isSenior
+      Token.Punctuation ,
+      Token.Name.Builtin min
+      Token.Punctuation (
+      Token.Name.Variable salary
+      Token.Punctuation )
+      Token.Keyword.Reserved group by
+      Token.Name.Variable isSenior
+      *****
+      * Parsing
+      *****
+      ['isSenior']
+      ['isSenior', 'min(salary)']
+      *****
+      * Result
+      *****
+
       """,
   'Parsing GROUP BY (failures)' : r"""
       >>> parse("  select dept , lunchTime   group by dept "
       *****
       GVizRuntimeError  :  Column [lunchTime] should be added to GROUP BY, removed from SELECT, or aggregated in SELECT.
 
+      >>> parse("  select max(salary)", 'cols')
+      ... 
+      *****
+      * Tokens
+      *****
+      Token.Keyword.Reserved select
+      Token.Name.Builtin max
+      Token.Punctuation (
+      Token.Name.Variable salary
+      Token.Punctuation )
+      *****
+      * Parsing
+      *****
+      ['max(salary)']
+      *****
+      * Result
+      *****
+      = Columns =
+      max(salary) number
+      <BLANKLINE>
+      GVizRuntimeError  :  max invoked without GROUP BY, PIVOT
+
       """,
   'Parsing PIVOT (simple)' : r"""
       >>> parse("select dept  pivot    dept, salary , lunchTime", 
       = Columns =
       max(salary) number Better paid
       <BLANKLINE>
-      NotImplementedError  :  Feature not supported ... yet
+      GVizRuntimeError  :  max invoked without GROUP BY, PIVOT
 
       >>> parse(r'''select lower(name) + '@example.com', upper(dept) 
       ...       label `lower(name) + '@example.com'` 'E-mail', 
       = Columns =
       max(salary) number Better paid
       <BLANKLINE>
-      NotImplementedError  :  Feature not supported ... yet
+      GVizRuntimeError  :  max invoked without GROUP BY, PIVOT
 
       >>> parse(r'''select lower(name) + '@example.com', upper(dept), 
       ...       isSenior, seniorityStartTime, salary * 0.05
       = Columns =
       2 * (max(empSalary) / max(empTax)) number
       <BLANKLINE>
-      NotImplementedError  :  Feature not supported ... yet
+      GVizRuntimeError  :  max invoked without GROUP BY, PIVOT
 
       >>> parse("select 12 * (year(seniorityStartTime) - year(hireDate)) + "
       ...       "month(seniorityStartTime) - month(hireDate)", 'cols')
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.