Commits

Olemis Lang committed 2720de9

GViz QL : Initial implementation of group by clause. Tests rewritten accordingly ... (failures=2)

  • Participants
  • Parent commits 7de29ed
  • Branches gviz_ql

Comments (0)

Files changed (3)

trac-dev/gviz/tracgviz/gvizql.py

 from weakref import proxy
 
 from tracgviz.api import DataTable
+from tracgviz.util import groupby
 from tracgviz.util.parsing import OperatorPrecedenceParser,  \
     InvalidParserConfiguration
 
     elif not isinstance(seq, Sequence):
       seq = Sequence([seq])
     self.cols_mapping = seq
-    logging.debug("Select list %s", seq)
     self.cols = [self._resolve_schema(c, lambda colnm: (str(colnm), None))[0] 
         for c in self.cols_mapping ] if seq is not None else None
 
   def transform(self, schema, data):
     r"""Create groups.
     """
-    self.unsupported()
+    get_col_value = self._column_accessor(schema)
+    get_col_schema = self._schema_accessor(schema)
+    new_schema = [self._resolve_schema(c, get_col_schema) \
+        for c in self.groups_def]
+    groups = groupby(data, 
+        key=lambda row: Tuple(self._eval_expr(row, c, get_col_value) \
+            for c in self.groups_def)
+      )
+    new_data = (setattr(newrow, '__schema__', schema) or \
+        setattr(newrow, '__group__', group) or newrow \
+            for newrow, group in groups)
+    return new_schema, new_data
 
 class GVizPivotClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 4, 'idx_eval': 3, 'keyw' : 'pivot'}
 class Sequence(list):
   pass
 
+class Tuple(tuple):
+  pass
+
 class GVizQLExpressionBuilder:
   r"""Stub class responsible for handling parser callback invocations 
   in order to build instances of `GVizQLExpression`.

trac-dev/gviz/tracgviz/testing/test_gvizql.py

       *****
       * Result
       *****
-      GVizUnsupportedQueryOp  :  Unable to evaluate GROUP BY clause. Either the whole clause or an specific feature is not supported yet.
+      = Columns =
+      dept string
+      = Row =
+         dept = Eng
+      = Row =
+         dept = Sales
+      = Row =
+         dept = Marketing
 
 
       >>> parse("select dept, max(salary) group by   dept   ", \
       *****
       * Result
       *****
-      GVizUnsupportedQueryOp  :  Unable to evaluate GROUP BY clause. Either the whole clause or an specific feature is not supported yet.
-
+      NotImplementedError  :  Feature not supported ... yet
 
       >>> parse("select lunchTime, avg(salary), count(age) " \
       ...       "group by    isSenior,lunchTime", \
       *****
       * Result
       *****
-      GVizUnsupportedQueryOp  :  Unable to evaluate GROUP BY clause. Either the whole clause or an specific feature is not supported yet.
+      = Columns =
+      lunchTime timeofday
+      avg(salary) number
+      count(age) number
+      <BLANKLINE>
+      NotImplementedError  :  Feature not supported ... yet
 
       """,
   'Parsing GROUP BY (complex)' : r"""
       """,
+  'Parsing GROUP BY (failures)' : r"""
+      >>> parse("  select dept , lunchTime   group by dept "
+      ...       "   order by dept", 'sort', 'aggregate', 'cols')
+      ... 
+      *****
+      * Tokens
+      *****
+      Token.Keyword.Reserved select
+      Token.Name.Variable dept
+      Token.Punctuation ,
+      Token.Name.Variable lunchTime
+      Token.Keyword.Reserved group by
+      Token.Name.Variable dept
+      Token.Keyword.Reserved order by
+      Token.Name.Variable dept
+      *****
+      * Parsing
+      *****
+      [('dept', True)]
+      ['dept']
+      ['dept', 'lunchTime']
+      *****
+      * Result
+      *****
+      GVizRuntimeError  :  Column [lunchTime] should be added to GROUP BY, removed from SELECT, or aggregated in SELECT.
+
+      """,
   'Parsing PIVOT (simple)' : r"""
       >>> parse("select dept  pivot    dept, salary , lunchTime", 
       ...           'pivot', 'cols')
       *****
       * Result
       *****
-      GVizUnsupportedQueryOp  :  Unable to evaluate GROUP BY clause. Either the whole clause or an specific feature is not supported yet.
+      GVizUnsupportedQueryOp  :  Unable to evaluate PIVOT clause. Either the whole clause or an specific feature is not supported yet.
 
 
       >>> parse("select lunchTime, sum(salary) group by lunchTime " \
       *****
       * Result
       *****
-      GVizUnsupportedQueryOp  :  Unable to evaluate GROUP BY clause. Either the whole clause or an specific feature is not supported yet.
+      GVizUnsupportedQueryOp  :  Unable to evaluate PIVOT clause. Either the whole clause or an specific feature is not supported yet.
 
       """,
   'Parsing ORDER BY (simple)' : r"""
          isSenior = False
          seniorityStartTime = None
 
-      >>> parse("  select dept , lunchTime   group by dept "
-      ...       "   order by dept", 'sort', 'aggregate', 'cols')
+      >>> parse("  select dept , lunchTime   group by dept, lunchTime "
+      ...       "   order by dept desc", 'sort', 'aggregate', 'cols')
       ... 
       *****
       * Tokens
       Token.Name.Variable lunchTime
       Token.Keyword.Reserved group by
       Token.Name.Variable dept
+      Token.Punctuation ,
+      Token.Name.Variable lunchTime
       Token.Keyword.Reserved order by
       Token.Name.Variable dept
+      Token.Keyword desc
       *****
       * Parsing
       *****
-      [('dept', True)]
-      ['dept']
+      [('dept', False)]
       ['dept', 'lunchTime']
+      ['dept', 'lunchTime']
       *****
       * Result
       *****
-      GVizUnsupportedQueryOp  :  Unable to evaluate GROUP BY clause. Either the whole clause or an specific feature is not supported yet.
+      = Columns =
+      dept string
+      lunchTime timeofday
+      = Row =
+         dept = Sales
+         lunchTime = 12:00:00
+      = Row =
+         dept = Marketing
+         lunchTime = 13:00:00
+      = Row =
+         dept = Eng
+         lunchTime = 12:00:00
+      = Row =
+         dept = Eng
+         lunchTime = 13:00:00
 
       """,
   'Parsing ORDER BY (complex)' : r"""
       *****
       * Result
       *****
-      GVizUnsupportedQueryOp  :  Unable to evaluate GROUP BY clause. Either the whole clause or an specific feature is not supported yet.
+      NotImplementedError  :  Feature not supported ... yet
 
 
       >>> parse("  select dept , min(lunchTime)   group by dept "
       *****
       * Result
       *****
-      GVizUnsupportedQueryOp  :  Unable to evaluate GROUP BY clause. Either the whole clause or an specific feature is not supported yet.
+      NotImplementedError  :  Feature not supported ... yet
 
       """,
   'Parsing LIMIT' : r"""
       = Row =
          name = Dave
 
-      >>> parse("  select dept , lunchTime   group by dept "
-      ...       "   order by dept limit 100", 'cnt', 'cols', 'aggregate', 
-      ...                                     'sort')
+      >>> parse("  select dept , lunchTime   group by dept, lunchTime "
+      ...       "   order by dept , lunchTime desc limit 100", 
+      ...       'cnt', 'cols', 'aggregate', 'sort')
       ... 
       *****
       * Tokens
       Token.Name.Variable lunchTime
       Token.Keyword.Reserved group by
       Token.Name.Variable dept
+      Token.Punctuation ,
+      Token.Name.Variable lunchTime
       Token.Keyword.Reserved order by
       Token.Name.Variable dept
+      Token.Punctuation ,
+      Token.Name.Variable lunchTime
+      Token.Keyword desc
       Token.Keyword.Reserved limit
       Token.Literal.Number.Integer 100
       *****
       *****
       100
       ['dept', 'lunchTime']
-      ['dept']
-      [('dept', True)]
+      ['dept', 'lunchTime']
+      [('dept', True), ('lunchTime', False)]
       *****
       * Result
       *****
-      GVizUnsupportedQueryOp  :  Unable to evaluate GROUP BY clause. Either the whole clause or an specific feature is not supported yet.
+      = Columns =
+      dept string
+      lunchTime timeofday
+      = Row =
+         dept = Eng
+         lunchTime = 13:00:00
+      = Row =
+         dept = Eng
+         lunchTime = 12:00:00
+      = Row =
+         dept = Marketing
+         lunchTime = 13:00:00
+      = Row =
+         dept = Sales
+         lunchTime = 12:00:00
 
       """,
   'Parsing OFFSET' : r"""
          isSenior = False
          seniorityStartTime = None
 
-      >>> parse("  select dept , lunchTime   group by dept "
-      ...       " order by dept limit 25  offset 100 ", 'skip', 'cols', \
-      ...           'aggregate', 'cnt', 'sort')
+      >>> parse("  select dept , lunchTime   group by dept, lunchTime "
+      ...       " order by dept , lunchTime desc limit 2  offset 1 ", 
+      ...       'skip', 'cols', 'aggregate', 'cnt', 'sort')
       ... 
       *****
       * Tokens
       Token.Name.Variable lunchTime
       Token.Keyword.Reserved group by
       Token.Name.Variable dept
+      Token.Punctuation ,
+      Token.Name.Variable lunchTime
       Token.Keyword.Reserved order by
       Token.Name.Variable dept
+      Token.Punctuation ,
+      Token.Name.Variable lunchTime
+      Token.Keyword desc
       Token.Keyword.Reserved limit
-      Token.Literal.Number.Integer 25
+      Token.Literal.Number.Integer 2
       Token.Keyword.Reserved offset
-      Token.Literal.Number.Integer 100
+      Token.Literal.Number.Integer 1
       *****
       * Parsing
       *****
-      100
+      1
       ['dept', 'lunchTime']
-      ['dept']
-      25
-      [('dept', True)]
+      ['dept', 'lunchTime']
+      2
+      [('dept', True), ('lunchTime', False)]
       *****
       * Result
       *****
-      GVizUnsupportedQueryOp  :  Unable to evaluate GROUP BY clause. Either the whole clause or an specific feature is not supported yet.
+      = Columns =
+      dept string
+      lunchTime timeofday
+      = Row =
+         dept = Eng
+         lunchTime = 12:00:00
+      = Row =
+         dept = Marketing
+         lunchTime = 13:00:00
 
       """,
   'Parsing LABEL (simple)' : r"""

trac-dev/gviz/tracgviz/util/__init__.py

 __all__ = 'BaseGVizHandler', 'GVizXMLRPCAdapter', 'dummy_request', \
           'convert_req_date', 'rpc_to_datetime', 'render_gviz_value', \
           'get_column_desc', 'TYPES_2_GVIZ', 'get_column_desc', \
-          'rpc_opt_sigs', 'REQFIELDS_DESC', 'REQFIELDS_DEFAULTS'
+          'rpc_opt_sigs', 'REQFIELDS_DESC', 'REQFIELDS_DEFAULTS', 'groupby'
 
 from trac.core import Component, ExtensionPoint, implements
 from trac.config import Option
       'datefmt'  : "%Y-%m-%d %H:%M:%S"
     }
 
+class groupby(dict):
+    r"""Guido inspired SQL-like GROUPBY class that also encapsulates the logic 
+    in a Unix-like "sort | uniq".
+
+    http://code.activestate.com/recipes/259173-groupby/
+
+    PS: This version is slightly modified.
+    """
+    def __init__(self, seq, key=None, value=None):
+        samekey = key is None
+        samevalue = value is None
+        for item in seq:
+            k = val if samekey else key(item)
+            self.setdefault(k, []).append(item if samevalue else value(item))
+
+    __iter__ = dict.iteritems
+