Commits

Olemis Lang committed f4e31f7

GViz QL: Data-driven parser for operator precedence grammars ... with tests

Comments (0)

Files changed (7)

trac-dev/gviz/setup.py

 	package_dir = {
 	        'tracgviz' : 'tracgviz',
 	        'tracgviz.testing' : 'tracgviz/testing',
+	        'tracgviz.util' : 'tracgviz/util',
 	        },
 	packages= PKG,
 	package_data={
 		'tracgviz': ['templates/*', 'htdocs/*', 
 		    'messages/es/LC_MESSAGES/*', '../CHANGES', '../COPYRIGHT', 
                     '../NOTICE', '../README', '../TODO'],
-		'tracgviz.testing': ['data/*',]
+		'tracgviz.testing': ['data/*',],
+		'tracgviz.util': []
 		},
 	include_package_data=True,
 	provides = ['tracgviz (%s)' % (latest,), 
-	            'tracgviz.testing (%s)' % (latest,), ],
+	            'tracgviz.testing (%s)' % (latest,),
+	            'tracgviz.util (%s)' % (latest,), ],
 	obsoletes = ['tracgviz (>=%s.0.0, <%s)' % \
 	                                    (versions[-1][0], latest), \
-	             'tracgviz.testing (>=%s.0.0, <%s)' % \
+	            'tracgviz.testing (>=%s.0.0, <%s)' % \
+	                                    (versions[-1][0], latest), \
+	            'tracgviz.util (>=%s.0.0, <%s)' % \
 	                                    (versions[-1][0], latest),],
 	entry_points = ENTRY_POINTS,
 	classifiers = cats,

trac-dev/gviz/tracgviz/gvizql.py

 class GVizQLClauseType(type):
   r"""Keep track of all GVizQL clause handlers installed in the 
   system, syntax order ...
-  
+
   >>> ','.join(ct.get_props('keyw') for ct in GVizQLClauseType.iterparse())
   'select,from,where,group by,pivot,order by,limit,offset,label,format,options'
-  
+
   ... as well as evaluation order.
-  
+
   >>> ','.join(ct.get_props('keyw') for ct in GVizQLClauseType.itereval())
   'from,group by,pivot,where,order by,offset,limit,select,format,options,label'
   """
   SYNTAX_ORDER = list()
   EVAL_ORDER = list()
   PROPS = ('idx_syntax', 'idx_eval', 'keyw')
-  
+
   def __new__(cls, name, bases, suite):
     r"""Keep track of all GVizQL clause handlers installed in the 
     system, as well as evaluation order.
       del suite['__abstract__']
     except KeyError:
       abstract = False
-    
+
     @classmethod
     def get_props(cls, propnm):
       try:
       except KeyError, exc:
         raise ValueError('Unsupported property %s', exc.message)
     suite['get_props'] = get_props
-    
+
     self = super(GVizQLClauseType, cls).__new__(cls, name, bases, suite)
     if not abstract:
       cnm = self.get_props('keyw')
       GVizQLClauseType.EVAL_ORDER.sort(None, \
                                     lambda x: x.get_props('idx_eval'))
     return self
-  
+
   @staticmethod
   def itereval():
     r"""Iterate over GVizQL clause handlers following evaluation order.
     """
     return iter(GVizQLClauseType.EVAL_ORDER)
-  
+
   @staticmethod
   def iterparse():
     r"""Iterate over GVizQL clause handlers following syntax order.
     """
     return iter(GVizQLClauseType.SYNTAX_ORDER)
-  
+
   @staticmethod
   def clauses():
     r"""Retrieved all the keywords identifying different clauses 
   r"""Objects used to define syntax and retrieve items inside a 
   clause. It's also responsible of performing the transformations 
   dictated by this clause on a data set.
-  
+
   Instances of this class have to override the following fields:
     - `_PROPS` :      a static dictionary containing the following keys
       * `idx_syntax`    a number indicating the relative syntantic 
   """
   __metaclass__ = GVizQLClauseType
   __abstract__ = True
-  
+
   ERR_MSGS = {
       'PARSING' : "%(clause)s clause is not supported yet.",
       'EVAL' :    "Unable to evaluate %(clause)s clause. " \
                   "Either the whole clause or an specific feature " \
                   "is not supported yet."
     }
-  
+
   def unsupported(self, at='PARSING', **vals):
     r"""Indicate that this clause is not supported yet.
     """
     vals['clause'] = self.get_props('keyw').upper()
     msg = self.ERR_MSGS.get(at, '') % vals
     raise GVizUnsupportedQueryOp(msg)
-  
+
   def transform(self, schema, data):
     r"""Transform the result set as dictated by the clause included 
     in the GViz QL expression provided by the client. This 
     transformation *SHOULD* be performed by using iterators.
-    
+
     Note: Default behavior is to indicate that the clause is not 
     supported.
     """
 class GVizSelectClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 0, 'idx_eval': 7, 'keyw' : 'select'}
   KEYWORDS = ('select',)
-  
+
   def __init__(self, ctx):
     r"""Initialize the expression using the enclosing parsing context
     (see docs for `GVizQLParsingContext`). Try to parse the items 
         val = '<EOL>'
       raise GVizInvalidQuery("Syntax error or unsupported " \
                               "feature. Unexpected token %s." % (val,))
-  
+
   def transform(self, schema, data):
     r"""Arrange values considering the given column order.
     """
 class GVizFromClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 1, 'idx_eval': 0, 'keyw' : 'from'}
   KEYWORDS = ('from',)
-  
+
   def __init__(self, ctx):
     r"""Initialize the expression using the enclosing parsing context
     (see docs for `GVizQLParsingContext`). Try to parse the items 
     r"""Notify that base table has not been processed by data source. 
     This is made this way since it may hide a bug in the underlying 
     data source implementation.
-    
+
     FROM clause is irrevelevant for transformations. This clause is 
     available so that the underlying data sources be able to handle 
     it. This feature will be supported in upcoming versions.
 class GVizWhereClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 2, 'idx_eval': 3, 'keyw' : 'where'}
   KEYWORDS = ('where',)
-  
+
   def __init__(self, ctx):
     r"""Initialize the expression using the enclosing parsing context
     (see docs for `GVizQLParsingContext`). Try to parse the items 
 class GVizGroupByClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 3, 'idx_eval': 1, 'keyw' : 'group by'}
   KEYWORDS = ('group', 'by')
-  
+
   def __init__(self, ctx):
     r"""Initialize the expression using the enclosing parsing context
     (see docs for `GVizQLParsingContext`). Try to parse the items 
 class GVizPivotClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 4, 'idx_eval': 2, 'keyw' : 'pivot'}
   KEYWORDS = ('pivot',)
-  
+
   def __init__(self, ctx):
     r"""Initialize the expression using the enclosing parsing context
     (see docs for `GVizQLParsingContext`). Try to parse the items 
 class GVizOrderByClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 5, 'idx_eval': 4, 'keyw' : 'order by'}
   KEYWORDS = ['asc', 'desc', 'order', 'by']
-  
+
   def __init__(self, ctx):
     r"""Initialize the expression using the enclosing parsing context
     (see docs for `GVizQLParsingContext`). Try to parse the items 
 class GVizLimitClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 6, 'idx_eval': 6, 'keyw' : 'limit'}
   KEYWORDS = ('limit',)
-  
+
   def __init__(self, ctx):
     r"""Initialize the expression using the enclosing parsing context
     (see docs for `GVizQLParsingContext`). Try to parse the items 
     else :
       raise GVizInvalidQuery("Syntax error: Integer number expected " \
                               "but %s found." % (val,))
-  
+
   def transform(self, schema, data):
     r"""Retrieve no more than `cnt` items.
     """
 class GVizOffsetClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 7, 'idx_eval': 5, 'keyw' : 'offset'}
   KEYWORDS = ('offset',)
-  
+
   def __init__(self, ctx):
     r"""Initialize the expression using the enclosing parsing context
     (see docs for `GVizQLParsingContext`). Try to parse the items 
     else :
       raise GVizInvalidQuery("Syntax error: Integer number expected " \
                               "but %s found." % (val,))
-  
+
   def transform(self, schema, data):
     r"""Skip the number of items determined by `skip` attribute.
     """
 class GVizLabelClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 8, 'idx_eval': 10, 'keyw' : 'label'}
   KEYWORDS = ('label',)
-  
+
   def __init__(self, ctx):
     r"""Initialize the expression using the enclosing parsing context
     (see docs for `GVizQLParsingContext`). Try to parse the items 
       else :
         raise GVizInvalidQuery("Syntax error: Column name expected " \
                                 "but %s found." % (val,))
-  
+
   def transform(self, schema, data):
     r"""(Add | modify) the schema in order to (include | update) 
     column labels.
-    
+
     Note: The process is not aborted if a label for a missing column 
           is specified.
     """
 class GVizFormatClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 9, 'idx_eval': 8, 'keyw' : 'format'}
   KEYWORDS = ('format',)
-  
+
   def __init__(self, ctx):
     r"""Initialize the expression using the enclosing parsing context
     (see docs for `GVizQLParsingContext`). Try to parse the items 
 class GVizOptionsClause(GVizQLClauseHandler):
   _PROPS = {'idx_syntax' : 10, 'idx_eval': 9, 'keyw' : 'options'}
   KEYWORDS = ('options',)
-  
+
   def __init__(self, ctx):
     r"""Initialize the expression using the enclosing parsing context
     (see docs for `GVizQLParsingContext`). Try to parse the items 
             (r'[a-zA-Z]\w*', Name.Variable, '#pop'),
           ],
       }
-  
+
   def __init__(self, *args, **kwds):
     super(GVizQLParser, self).__init__(*args, **kwds)
     self.noisy = True
-  
+
   def parse(self, tq):
     r"""Parse a GVizQL expression.
-    
+
     @param tq                       the GViz QL expression.
     @return                         an instance 
     @throw GVizInvalidQuery         if a syntax error or other error 
                                     that's not supported yet.
     """
     global GVizUnsupportedQueryOp, GVizInvalidQuery
-    from api import GVizUnsupportedQueryOp as GU, GVizInvalidQuery as GI
+    from tracgviz.api import GVizUnsupportedQueryOp as GU, GVizInvalidQuery as GI
     GVizUnsupportedQueryOp, GVizInvalidQuery = GU, GI
-    
+
     tkns = iter(self.get_tokens(tq))
     try:
       tkn, val = tkns.next()
       if not expr._handlers:            # Something was actually parsed ?
         raise GVizInvalidQuery("No token found: empty string ?")
       return expr
-  
+
   def get_tokens(self, tq):
     try:
       self._ctx = ctx = GVizQLParsingContext()
     finally:
       self.filters = []
       self._ctx = None
-    
+
 class GVizQLParsingContext():
   r"""Used to store global information generated at parsing time. 
   The parser adds the following fields to this object:
-    
+
     - stream : The token stream being processed.
     - parser : The object that's actually parsing the GViz QL expression.
   """
     for attr in 'parser stream'.split():
       setattr(self, attr, None)
     self._last = (None, None)
-  
+
   def filter(self, p, stream):
     r""" Store lookahead token.
     """
         continue
       self._last = t
       yield t
-    
+
   @property
   def last_token():
     return self._last
-  
+
 class GVizQLExpression:
   r"""Compiled GVizQL expression.
   """
     self._handlers[clause_nm] = ch
     for attrnm in ch.__dict__ :
       self._attrmap[attrnm] = clause_nm
-    
+
   def __getattr__(self, attrnm):
     try:
       return getattr(self._handlers[self._attrmap[attrnm]], attrnm)
     except :
       raise AttributeError("'%s' object has no attribute '%s'" % \
                                 (self.__class__.__name__, attrnm))
-  
+
   def itereval(self):
     r"""Iterate over the clause handlers present in this expression 
     following evaluation precedence.
       ch = self._handlers.get(ct.get_props('keyw'))
       if ch is not None :
         yield ch
-  
+
   def transform(self, schema, data):
     r"""Transform the result set as determined by the GVizQL 
     expression directives and clauses.
   r"""Prepare the data and schema to be supplied to an instance of 
   gviz_api.DataTable as determined by a GVizQL expression. This is 
   accomplished by wrapping the original data with multiple iterators.
-  
+
   @param provider   an instance of `IGVizDataProvider` interface 
                     responsible for providing the base result set 
                     (i.e. primary information) subsequently modified 
   from unittest import defaultTestLoader
   from string import whitespace
   import sys
-  
+
   from api import GVizInvalidQuery
   from testing.dutest import MultiTestLoader, DocTestLoader
-  
+
   def parse(expr, *attrs, **kwds):
     # Test lexical analysis
     print "*****\n* Tokens\n*****"
             print "= Row ="
             for val, col in izip(row, iter_schema()):
               print '  ', col[0], '=', val
-  
+
   l = MultiTestLoader([defaultTestLoader, \
                         DocTestLoader(
                             extraglobs=dict(parse=parse),

trac-dev/gviz/tracgviz/testing/test_parsing.py

+
+r"""Test cases and test data for parsers.
+"""
+__author__ = 'Olemis Lang'
+
+from pygments.token import *
+
+from tracgviz.util.parsing import OperatorPrecedenceParser as Parser, Any, \
+    EndMarker, NonTerminal
+
+# Modified version of sample operator precedence grammar in
+# The Theory of Parsing, Translation, and Compiling, A.V. Aho, J.B. Ullman
+#     Volume 1: Parsing
+#
+# Grammar
+# =======
+#
+#  1. E -> E + T
+#  2. E -> T
+#  3. T -> T * F
+#  4. T -> F
+#  5. F -> ( E )
+#  6. F -> a
+
+Multiply  = (Operator, '*')
+Add       = (Operator, '+')
+OpenP     = (Punctuation, '(')
+CloseP    = (Punctuation, ')')
+EndE      = (EndMarker, 'E')
+Var       = (Name, Any)
+
+SAMPLE_GRAMMAR_PRECEDENCE = {
+    CloseP : {
+        Parser.MorePrecedence : [Multiply, Add, CloseP, EndE],
+      },
+    Var : {
+        Parser.MorePrecedence : [Multiply, Add, CloseP, EndE],
+      },
+    Multiply : {
+        Parser.MorePrecedence : [Multiply, Add, CloseP, EndE],
+        Parser.LessPrecedence : [OpenP, Var],
+      },
+    Add : {
+        Parser.MorePrecedence : [Add, CloseP, EndE],
+        Parser.LessPrecedence : [Multiply, OpenP, Var],
+      },
+    OpenP : {
+        Parser.LessPrecedence : [OpenP, Var, Multiply, Add,],
+        Parser.SamePrecedence : [CloseP, ],
+      },
+    EndE : {
+        Parser.LessPrecedence : [OpenP, Var, Multiply, Add,],
+      },
+  }
+
+SAMPLE_GRAMMAR_PRECEDENCE = dict(
+    [ (tkn1, tkn2), prec] \
+        for tkn1, v1 in SAMPLE_GRAMMAR_PRECEDENCE.iteritems() \
+        for prec, v2 in v1.iteritems() \
+        for tkn2 in v2
+  )
+
+SAMPLE_GRAMMAR_PRODUCTIONS = {
+    (Name, Any) : {
+        EndMarker : '6',
+      },
+    (Punctuation, ')') : {
+        (NonTerminal, Any) : {
+            (Punctuation, '(') : {
+                EndMarker: '5',
+              }
+          }
+      },
+    (NonTerminal, Any) : {
+        (Operator, '*') :{
+            (NonTerminal, Any) : {
+                EndMarker: '3',
+              }
+          },
+        (Operator, '+') :{
+            (NonTerminal, Any) : {
+                EndMarker: '1',
+              }
+          },
+      },
+  }
+
+# Input
+# =====
+#
+# (a + a) * a
+
+SAMPLE_INPUT_STREAM = [
+    (Punctuation, '('), (Name, 'a'), (Operator, '+'), (Name, 'a'), 
+    (Punctuation, ')'), (Operator, '*'), (Name, 'a'), 
+  ]
+
+__test__ = {
+  'Operator precedence grammar: Simple example' : r"""
+      >>> from tracgviz.util.parsing import OperatorPrecedenceParser
+      >>> p = OperatorPrecedenceParser()
+      >>> p.productions_tree = SAMPLE_GRAMMAR_PRODUCTIONS
+      >>> p.precedence = SAMPLE_GRAMMAR_PRECEDENCE
+      >>> p.start_state = 'E'
+
+      >>> stream = iter(SAMPLE_INPUT_STREAM)
+
+      >>> parse_order = []
+      >>> def store_parse_order(production_id, *args):
+      ...   parse_order.append(production_id)
+
+      >>> p.parse(stream, store_parse_order)
+      >>> ' '.join(parse_order)
+      '6 6 1 5 6 3'
+      """,
+  }
+

trac-dev/gviz/tracgviz/testing/util.py

+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+# Copyright 2009-2011 Olemis Lang <olemis at gmail.com>
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+r"""Utility classes.
+
+Copyright 2009-2011 Olemis Lang <olemis at gmail.com>
+Licensed under the Apache License, Version 2.0 
+"""
+__author__ = 'Olemis Lang'
+
+__all__ = 'dummy_request',
+
+from trac.web.api import Request
+from trac.web.chrome import Chrome
+from trac.web.main import RequestDispatcher
+
+from urlparse import urlparse
+
+def dummy_request(env, uname=None):
+    environ = {
+                'trac.base_url' : str(env._abs_href()), 
+                'SCRIPT_NAME' : urlparse(str(env._abs_href())).path
+                }
+    req = Request(environ, lambda *args, **kwds: None)
+    # Intercept redirection
+    req.redirect = lambda *args, **kwds: None
+    # Setup user information
+    if uname is not None :
+      environ['REMOTE_USER'] = req.authname = uname
+    
+    rd = RequestDispatcher(env)
+    chrome = Chrome(env)
+    req.callbacks.update({
+        'authname': rd.authenticate,
+        'chrome': chrome.prepare_request,
+        'hdf': rd._get_hdf,
+        'perm': rd._get_perm,
+        'session': rd._get_session,
+        'tz': rd._get_timezone,
+        'form_token': rd._get_form_token
+    })
+    return req
+

trac-dev/gviz/tracgviz/util.py

-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-
-# Copyright 2009-2011 Olemis Lang <olemis at gmail.com>
-#
-#   Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License.
-
-r"""Helper (abstract) classes used to implement custom data sources,
-formatters, and protocol handlers.
-
-Copyright 2009-2011 Olemis Lang <olemis at gmail.com>
-Licensed under the Apache License, Version 2.0 
-"""
-__author__ = 'Olemis Lang'
-
-__all__ = 'BaseGVizHandler', 'GVizXMLRPCAdapter', 'dummy_request', \
-          'convert_req_date', 'rpc_to_datetime', 'render_gviz_value', \
-          'get_column_desc', 'TYPES_2_GVIZ', 'get_column_desc', \
-          'rpc_opt_sigs', 'REQFIELDS_DESC', 'REQFIELDS_DEFAULTS'
-
-from trac.core import Component, ExtensionPoint, implements
-from trac.config import Option
-from trac.web.api import RequestDone, Request
-from trac.web.chrome import Chrome
-from trac.web.main import RequestDispatcher
-
-from tracrpc.api import XMLRPCSystem, Method
-
-from BaseHTTPServer import DEFAULT_ERROR_MESSAGE, BaseHTTPRequestHandler
-from datetime import datetime, date, time
-from itertools import takewhile, chain, imap, repeat, izip
-from xmlrpclib import DateTime
-
-from api import IGVizProtocolHandler, IGVizTableEncoder, \
-                IGVizDataProvider, IHashLibrary, GVizBadRequestError
-from testing.util import dummy_request
-
-__metaclass__ = type
-
-def send_response(req, status, response, mimetype='text/plain', \
-                    extra_headers=dict()):
-    r"""Send an HTTP response back to the caller.
-    """
-    req.send_response(status)
-    req.send_header('Content-Type', mimetype)
-    req.send_header('Content-Length', len(response))
-    for k, v in dict(extra_headers).iteritems():
-        req.send_header(k, v)
-    req.end_headers()
-    req.write(response)
-    raise RequestDone()
-
-def send_std_error_response(req, status):
-    r"""Send an HTTP error response back to the caller using a 
-    standard template.
-    """
-    message, explain = BaseHTTPRequestHandler.responses[status]
-    errctx = dict(code=status, message=message, explain=explain)
-    send_response(req, status, DEFAULT_ERROR_MESSAGE % errctx, \
-                    mimetype='text/html')
-
-class BaseGVizHandler(Component):
-    r"""This class encloses the functionality which might be present
-    in most versions of Google Visualization API. It can be reused by
-    specific subclasses implementing a specific protocol version
-    as defined by the Google Visualization API.
-    """
-    abstract = True
-    implements(IGVizProtocolHandler)
-    encoders = ExtensionPoint(IGVizTableEncoder)
-    hashlibs = ExtensionPoint(IHashLibrary)
-    
-    hash_name = Option('gviz', 'hash', default=None, 
-                        doc="""The algorithm used to generate a hash """
-                            """of the data sent back to the client. This """
-                            """feature is defined by Google """
-                            """Visualization API since version 0.5 so as """
-                            """to optimize the request / response """
-                            """mechanism to make rational use of the """
-                            """available bandwith.""")
-    
-    def _init_hash(self):
-        r"""Setup the secure hash algorithm.
-        """
-        hash_name = self.hash_name
-        self._hlib = self.hash_obj = None
-        self.log.debug("IG: Config hash method : '%s'", hash_name)
-        if hash_name:
-          pr = -1
-          for hlib in self.hashlibs:
-            self.log.debug("IG: Processing : %s", hlib)
-            try:
-              cur_pr, _ = hlib.get_hash_properties(hash_name)
-            except TypeError:
-              self.log.debug("IG: %s doesnt support '%s'", hlib, hash_name)
-            else:
-              if cur_pr > pr:
-                self._hlib = hlib
-                pr = cur_pr
-        if self._hlib is not None:
-          self.hash_obj = self._hlib.new_hash_obj(hash_name)
-          self.log.info("IG: Hash method '%s' lib '%s'", hash_name, self._hlib)
-        else:
-          self.log.info("IG: Hash method 'None'")
-    
-    @staticmethod
-    def fmt_supports_version(encoder, version):
-        r"""Return whether a data table encoder supports a specific
-        version of Google Visualization API."""
-        
-        rels = {
-                '>' : tuple.__gt__,
-                '<' : tuple.__lt__,
-                '>=' : tuple.__ge__,
-                '<=' : tuple.__le__,
-                '==' : tuple.__eq__,
-                '!=' : lambda x, y: x != y,
-               }
-        
-        versions = encoder.supported_versions()
-        return all(rels[r](v, version) for r, v in versions)
-    
-    def find_encoder(self, fmt_id, version, mime_type=None):
-        r"""Find an encoder able to convert a data table contents into
-        a specific format, maybe having a well-known content-type.
-        
-        @param fmt_id the output format id
-        @param version the particular protocol `version` of Google
-                    Visualization API in use.
-        @param mime_type if specified then a best match is made to 
-                    return an encoder returning the specific content
-                    type requested by the caller.
-                    
-        @return the best match made according to the available 
-                    encoders or `None` if no such encoder could be
-                    found. This encoder *must* support the requested
-                    format and protocol version, and *should*
-                    use the requested content-type, but the later
-                    assertion *is not compulsory*.
-        """
-        encoders = self._fmt[fmt_id]
-        encoders = takewhile(
-                lambda e: self.fmt_supports_version(e, version), 
-                encoders)
-        try:
-            first = encoders.next()
-        except StopIteration:
-            return None
-        else:
-            if mime_type is None or first.get_content_type() == mime_type:
-                return first
-            else:
-                try:
-                    return takewhile(
-                            lambda e: e.get_content_type() == mime_type, 
-                            encoders).next()
-                except StopIteration:
-                    return first
-    
-    def _init_fmt(self):
-        """Arrange the available format encoders.
-        """
-        self._fmt = dict()
-        for e in self.encoders:
-            self._fmt.setdefault(e.get_format_id(), []).append(e)
-    
-    def __init__(self):
-        self._init_fmt()
-        self._init_hash()
-        
-    # TODO : Implement common features.
-
-class VoidRpcHandler:
-    def __getattr__(self, attrnm):
-        raise AttributeError("The requested XML-RPC handler cannot "
-                                "be found. Either it doesn't exist "
-                                "or the component is disabled. "
-                                "Contact your Trac administrator.")
-
-#class Method(Method):
-#    r"""A faster XML-RPC method implementation since it returns 
-#    iterators instead of lists.
-#    """
-#    def __call__(self, req, args):
-#        req.perm.assert_permission(self.permission)
-#        result = self.callable(req, *args)
-#        # If result is null, return a zero
-#        if result is None:
-#            result = 0
-#        elif isinstance(result, dict):
-#            for key,val in result.iteritems():
-#                if isinstance(val, datetime.datetime):
-#                    result[key] = to_datetime(val)
-#            #pass
-#        elif not isinstance(result, basestring):
-#            # Try and convert result to a list
-#            try:
-#                result = (i for i in result)
-#            except TypeError:
-#                pass
-#        return (result,)
-
-class RPCHelperObject:
-    r"""A proxy class needed to assert the permissions handled by 
-    XMLRPCSystem, instead of using directly to the RPC method.
-    """
-    def __init__(self, rpc_obj):
-        methods = (Method(rpc_obj, *mi) for mi in rpc_obj.xmlrpc_methods())
-        prefix_len = len(rpc_obj.xmlrpc_namespace()) + 1
-        
-        def method_wrapper(m):
-            wrapper = lambda req, *args: m(req, args)[0]
-            wrapper.__module__ = m.callable.__module__
-            wrapper.func_name = m.callable.__name__
-            return wrapper
-        self.__methods = dict([m.name[prefix_len:], method_wrapper(m)] \
-                                for m in methods)
-        rpc_obj.log.debug('IG: RPC methods %s', self.__methods)
-        self.__rpc_obj = rpc_obj
-    
-    def __getattr__(self, attrnm):
-        r"""Try to retrieve the XML-RPC method first. Otherwise return 
-        the attribute of the underlying XML-RPC object.
-        """
-        try:
-            return self.__methods[attrnm]
-        except KeyError:
-            return getattr(self.__rpc_obj, attrnm)
-
-class GVizXMLRPCAdapter(Component):
-    r"""Base class for components whose main purpose is to provide 
-    some data relying on an existing XML-RPC handler (i.e. a 
-    component implementing tracrpc.api.IXMLRPCHandler interface). 
-    The data source is meant to reuse the RPC provider namespace and
-    logic.
-    """
-    implements(IGVizDataProvider)
-    abstract = True
-    
-    def __init__(self):
-        r"""Assign the corresponding XML RPC handler to this data
-        source provider. 
-        
-        Note: Since Trac core system components hack the initializer,
-        further initialiation steps needed by sub-classes should be
-        coded by overriding `do_init` method.
-        """
-        try:
-            rpcns = '.'.join(self.xmlrpc_namespace())
-        except AttributeError:
-            rpcns = '.'.join(self.gviz_namespace())
-        self.log.debug('IG: RPC Namespace %s Ok', rpcns)
-        for rpc_provider in XMLRPCSystem(self.env).method_handlers:
-            # TODO : Implement a proper match for regex in gviz ns
-            if rpc_provider.xmlrpc_namespace() == rpcns:
-                # Substituted in order to reuse permissions asserted 
-                # by XMLRPCSystem.
-                # self._rpc_obj = rpc_provider
-                self._rpc_obj = RPCHelperObject(rpc_provider)
-                break
-        else:
-            self._rpc_obj = VoidRpcHandler()
-            self.log.info('IG: Missing XML-RPC handler %s' % (rpcns,))
-        try:
-            __init__ = self.do_init
-        except AttributeError:
-            pass
-        else:
-            __init__()
-
-def convert_req_date(when, fmt, req, xmlfmt=True):
-    r"""Convert a string to the corresponding datetime value using 
-    the specified format string.
-    """
-    try:
-      if when is not None:
-          when = datetime.strptime(when, fmt)
-          when = when.replace(tzinfo=req.tz)
-      else:
-          when = datetime.now(tz=req.tz)
-      if xmlfmt:
-          when = DateTime(when)
-      return when
-    except:
-      raise GVizBadRequestError("Invalid datetime value or wrong date format.")
-
-def rpc_to_datetime(DT, req):
-    r"""Return the datetime object representing the xmlrpclib.DateTime 
-    value in `DT`. The return value is at the timezone of the 
-    environment processing the request `req`.
-    """
-    dt = datetime.strptime(DT.value, '%Y%m%dT%H:%M:%S')
-    return dt.replace(tzinfo=req.tz)
-
-def __insert_many_id(id, _tuple): 
-    return (id,) + _tuple
-
-def __insert_value_id(id, value): 
-    return (id, value)
-
-def map_with_id(req, ids, func, ins, *iterables):
-    if iterables:
-        iterables = izip(*iterables)
-    else:
-        iterables = repeat(tuple())
-    return chain(*(imap(ins, repeat(x), func(req, x, *args)) \
-            for x, args in izip(ids, iterables)))
-
-def map_many_with_id(req, ids, func, *iterables):
-    return map_with_id(req, ids, func, __insert_many_id, *iterables)
-
-def map_value_with_id(req, ids, func, *iterables):
-    return map_with_id(req, ids, func, __insert_value_id, *iterables)
-
-DEFAULT_DATE_FORMATS = {
-    'date' : "%Y-%m-%d",
-    'datetime' : "%Y-%m-%d %H:%M:%S",
-    'timeofday' : "%H:%M:%S",
-  }
-
-def render_gviz_value(value, gviz_type, table, req_or_env):
-  r"""Return a string used to display the values inside GViz data 
-  sources.
-  """
-  if isinstance(req_or_env, Request):
-    req = req_or_env
-  else:
-    # Assume it's an instance of Environment
-    req = dummy_request(req_or_env)
-  try:
-    date_fmt_str = DEFAULT_DATE_FORMATS[gviz_type]
-  except KeyError:
-    return table.SingleValueToJS(value, gviz_type)
-  else:
-    try:
-      if isinstance(value, DateTime):
-        value = rpc_to_datetime(value, req)
-      elif isinstance(value, int):
-        value = datetime.fromtimestamp(int(value or 0), req.tz)
-      return value.strftime(date_fmt_str)
-    except Exception, exc:
-      return '(Unknown: %s)' % (exc,)
-
-TYPES_2_GVIZ = {
-            type(None): 'string',
-            str : 'string',
-            unicode : 'string',
-            long : 'number',
-            int : 'number',
-            datetime : 'datetime',
-            date : 'date', 
-            time : 'timeofday',
-            DateTime : 'datetime',
-            bool : 'boolean',
-          }
-
-def get_column_desc(cursor, infer=False):
-  r"""Retrieve a sequence of tuples (name, type) describing 
-  the columns present in the results provider by a cursor object 
-  after executing a database query.
-  """
-  row = None
-  if cursor.description:
-    for i, d in enumerate(cursor.description):
-      name, type_code = d[:2]
-      if isinstance(name, str):
-        name = unicode(name, 'utf-8')
-      if type_code is None and infer:
-        if row is None:
-          try:
-            row, = cursor.fetchmany(1)
-          except:
-            row = ('',) * len(list(cursor.description))
-        type_code = TYPES_2_GVIZ.get(row[i].__class__)
-      yield name, type_code
-
-def rpc_opt_sigs(ret_type, fixed_types=None, *opt_types):
-  r"""Generate tuples describing the signatures of an XML-RPC method 
-  whose arguments can take values in a set of optional types or 
-  be missing in the method call.
-  """
-  if fixed_types is None:
-    fixed_types = ()
-  else:
-    fixed_types = tuple(fixed_types)
-  
-  new_sig = (ret_type,) + fixed_types
-  yield new_sig
-  old_gen = [new_sig]
-  
-  for arg_types in opt_types:
-    new_gen = []
-    for sig in old_gen:
-      for arg_type in arg_types:
-        new_sig = sig + (arg_type,)
-        yield new_sig
-        new_gen.append(new_sig)
-    old_gen = new_gen
-
-REQFIELDS_DESC = {
-      'datefmt'  : "The syntax of %(args)s field%(plural)s. Here you "
-                            "can embed the directives supported by "
-                            "`time.strftime` function. The default "
-                            "behavior is to accept the well known "
-                            "format `yyyy-mm-dd HH:MM:SS` which is "
-                            "actually written like this "
-                            "`%%Y-%%m-%%d %%H:%%M:%%S`.",
-    }
-
-REQFIELDS_DEFAULTS = {
-      'datefmt'  : "%Y-%m-%d %H:%M:%S"
-    }
-

trac-dev/gviz/tracgviz/util/__init__.py

+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+# Copyright 2009-2011 Olemis Lang <olemis at gmail.com>
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+r"""Helper (abstract) classes used to implement custom data sources,
+formatters, and protocol handlers.
+
+Copyright 2009-2011 Olemis Lang <olemis at gmail.com>
+Licensed under the Apache License, Version 2.0 
+"""
+__author__ = 'Olemis Lang'
+
+__all__ = 'BaseGVizHandler', 'GVizXMLRPCAdapter', 'dummy_request', \
+          'convert_req_date', 'rpc_to_datetime', 'render_gviz_value', \
+          'get_column_desc', 'TYPES_2_GVIZ', 'get_column_desc', \
+          'rpc_opt_sigs', 'REQFIELDS_DESC', 'REQFIELDS_DEFAULTS'
+
+from trac.core import Component, ExtensionPoint, implements
+from trac.config import Option
+from trac.web.api import RequestDone, Request
+from trac.web.chrome import Chrome
+from trac.web.main import RequestDispatcher
+
+from tracrpc.api import XMLRPCSystem, Method
+
+from BaseHTTPServer import DEFAULT_ERROR_MESSAGE, BaseHTTPRequestHandler
+from datetime import datetime, date, time
+from itertools import takewhile, chain, imap, repeat, izip
+from xmlrpclib import DateTime
+
+from tracgviz.api import IGVizProtocolHandler, IGVizTableEncoder, \
+                IGVizDataProvider, IHashLibrary, GVizBadRequestError
+from tracgviz.testing.util import dummy_request
+
+__metaclass__ = type
+
+def send_response(req, status, response, mimetype='text/plain', \
+                    extra_headers=dict()):
+    r"""Send an HTTP response back to the caller.
+    """
+    req.send_response(status)
+    req.send_header('Content-Type', mimetype)
+    req.send_header('Content-Length', len(response))
+    for k, v in dict(extra_headers).iteritems():
+        req.send_header(k, v)
+    req.end_headers()
+    req.write(response)
+    raise RequestDone()
+
+def send_std_error_response(req, status):
+    r"""Send an HTTP error response back to the caller using a 
+    standard template.
+    """
+    message, explain = BaseHTTPRequestHandler.responses[status]
+    errctx = dict(code=status, message=message, explain=explain)
+    send_response(req, status, DEFAULT_ERROR_MESSAGE % errctx, \
+                    mimetype='text/html')
+
+class BaseGVizHandler(Component):
+    r"""This class encloses the functionality which might be present
+    in most versions of Google Visualization API. It can be reused by
+    specific subclasses implementing a specific protocol version
+    as defined by the Google Visualization API.
+    """
+    abstract = True
+    implements(IGVizProtocolHandler)
+    encoders = ExtensionPoint(IGVizTableEncoder)
+    hashlibs = ExtensionPoint(IHashLibrary)
+    
+    hash_name = Option('gviz', 'hash', default=None, 
+                        doc="""The algorithm used to generate a hash """
+                            """of the data sent back to the client. This """
+                            """feature is defined by Google """
+                            """Visualization API since version 0.5 so as """
+                            """to optimize the request / response """
+                            """mechanism to make rational use of the """
+                            """available bandwith.""")
+    
+    def _init_hash(self):
+        r"""Setup the secure hash algorithm.
+        """
+        hash_name = self.hash_name
+        self._hlib = self.hash_obj = None
+        self.log.debug("IG: Config hash method : '%s'", hash_name)
+        if hash_name:
+          pr = -1
+          for hlib in self.hashlibs:
+            self.log.debug("IG: Processing : %s", hlib)
+            try:
+              cur_pr, _ = hlib.get_hash_properties(hash_name)
+            except TypeError:
+              self.log.debug("IG: %s doesnt support '%s'", hlib, hash_name)
+            else:
+              if cur_pr > pr:
+                self._hlib = hlib
+                pr = cur_pr
+        if self._hlib is not None:
+          self.hash_obj = self._hlib.new_hash_obj(hash_name)
+          self.log.info("IG: Hash method '%s' lib '%s'", hash_name, self._hlib)
+        else:
+          self.log.info("IG: Hash method 'None'")
+    
+    @staticmethod
+    def fmt_supports_version(encoder, version):
+        r"""Return whether a data table encoder supports a specific
+        version of Google Visualization API."""
+        
+        rels = {
+                '>' : tuple.__gt__,
+                '<' : tuple.__lt__,
+                '>=' : tuple.__ge__,
+                '<=' : tuple.__le__,
+                '==' : tuple.__eq__,
+                '!=' : lambda x, y: x != y,
+               }
+        
+        versions = encoder.supported_versions()
+        return all(rels[r](v, version) for r, v in versions)
+    
+    def find_encoder(self, fmt_id, version, mime_type=None):
+        r"""Find an encoder able to convert a data table contents into
+        a specific format, maybe having a well-known content-type.
+        
+        @param fmt_id the output format id
+        @param version the particular protocol `version` of Google
+                    Visualization API in use.
+        @param mime_type if specified then a best match is made to 
+                    return an encoder returning the specific content
+                    type requested by the caller.
+                    
+        @return the best match made according to the available 
+                    encoders or `None` if no such encoder could be
+                    found. This encoder *must* support the requested
+                    format and protocol version, and *should*
+                    use the requested content-type, but the later
+                    assertion *is not compulsory*.
+        """
+        encoders = self._fmt[fmt_id]
+        encoders = takewhile(
+                lambda e: self.fmt_supports_version(e, version), 
+                encoders)
+        try:
+            first = encoders.next()
+        except StopIteration:
+            return None
+        else:
+            if mime_type is None or first.get_content_type() == mime_type:
+                return first
+            else:
+                try:
+                    return takewhile(
+                            lambda e: e.get_content_type() == mime_type, 
+                            encoders).next()
+                except StopIteration:
+                    return first
+    
+    def _init_fmt(self):
+        """Arrange the available format encoders.
+        """
+        self._fmt = dict()
+        for e in self.encoders:
+            self._fmt.setdefault(e.get_format_id(), []).append(e)
+    
+    def __init__(self):
+        self._init_fmt()
+        self._init_hash()
+        
+    # TODO : Implement common features.
+
+class VoidRpcHandler:
+    def __getattr__(self, attrnm):
+        raise AttributeError("The requested XML-RPC handler cannot "
+                                "be found. Either it doesn't exist "
+                                "or the component is disabled. "
+                                "Contact your Trac administrator.")
+
+#class Method(Method):
+#    r"""A faster XML-RPC method implementation since it returns 
+#    iterators instead of lists.
+#    """
+#    def __call__(self, req, args):
+#        req.perm.assert_permission(self.permission)
+#        result = self.callable(req, *args)
+#        # If result is null, return a zero
+#        if result is None:
+#            result = 0
+#        elif isinstance(result, dict):
+#            for key,val in result.iteritems():
+#                if isinstance(val, datetime.datetime):
+#                    result[key] = to_datetime(val)
+#            #pass
+#        elif not isinstance(result, basestring):
+#            # Try and convert result to a list
+#            try:
+#                result = (i for i in result)
+#            except TypeError:
+#                pass
+#        return (result,)
+
+class RPCHelperObject:
+    r"""A proxy class needed to assert the permissions handled by 
+    XMLRPCSystem, instead of using directly to the RPC method.
+    """
+    def __init__(self, rpc_obj):
+        methods = (Method(rpc_obj, *mi) for mi in rpc_obj.xmlrpc_methods())
+        prefix_len = len(rpc_obj.xmlrpc_namespace()) + 1
+        
+        def method_wrapper(m):
+            wrapper = lambda req, *args: m(req, args)[0]
+            wrapper.__module__ = m.callable.__module__
+            wrapper.func_name = m.callable.__name__
+            return wrapper
+        self.__methods = dict([m.name[prefix_len:], method_wrapper(m)] \
+                                for m in methods)
+        rpc_obj.log.debug('IG: RPC methods %s', self.__methods)
+        self.__rpc_obj = rpc_obj
+    
+    def __getattr__(self, attrnm):
+        r"""Try to retrieve the XML-RPC method first. Otherwise return 
+        the attribute of the underlying XML-RPC object.
+        """
+        try:
+            return self.__methods[attrnm]
+        except KeyError:
+            return getattr(self.__rpc_obj, attrnm)
+
+class GVizXMLRPCAdapter(Component):
+    r"""Base class for components whose main purpose is to provide 
+    some data relying on an existing XML-RPC handler (i.e. a 
+    component implementing tracrpc.api.IXMLRPCHandler interface). 
+    The data source is meant to reuse the RPC provider namespace and
+    logic.
+    """
+    implements(IGVizDataProvider)
+    abstract = True
+    
+    def __init__(self):
+        r"""Assign the corresponding XML RPC handler to this data
+        source provider. 
+        
+        Note: Since Trac core system components hack the initializer,
+        further initialiation steps needed by sub-classes should be
+        coded by overriding `do_init` method.
+        """
+        try:
+            rpcns = '.'.join(self.xmlrpc_namespace())
+        except AttributeError:
+            rpcns = '.'.join(self.gviz_namespace())
+        self.log.debug('IG: RPC Namespace %s Ok', rpcns)
+        for rpc_provider in XMLRPCSystem(self.env).method_handlers:
+            # TODO : Implement a proper match for regex in gviz ns
+            if rpc_provider.xmlrpc_namespace() == rpcns:
+                # Substituted in order to reuse permissions asserted 
+                # by XMLRPCSystem.
+                # self._rpc_obj = rpc_provider
+                self._rpc_obj = RPCHelperObject(rpc_provider)
+                break
+        else:
+            self._rpc_obj = VoidRpcHandler()
+            self.log.info('IG: Missing XML-RPC handler %s' % (rpcns,))
+        try:
+            __init__ = self.do_init
+        except AttributeError:
+            pass
+        else:
+            __init__()
+
+def convert_req_date(when, fmt, req, xmlfmt=True):
+    r"""Convert a string to the corresponding datetime value using 
+    the specified format string.
+    """
+    try:
+      if when is not None:
+          when = datetime.strptime(when, fmt)
+          when = when.replace(tzinfo=req.tz)
+      else:
+          when = datetime.now(tz=req.tz)
+      if xmlfmt:
+          when = DateTime(when)
+      return when
+    except:
+      raise GVizBadRequestError("Invalid datetime value or wrong date format.")
+
+def rpc_to_datetime(DT, req):
+    r"""Return the datetime object representing the xmlrpclib.DateTime 
+    value in `DT`. The return value is at the timezone of the 
+    environment processing the request `req`.
+    """
+    dt = datetime.strptime(DT.value, '%Y%m%dT%H:%M:%S')
+    return dt.replace(tzinfo=req.tz)
+
+def __insert_many_id(id, _tuple): 
+    return (id,) + _tuple
+
+def __insert_value_id(id, value): 
+    return (id, value)
+
+def map_with_id(req, ids, func, ins, *iterables):
+    if iterables:
+        iterables = izip(*iterables)
+    else:
+        iterables = repeat(tuple())
+    return chain(*(imap(ins, repeat(x), func(req, x, *args)) \
+            for x, args in izip(ids, iterables)))
+
+def map_many_with_id(req, ids, func, *iterables):
+    return map_with_id(req, ids, func, __insert_many_id, *iterables)
+
+def map_value_with_id(req, ids, func, *iterables):
+    return map_with_id(req, ids, func, __insert_value_id, *iterables)
+
+DEFAULT_DATE_FORMATS = {
+    'date' : "%Y-%m-%d",
+    'datetime' : "%Y-%m-%d %H:%M:%S",
+    'timeofday' : "%H:%M:%S",
+  }
+
+def render_gviz_value(value, gviz_type, table, req_or_env):
+  r"""Return a string used to display the values inside GViz data 
+  sources.
+  """
+  if isinstance(req_or_env, Request):
+    req = req_or_env
+  else:
+    # Assume it's an instance of Environment
+    req = dummy_request(req_or_env)
+  try:
+    date_fmt_str = DEFAULT_DATE_FORMATS[gviz_type]
+  except KeyError:
+    return table.SingleValueToJS(value, gviz_type)
+  else:
+    try:
+      if isinstance(value, DateTime):
+        value = rpc_to_datetime(value, req)
+      elif isinstance(value, int):
+        value = datetime.fromtimestamp(int(value or 0), req.tz)
+      return value.strftime(date_fmt_str)
+    except Exception, exc:
+      return '(Unknown: %s)' % (exc,)
+
+TYPES_2_GVIZ = {
+            type(None): 'string',
+            str : 'string',
+            unicode : 'string',
+            long : 'number',
+            int : 'number',
+            datetime : 'datetime',
+            date : 'date', 
+            time : 'timeofday',
+            DateTime : 'datetime',
+            bool : 'boolean',
+          }
+
+def get_column_desc(cursor, infer=False):
+  r"""Retrieve a sequence of tuples (name, type) describing 
+  the columns present in the results provider by a cursor object 
+  after executing a database query.
+  """
+  row = None
+  if cursor.description:
+    for i, d in enumerate(cursor.description):
+      name, type_code = d[:2]
+      if isinstance(name, str):
+        name = unicode(name, 'utf-8')
+      if type_code is None and infer:
+        if row is None:
+          try:
+            row, = cursor.fetchmany(1)
+          except:
+            row = ('',) * len(list(cursor.description))
+        type_code = TYPES_2_GVIZ.get(row[i].__class__)
+      yield name, type_code
+
+def rpc_opt_sigs(ret_type, fixed_types=None, *opt_types):
+  r"""Generate tuples describing the signatures of an XML-RPC method 
+  whose arguments can take values in a set of optional types or 
+  be missing in the method call.
+  """
+  if fixed_types is None:
+    fixed_types = ()
+  else:
+    fixed_types = tuple(fixed_types)
+  
+  new_sig = (ret_type,) + fixed_types
+  yield new_sig
+  old_gen = [new_sig]
+  
+  for arg_types in opt_types:
+    new_gen = []
+    for sig in old_gen:
+      for arg_type in arg_types:
+        new_sig = sig + (arg_type,)
+        yield new_sig
+        new_gen.append(new_sig)
+    old_gen = new_gen
+
+REQFIELDS_DESC = {
+      'datefmt'  : "The syntax of %(args)s field%(plural)s. Here you "
+                            "can embed the directives supported by "
+                            "`time.strftime` function. The default "
+                            "behavior is to accept the well known "
+                            "format `yyyy-mm-dd HH:MM:SS` which is "
+                            "actually written like this "
+                            "`%%Y-%%m-%%d %%H:%%M:%%S`.",
+    }
+
+REQFIELDS_DEFAULTS = {
+      'datefmt'  : "%Y-%m-%d %H:%M:%S"
+    }
+

trac-dev/gviz/tracgviz/util/parsing.py

+#!/usr/bin/env python
+
+# Copyright 2009-2011 Olemis Lang <olemis at gmail.com>
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+r"""Generic parsing algorithms.
+
+Copyright 2009-2011 Olemis Lang <olemis at gmail.com>
+Licensed under the Apache License, Version 2.0 
+"""
+__author__ = 'Olemis Lang'
+
+__all__ = 'OperatorPrecedenceParser'
+__metaclass__ = type
+
+from itertools import ifilter
+import logging
+from pygments.token import *
+
+#------------------------------------------------------
+#   Operator precedence grammars
+#------------------------------------------------------
+
+class OperatorPrecedenceParser:
+  r"""
+  Shift-reduce parser for operator precedence grammars based on precedence
+  matrix.
+  """
+
+  MorePrecedence = Token.Grammar.Relationship.MorePrecedence
+  SamePrecedence = Token.Grammar.Relationship.SamePrecedence
+  LessPrecedence = Token.Grammar.Relationship.LessPrecedence
+
+  def __init__(self):
+    r"""Initialize with empty precendence table and skeletal grammar.
+    """
+    self.start_state = None
+    self.precedence = {}
+    self.productions_tree = {}
+
+  def parse(self, stream, on_reduce, start_state=None):
+    r"""Parse a token `stream` of tokens.
+
+    :param on_reduce:   Optional Callable object invoked when a reduction is 
+                        perfomed according to some grammar production.
+                        It receives production ID as first parameter.
+                        Subsequent positional arguments will be the
+                        values matched for each symbol in target 
+                        production. It may return a value. It will be
+                        propagated in the form of a `NonTerminal` token 
+                        and will be available in subsequent reductions 
+                        involving that token.
+    :param start_state: The name of a non-terminal symbol. This parameter 
+                        allows for parsing a subset of the target grammar.
+                        If missing parsing will start with topmost
+                        grammar state.
+    :return:            the last object returned by on_reduce callback
+    :raise SyntaxError: if invalid syntax is detected
+    :raise InvalidParserConfiguration: if no suitable start state is found
+    """
+    if start_state is None:
+      start_state = self.start_state
+    if start_state is None:
+      raise InvalidParserConfiguration(self, "Missing start state")
+    if on_reduce is None:
+      on_reduce = lambda *args: None
+    # Auxiliary variables
+    SHIFT_PREC = (self.SamePrecedence, self.LessPrecedence)
+    REDUCE_PREC = (self.MorePrecedence,)
+    # Initial state
+    pushdown_list = [(EndMarker, start_state)]
+    last_tkn, last_val = EndMarker, start_state
+    tkn = None        # Force reading next char from stream
+    while True:
+      is_last_nt = last_tkn is NonTerminal
+      if is_last_nt:
+        last_tkn, last_val = pushdown_list[-2]
+      if tkn is None:
+        try:
+          tkn, val = stream.next()
+        except StopIteration:
+          tkn, val = (EndMarker, start_state)
+      if is_last_nt and last_tkn is EndMarker and tkn is EndMarker:
+        # Accept token stream !!!
+        return pushdown_list[-1][1]
+      candidates = ( ((last_tkn, lv) , (tkn, v)) \
+          for lv in (last_val, Any) for v in (val, Any) )
+      try:
+        precedence = ifilter(None, 
+            (self.precedence.get(c) for c in candidates)).next()
+      except StopIteration:
+        if tkn is EndMarker:
+          raise SyntaxError(self, "Unexpected EOL")
+        else:
+          raise SyntaxError(self, "Unexpected token " + val)
+      else:
+        logging.debug('Precedence %s,%s |--> %s,%s: %s', 
+            last_tkn, last_val, tkn, val, precedence)
+      if precedence in SHIFT_PREC:
+        # Shift
+        pushdown_list.append((tkn, val))
+        last_tkn, last_val = tkn, val
+        tkn = None        # Force reading next char from stream
+      elif precedence in REDUCE_PREC:
+        try:
+          prod_id, args = self._match_production(pushdown_list)
+        except LookupError:
+          raise SyntaxError('Unexpected token ' + last_val)
+        pushdown_list.append((NonTerminal, on_reduce(prod_id, *args)))
+        last_tkn, last_val = pushdown_list[-1]
+      else:
+        raise InvalidParserConfiguration(self, "Invalid precedence " + 
+            str(precedence))
+
+  def _match_token(self, reftkn, tkn):
+    r"""Match a token `tkn` against expected token `reftkn`.
+    """
+    if reftkn is Any:
+      return True
+    else:
+      _reftype, _refval = reftkn
+      _type, _val = tkn
+      return (_reftype is Any or _reftype is _type) and \
+          (_refval is Any or _refval == _val)
+
+  def _match_production(self, pushdown_list):
+    r"""Match production on reduce
+    """
+    idx = 0
+    choices = self.productions_tree
+    while EndMarker not in choices:
+      idx -= 1
+      last_tkn, last_val = pushdown_list[idx]
+      logging.debug('Last token %s : %s', last_tkn, last_val)
+      choices = choices.get((last_tkn, last_val)) or choices.get((last_tkn, Any))
+      if choices is None:
+        raise LookupError("Could not match grammar against pushdown list")
+    if EndMarker in choices:
+      args = pushdown_list[idx:]
+      for _ in xrange(idx, 0):
+        pushdown_list.pop()
+      return choices[EndMarker], args
+    else:
+      raise InvalidParserConfiguration(self, "Expected production ID. Not found.")
+
+#------------------------------------------------------
+#   Helper functions and objects
+#------------------------------------------------------
+
+# Intermediate tokens representing non-terminal grammar symbols
+NonTerminal = Token.Grammar.NonTerminal
+# Wildchar token used in token matching.
+Any         = Token.Grammar.Any
+# Token used to delimit segments in a token stream
+EndMarker   = Token.Grammar.EndMarker
+
+#------------------------------------------------------
+#   Exception classes
+#------------------------------------------------------
+
+class ParserError(RuntimeError):
+  r"""Error condition detected at parsing time.
+  """
+  def __init__(self, parser, *args):
+    r"""Initialize ecception object with parser and arguments
+    """
+    self.parser = parser
+    RuntimeError.__init__(self, *args)
+
+  def __unicode__(self):
+    return "Parser %s failed: %" % (self.parser.__class__.__name__,
+        RuntimeError.__unicode__(self))
+
+class InvalidParserConfiguration(ParserError):
+  r"""Wrong parser configuration detected.
+  """
+
+#------------------------------------------------------
+#   Global Testing
+#------------------------------------------------------
+
+from tracgviz.testing.test_parsing import __test__, SAMPLE_GRAMMAR_PRECEDENCE, \
+    SAMPLE_GRAMMAR_PRODUCTIONS, SAMPLE_INPUT_STREAM
+
+def test_suite():
+  from doctest import ELLIPSIS, NORMALIZE_WHITESPACE, REPORT_UDIFF
+  from unittest import defaultTestLoader
+  from string import whitespace
+  import sys
+
+  from tracgviz.api import GVizInvalidQuery
+  from tracgviz.testing.dutest import MultiTestLoader, DocTestLoader
+
+  # logging.basicConfig(level=logging.DEBUG)
+
+  def parse(expr, *attrs, **kwds):
+    # Test lexical analysis
+    print "*****\n* Tokens\n*****"
+    newline = False
+    p = GVizQLParser()
+    p.noisy = False
+    for tkn, val in p.get_tokens(expr):
+      if tkn is not Whitespace:
+        if newline:
+          print
+        else:
+          newline = True
+        print tkn, val,
+    if not (tkn is Whitespace and val == '\n'):  # Check for EOL token.
+      raise AssertionError('Expected : %s , %r \n\nGot : %s , %r' % \
+                                  (Whitespace, u'\n', tkn, val))
+    if attrs :
+      # Test parsing and compilation
+      print
+      print "*****\n* Parsing\n*****",
+      from api import GVizUnsupportedQueryOp
+      try:
+        expr = p.parse(expr)
+      except GVizUnsupportedQueryOp :
+        print '\nNotSupported  :(',
+      except GVizInvalidQuery, exc :
+        print 
+        print exc.message
+      else:
+        for attrnm in attrs :
+          print
+          print getattr(expr, attrnm),
+        # Test evaluation and transformations on result sets
+        print
+        print "*****\n* Result\n*****",
+        try:
+          schema, data = expr.transform(TEST_DATA_GVIZSCHEMA, \
+                                        TEST_DATA_GVIZDATA)
+        except Exception, exc:
+          print
+          print exc.__class__.__name__, ' : ', exc.message,
+        else:
+          print
+          print "= Columns =",
+          if isinstance(schema, dict):
+            def iter_schema():
+              return ((k,) + v for k, v in schema.iteritems())
+          else:
+            iter_schema = lambda : schema
+          for col in iter_schema() :
+            print
+            print col[0], col[1],
+            try :
+              print col[2],
+            except IndexError :
+              pass
+          print 
+          for row in data :
+            print "= Row ="
+            for val, col in izip(row, iter_schema()):
+              print '  ', col[0], '=', val
+
+  l = MultiTestLoader([defaultTestLoader, \
+                        DocTestLoader(
+                            extraglobs=dict(parse=parse),
+                            optionflags=ELLIPSIS \
+                                        | NORMALIZE_WHITESPACE \
+                                        | REPORT_UDIFF \
+                          )])
+  return l.loadTestsFromModule(sys.modules[__name__])
+