Commits

Olemis Lang committed 620a50e

TracGViz : Explicit typecasts in XSV to GViz converter

  • Participants
  • Parent commits 9411fc5
  • Branches providers

Comments (0)

Files changed (4)

File trac-dev/gviz/TODO

 
 X Error message for GViz content providers when guessed mimetype is still `None`
 
+X Move `dutest` to `tests_require` rather than `install_requires`.
+
+X Naive typecasts in CSV to GViz MIME converter.
+
 ~ Write tests for data sources included in TracGViz 1.3.4 that are 
   not about version control.
 
 - Test error message for GViz content providers when guessed mimetype is 
   still `None`
 
-- Move `dutest` to `tests_require` rather than `install_requires`.
+- Typecasts in CSV to GViz MIME converter powered by PyICU.
 
-- Typecasts in CSV to GViz MIME converter.
+- Test typecasts in CSV to GViz MIME converter.
 
 - Use FSL to select files in GViz providers ?
 

File trac-dev/gviz/tracgviz/attachment.py

     @gviz_param('id', "an `id`, which uniquely identifies a resource " \
                       "within its realm (e.g. wiki page name or ticket ID).")
     @gviz_param('name', "the attachment file name.")
+    @gviz_param('colsdef', "Customize column types using 'type:format' "
+                           "expressions.")
     def get_data(self, req, tq, realm, id, name, **tqx):
         r"""Retrieve attachment contents if possible (i.e. if there 
         is a MIME converter available and able to transform the input 

File trac-dev/gviz/tracgviz/util/__init__.py

           'get_column_desc', 'TYPES_2_GVIZ', 'get_column_desc', \
           'rpc_opt_sigs', 'REQFIELDS_DESC', 'REQFIELDS_DEFAULTS', \
           'is_dyn_schema', 'compile_pattern', 'ObjectIntercept', \
-          'send_response', 'iter_table_data', 'StringIO', 'UTF8Recoder'
+          'send_response', 'iter_table_data', 'StringIO', 'UTF8Recoder', \
+          'GVizTypedRows'
 
 from trac.core import Component, ExtensionPoint, implements, TracError
 from trac.config import Option
 from trac.mimeview.api import Mimeview
 from trac.util.text import to_utf8
+from trac.util.translation import _
 from trac.web.api import RequestDone, Request
 from trac.web.chrome import Chrome
 from trac.web.main import RequestDispatcher
     implements(IGVizProtocolHandler)
     encoders = ExtensionPoint(IGVizTableEncoder)
     hashlibs = ExtensionPoint(IHashLibrary)
-    
+
     hash_name = Option('gviz', 'hash', default=None, 
                         doc="""The algorithm used to generate a hash """
                             """of the data sent back to the client. This """
                             """to optimize the request / response """
                             """mechanism to make rational use of the """
                             """available bandwith.""")
-    
+
     def _init_hash(self):
         r"""Setup the secure hash algorithm.
         """
                               hash_name, self._hlib, str(exc))
         else:
           self.log.info("IG: Hash method 'None'")
-    
+
     @staticmethod
     def fmt_supports_version(encoder, version):
         r"""Return whether a data table encoder supports a specific
         version of Google Visualization API."""
-        
+
         rels = {
                 '>' : tuple.__gt__,
                 '<' : tuple.__lt__,
                 '==' : tuple.__eq__,
                 '!=' : lambda x, y: x != y,
                }
-        
+
         versions = encoder.supported_versions()
         return all(rels[r](v, version) for r, v in versions)
-    
+
     def find_encoder(self, fmt_id, version, mime_type=None):
         r"""Find an encoder able to convert a data table contents into
         a specific format, maybe having a well-known content-type.
-        
+
         @param fmt_id the output format id
         @param version the particular protocol `version` of Google
                     Visualization API in use.
         @param mime_type if specified then a best match is made to 
                     return an encoder returning the specific content
                     type requested by the caller.
-                    
+
         @return the best match made according to the available 
                     encoders or `None` if no such encoder could be
                     found. This encoder *must* support the requested
                             encoders).next()
                 except StopIteration:
                     return first
-    
+
     def _init_fmt(self):
         """Arrange the available format encoders.
         """
         self._fmt = dict()
         for e in self.encoders:
             self._fmt.setdefault(e.get_format_id(), []).append(e)
-    
+
     def __init__(self):
         self._init_fmt()
         self._init_hash()
-        
+
     # TODO : Implement common features.
 
 class VoidRpcHandler:
     def __init__(self, rpc_obj):
         methods = (Method(rpc_obj, *mi) for mi in rpc_obj.xmlrpc_methods())
         prefix_len = len(rpc_obj.xmlrpc_namespace()) + 1
-        
+
         def method_wrapper(m):
             wrapper = lambda req, *args: m(req, args)[0]
             wrapper.__module__ = m.callable.__module__
                                 for m in methods)
         rpc_obj.log.debug('IG: RPC methods %s', self.__methods)
         self.__rpc_obj = rpc_obj
-    
+
     def __getattr__(self, attrnm):
         r"""Try to retrieve the XML-RPC method first. Otherwise return 
         the attribute of the underlying XML-RPC object.
     """
     implements(IGVizDataProvider)
     abstract = True
-    
+
     def __init__(self):
         r"""Assign the corresponding XML RPC handler to this data
         source provider. 
-        
+
         Note: Since Trac core system components hack the initializer,
         further initialiation steps needed by sub-classes should be
         coded by overriding `do_init` method.
             pass
         else:
             __init__()
-        
+
         if self._is_rpc_datetime :    # Assume datetime as default
           self._rpc_date_impl = lambda dt, req : dt
         else :
           self._rpc_date_impl = lambda dt, req : DateTime(dt)
-    
+
     @property
     def _is_rpc_datetime(self):
         r"""Determine whether the underlying Rpc implementation 
         manipulates instances of `xmlrpclib.DateTime` or 
         `datetime.datetime` (>=1.0.6). 
-        
+
         Immediately after this change was committed, `RPC` entry was 
         added to the system information. So we check for the later 
         condition as a workaround.
           self.__dict__['_is_rpc_datetime'] = False
           return False
 
+
+class GVizSimpleTypedRows:
+    r"""A CSV reader which will iterate over lines in the CSV file `f`,
+    and apply declared typecasts upon input values.
+    """
+    def __init__(self, seq, signature=None):
+        if isinstance(signature, basestring):
+            signature = self.parse_signature(signature)
+        self.signature = signature
+        self.seq = seq
+
+    def __iter__(self):
+        converters = [x[2] for x in self.signature]
+        DEFAULT_CONV = lambda x: x
+        for row in self.seq:
+            yield tuple(c(v) for v,c in izip(row, chain(converters, 
+                                                        repeat(DEFAULT_CONV))))
+
+    # Protected methods
+
+    @classmethod
+    def _sigitem(cls, (dtype, dformat)):
+        def error(v):
+            raise GVizBadRequestError(_("Wrong value %(v)s for %(t)s %(f)s",
+                                        v=v, t=dtype, f=dformat or ''))
+        if dtype == 'string':
+            conv = lambda v: v
+        elif dtype == 'number':
+            def conv(v):
+                if not v:
+                    return None
+                try:
+                    return int(v)
+                except ValueError:
+                    try:
+                        return float(v)
+                    except ValueError:
+                        error(v)
+        elif dtype == 'boolean':
+            def conv(v):
+                try:
+                    return {'true' : True, 'false' : False, '' : None}[v]
+                except KeyError:
+                    error(v)
+        elif dtype == 'date':
+            dformat = dformat or '%Y-%m-%d'
+            def conv(v):
+                if not v:
+                    return None
+                try:
+                    return datetime.strptime(v, dformat).date()
+                except ValueError:
+                    error(v)
+        elif dtype == 'timeofday':
+            dformat = dformat or '%H:%M:%S'
+            def conv(v):
+                if not v:
+                    return None
+                try:
+                    return datetime.strptime(v, dformat).time()
+                except ValueError:
+                    error(v)
+        elif dtype == 'datetime':
+            dformat = dformat or '%Y-%m-%d %H:%M:%S'
+            def conv(v):
+                if not v:
+                    return None
+                try:
+                    return datetime.strptime(v, dformat).date()
+                except ValueError:
+                    error(v)
+        else:
+            conv = error
+
+        return dtype, dformat, conv
+
+    @classmethod
+    def parse_signature(cls, signature):
+        """Transform readable signature into executable signature.
+        """
+        tokens = (x.split(':', 2) for x in signature.split(','))
+        norm_tokens = ((x[0].strip() or 'string', x[1].strip()) if len(x) == 2 
+                       else (x[0].strip() or 'string', None) for x in tokens)
+        return [cls._sigitem(x) for x in norm_tokens]
+
+    def _iter_rows(self):
+        raise NotImplementedError
+
+
 class GVizContentProvider(Component):
     r"""Base class useful to implement data sources that convert 
     contents stored in a source file or buffer in order to feed GViz 
     """
     abstract = True
     implements(IGVizDataProvider)
-    
+
+    ROW_TYPECAST = GVizSimpleTypedRows
+
     # IGVizDataProvider methods
     def get_data_schema(self, req):
         r"""Provide the schema used to populate GViz data tables out 
                                   mimetype)
           raise NotImplementedError("Impossible to load data from `%s`" \
                                         % (mimetype,))
+        self.log.debug('[GViz] Cols before %s', cols)
+        # Force specified column types
+        colsdef = req.args.get('colsdef')
+        if colsdef:
+            parsed_cols = self.ROW_TYPECAST.parse_signature(colsdef)
+            for i, pc in enumerate(parsed_cols):
+                if i >= len(cols):
+                    break
+                c = list(cols[i])
+                c[1] = str(pc[0])
+                cols[i] = tuple(c)
+            data = self.ROW_TYPECAST(data, parsed_cols)
+
         req.args['_rawdata'] = data
+        self.log.debug('[GViz] Cols after %s', cols)
         return cols
-    
+
     def get_data(self, req, tq, **tqx):
         return req.args.get('_rawdata') or []
 
     # API methods
     def get_input_contents(self):
         r"""Retrieve the source used to feed the data table.
-        
+
         @return     a binary tuple of the form (`content`, `mimetype`) 
                     where `contents` is a file-like object or a 
                     buffer (string, unicode, ...) and `mimetype` is 
                     conversion.
         """
         raise NotImplementedError("Unknown source file or buffer")
-    
+
     def guess_mimetype(self, fd, fnm):
         r"""Try to guess the MIME type of the input file.
-        
+
         @param fd       input file object
         @param fnm      input file name
         @return         a ternary tuple of the form 
                         (`content`, `mimetype`, `rewinded`) where 
-                        
+
                         `content`   the input file object (i.e. `fd`) 
                                     if the file pointer could be 
                                     posiotioned at the beginning
     fixed_types = ()
   else:
     fixed_types = tuple(fixed_types)
-  
+
   new_sig = (ret_type,) + fixed_types
   yield new_sig
   old_gen = [new_sig]
-  
+
   for arg_types in opt_types:
     new_gen = []
     for sig in old_gen:
 def is_dyn_schema(provider):
   r"""Determine whether the schema defined by `provider` is static 
   or dynamic (i.e may change at run-time).
-  
+
   @param provider     an instance of `IGVizDataProvider` interface.
   @return             `True` if the schema may change at run-time
                       `False` otherwise (i.e. schema is static).
   r"""Iterate over the different rows present in a data table.
   Data in each row will be retrieved in the order determined by 
   the table's columns.
-  
+
   @param sort_keys    list of keys to sort by. For further details 
                       please read docstrings for 
                       `gviz_api.DataTable._PreparedData`.
   for row in table._PreparedData(sort_keys):
       yield tuple(row.get(col) for col in colnms)
 
+
 #--------------------------------------------------
 #   Intercepting attribute access
 #--------------------------------------------------
   """
   def __init__(self, obj, basecls):
     r"""Initialize.
-      
+
     @param obj        the real subject.
     @param basecls    replace reference to `self` before returning 
                       methods of this class.
 
 class IoIntercept(ObjectIntercept):
   r"""Write the contents of the HTTP response to a stream.
-  
+
   The following methods may be overriden :
     - raise_status_failed    :  Raise an exception if `HTTP OK` 
                                 (i.e. `200`) is not the status code 
   """
   def __init__(self, reqi, strm):
     r"""Initialize.
-    
+
     @param reqi         a request or intercept object.
     @param strm         the stream used to gather the HTTP response.
     """
   def _start_response(self, status, headers):
     r"""Ensure that the response code is `HTTP OK` (i.e. `200`).
     Start writing to the stream object.
-    
+
     Parameters are ignored.
     """
     if status.lower() != '200 ok':
 class RedirectIntercept(ObjectIntercept):
   r"""Redirect the request so that it be processed by another 
   request handler.
-  
+
   The following methods may be overriden :
     - raise_status_failed    :  Raise an exception if `HTTP OK` 
                                 (i.e. `200`) is not the status code 
     return ObjectIntercept.__getattr__(self, attrnm)
   def __init__(self, reqi, env, **params):
     r"""Initialize.
-    
+
     @param reqi         a request or intercept object.
     @param uri_suffix   the suffix of the destination URI (i.e. the 
                         part of it that's inside the web 
     self.path_info = uobj.path
     self.args = parse_qs(uobj.query)
     self.args.update(self.__params)
-    
+
     # Needed because Trac built-in protection against CSRF attacks 
     # is based on validating form tokens. If not set then form 
     # submissions (e.g. previews) will fail.
     self.args['__FORM_TOKEN'] = self._target.args.get('__FORM_TOKEN')
-    
+
     try:
       RequestDispatcher(self.__env).dispatch(self)
     except (RequestDone, TracError):

File trac-dev/gviz/tracgviz/xsv.py

 """
 __author__ = 'Olemis Lang'
 
+from csv import excel, QUOTE_MINIMAL, reader, writer
+from datetime import datetime, date, time
+from itertools import chain, repeat, izip
+
 from trac.core import Component, implements
 from trac.mimeview.api import IContentConverter, Mimeview
 from trac.util.translation import _
 
-from csv import writer, QUOTE_MINIMAL, reader
-from itertools import izip
-
 from tracgviz.api import IGVizTableEncoder
 from tracgviz.util import render_gviz_value, iter_table_data, StringIO, UTF8Recoder
 
+__metaclass__ = type
 __all__ = 'GVizCSVEncoder', 'GVizTSVEncoder', 'GvizXSVConverter'
 
 #--------------------------------------------------