Commits

Nate Coraor committed 1ae95b3 Merge

Merge next-stable to stable.

  • Participants
  • Parent commits cd6fec2, 1e7ce74
  • Branches stable
  • Tags release_2013.08.12

Comments (0)

Files changed (340)

+[run]
+branch = True
+include = lib/galaxy/*
 job_conf.xml
 data_manager_conf.xml
 shed_data_manager_conf.xml
-visualizations_conf.xml
+config/visualizations/*.xml
 static/welcome.html.*
 static/welcome.html
 
 # Test output
 run_functional_tests.html
 test/tool_shed/tmp/*
+.coverage
+htmlcov
+run_unit_tests.html
 
 # Project files
 *.kpf
 
 HOW TO START
 ============
-Galaxy requires Python 2.5, 2.6 or 2.7. To check your python version, run:
+Galaxy requires Python 2.6 or 2.7. To check your python version, run:
 
 % python -V
 Python 2.7.3

File buildbot_setup.sh

 "
 
 SAMPLES="
+tool_conf.xml.sample
 datatypes_conf.xml.sample
 universe_wsgi.ini.sample
 tool_data_table_conf.xml.sample

File config/visualizations/circster.xml.sample

+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE visualization SYSTEM "visualization.dtd">
+<visualization name="circster">
+    <data_sources>
+        <data_source>
+            <model_class>HistoryDatasetAssociation</model_class>
+            <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test>
+            <to_param param_attr="id">dataset_id</to_param>
+            <to_param assign="hda">hda_ldda</to_param>
+        </data_source>
+        <data_source>
+            <model_class>LibraryDatasetDatasetAssociation</model_class>
+            <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test>
+            <to_param param_attr="id">dataset_id</to_param>
+            <to_param assign="ldda">hda_ldda</to_param>
+        </data_source>
+    </data_sources>
+    <params>
+        <param type="visualization">id</param>
+        <param type="hda_or_ldda">dataset_id</param>
+        <param_modifier type="string" modifies="dataset_id">hda_ldda</param_modifier>
+        <param type="dbkey">dbkey</param>
+    </params>
+    <template>circster.mako</template>
+    <render_location>_top</render_location>
+</visualization>

File config/visualizations/phyloviz.xml.sample

+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE visualization SYSTEM "visualization.dtd">
+<visualization name="phyloviz">
+    <data_sources>
+        <data_source>
+            <model_class>HistoryDatasetAssociation</model_class>
+            <test type="isinstance" test_attr="datatype" result_type="datatype">data.Newick</test>
+            <test type="isinstance" test_attr="datatype" result_type="datatype">data.Nexus</test>
+            <to_param param_attr="id">dataset_id</to_param>
+        </data_source>
+    </data_sources>
+    <params>
+        <param type="dataset" var_name_in_template="hda" required="true">dataset_id</param>
+        <param type="integer" default="0">tree_index</param>
+    </params>
+    <template>phyloviz.mako</template>
+    <render_location>_top</render_location>
+</visualization>

File config/visualizations/scatterplot.xml.sample

+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE visualization SYSTEM "visualization.dtd">
+<visualization name="scatterplot">
+    <data_sources>
+        <data_source>
+            <model_class>HistoryDatasetAssociation</model_class>
+            <test type="isinstance" test_attr="datatype" result_type="datatype">tabular.Tabular</test>
+            <to_param param_attr="id">dataset_id</to_param>
+        </data_source>
+    </data_sources>
+    <params>
+        <param type="dataset" var_name_in_template="hda" required="true">dataset_id</param>
+    </params>
+    <template>scatterplot.mako</template>
+</visualization>

File config/visualizations/sweepster.xml.sample

+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE visualization SYSTEM "visualization.dtd">
+<visualization name="sweepster">
+    <data_sources>
+        <data_source>
+            <model_class>HistoryDatasetAssociation</model_class>
+            <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test>
+            <to_param param_attr="id">dataset_id</to_param>
+            <to_param assign="hda">hda_ldda</to_param>
+        </data_source>
+        <data_source>
+            <model_class>LibraryDatasetDatasetAssociation</model_class>
+            <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test>
+            <to_param param_attr="id">dataset_id</to_param>
+            <to_param assign="ldda">hda_ldda</to_param>
+        </data_source>
+    </data_sources>
+    <params>
+        <param type="visualization" var_name_in_template="viz">visualization</param>
+        <param type="hda_or_ldda" var_name_in_template="dataset">dataset_id</param>
+        <param_modifier type="string" modifies="dataset_id">hda_ldda</param_modifier>
+    </params>
+    <template>sweepster.mako</template>
+    <render_location>_top</render_location>
+</visualization>

File config/visualizations/trackster.xml.sample

+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE visualization SYSTEM "visualization.dtd">
+<visualization name="trackster">
+    <!--not tested yet -->
+    <data_sources>
+        <data_source>
+            <model_class>HistoryDatasetAssociation</model_class>
+            <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test>
+            <to_param param_attr="id">dataset_id</to_param>
+            <to_param assign="hda">hda_ldda</to_param>
+            <to_param param_attr="dbkey">dbkey</to_param>
+        </data_source>
+        <data_source>
+            <model_class>LibraryDatasetDatasetAssociation</model_class>
+            <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test>
+            <to_param param_attr="id">dataset_id</to_param>
+            <to_param assign="ldda">hda_ldda</to_param>
+        </data_source>
+    </data_sources>
+    <params>
+        <param type="visualization">id</param>
+        <param type="dataset">dataset_id</param>
+        <param type="genome_region">genome_region</param>
+        <param type="dbkey">dbkey</param>
+    </params>
+    <template_root>tracks</template_root>
+    <template>browser.mako</template>
+    <render_location>_top</render_location>
+</visualization>

File config/visualizations/visualization.dtd

+<!-- runnable on NIX with xmllint -->
+
+<!-- each visualization must have a template (all other elements are optional) -->
+<!ELEMENT visualization (data_sources*,params*,template_root*,template,link_text*,render_location*)>
+<!-- visualization name (e.g. 'trackster', 'scatterplot', etc.) is required -->
+<!ATTLIST visualization
+    name        CDATA #REQUIRED
+>
+
+<!ELEMENT data_sources (data_source*)>
+<!-- data sources are elements that describe what objects (HDAs, LDDAs, Job, User, etc.)
+        are applicable to a visualization. Often these are used to fetch applicable links
+        to the visualizations that use them.
+-->
+  <!ELEMENT data_source (model_class,(test|to_param)*)>
+    <!ELEMENT model_class (#PCDATA)>
+    <!-- model_class is currently the class name of the object you want to make a visualization
+          applicable to (e.g. HistoryDatasetAssociation). Currently only classes in galaxy.model
+          can be used.
+          REQUIRED and currently limited to: 'HistoryDatasetAssociation', 'LibraryDatasetDatasetAssociation'
+    -->
+    <!ELEMENT test (#PCDATA)>
+    <!-- tests help define what conditions the visualization can be applied to the model_class/target.
+          Currently, all tests are OR'd and there is no logical grouping. Tests are run in order.
+        (text): the text of this element is what the given target will be compared to (REQUIRED)
+        type: what type of test to run (e.g. when the target is an HDA the test will often be of type 'isinstance'
+                and test whether the HDA's datatype isinstace of a class)
+                DEFAULT: string comparison.
+        test_attr: what attribute of the target object should be used in the test. For instance, 'datatype'
+                will attempt to get the HDA.datatype from a target HDA. If the given object doesn't have
+                that attribute the test will fail (with no error). test_attr can be dot separated attributes,
+                looking up each in turn. For example, if the target was a history, one could access the
+                history.user.email by setting test_attr to 'user.email' (why you would want that, I don't know)
+                DEFAULT: to comparing the object itself (and not any of it's attributes)
+        result_type: if the result (the text of the element mentioned above) needs to be parsed into
+                something other than a string, result_type will tell the registry how to do this. E.g.
+                if result_type is 'datatype' the registry will assume the text is a datatype class name
+                and parse it into the proper class before the test (often 'isinstance') is run.
+                DEFAULT: no parsing (result should be a string)
+    -->
+    <!ATTLIST test
+        type        CDATA #IMPLIED
+        test_attr   CDATA #IMPLIED
+        result_type CDATA #IMPLIED
+    >
+
+    <!ELEMENT to_param (#PCDATA)>
+    <!-- to_param tells the registry how to parse the data_source into a query string param.
+          For example, HDA data_sources can set param_to text to 'dataset_id' and param_attr to 'id' and the
+            the target HDA (if it passes the tests) will be passed as "dataset_id=HDA.id"
+        (text): the query string param key this source will be parsed into (e.g. dataset_id)
+                REQUIRED
+        param_attr: the attribute of the data_source object to use as the value in the query string param.
+                E.g. param_attr='id' for an HDA data_source would use the (encoded) id.
+                NOTE: a to_param MUST have either a param_attr or assign
+        assign: you can use this to directly assign a value to a query string's param. E.g. if the
+                data_source is a LDDA we can set 'hda_or_ldda=ldda' using assign='ldda'.
+                NOTE: a to_param MUST have either a param_attr or assign
+    -->
+    <!ATTLIST to_param
+        param_attr  CDATA #IMPLIED
+        assign      CDATA #IMPLIED
+    >
+
+<!ELEMENT params ((param|param_modifier)*)>
+<!-- params describe what data will be sent to a visualization template and
+      how to convert them from a query string in a URL into variables usable in a template.
+      For example,
+     param_modifiers are a special class of parameters that modify other params
+      (e.g. hda_ldda can be 'hda' or 'ldda' and modifies/informs dataset_id to fetch an HDA or LDDA)
+-->
+  <!ELEMENT param (#PCDATA)>
+  <!-- param tells the registry how to parse the query string param back into a resource/data_source.
+        For example, if a query string has "dataset_id=NNN" and the type is 'dataset', the registry
+        will attempt to fetch the hda with id of NNN from the database and pass it to the template.
+      (text): the query string param key this source will be parsed from (e.g. dataset_id)
+              REQUIRED
+      type: the type of the resource.
+              Can be: str (DEFAULT), bool, int, float, json, visualization, dbkey, dataset, or hda_ldda.
+      default: if a param is not passed on the query string (and is not required) OR the given param
+              fails to parse, this value is used instead.
+              DEFAULT: None
+      required: set this to true if the param is required for the template. Rendering will with an error
+              if the param hasn't been sent.
+              DEFAULT: false
+      csv: set this to true if the param is a comma separated list. The registry will attempt to
+              parse each value as the given type and send the result as a list to the template.
+              DEFAULT: false
+      constrain_to: (currently unused) constain a param to a set of values, error if not valid.
+              DEFAULT: don't constrain
+      var_name_in_template: a new name for the resource/variable to use in the template. E.g. an initial
+              query string param key might be 'dataset_id' in the URL, the registry parses it into an HDA,
+              and if var_name_in_template is set to 'hda', the template will be able to access the HDA
+              with the variable name 'hda' (as in hda.title).
+              DEFAULT: keep the original query string name
+  -->
+  <!ATTLIST param
+      type        CDATA #IMPLIED
+      default     CDATA #IMPLIED
+      required    CDATA #IMPLIED
+      csv         CDATA #IMPLIED
+      constrain_to          CDATA #IMPLIED
+      var_name_in_template  CDATA #IMPLIED
+  >
+  <!-- param_modifiers are the same as param but have a REQUIRED 'modifies' attribute.
+        'modifies' must point to the param name (the text part of param element) that it will modify.
+        E.g. <param_modifier modifies="dataset_id">hda_ldda</param_modifier>
+  -->
+  <!ELEMENT param_modifier (#PCDATA)>
+  <!ATTLIST param_modifier
+      modifies    CDATA #REQUIRED
+      type        CDATA #IMPLIED
+      default     CDATA #IMPLIED
+      required    CDATA #IMPLIED
+      csv         CDATA #IMPLIED
+      constrain_to          CDATA #IMPLIED
+      var_name_in_template  CDATA #IMPLIED
+  >
+
+<!-- template_root: the directory to search for the template relative to templates/webapps/galaxy
+      (optional) DEFAULT: visualizations
+-->
+<!ELEMENT template_root (#PCDATA)>
+<!-- template: the template used to render the visualization. REQUIRED -->
+<!ELEMENT template (#PCDATA)>
+<!-- link_text: the text component of an html anchor displayed when the registry builds the link information -->
+<!ELEMENT link_text (#PCDATA)>
+<!-- render_location: used as the target attribute of the link to the visualization.
+      Can be 'galaxy_main', '_top', '_blank'. DEFAULT: 'galaxy_main'
+-->
+<!-- TODO: rename -> render_target -->
+<!ELEMENT render_location (#PCDATA)>
 mercurial = 2.2.3
 MySQL_python = 1.2.3c1
 numpy = 1.6.0
-pbs_python = 4.1.0
+pbs_python = 4.3.5
 psycopg2 = 2.0.13
 pycrypto = 2.5
 pysam = 0.4.2

File job_conf.xml.sample_advanced

             <param id="shell_hostname">foo.example.org</param>
             <param id="Job_Execution_Time">24:00:00</param>
         </destination>
-        <destination id="condor" runner="condor"/>
+        <destination id="condor" runner="condor">
+            <!-- With no params, jobs are submitted to the 'vanilla' universe with:
+                    notification = NEVER
+                    getenv = true
+                 Additional/override query ClassAd params can be specified with
+                 <param> tags.
+            -->
+            <param id="request_cpus">8</param>
+        </destination>
     </destinations>
     <tools>
         <!-- Tools can be configured to use specific destinations or handlers,

File lib/galaxy/actions/admin.py

 
 import logging
 from galaxy import util
-from galaxy.exceptions import MessageException
+from galaxy.exceptions import ActionInputError
 
 log = logging.getLogger( __name__ )
 
             except AssertionError:
                 create_amount = False
         if not params.name or not params.description:
-            raise MessageException( "Enter a valid name and a description.", type='error' )
+            raise ActionInputError( "Enter a valid name and a description." )
         elif self.sa_session.query( self.app.model.Quota ).filter( self.app.model.Quota.table.c.name==params.name ).first():
-            raise MessageException( "Quota names must be unique and a quota with that name already exists, so choose another name.", type='error' )
+            raise ActionInputError( "Quota names must be unique and a quota with that name already exists, so choose another name." )
         elif not params.get( 'amount', None ):
-            raise MessageException( "Enter a valid quota amount.", type='error' )
+            raise ActionInputError( "Enter a valid quota amount." )
         elif create_amount is False:
-            raise MessageException( "Unable to parse the provided amount.", type='error' )
+            raise ActionInputError( "Unable to parse the provided amount." )
         elif params.operation not in self.app.model.Quota.valid_operations:
-            raise MessageException( "Enter a valid operation.", type='error' )
+            raise ActionInputError( "Enter a valid operation." )
         elif params.default != 'no' and params.default not in self.app.model.DefaultQuotaAssociation.types.__dict__.values():
-            raise MessageException( "Enter a valid default type.", type='error' )
+            raise ActionInputError( "Enter a valid default type." )
         elif params.default != 'no' and params.operation != '=':
-            raise MessageException( "Operation for a default quota must be '='.", type='error' )
+            raise ActionInputError( "Operation for a default quota must be '='." )
         elif create_amount is None and params.operation != '=':
-            raise MessageException( "Operation for an unlimited quota must be '='.", type='error' )
+            raise ActionInputError( "Operation for an unlimited quota must be '='." )
         else:
             # Create the quota
             quota = self.app.model.Quota( name=params.name, description=params.description, amount=create_amount, operation=params.operation )
 
     def _rename_quota( self, quota, params ):
         if not params.name:
-            raise MessageException( 'Enter a valid name', type='error' )
+            raise ActionInputError( 'Enter a valid name' )
         elif params.name != quota.name and self.sa_session.query( self.app.model.Quota ).filter( self.app.model.Quota.table.c.name==params.name ).first():
-            raise MessageException( 'A quota with that name already exists', type='error' )
+            raise ActionInputError( 'A quota with that name already exists' )
         else:
             old_name = quota.name
             quota.name = params.name
 
     def _manage_users_and_groups_for_quota( self, quota, params ):
         if quota.default:
-            raise MessageException( 'Default quotas cannot be associated with specific users and groups', type='error' )
+            raise ActionInputError( 'Default quotas cannot be associated with specific users and groups' )
         else:
             in_users = [ self.sa_session.query( self.app.model.User ).get( x ) for x in util.listify( params.in_users ) ]
             in_groups = [ self.sa_session.query( self.app.model.Group ).get( x ) for x in util.listify( params.in_groups ) ]
             except AssertionError:
                 new_amount = False
         if not params.amount:
-            raise MessageException( 'Enter a valid amount', type='error' )
+            raise ActionInputError( 'Enter a valid amount' )
         elif new_amount is False:
-            raise MessageException( 'Unable to parse the provided amount', type='error' )
+            raise ActionInputError( 'Unable to parse the provided amount' )
         elif params.operation not in self.app.model.Quota.valid_operations:
-            raise MessageException( 'Enter a valid operation', type='error' )
+            raise ActionInputError( 'Enter a valid operation' )
         else:
             quota.amount = new_amount
             quota.operation = params.operation
 
     def _set_quota_default( self, quota, params ):
         if params.default != 'no' and params.default not in self.app.model.DefaultQuotaAssociation.types.__dict__.values():
-            raise MessageException( 'Enter a valid default type.', type='error' )
+            raise ActionInputError( 'Enter a valid default type.' )
         else:
             if params.default != 'no':
                 self.app.quota_agent.set_default_quota( params.default, quota )
 
     def _unset_quota_default( self, quota, params ):
         if not quota.default:
-            raise MessageException( "Quota '%s' is not a default." % quota.name, type='error' )
+            raise ActionInputError( "Quota '%s' is not a default." % quota.name )
         else:
             message = "Quota '%s' is no longer the default for %s users." % ( quota.name, quota.default[0].type )
             for dqa in quota.default:
             if q.default:
                 names.append( q.name )
         if len( names ) == 1:
-            raise MessageException( "Quota '%s' is a default, please unset it as a default before deleting it" % ( names[0] ), type='error' )
+            raise ActionInputError( "Quota '%s' is a default, please unset it as a default before deleting it" % ( names[0] ) )
         elif len( names ) > 1:
-            raise MessageException( "Quotas are defaults, please unset them as defaults before deleting them: " + ', '.join( names ), type='error' )
+            raise ActionInputError( "Quotas are defaults, please unset them as defaults before deleting them: " + ', '.join( names ) )
         message = "Deleted %d quotas: " % len( quotas )
         for q in quotas:
             q.deleted = True
             if not q.deleted:
                 names.append( q.name )
         if len( names ) == 1:
-            raise MessageException( "Quota '%s' has not been deleted, so it cannot be undeleted." % ( names[0] ), type='error' )
+            raise ActionInputError( "Quota '%s' has not been deleted, so it cannot be undeleted." % ( names[0] ) )
         elif len( names ) > 1:
-            raise MessageException( "Quotas have not been deleted so they cannot be undeleted: " + ', '.join( names ), type='error' )
+            raise ActionInputError( "Quotas have not been deleted so they cannot be undeleted: " + ', '.join( names ) )
         message = "Undeleted %d quotas: " % len( quotas )
         for q in quotas:
             q.deleted = False
             if not q.deleted:
                 names.append( q.name )
         if len( names ) == 1:
-            raise MessageException( "Quota '%s' has not been deleted, so it cannot be purged." % ( names[0] ), type='error' )
+            raise ActionInputError( "Quota '%s' has not been deleted, so it cannot be purged." % ( names[0] ) )
         elif len( names ) > 1:
-            raise MessageException( "Quotas have not been deleted so they cannot be undeleted: " + ', '.join( names ), type='error' )
+            raise ActionInputError( "Quotas have not been deleted so they cannot be undeleted: " + ', '.join( names ) )
         message = "Purged %d quotas: " % len( quotas )
         for q in quotas:
             # Delete UserQuotaAssociations

File lib/galaxy/app.py

         from tool_shed.galaxy_install.migrate.check import verify_tools
         verify_tools( self, db_url, kwargs.get( 'global_conf', {} ).get( '__file__', None ), self.config.database_engine_options )
         # Object store manager
-        self.object_store = build_object_store_from_config(self.config)
+        self.object_store = build_object_store_from_config(self.config, fsmon=True)
         # Setup the database engine and ORM
         from galaxy.model import mapping
         self.model = mapping.init( self.config.file_path,
         # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables.
         self.tool_data_tables.load_from_config_file( config_filename=self.config.shed_tool_data_table_config,
                                                      tool_data_path=self.tool_data_tables.tool_data_path,
-                                                     from_shed_config=True )
+                                                     from_shed_config=False )
         # Initialize the job management configuration
         self.job_config = jobs.JobConfiguration(self)
         # Initialize the tools, making sure the list of tool configs includes the reserved migrated_tools_conf.xml file.
         # Load genome indexer tool.
         load_genome_index_tools( self.toolbox )
         # visualizations registry: associates resources with visualizations, controls how to render
-        self.visualizations_registry = ( VisualizationsRegistry( self.config.root, self.config.visualizations_conf_path )
-                                         if self.config.visualizations_conf_path else None )
+        self.visualizations_registry = None
+        if self.config.visualizations_config_directory:
+            self.visualizations_registry = VisualizationsRegistry( self.config.root,
+                                                                   self.config.visualizations_config_directory )
         # Load security policy.
         self.security_agent = self.model.security_agent
         self.host_security_agent = galaxy.security.HostAgent( model=self.security_agent.model, permitted_actions=self.security_agent.permitted_actions )

File lib/galaxy/config.py

             tcf = kwargs[ 'tool_config_files' ]
         else:
             tcf = 'tool_conf.xml'
+        self.tool_filters = listify( kwargs.get( "tool_filters", [] ) )
+        self.tool_label_filters = listify( kwargs.get( "tool_label_filters", [] ) )
+        self.tool_section_filters = listify( kwargs.get( "tool_section_filters", [] ) )
         self.tool_configs = [ resolve_path( p, self.root ) for p in listify( tcf ) ]
+        self.shed_tool_data_path = kwargs.get( "shed_tool_data_path", None )
+        if self.shed_tool_data_path:
+            self.shed_tool_data_path = resolve_path( self.shed_tool_data_path, self.root )
+        else:
+            self.shed_tool_data_path = self.tool_data_path
         self.tool_data_table_config_path = resolve_path( kwargs.get( 'tool_data_table_config_path', 'tool_data_table_conf.xml' ), self.root )
         self.shed_tool_data_table_config = resolve_path( kwargs.get( 'shed_tool_data_table_config', 'shed_tool_data_table_conf.xml' ), self.root )
         self.enable_tool_shed_check = string_as_bool( kwargs.get( 'enable_tool_shed_check', False ) )
+        self.hours_between_check = kwargs.get( 'hours_between_check', 12 )
         try:
-            self.hours_between_check = kwargs.get( 'hours_between_check', 12 )
-            if isinstance( self.hours_between_check, float ):
+            hbc_test = int( self.hours_between_check )
+            self.hours_between_check = hbc_test
+            if self.hours_between_check < 1 or self.hours_between_check > 24:
+                self.hours_between_check = 12
+        except:
+            try:
                 # Float values are supported for functional tests.
+                hbc_test = float( self.hours_between_check )
+                self.hours_between_check = hbc_test
                 if self.hours_between_check < 0.001 or self.hours_between_check > 24.0:
                     self.hours_between_check = 12.0
-            else:
-                if self.hours_between_check < 1 or self.hours_between_check > 24:
-                    self.hours_between_check = 12
-        except:
-            self.hours_between_check = 12
+            except:
+                self.hours_between_check = 12
         self.update_integrated_tool_panel = kwargs.get( "update_integrated_tool_panel", True )
         self.enable_data_manager_user_view = string_as_bool( kwargs.get( "enable_data_manager_user_view", "False" ) )
         self.data_manager_config_file = resolve_path( kwargs.get('data_manager_config_file', 'data_manager_conf.xml' ), self.root )
         self.ucsc_display_sites = kwargs.get( 'ucsc_display_sites', "main,test,archaea,ucla" ).lower().split(",")
         self.gbrowse_display_sites = kwargs.get( 'gbrowse_display_sites', "modencode,sgd_yeast,tair,wormbase,wormbase_ws120,wormbase_ws140,wormbase_ws170,wormbase_ws180,wormbase_ws190,wormbase_ws200,wormbase_ws204,wormbase_ws210,wormbase_ws220,wormbase_ws225" ).lower().split(",")
         self.brand = kwargs.get( 'brand', None )
+        self.welcome_url = kwargs.get( 'welcome_url', '/static/welcome.html' )
         # Configuration for the message box directly below the masthead.
         self.message_box_visible = kwargs.get( 'message_box_visible', False )
         self.message_box_content = kwargs.get( 'message_box_content', None )
         self.fluent_log = string_as_bool( kwargs.get( 'fluent_log', False ) )
         self.fluent_host = kwargs.get( 'fluent_host', 'localhost' )
         self.fluent_port = int( kwargs.get( 'fluent_port', 24224 ) )
-        # visualizations registry config path
-        self.visualizations_conf_path = kwargs.get( 'visualizations_conf_path', None )
+        # visualization registries config directory
+        self.visualizations_config_directory = kwargs.get( 'visualizations_config_directory', None )
 
     @property
     def sentry_dsn_public( self ):

File lib/galaxy/datatypes/binary.py

 from galaxy.datatypes.metadata import MetadataElement
 from galaxy.datatypes import metadata
 from galaxy.datatypes.sniff import *
+import dataproviders
 
 log = logging.getLogger(__name__)
 
         trans.response.headers["Content-Disposition"] = 'attachment; filename="Galaxy%s-[%s].%s"' % (dataset.hid, fname, to_ext)
         return open( dataset.file_name )
 
+
 class Ab1( Binary ):
     """Class describing an ab1 binary sequence file"""
     file_ext = "ab1"
 
 Binary.register_unsniffable_binary_ext("ab1")
 
+
 class GenericAsn1Binary( Binary ):
     """Class for generic ASN.1 binary format"""
     file_ext = "asn1-binary"
 
 Binary.register_unsniffable_binary_ext("asn1-binary")
 
+
+@dataproviders.decorators.has_dataproviders
 class Bam( Binary ):
     """Class describing a BAM binary file"""
     file_ext = "bam"
             return dataset.peek
         except:
             return "Binary bam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) )
+
+    # ------------- Dataproviders
+    # pipe through samtools view
+    #ALSO: (as Sam)
+    # bam does not use '#' to indicate comments/headers - we need to strip out those headers from the std. providers
+    #TODO:?? seems like there should be an easier way to do/inherit this - metadata.comment_char?
+    #TODO: incorporate samtools options to control output: regions first, then flags, etc.
+    @dataproviders.decorators.dataprovider_factory( 'line', dataproviders.line.FilteredLineDataProvider.settings )
+    def line_dataprovider( self, dataset, **settings ):
+        samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset )
+        settings[ 'comment_char' ] = '@'
+        return dataproviders.line.FilteredLineDataProvider( samtools_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'regex-line', dataproviders.line.RegexLineDataProvider.settings )
+    def regex_line_dataprovider( self, dataset, **settings ):
+        samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset )
+        settings[ 'comment_char' ] = '@'
+        return dataproviders.line.RegexLineDataProvider( samtools_source, **settings )
     
+    @dataproviders.decorators.dataprovider_factory( 'column', dataproviders.column.ColumnarDataProvider.settings )
+    def column_dataprovider( self, dataset, **settings ):
+        samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset )
+        settings[ 'comment_char' ] = '@'
+        return dataproviders.column.ColumnarDataProvider( samtools_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'dict', dataproviders.column.DictDataProvider.settings )
+    def dict_dataprovider( self, dataset, **settings ):
+        samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset )
+        settings[ 'comment_char' ] = '@'
+        return dataproviders.column.DictDataProvider( samtools_source, **settings )
+
+    # these can't be used directly - may need BamColumn, BamDict (Bam metadata -> column/dict)
+    # OR - see genomic_region_dataprovider
+    #@dataproviders.decorators.dataprovider_factory( 'dataset-column', dataproviders.column.ColumnarDataProvider.settings )
+    #def dataset_column_dataprovider( self, dataset, **settings ):
+    #    settings[ 'comment_char' ] = '@'
+    #    return super( Sam, self ).dataset_column_dataprovider( dataset, **settings )
+
+    #@dataproviders.decorators.dataprovider_factory( 'dataset-dict', dataproviders.column.DictDataProvider.settings )
+    #def dataset_dict_dataprovider( self, dataset, **settings ):
+    #    settings[ 'comment_char' ] = '@'
+    #    return super( Sam, self ).dataset_dict_dataprovider( dataset, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'header', dataproviders.line.RegexLineDataProvider.settings )
+    def header_dataprovider( self, dataset, **settings ):
+        # in this case we can use an option of samtools view to provide just what we need (w/o regex)
+        samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset, '-H' )
+        return dataproviders.line.RegexLineDataProvider( samtools_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'id-seq-qual', dataproviders.column.DictDataProvider.settings )
+    def id_seq_qual_dataprovider( self, dataset, **settings ):
+        settings[ 'indeces' ] = [ 0, 9, 10 ]
+        settings[ 'column_types' ] = [ 'str', 'str', 'str' ]
+        settings[ 'column_names' ] = [ 'id', 'seq', 'qual' ]
+        return self.dict_dataprovider( dataset, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'genomic-region', dataproviders.column.ColumnarDataProvider.settings )
+    def genomic_region_dataprovider( self, dataset, **settings ):
+        # GenomicRegionDataProvider currently requires a dataset as source - may not be necc.
+        #TODO:?? consider (at least) the possible use of a kwarg: metadata_source (def. to source.dataset),
+        #   or remove altogether...
+        #samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset )
+        #return dataproviders.dataset.GenomicRegionDataProvider( samtools_source, metadata_source=dataset,
+        #                                                        2, 3, 3, **settings )
+
+        # instead, set manually and use in-class column gen
+        settings[ 'indeces' ] = [ 2, 3, 3 ]
+        settings[ 'column_types' ] = [ 'str', 'int', 'int' ]
+        return self.column_dataprovider( dataset, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'genomic-region-dict', dataproviders.column.DictDataProvider.settings )
+    def genomic_region_dict_dataprovider( self, dataset, **settings ):
+        settings[ 'indeces' ] = [ 2, 3, 3 ]
+        settings[ 'column_types' ] = [ 'str', 'int', 'int' ]
+        settings[ 'column_names' ] = [ 'chrom', 'start', 'end' ]
+        return self.dict_dataprovider( dataset, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'samtools' )
+    def samtools_dataprovider( self, dataset, **settings ):
+        """Generic samtools interface - all options available through settings."""
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.dataset.SamtoolsDataProvider( dataset_source, **settings )
+
 Binary.register_sniffable_binary_format("bam", "bam", Bam)
 
+
 class H5( Binary ):
     """Class describing an HDF5 file"""
     file_ext = "h5"
 
 Binary.register_unsniffable_binary_ext("h5")
 
+
 class Scf( Binary ):
     """Class describing an scf binary sequence file"""
     file_ext = "scf"
 
 Binary.register_unsniffable_binary_ext("scf")
 
+
 class Sff( Binary ):
     """ Standard Flowgram Format (SFF) """
     file_ext = "sff"
 
 Binary.register_sniffable_binary_format("sff", "sff", Sff)
 
+
 class BigWig(Binary):
     """
     Accessing binary BigWig files from UCSC.
     
 Binary.register_sniffable_binary_format("bigwig", "bigwig", BigWig)
 
+
 class BigBed(BigWig):
     """BigBed support from UCSC."""
 
 
 Binary.register_sniffable_binary_format("bigbed", "bigbed", BigBed)
 
+
 class TwoBit (Binary):
     """Class describing a TwoBit format nucleotide file"""
     
             return dataset.peek
         except:
             return "Binary TwoBit format nucleotide file (%s)" % (data.nice_size(dataset.get_size()))
+
+Binary.register_sniffable_binary_format("twobit", "twobit", TwoBit)

File lib/galaxy/datatypes/converters/bam_to_bigwig_converter.xml

 <tool id="CONVERTER_bam_to_bigwig_0" name="Convert BAM to BigWig" version="1.0.0" hidden="true">
     <!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+    <requirements>
+        <requirement type="package">ucsc_tools</requirement>
+        <requirement type="package">bedtools</requirement>
+    </requirements>
     <command>
         bedtools genomecov -bg -split -ibam $input -g $chromInfo 
 

File lib/galaxy/datatypes/converters/bed_gff_or_vcf_to_bigwig_converter.xml

 <tool id="CONVERTER_bed_gff_or_vcf_to_bigwig_0" name="Convert BED, GFF, or VCF to BigWig" version="1.0.0" hidden="true">
     <!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+    <requirements>
+        <requirement type="package">ucsc_tools</requirement>
+        <requirement type="package">bedtools</requirement>
+    </requirements>
     <command>
         ## Remove comments and sort by chromosome.
         grep -v '^#' $input | sort -k1,1 | 

File lib/galaxy/datatypes/converters/interval_to_bigwig_converter.xml

 <tool id="CONVERTER_interval_to_bigwig_0" name="Convert Genomic Intervals To Coverage">
   <!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
   <!-- Used on the metadata edit page. -->
+    <requirements>
+        <requirement type="package">ucsc_tools</requirement>
+        <requirement type="package">bedtools</requirement>
+    </requirements>
   <command>
 
     ## Remove comments and sort by chromosome.

File lib/galaxy/datatypes/converters/interval_to_interval_index_converter.py

 
 from __future__ import division
 
-import sys, fileinput, optparse
+import optparse
 from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from galaxy.visualization.tracks.summary import *
-from galaxy.datatypes.util.gff_util import convert_gff_coords_to_bed
+eggs.require( "bx-python" )
 from bx.interval_index_file import Indexes
 
 def main():
-    
+
     # Read options, args.
     parser = optparse.OptionParser()
     parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col', default=1 )
     parser.add_option( '-e', '--end-col', type='int', dest='end_col', default=3 )
     (options, args) = parser.parse_args()
     input_fname, output_fname = args
-    
+
     # Make column indices 0-based.
     options.chrom_col -= 1
     options.start_col -= 1
     options.end_col -= 1
-    
+
     # Do conversion.
     index = Indexes()
     offset = 0
         chrom_end = int( feature[ options.end_col ] )
         index.add( chrom, chrom_start, chrom_end, offset )
         offset += len(line)
-            
+
     index.write( open(output_fname, "w") )
 
-if __name__ == "__main__": 
+if __name__ == "__main__":
     main()
-    
+

File lib/galaxy/datatypes/converters/pileup_to_interval_index_converter.py

 
 from __future__ import division
 
-import sys, fileinput, optparse
+import optparse
 from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from galaxy.visualization.tracks.summary import *
-from galaxy.datatypes.util.gff_util import convert_gff_coords_to_bed
+eggs.require( "bx-python" )
 from bx.interval_index_file import Indexes
 
 def main():
-    
+
     # Read options, args.
     parser = optparse.OptionParser()
     (options, args) = parser.parse_args()
     input_fname, output_fname = args
-    
+
     # Do conversion.
     index = Indexes()
     offset = 0
         start = int( start ) - 1
         index.add( chrom, start, start + 1, offset )
         offset += len( line )
-            
+
     index.write( open(output_fname, "w") )
 
-if __name__ == "__main__": 
+if __name__ == "__main__":
     main()
-    
+

File lib/galaxy/datatypes/converters/sam_to_bigwig_converter.xml

 <tool id="CONVERTER_sam_to_bigwig_0" name="Convert SAM to BigWig" version="1.0.0" hidden="true">
+    <requirements>
+        <requirement type="package">ucsc_tools</requirement>
+        <requirement type="package">samtools</requirement>
+        <requirement type="package">bedtools</requirement>
+    </requirements>
     <command>
         samtools view -bh $input | bedtools genomecov -bg -split -ibam stdin -g $chromInfo 
 

File lib/galaxy/datatypes/data.py

 import tempfile
 import zipfile
 from cgi import escape
+from inspect import isclass
 from galaxy import util
 from galaxy.datatypes.metadata import MetadataElement #import directly to maintain ease of use in Datatype class definitions
 from galaxy.util import inflector
 from galaxy.util.odict import odict
 from galaxy.util.sanitize_html import sanitize_html
 
+import dataproviders
+
 from galaxy import eggs
 eggs.require( "Paste" )
 import paste
                 cls.metadata_spec.update( base.metadata_spec ) #add contents of metadata spec of base class to cls
         metadata.Statement.process( cls )
 
+@dataproviders.decorators.has_dataproviders
 class Data( object ):
     """
     Base class for all datatypes.  Implements basic interfaces as well
     def has_resolution(self):
         return False
 
-
+    def matches_any( self, target_datatypes ):
+        """
+        Check if this datatype is of any of the target_datatypes or is
+        a subtype thereof.
+        """
+        datatype_classes = tuple( [ datatype if isclass( datatype ) else datatype.__class__ for datatype in target_datatypes ] )
+        return isinstance( self, datatype_classes )
     def merge( split_files, output_file):
         """
             Merge files with copy.copyfileobj() will not hit the
             return [ 'trackster', 'circster' ]
         return []
 
+    # ------------- Dataproviders
+    def has_dataprovider( self, data_format ):
+        """
+        Returns True if `data_format` is available in `dataproviders`.
+        """
+        return ( data_format in self.dataproviders )
+
+    def dataprovider( self, dataset, data_format, **settings ):
+        """
+        Base dataprovider factory for all datatypes that returns the proper provider
+        for the given `data_format` or raises a `NoProviderAvailable`.
+        """
+        if self.has_dataprovider( data_format ):
+            return self.dataproviders[ data_format ]( self, dataset, **settings )
+        raise dataproviders.exceptions.NoProviderAvailable( self, data_format )
+
+    @dataproviders.decorators.dataprovider_factory( 'base' )
+    def base_dataprovider( self, dataset, **settings ):
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.base.DataProvider( dataset_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'chunk', dataproviders.chunk.ChunkDataProvider.settings )
+    def chunk_dataprovider( self, dataset, **settings ):
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.chunk.ChunkDataProvider( dataset_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'chunk64', dataproviders.chunk.Base64ChunkDataProvider.settings )
+    def chunk64_dataprovider( self, dataset, **settings ):
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.chunk.Base64ChunkDataProvider( dataset_source, **settings )
+
+
+@dataproviders.decorators.has_dataproviders
 class Text( Data ):
     file_ext = 'txt'
     line_class = 'line'
         f.close()
     split = classmethod(split)
 
+    # ------------- Dataproviders
+    @dataproviders.decorators.dataprovider_factory( 'line', dataproviders.line.FilteredLineDataProvider.settings )
+    def line_dataprovider( self, dataset, **settings ):
+        """
+        Returns an iterator over the dataset's lines (that have been `strip`ed)
+        optionally excluding blank lines and lines that start with a comment character.
+        """
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.line.FilteredLineDataProvider( dataset_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'regex-line', dataproviders.line.RegexLineDataProvider.settings )
+    def regex_line_dataprovider( self, dataset, **settings ):
+        """
+        Returns an iterator over the dataset's lines
+        optionally including/excluding lines that match one or more regex filters.
+        """
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.line.RegexLineDataProvider( dataset_source, **settings )
+
+
 class GenericAsn1( Text ):
     """Class for generic ASN.1 text format"""
     file_ext = 'asn1'
 
+
 class LineCount( Text ):
     """
     Dataset contains a single line with a single integer that denotes the
     """
     pass
 
+
 class Newick( Text ):
     """New Hampshire/Newick Format"""
     file_ext = "nhx"

File lib/galaxy/datatypes/dataproviders/__init__.py

+
+#TODO: ---- This is a work in progress ----
+"""
+Dataproviders are iterators with context managers that provide data to some
+consumer datum by datum.
+
+As well as subclassing and overriding to get the proper data, Dataproviders
+can be piped from one to the other.
+..example::
+
+.. note:: be careful to NOT pipe providers into subclasses of those providers.
+    Subclasses provide all the functionality of their superclasses,
+    so there's generally no need.
+
+.. note:: be careful to when using piped providers that accept the same keywords
+    in their __init__ functions (such as limit or offset) to pass those
+    keywords to the proper (often final) provider. These errors that result
+    can be hard to diagnose.
+"""
+import decorators
+import exceptions
+
+import base
+import chunk
+import line
+import column
+import external
+import dataset

File lib/galaxy/datatypes/dataproviders/base.py

+"""
+Base class(es) for all DataProviders.
+"""
+# there's a blurry line between functionality here and functionality in datatypes module
+# attempting to keep parsing to a minimum here and focus on chopping/pagination/reformat(/filtering-maybe?)
+#   and using as much pre-computed info/metadata from the datatypes module as possible
+# also, this shouldn't be a replacement/re-implementation of the tool layer
+#   (which provides traceability/versioning/reproducibility)
+
+from collections import deque
+import exceptions
+
+_TODO = """
+hooks into datatypes (define providers inside datatype modules) as factories
+capture tell() when provider is done
+    def stop( self ): self.endpoint = source.tell(); raise StopIteration()
+implement __len__ sensibly where it can be (would be good to have where we're giving some progress - '100 of 300')
+    seems like sniffed files would have this info
+unit tests
+add datum entry/exit point methods: possibly decode, encode
+    or create a class that pipes source through - how would decode work then?
+
+icorporate existing visualization/dataproviders
+some of the sources (esp. in datasets) don't need to be re-created
+YAGNI: InterleavingMultiSourceDataProvider, CombiningMultiSourceDataProvider
+
+datasets API entry point:
+    kwargs should be parsed from strings 2 layers up (in the DatasetsAPI) - that's the 'proper' place for that.
+    but how would it know how/what to parse if it doesn't have access to the classes used in the provider?
+        Building a giant list by sweeping all possible dprov classes doesn't make sense
+    For now - I'm burying them in the class __init__s - but I don't like that
+"""
+
+import logging
+log = logging.getLogger( __name__ )
+
+
+# ----------------------------------------------------------------------------- base classes
+class HasSettings( type ):
+    """
+    Metaclass for data providers that allows defining and inheriting
+    a dictionary named 'settings'.
+
+    Useful for allowing class level access to expected variable types
+    passed to class `__init__` functions so they can be parsed from a query string.
+    """
+    # yeah - this is all too acrobatic
+    def __new__( cls, name, base_classes, attributes ):
+        settings = {}
+        # get settings defined in base classes
+        for base_class in base_classes:
+            base_settings = getattr( base_class, 'settings', None )
+            if base_settings:
+                settings.update( base_settings )
+        # get settings defined in this class
+        new_settings = attributes.pop( 'settings', None )
+        if new_settings:
+            settings.update( new_settings )
+        attributes[ 'settings' ] = settings
+        return type.__new__( cls, name, base_classes, attributes )
+
+
+# ----------------------------------------------------------------------------- base classes
+class DataProvider( object ):
+    """
+    Base class for all data providers. Data providers:
+        (a) have a source (which must be another file-like object)
+        (b) implement both the iterator and context manager interfaces
+        (c) do not allow write methods
+            (but otherwise implement the other file object interface methods)
+    """
+    # a definition of expected types for keyword arguments sent to __init__
+    #   useful for controlling how query string dictionaries can be parsed into correct types for __init__
+    #   empty in this base class
+    __metaclass__ = HasSettings
+    settings = {}
+
+    def __init__( self, source, **kwargs ):
+        """
+        :param source: the source that this iterator will loop over.
+            (Should implement the iterable interface and ideally have the
+            context manager interface as well)
+        """
+        self.source = self.validate_source( source )
+
+    def validate_source( self, source ):
+        """
+        Is this a valid source for this provider?
+
+        :raises InvalidDataProviderSource: if the source is considered invalid.
+
+        Meant to be overridden in subclasses.
+        """
+        if not source or not hasattr( source, '__iter__' ):
+            # that's by no means a thorough check
+            raise exceptions.InvalidDataProviderSource( source )
+        return source
+
+    #TODO: (this might cause problems later...)
+    #TODO: some providers (such as chunk's seek and read) rely on this... remove
+    def __getattr__( self, name ):
+        if name == 'source':
+            # if we're inside this fn, source hasn't been set - provide some safety just for this attr
+            return None
+        # otherwise, try to get the attr from the source - allows us to get things like provider.encoding, etc.
+        if hasattr( self.source, name ):
+            return getattr( self.source, name )
+        # raise the proper error
+        return self.__getattribute__( name )
+
+    # write methods should not be allowed
+    def truncate( self, size ):
+        raise NotImplementedError( 'Write methods are purposely disabled' )
+    def write( self, string ):
+        raise NotImplementedError( 'Write methods are purposely disabled' )
+    def writelines( self, sequence ):
+        raise NotImplementedError( 'Write methods are purposely disabled' )
+
+    #TODO: route read methods through next?
+    #def readline( self ):
+    #    return self.next()
+    def readlines( self ):
+        return [ line for line in self ]
+
+    # iterator interface
+    def __iter__( self ):
+        # it's generators all the way up, Timmy
+        with self as source:
+            for datum in self.source:
+                yield datum
+    def next( self ):
+        return self.source.next()
+
+    # context manager interface
+    def __enter__( self ):
+        # make the source's context manager interface optional
+        if hasattr( self.source, '__enter__' ):
+            self.source.__enter__()
+        return self
+    def __exit__( self, *args ):
+        # make the source's context manager interface optional, call on source if there
+        if hasattr( self.source, '__exit__' ):
+            self.source.__exit__( *args )
+        # alternately, call close()
+        elif hasattr( self.source, 'close' ):
+            self.source.close()
+
+    def __str__( self ):
+        """
+        String representation for easier debugging.
+
+        Will call `__str__` on it's source so this will display piped dataproviders.
+        """
+        # we need to protect against recursion (in __getattr__) if self.source hasn't been set
+        source_str = str( self.source ) if hasattr( self, 'source' ) else ''
+        return '%s(%s)' %( self.__class__.__name__, str( source_str ) )
+
+
+class FilteredDataProvider( DataProvider ):
+    """
+    Passes each datum through a filter function and yields it if that function
+    returns a non-`None` value.
+
+    Also maintains counters:
+        - `num_data_read`: how many data have been consumed from the source.
+        - `num_valid_data_read`: how many data have been returned from `filter`.
+        - `num_data_returned`: how many data has this provider yielded.
+    """
+    # not useful here - we don't want functions over the query string
+    #settings.update({ 'filter_fn': 'function' })
+
+    def __init__( self, source, filter_fn=None, **kwargs ):
+        """
+        :param filter_fn: a lambda or function that will be passed a datum and
+            return either the (optionally modified) datum or None.
+        """
+        super( FilteredDataProvider, self ).__init__( source, **kwargs )
+        self.filter_fn = filter_fn if hasattr( filter_fn, '__call__' ) else None
+        # count how many data we got from the source
+        self.num_data_read = 0
+        # how many valid data have we gotten from the source
+        #   IOW, data that's passed the filter and been either provided OR have been skipped due to offset
+        self.num_valid_data_read = 0
+        # how many lines have been provided/output
+        self.num_data_returned = 0
+
+    def __iter__( self ):
+        parent_gen = super( FilteredDataProvider, self ).__iter__()
+        for datum in parent_gen:
+            self.num_data_read += 1
+            datum = self.filter( datum )
+            if datum != None:
+                self.num_valid_data_read += 1
+                self.num_data_returned += 1
+                yield datum
+
+    #TODO: may want to squash this into DataProvider
+    def filter( self, datum ):
+        """
+        When given a datum from the provider's source, return None if the datum
+        'does not pass' the filter or is invalid. Return the datum if it's valid.
+
+        :param datum: the datum to check for validity.
+        :returns: the datum, a modified datum, or None
+
+        Meant to be overridden.
+        """
+        if self.filter_fn:
+            return self.filter_fn( datum )
+        # also can be overriden entirely
+        return datum
+
+
+class LimitedOffsetDataProvider( FilteredDataProvider ):
+    """
+    A provider that uses the counters from FilteredDataProvider to limit the
+    number of data and/or skip `offset` number of data before providing.
+
+    Useful for grabbing sections from a source (e.g. pagination).
+    """
+    # define the expected types of these __init__ arguments so they can be parsed out from query strings
+    settings = {
+        'limit' : 'int',
+        'offset': 'int'
+    }
+
+    #TODO: may want to squash this into DataProvider
+    def __init__( self, source, offset=0, limit=None, **kwargs ):
+        """
+        :param offset:  the number of data to skip before providing.
+        :param limit:   the final number of data to provide.
+        """
+        super( LimitedOffsetDataProvider, self ).__init__( source, **kwargs )
+
+        # how many valid data to skip before we start outputing data - must be positive
+        #   (diff to support neg. indeces - must be pos.)
+        self.offset = max( offset, 0 )
+
+        # how many valid data to return - must be positive (None indicates no limit)
+        self.limit = limit
+        if self.limit != None:
+            self.limit = max( self.limit, 0 )
+
+    def __iter__( self ):
+        """
+        Iterate over the source until `num_valid_data_read` is greater than
+        `offset`, begin providing datat, and stop when `num_data_returned`
+        is greater than `offset`.
+        """
+        parent_gen = super( LimitedOffsetDataProvider, self ).__iter__()
+        for datum in parent_gen:
+
+            if self.limit != None and self.num_data_returned > self.limit:
+                break
+
+            if self.num_valid_data_read > self.offset:
+                yield datum
+            else:
+                # wot a cheezy way of doing this...
+                self.num_data_returned -= 1
+
+    #TODO: skipping lines is inefficient - somehow cache file position/line_num pair and allow provider
+    #   to seek to a pos/line and then begin providing lines
+    # the important catch here is that we need to have accurate pos/line pairs
+    #   in order to preserve the functionality of limit and offset
+    #if file_seek and len( file_seek ) == 2:
+    #    seek_pos, new_line_num = file_seek
+    #    self.seek_and_set_curr_line( seek_pos, new_line_num )
+
+    #def seek_and_set_curr_line( self, file_seek, new_curr_line_num ):
+    #    self.seek( file_seek, os.SEEK_SET )
+    #    self.curr_line_num = new_curr_line_num
+
+
+class MultiSourceDataProvider( DataProvider ):
+    """
+    A provider that iterates over a list of given sources and provides data
+    from one after another.
+
+    An iterator over iterators.
+    """
+    def __init__( self, source_list, **kwargs ):
+        """
+        :param source_list: an iterator of iterables
+        """
+        self.source_list = deque( source_list )
+
+    def __iter__( self ):
+        """
+        Iterate over the source_list, then iterate over the data in each source.
+
+        Skip a given source in `source_list` if it is `None` or invalid.
+        """
+        for source in self.source_list:
+            # just skip falsy sources
+            if not source:
+                continue
+            try:
+                self.source = self.validate_source( source )
+            except exceptions.InvalidDataProviderSource, invalid_source:
+                continue
+            
+            parent_gen = super( MultiSourceDataProvider, self ).__iter__()
+            for datum in parent_gen:
+                 yield datum

File lib/galaxy/datatypes/dataproviders/chunk.py

+"""
+Chunk (N number of bytes at M offset to a source's beginning) provider.
+
+Primarily for file sources but usable by any iterator that has both
+seek and read( N ).
+"""
+import os
+import base64
+
+import base
+import exceptions
+
+_TODO = """
+"""
+
+import logging
+log = logging.getLogger( __name__ )
+
+
+# -----------------------------------------------------------------------------
+class ChunkDataProvider( base.DataProvider ):
+    """
+    Data provider that yields chunks of data from it's file.
+
+    Note: this version does not account for lines and works with Binary datatypes.
+    """
+    MAX_CHUNK_SIZE = 2**16
+    DEFAULT_CHUNK_SIZE = MAX_CHUNK_SIZE
+    settings = {
+        'chunk_index'   : 'int',
+        'chunk_size'    : 'int'
+    }
+
+    #TODO: subclass from LimitedOffsetDataProvider?
+    # see web/framework/base.iterate_file, util/__init__.file_reader, and datatypes.tabular
+    def __init__( self, source, chunk_index=0, chunk_size=DEFAULT_CHUNK_SIZE, **kwargs ):
+        """
+        :param chunk_index: if a source can be divided into N number of
+            `chunk_size` sections, this is the index of which section to
+            return.
+        :param chunk_size:  how large are the desired chunks to return
+            (gen. in bytes).
+        """
+        super( ChunkDataProvider, self ).__init__( source, **kwargs )
+        self.chunk_size = int( chunk_size )
+        self.chunk_pos = int( chunk_index ) * self.chunk_size
+
+    def validate_source( self, source ):
+        """
+        Does the given source have both the methods `seek` and `read`?
+        :raises InvalidDataProviderSource: if not.
+        """
+        source = super( ChunkDataProvider, self ).validate_source( source )
+        if( ( not hasattr( source, 'seek' ) )
+        or  ( not hasattr( source, 'read' ) ) ):
+            raise exceptions.InvalidDataProviderSource( source )
+        return source
+
+    def __iter__( self ):
+        # not reeeally an iterator per se
+        self.__enter__()
+        self.source.seek( self.chunk_pos, os.SEEK_SET )
+        chunk = self.encode( self.source.read( self.chunk_size ) )
+        yield chunk
+        self.__exit__()
+
+    def encode( self, chunk ):
+        """
+        Called on the chunk before returning.
+
+        Overrride to modify, encode, or decode chunks.
+        """
+        return chunk
+
+
+class Base64ChunkDataProvider( ChunkDataProvider ):
+    """
+    Data provider that yields chunks of base64 encoded data from it's file.
+    """
+    def encode( self, chunk ):
+        """
+        Return chunks encoded in base 64.
+        """
+        return base64.b64encode( chunk )

File lib/galaxy/datatypes/dataproviders/column.py

+"""
+Providers that provide lists of lists generally where each line of a source
+is further subdivided into multiple data (e.g. columns from a line).
+"""
+
+import line
+
+_TODO = """
+move ColumnarDataProvider parsers to more sensible location
+
+TransposedColumnarDataProvider: provides each column as a single array
+    - see existing visualizations/dataprovider/basic.ColumnDataProvider
+"""
+
+import logging
+log = logging.getLogger( __name__ )
+
+
+# ----------------------------------------------------------------------------- base classes
+class ColumnarDataProvider( line.RegexLineDataProvider ):
+    """
+    Data provider that provide a list of columns from the lines of it's source.
+
+    Columns are returned in the order given in indeces, so this provider can
+    re-arrange columns.
+
+    If any desired index is outside the actual number of columns
+    in the source, this provider will None-pad the output and you are guaranteed
+    the same number of columns as the number of indeces asked for (even if they
+    are filled with None).
+    """
+    settings = {
+        'indeces'       : 'list:int',
+        'column_count'  : 'int',
+        'column_types'  : 'list:str',
+        'parse_columns' : 'bool',
+        'deliminator'   : 'str'
+    }
+
+    def __init__( self, source, indeces=None,
+            column_count=None, column_types=None, parsers=None, parse_columns=True,
+            deliminator='\t', **kwargs ):
+        """
+        :param indeces: a list of indeces of columns to gather from each row
+            Optional: will default to `None`.
+            If `None`, this provider will return all rows (even when a
+                particular row contains more/less than oth