Commits

Brad Chapman committed 6f3ee27 Merge

Pull from Galaxy release: October 23, 2012

Comments (0)

Files changed (276)

community_wsgi.ini.sample

 session_key = galaxysessions
 session_secret = changethisinproduction
 
-# Galaxy session security
+# -- Users and Security
+
+# Galaxy encodes various internal values when these values will be output in
+# some format (for example, in a URL or cookie).  You should set a key to be
+# used by the algorithm that encodes and decodes these values.  It can be any
+# string.  If left unchanged, anyone could construct a cookie that would grant
+# them access to others' sessions.
 id_secret = changethisinproductiontoo
 
+# User authentication can be delegated to an upstream proxy server (usually
+# Apache).  The upstream proxy should set a REMOTE_USER header in the request.
+# Enabling remote user disables regular logins.  For more information, see:
+# http://wiki.g2.bx.psu.edu/Admin/Config/Apache%20Proxy
+#use_remote_user = False
+
 # Configuration for debugging middleware
 debug = true
 use_lint = false

datatypes_conf.xml.sample

       <display file="ucsc/bigwig.xml" />
       <display file="igb/bigwig.xml" />
     </datatype>
+    <datatype extension="chrint" type="galaxy.datatypes.interval:ChromatinInteractions" display_in_upload="True">
+      <converter file="interval_to_bgzip_converter.xml" target_datatype="bgzip"/>
+      <converter file="interval_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/>
+      <converter file="interval_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+    </datatype>
     <!-- MSI added Datatypes -->
     <datatype extension="csv" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="true" /> <!-- FIXME: csv is 'tabular'ized data, but not 'tab-delimited'; the class used here is intended for 'tab-delimited' -->
     <!-- End MSI added Datatypes -->

lib/galaxy/config.py

 import sys, os, tempfile
 import logging, logging.config
 import ConfigParser
+from datetime import timedelta
 from galaxy.util import string_as_bool, listify, parse_xml
 
 from galaxy import eggs
         self.output_size_limit = int( kwargs.get( 'output_size_limit', 0 ) )
         self.retry_job_output_collection = int( kwargs.get( 'retry_job_output_collection', 0 ) )
         self.job_walltime = kwargs.get( 'job_walltime', None )
+        self.job_walltime_delta = None
+        if self.job_walltime is not None:
+            h, m, s = [ int( v ) for v in self.job_walltime.split( ':' ) ]
+            self.job_walltime_delta = timedelta( 0, s, 0, 0, m, h )
         self.admin_users = kwargs.get( "admin_users", "" )
         self.mailing_join_addr = kwargs.get('mailing_join_addr',"galaxy-announce-join@bx.psu.edu")
         self.error_email_to = kwargs.get( 'error_email_to', None )
             amqp_config = {}
         for k, v in amqp_config:
             self.amqp[k] = v
+        self.biostar = kwargs.get( 'biostar', None )
         self.running_functional_tests = string_as_bool( kwargs.get( 'running_functional_tests', False ) )
+        # Experimental: This will not be enabled by default and will hide 
+        # nonproduction code.
+        # The api_folders refers to whether the API exposes the /folders section.
+        self.api_folders = string_as_bool( kwargs.get( 'api_folders', False ) )
+        # This is for testing new library browsing capabilities.
+        self.new_lib_browse = string_as_bool( kwargs.get( 'new_lib_browse', False ) )
+
     def __read_tool_job_config( self, global_conf_parser, section, key ):
         try:
             tool_runners_config = global_conf_parser.items( section )
         if self.migrated_tools_config not in tool_configs:
             tool_configs.append( self.migrated_tools_config )
         for path in tool_configs:
-            if not os.path.isfile( path ):
+            if not os.path.exists( path ):
                 raise ConfigurationError("File not found: %s" % path )
         if not os.path.isfile( self.datatypes_config ):
             raise ConfigurationError("File not found: %s" % self.datatypes_config )

lib/galaxy/datatypes/assembly.py

             if long_reads:
                  gen_msg = gen_msg + ' Long Reads'
             if len(gen_msg) > 0:
-		    gen_msg = 'Uses: ' + gen_msg
+                gen_msg = 'Uses: ' + gen_msg
         except:
             log.debug( "Velveth could not read Log file in %s" % efp)
         log.debug( "Velveth log info  %s" % gen_msg)

lib/galaxy/datatypes/data.py

         TODO: Do we need to merge gzip files using gzjoin? cat seems to work,
         but might be brittle. Need to revisit this.
         """
-        if len(split_files) == 1:
+        if not split_files:
+            raise ValueError('Asked to merge zero files as %s' % output_file)
+        elif len(split_files) == 1:
             cmd = 'mv -f %s %s' % ( split_files[0], output_file )
         else:
             cmd = 'cat %s > %s' % ( ' '.join(split_files), output_file )

lib/galaxy/datatypes/interval.py

     
     file_ext = "chrint"
     
-    column_names = [ 'Chrom', 'Start1', 'End1', 'Start2', 'End2', 'Value' ]
+    column_names = [ 'Chrom1', 'Start1', 'End1', 'Chrom2', 'Start2', 'End2', 'Value' ]
     
     """Add metadata elements"""
-    MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter )
+    MetadataElement( name="chrom1Col", default=1, desc="Chrom1 column", param=metadata.ColumnParameter )
     MetadataElement( name="start1Col", default=2, desc="Start1 column", param=metadata.ColumnParameter )
     MetadataElement( name="end1Col", default=3, desc="End1 column", param=metadata.ColumnParameter )
-    MetadataElement( name="start2Col", default=2, desc="Start2 column", param=metadata.ColumnParameter )
-    MetadataElement( name="end2Col", default=3, desc="End2 column", param=metadata.ColumnParameter )
-    MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True, visible=False )
+    MetadataElement( name="chrom2Col", default=4, desc="Chrom2 column", param=metadata.ColumnParameter )
+    MetadataElement( name="start2Col", default=5, desc="Start2 column", param=metadata.ColumnParameter )
+    MetadataElement( name="end2Col", default=6, desc="End2 column", param=metadata.ColumnParameter )
+    MetadataElement( name="valueCol", default=7, desc="Value column", param=metadata.ColumnParameter )
+
+    MetadataElement( name="columns", default=7, desc="Number of columns", readonly=True, visible=False )
     
     def sniff( self, filename ):
         return False

lib/galaxy/datatypes/sniff.py

     """
     fd, temp_name = tempfile.mkstemp()
     fp = os.fdopen( fd, "wt" )
-    i = 0
-    for i, line in enumerate( file( fname, "U" ), 1 ):
+    i = None
+    for i, line in enumerate( file( fname, "U" ) ):
         fp.write( "%s\n" % line.rstrip( "\r\n" ) )
     fp.close()
+    if i is None:
+        i = 0
+    else:
+        i += 1
     if in_place:
         shutil.move( temp_name, fname )
         # Return number of lines in file.
     regexp = re.compile( patt )
     fd, temp_name = tempfile.mkstemp()
     fp = os.fdopen( fd, "wt" )
-    i = 0
-    for i, line in enumerate( file( fname ), 1):
+    i = None
+    for i, line in enumerate( file( fname ) ):
         line  = line.rstrip( '\r\n' )
         elems = regexp.split( line )
         fp.write( "%s\n" % '\t'.join( elems ) )
     fp.close()
+    if i is None:
+        i = 0
+    else:
+        i += 1        
     if in_place:
         shutil.move( temp_name, fname )
         # Return number of lines in file.

lib/galaxy/jobs/__init__.py

                 dataset.info = message
                 dataset.set_size()
                 dataset.dataset.set_total_size()
+                dataset.mark_unhidden()
                 if dataset.ext == 'auto':
                     dataset.extension = 'data'
                 # Update (non-library) job output datasets through the object store
                 # job's state:
                 if job.states.ERROR == job.state:
                     dataset.blurb = "error"
+                    dataset.mark_unhidden()
                 elif dataset.has_data():
                     # If the tool was expected to set the extension, attempt to retrieve it
                     if dataset.ext == 'auto':

lib/galaxy/jobs/actions/post.py

     @classmethod
     def execute(cls, app, sa_session, action, job, replacement_dict):
         for dataset_assoc in job.output_datasets:
-            if action.output_name == '' or dataset_assoc.name == action.output_name:
+            if dataset_assoc.dataset.state != dataset_assoc.dataset.states.ERROR and ( action.output_name == '' or dataset_assoc.name == action.output_name ):
                 dataset_assoc.dataset.visible=False
 
     @classmethod

lib/galaxy/jobs/handler.py

         if ( None != job.get_job_runner_name() ):
             runner_name = (job.get_job_runner_name().split(":",1))[0]
             if ( isinstance( job, model.Job ) ):
-    	        log.debug( "stopping job %d in %s runner" %( job.get_id(), runner_name ) )
+                log.debug( "stopping job %d in %s runner" %( job.get_id(), runner_name ) )
             elif ( isinstance( job, model.Task ) ):
                 log.debug( "Stopping job %d, task %d in %s runner" 
                          % ( job.get_job().get_id(), job.get_id(), runner_name ) )

lib/galaxy/jobs/runners/local.py

 import logging
 import subprocess
 import tempfile
+import datetime
 from Queue import Queue
 import threading
 
                                              preexec_fn = os.setpgrp )
                     job_wrapper.set_runner( 'local:///', proc.pid )
                     job_wrapper.change_state( model.Job.states.RUNNING )
-                    if self.app.config.output_size_limit > 0:
-                        sleep_time = 1
-                        while proc.poll() is None:
+                    sleep_time = 1
+                    job_start = datetime.datetime.now()
+                    while proc.poll() is None:
+                        if self.app.config.output_size_limit > 0:
                             for outfile, size in job_wrapper.check_output_sizes():
                                 if size > self.app.config.output_size_limit:
                                     # Error the job immediately
-                                    job_wrapper.fail( 'Job output grew too large (greater than %s), please try different job parameters or' \
+                                    job_wrapper.fail( 'Job output grew too large (greater than %s), please try different job parameters' \
                                         % nice_size( self.app.config.output_size_limit ) )
                                     log.warning( 'Terminating job %s due to output %s growing larger than %s limit' \
                                         % ( job_wrapper.job_id, os.path.basename( outfile ), nice_size( self.app.config.output_size_limit ) ) )
                                     # Then kill it
-                                    os.killpg( proc.pid, 15 )
-                                    sleep( 1 )
-                                    if proc.poll() is None:
-                                        os.killpg( proc.pid, 9 )
-                                    proc.wait() # reap
+                                    self._terminate( proc )
                                     log.debug( 'Job %s (pid %s) terminated' % ( job_wrapper.job_id, proc.pid ) )
                                     return
                                 sleep( sleep_time )
-                                if sleep_time < 8:
-                                    # So we don't stat every second
-                                    sleep_time *= 2
+                        if self.app.config.job_walltime_delta is not None:
+                            time_executing = datetime.datetime.now() - job_start
+                            if time_executing > self.app.config.job_walltime_delta:
+                                # Error the job immediately
+                                job_wrapper.fail( 'Job ran longer than maximum allowed execution time (%s), please try different job parameters' \
+                                    % self.app.config.job_walltime )
+                                log.warning( 'Terminating job %s since walltime has been reached' % job_wrapper.job_id )
+                                # Then kill it
+                                self._terminate( proc )
+                                log.debug( 'Job %s (pid %s) terminated' % ( job_wrapper.job_id, proc.pid ) )
+                                return
+                        if sleep_time < 8:
+                            # So we don't stat every second
+                            sleep_time *= 2
                     # Reap the process and get the exit code.
                     exit_code = proc.wait()
                     stdout_file.seek( 0 )
         # local jobs can't be recovered
         job_wrapper.change_state( model.Job.states.ERROR, info = "This job was killed when Galaxy was restarted.  Please retry the job." )
 
+    def _terminate( self, proc ):
+        os.killpg( proc.pid, 15 )
+        sleep( 1 )
+        if proc.poll() is None:
+            os.killpg( proc.pid, 9 )
+        return proc.wait() # reap

lib/galaxy/jobs/runners/pbs.py

         # set the default server during startup
         self.default_pbs_server = None
         self.determine_pbs_server( 'pbs:///' )
-        self.job_walltime = None
-        if self.app.config.job_walltime is not None:
-            h, m, s = [ int( v ) for v in self.app.config.job_walltime.split( ':' ) ]
-            self.job_walltime = timedelta( 0, s, 0, 0, m, h )
         self.monitor_thread = threading.Thread( target=self.monitor )
         self.monitor_thread.start()
         self.work_queue = Queue()
         galaxy_job_id = job_wrapper.get_id_tag()
         log.debug("(%s) submitting file %s" % ( galaxy_job_id, job_file ) )
         log.debug("(%s) command is: %s" % ( galaxy_job_id, command_line ) )
-        job_id = pbs.pbs_submit(c, job_attrs, job_file, pbs_queue_name, None)
-        pbs.pbs_disconnect(c)
 
-        # check to see if it submitted
-        if not job_id:
+        tries = 0
+        while tries < 5:
+            job_id = pbs.pbs_submit(c, job_attrs, job_file, pbs_queue_name, None)
+            tries += 1
+            if job_id:
+                pbs.pbs_disconnect(c)
+                break
             errno, text = pbs.error()
-            log.debug( "(%s) pbs_submit failed, PBS error %d: %s" % (galaxy_job_id, errno, text) )
+            log.warning( "(%s) pbs_submit failed (try %d/5), PBS error %d: %s" % (galaxy_job_id, tries, errno, text) )
+            time.sleep(2)
+        else:
+            log.error( "(%s) All attempts to submit job failed" % galaxy_job_id )
             job_wrapper.fail( "Unable to run this job due to a cluster error, please retry it later" )
             return
 
                     fail = False
                     for outfile, size in pbs_job_state.job_wrapper.check_output_sizes():
                         if size > self.app.config.output_size_limit:
-                            pbs_job_state.fail_message = 'Job output grew too large (greater than %s), please try different job parameters or' \
+                            pbs_job_state.fail_message = 'Job output grew too large (greater than %s), please try different job parameters' \
                                 % nice_size( self.app.config.output_size_limit )
                             log.warning( '(%s/%s) Dequeueing job due to output %s growing larger than %s limit' \
                                 % ( galaxy_job_id, job_id, os.path.basename( outfile ), nice_size( self.app.config.output_size_limit ) ) )
                             break
                     if fail:
                         continue
-                if self.job_walltime is not None:
+                if self.app.config.job_walltime_delta is not None:
                     # Check the job's execution time
                     if status.get( 'resources_used', False ):
                         # resources_used may not be in the status for new jobs
                         h, m, s = [ int( i ) for i in status.resources_used.walltime.split( ':' ) ]
                         time_executing = timedelta( 0, s, 0, 0, m, h )
-                        if time_executing > self.job_walltime:
-                            pbs_job_state.fail_message = 'Job ran longer than maximum allowed execution time (%s), please try different job parameters or' \
+                        if time_executing > self.app.config.job_walltime_delta:
+                            pbs_job_state.fail_message = 'Job ran longer than maximum allowed execution time (%s), please try different job parameters' \
                                 % self.app.config.job_walltime
                             log.warning( '(%s/%s) Dequeueing job since walltime has been reached' \
                                 % ( galaxy_job_id, job_id ) )

lib/galaxy/jobs/splitters/multi.py

     try:
         working_directory = job_wrapper.working_directory
         task_dirs = [os.path.join(working_directory, x) for x in os.listdir(working_directory) if x.startswith('task_')]
+        assert task_dirs, "Should be at least one sub-task!"
         # TODO: Output datasets can be very complex. This doesn't handle metadata files
         outputs = job_wrapper.get_output_hdas_and_fnames()
         pickone_done = []
                 # Just include those files f in the output list for which the 
                 # file f exists; some files may not exist if a task fails.
                 output_files = [ f for f in output_files if os.path.exists(f) ]
-                log.debug('files %s ' % output_files)
-                output_type.merge(output_files, output_file_name)
-                log.debug('merge finished: %s' % output_file_name)
-                pass # TODO: merge all the files
+                if output_files:
+                    log.debug('files %s ' % output_files)
+                    if len(output_files) < len(task_dirs):
+                        log.debug('merging only %i out of expected %i files for %s'
+                                  % (len(output_files), len(task_dirs), output_file_name))
+                    output_type.merge(output_files, output_file_name)
+                    log.debug('merge finished: %s' % output_file_name)
+                else:
+                    msg = 'nothing to merge for %s (expected %i files)' \
+                          % (output_file_name, len(task_dirs))
+                    log.debug(msg)
+                    stderr += msg + "\n"
             elif output in pickone_outputs:
                 # just pick one of them
                 if output not in pickone_done:

lib/galaxy/model/__init__.py

                     return source
             return ( None, None )
         return get_source( self )
-
+    @property
+    def source_dataset_chain( self ):
+        def _source_dataset_chain( dataset, lst ):
+            try:
+                cp_from_ldda = dataset.copied_from_library_dataset_dataset_association
+                if cp_from_ldda:
+                    lst.append( (cp_from_ldda, "(Data Library)") )
+                    return _source_dataset_chain( cp_from_ldda, lst )
+            except Exception, e:
+                log.warning( e )
+            try:
+                cp_from_hda  = dataset.copied_from_history_dataset_association
+                if cp_from_hda:
+                    lst.append( (cp_from_hda, cp_from_hda.history.name) )
+                    return _source_dataset_chain( cp_from_hda, lst )
+            except Exception, e:
+                log.warning( e )
+            return lst
+        return _source_dataset_chain( self, [] )
+    @property
+    def creating_job( self ):
+        creating_job_associations = None
+        if self.creating_job_associations:
+            creating_job_associations = self.creating_job_associations
+        else:
+            inherit_chain = self.source_dataset_chain
+            if inherit_chain:
+                creating_job_associations = inherit_chain[-1][0].creating_job_associations
+        if creating_job_associations:
+            return creating_job_associations[0].job
+        return None
     def get_display_applications( self, trans ):
         return self.datatype.get_display_applications_by_dataset( self, trans )
 
     def can_reset_metadata( self ):
         return self.status == self.installation_status.INSTALLED
     @property
+    def can_uninstall( self ):
+        return self.status != self.installation_status.UNINSTALLED
+    @property
+    def can_deactivate( self ):
+        return self.status not in [ self.installation_status.DEACTIVATED, self.installation_status.UNINSTALLED ]
+    @property
+    def can_reinstall_or_activate( self ):
+        return self.deleted
+    @property
     def includes_tools( self ):
         return self.metadata and 'tools' in self.metadata
     @property

lib/galaxy/model/mapping.py

             Dataset, 
             primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ), lazy=False ),
         # .history defined in History mapper
+        copied_from_history_dataset_association=relation( 
+            HistoryDatasetAssociation, 
+            primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ),
+            remote_side=[HistoryDatasetAssociation.table.c.id],
+            uselist=False ),
         copied_to_history_dataset_associations=relation( 
             HistoryDatasetAssociation, 
-            primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ),
-            backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
+            primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ) ),
+        copied_from_library_dataset_dataset_association=relation( 
+            LibraryDatasetDatasetAssociation, 
+            primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
+            uselist=False ),
         copied_to_library_dataset_dataset_associations=relation( 
             LibraryDatasetDatasetAssociation, 
-            primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
-            backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
+            primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ),
         implicitly_converted_datasets=relation( 
             ImplicitlyConvertedDatasetAssociation, 
             primaryjoin=( ImplicitlyConvertedDatasetAssociation.table.c.hda_parent_id == HistoryDatasetAssociation.table.c.id ) ),
         library_dataset = relation( LibraryDataset,
         primaryjoin=( LibraryDatasetDatasetAssociation.table.c.library_dataset_id == LibraryDataset.table.c.id ) ),
         user=relation( User.mapper ),
+        copied_from_library_dataset_dataset_association=relation( 
+            LibraryDatasetDatasetAssociation, 
+            primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
+            remote_side=[LibraryDatasetDatasetAssociation.table.c.id],
+            uselist=False ),
         copied_to_library_dataset_dataset_associations=relation( 
             LibraryDatasetDatasetAssociation, 
-            primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
-            backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
+            primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ),
+        copied_from_history_dataset_association=relation( 
+            HistoryDatasetAssociation, 
+            primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id  ),
+            uselist=False ),
         copied_to_history_dataset_associations=relation( 
             HistoryDatasetAssociation, 
-            primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
-            backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
+            primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ),
         implicitly_converted_datasets=relation( 
             ImplicitlyConvertedDatasetAssociation, 
             primaryjoin=( ImplicitlyConvertedDatasetAssociation.table.c.ldda_parent_id == LibraryDatasetDatasetAssociation.table.c.id ) ),

lib/galaxy/model/migrate/versions/0033_published_cols_for_histories_and_workflows.py

 db_session = scoped_session( sessionmaker( bind=migrate_engine, autoflush=False, autocommit=True ) )
 
 def upgrade():
-	print __doc__
-	metadata.reflect()
+    print __doc__
+    metadata.reflect()
 
-	# Create published column in history table.
-	History_table = Table( "history", metadata, autoload=True )
-	c = Column( "published", Boolean, index=True )
-	try:
-	    c.create( History_table )
-	    assert c is History_table.c.published
-	except Exception, e:
-	    print "Adding published column to history table failed: %s" % str( e )
-	    log.debug( "Adding published column to history table failed: %s" % str( e ) )
+    # Create published column in history table.
+    History_table = Table( "history", metadata, autoload=True )
+    c = Column( "published", Boolean, index=True )
+    try:
+        c.create( History_table )
+        assert c is History_table.c.published
+    except Exception, e:
+        print "Adding published column to history table failed: %s" % str( e )
+        log.debug( "Adding published column to history table failed: %s" % str( e ) )
     
     
-	# Create index for published column in history table.
-	try:
-	    i = Index( "ix_history_published", History_table.c.published )
-	    i.create()
-	except:
-	    # Mysql doesn't have a named index, but alter should work
-	    History_table.c.published.alter( unique=False )
+    # Create index for published column in history table.
+    try:
+        i = Index( "ix_history_published", History_table.c.published )
+        i.create()
+    except:
+        # Mysql doesn't have a named index, but alter should work
+        History_table.c.published.alter( unique=False )
     
-	# Create published column in stored workflows table.
-	StoredWorkflow_table = Table( "stored_workflow", metadata, autoload=True )
-	c = Column( "published", Boolean, index=True )
-	try:
-	    c.create( StoredWorkflow_table )
-	    assert c is StoredWorkflow_table.c.published
-	except Exception, e:
-	    print "Adding published column to stored_workflow table failed: %s" % str( e )
-	    log.debug( "Adding published column to stored_workflow table failed: %s" % str( e ) )
+    # Create published column in stored workflows table.
+    StoredWorkflow_table = Table( "stored_workflow", metadata, autoload=True )
+    c = Column( "published", Boolean, index=True )
+    try:
+        c.create( StoredWorkflow_table )
+        assert c is StoredWorkflow_table.c.published
+    except Exception, e:
+        print "Adding published column to stored_workflow table failed: %s" % str( e )
+        log.debug( "Adding published column to stored_workflow table failed: %s" % str( e ) )
 
-	# Create index for published column in stored workflows table.
-	try:
-	    i = Index( "ix_stored_workflow_published", StoredWorkflow_table.c.published )
-	    i.create()
-	except:
-	    # Mysql doesn't have a named index, but alter should work
-	    StoredWorkflow_table.c.published.alter( unique=False )
+    # Create index for published column in stored workflows table.
+    try:
+        i = Index( "ix_stored_workflow_published", StoredWorkflow_table.c.published )
+        i.create()
+    except:
+        # Mysql doesn't have a named index, but alter should work
+        StoredWorkflow_table.c.published.alter( unique=False )
 
-	# Create importable column in page table.
-	Page_table = Table( "page", metadata, autoload=True )
-	c = Column( "importable", Boolean, index=True )
-	try:
-		c.create( Page_table )
-		assert c is Page_table.c.importable
-	except Exception, e:
-		print "Adding importable column to page table failed: %s" % str( e )
-		log.debug( "Adding importable column to page table failed: %s" % str( e ) )
-		
-	# Create index for importable column in page table.
-	try:
-	    i = Index( "ix_page_importable", Page_table.c.importable )
-	    i.create()
-	except:
-	    # Mysql doesn't have a named index, but alter should work
-		Page_table.c.importable.alter( unique=False )
+    # Create importable column in page table.
+    Page_table = Table( "page", metadata, autoload=True )
+    c = Column( "importable", Boolean, index=True )
+    try:
+        c.create( Page_table )
+        assert c is Page_table.c.importable
+    except Exception, e:
+        print "Adding importable column to page table failed: %s" % str( e )
+        log.debug( "Adding importable column to page table failed: %s" % str( e ) )
+        
+    # Create index for importable column in page table.
+    try:
+        i = Index( "ix_page_importable", Page_table.c.importable )
+        i.create()
+    except:
+        # Mysql doesn't have a named index, but alter should work
+        Page_table.c.importable.alter( unique=False )
 
 def downgrade():
-	metadata.reflect()
+    metadata.reflect()
 
-	# Drop published column from history table.
-	History_table = Table( "history", metadata, autoload=True )
-	try:
-	    History_table.c.published.drop()
-	except Exception, e:
-	    print "Dropping column published from history table failed: %s" % str( e )
-	    log.debug( "Dropping column published from history table failed: %s" % str( e ) )
+    # Drop published column from history table.
+    History_table = Table( "history", metadata, autoload=True )
+    try:
+        History_table.c.published.drop()
+    except Exception, e:
+        print "Dropping column published from history table failed: %s" % str( e )
+        log.debug( "Dropping column published from history table failed: %s" % str( e ) )
     
-	# Drop published column from stored_workflow table.
-	StoredWorkflow_table = Table( "stored_workflow", metadata, autoload=True )
-	try:
-	    StoredWorkflow_table.c.published.drop()
-	except Exception, e:
-	    print "Dropping column published from stored_workflow table failed: %s" % str( e )
-	    log.debug( "Dropping column published from stored_workflow table failed: %s" % str( e ) )
-	
+    # Drop published column from stored_workflow table.
+    StoredWorkflow_table = Table( "stored_workflow", metadata, autoload=True )
+    try:
+        StoredWorkflow_table.c.published.drop()
+    except Exception, e:
+        print "Dropping column published from stored_workflow table failed: %s" % str( e )
+        log.debug( "Dropping column published from stored_workflow table failed: %s" % str( e ) )
+    
     # Drop importable column from page table.
-	Page_table = Table( "page", metadata, autoload=True )
-	try:
-		Page_table.c.importable.drop()
-	except Exception, e:
-		print "Dropping column importable from page table failed: %s" % str( e )
-		log.debug( "Dropping column importable from page table failed: %s" % str( e ) )
+    Page_table = Table( "page", metadata, autoload=True )
+    try:
+        Page_table.c.importable.drop()
+    except Exception, e:
+        print "Dropping column importable from page table failed: %s" % str( e )
+        log.debug( "Dropping column importable from page table failed: %s" % str( e ) )

lib/galaxy/model/migrate/versions/0043_visualization_sharing_tagging_annotating.py

         # Add column.
         deleted_column.create( Visualiation_table )
         assert deleted_column is Visualiation_table.c.deleted
-	    
+        
         # Fill column with default value.
         cmd = "UPDATE visualization SET deleted = %s" % default_false
         db_session.execute( cmd )
     except Exception, e:
         print "Adding index 'ix_visualization_deleted' failed: %s" % str( e )
         log.debug( "Adding index 'ix_visualization_deleted' failed: %s" % str( e ) )
-	    
+        
     try:
         # Add column.
         importable_column.create( Visualiation_table )
     except Exception, e:
         print "Adding index 'ix_visualization_importable' failed: %s" % str( e )
         log.debug( "Adding index 'ix_visualization_importable' failed: %s" % str( e ) )
-	    
+        
     try:
-	    slug_column.create( Visualiation_table )
-	    assert slug_column is Visualiation_table.c.slug
+        slug_column.create( Visualiation_table )
+        assert slug_column is Visualiation_table.c.slug
     except Exception, e:
         print "Adding slug column to visualization table failed: %s" % str( e )
         log.debug( "Adding slug column to visualization table failed: %s" % str( e ) )
     except Exception, e:
         print "Adding index 'ix_visualization_slug' failed: %s" % str( e )
         log.debug( "Adding index 'ix_visualization_slug' failed: %s" % str( e ) )
-	    
+        
     try:
         # Add column.
         published_column.create( Visualiation_table )
 
     # Drop columns for supporting sharing from visualization table.
     try:
-	    Visualiation_table.c.deleted.drop()
+        Visualiation_table.c.deleted.drop()
     except Exception, e:
         print "Dropping deleted column from visualization table failed: %s" % str( e )
         log.debug( "Dropping deleted column from visualization table failed: %s" % str( e ) )
 
     try:
-	    Visualiation_table.c.importable.drop()
+        Visualiation_table.c.importable.drop()
     except Exception, e:
         print "Dropping importable column from visualization table failed: %s" % str( e )
         log.debug( "Dropping importable column from visualization table failed: %s" % str( e ) )
 
     try:
-	    Visualiation_table.c.slug.drop()
+        Visualiation_table.c.slug.drop()
     except Exception, e:
         print "Dropping slug column from visualization table failed: %s" % str( e )
         log.debug( "Dropping slug column from visualization table failed: %s" % str( e ) )
 
     try:
-	    Visualiation_table.c.published.drop()
+        Visualiation_table.c.published.drop()
     except Exception, e:
         print "Dropping published column from visualization table failed: %s" % str( e )
         log.debug( "Dropping published column from visualization table failed: %s" % str( e ) )

lib/galaxy/model/migrate/versions/0051_imported_col_for_jobs_table.py

 db_session = scoped_session( sessionmaker( bind=migrate_engine, autoflush=False, autocommit=True ) )
 
 def upgrade():
-	print __doc__
-	metadata.reflect()
+    print __doc__
+    metadata.reflect()
 
-	# Create and initialize imported column in job table.
-	Jobs_table = Table( "job", metadata, autoload=True )
-	c = Column( "imported", Boolean, default=False, index=True )
-	try:
-	    # Create
-	    c.create( Jobs_table )
-	    assert c is Jobs_table.c.imported
-	    
-	    # Initialize.
-	    if migrate_engine.name == 'mysql' or migrate_engine.name == 'sqlite': 
-	        default_false = "0"
-	    elif migrate_engine.name == 'postgres':
-	        default_false = "false"
-	    db_session.execute( "UPDATE job SET imported=%s" % default_false )
-	    
-	except Exception, e:
-	    print "Adding imported column to job table failed: %s" % str( e )
-	    log.debug( "Adding imported column to job table failed: %s" % str( e ) )
+    # Create and initialize imported column in job table.
+    Jobs_table = Table( "job", metadata, autoload=True )
+    c = Column( "imported", Boolean, default=False, index=True )
+    try:
+        # Create
+        c.create( Jobs_table )
+        assert c is Jobs_table.c.imported
+        
+        # Initialize.
+        if migrate_engine.name == 'mysql' or migrate_engine.name == 'sqlite': 
+            default_false = "0"
+        elif migrate_engine.name == 'postgres':
+            default_false = "false"
+        db_session.execute( "UPDATE job SET imported=%s" % default_false )
+        
+    except Exception, e:
+        print "Adding imported column to job table failed: %s" % str( e )
+        log.debug( "Adding imported column to job table failed: %s" % str( e ) )
     
 def downgrade():
-	metadata.reflect()
+    metadata.reflect()
 
-	# Drop imported column from job table.
-	Jobs_table = Table( "job", metadata, autoload=True )
-	try:
-	    Jobs_table.c.imported.drop()
-	except Exception, e:
-	    print "Dropping column imported from job table failed: %s" % str( e )
-	    log.debug( "Dropping column imported from job table failed: %s" % str( e ) )
+    # Drop imported column from job table.
+    Jobs_table = Table( "job", metadata, autoload=True )
+    try:
+        Jobs_table.c.imported.drop()
+    except Exception, e:
+        print "Dropping column imported from job table failed: %s" % str( e )
+        log.debug( "Dropping column imported from job table failed: %s" % str( e ) )

lib/galaxy/security/__init__.py

 
     def get_actions_for_items( self, trans, action, permission_items ):
         # TODO: Rename this; it's a replacement for get_item_actions, but it
-        # doesn't represent what it's really confusing.
+        # doesn't represent what it's really doing, which is confusing.
         # TODO: Make this work for other classes besides lib_datasets.
         # That should be as easy as checking the type and writing a query for each;
         # we're avoiding using the SQLAlchemy backrefs because they can cause lots
                                  % ( item.library_dataset_id, len( base_result ), 
                                      len( new_result ) ) )
                 log.debug( "get_actions_for_items: Test end" )
-            except Exception as e:
+            except Exception, e:
                 log.debug( "Exception in test code: %s" % e )
 
         return ret_permissions
             return True, ''
         action = self.permitted_actions.DATASET_ACCESS
 
-        # SM: TODO: This is for timing debug. Delete it later.
-        from datetime import datetime, timedelta
-        query_start = datetime.now()
         lddas = self.sa_session.query( self.model.LibraryDatasetDatasetAssociation ) \
                                .join( "library_dataset" ) \
                                .filter( self.model.LibraryDataset.folder == folder ) \
                                .join( "dataset" ) \
                                .options( eagerload_all( "dataset.actions" ) ) \
                                .all()
-        query_end = datetime.now()
-        query_delta = query_end - query_start
-        #log.debug( "Check folder contents: join query time: %d.%.6d sec" % 
-        #         ( query_delta.seconds, query_delta.microseconds ) )
 
         for ldda in lddas:
             ldda_access_permissions = self.get_item_actions( action, ldda.dataset )

lib/galaxy/tool_shed/install_manager.py

                         break
             if found:
                 break      
-        full_path = os.path.abspath( os.path.join( root, name ) )
+        full_path = str( os.path.abspath( os.path.join( root, name ) ) )
         tool = self.toolbox.load_tool( full_path )
         return generate_tool_guid( repository_clone_url, tool )
     def get_proprietary_tool_panel_elems( self, latest_tool_migration_script_number ):

lib/galaxy/tool_shed/migrate/versions/0006_tools.py

+"""
+The following tools have been eliminated from the distribution:
+FASTQ to BAM, SAM to FASTQ, BAM Index Statistics, Estimate Library
+Complexity, Insertion size metrics for PAIRED data, SAM/BAM Hybrid
+Selection Metrics, bam/sam Cleaning, Add or Replace Groups, Replace
+SAM/BAM Header, Paired Read Mate Fixer, Mark Duplicate reads,
+SAM/BAM Alignment Summary Metrics, SAM/BAM GC Bias Metrics, and
+Reorder SAM/BAM.  The tools are now available in the repository
+named picard from the main Galaxy tool shed at
+http://toolshed.g2.bx.psu.edu, and will be installed into your
+local Galaxy instance at the location discussed above by running
+the following command.
+"""
+
+import sys
+
+def upgrade():
+    print __doc__
+def downgrade():
+    pass

lib/galaxy/tools/__init__.py

         self.workflows_by_id = {}
         # In-memory dictionary that defines the layout of the tool panel.
         self.tool_panel = odict()
+        self.index = 0
         # File that contains the XML section and tool tags from all tool panel config files integrated into a
         # single file that defines the tool panel layout.  This file can be changed by the Galaxy administrator
         # (in a way similar to the single tool_conf.xml file in the past) to alter the layout of the tool panel.
         self.tool_root_dir = tool_root_dir
         self.app = app
         self.init_dependency_manager()
-        for config_filename in listify( config_filenames ):
+        config_filenames = listify( config_filenames )
+        for config_filename in config_filenames:
+            if os.path.isdir( config_filename ):
+                directory_contents = sorted( os.listdir( config_filename ) )
+                directory_config_files = [ config_file for config_file in directory_contents if config_file.endswith( ".xml" ) ]
+                config_filenames.remove( config_filename )
+                config_filenames.extend( directory_config_files )
+        for config_filename in config_filenames:
             try:
                 self.init_tools( config_filename )
             except:
             tool_path = self.tool_root_dir
         # Only load the panel_dict under certain conditions.
         load_panel_dict = not self.integrated_tool_panel_config_has_contents
-        for index, elem in enumerate( root ):
+        for _, elem in enumerate( root ):
+            index = self.index
+            self.index += 1
             if parsing_shed_tool_conf:
                 config_elems.append( elem )
             if elem.tag == 'tool':
                 # isn't bogus. If we have two infinite values, then 
                 # the start must be -inf and the end must be +inf. 
                 # So at least warn about this situation:
-                if ( math.isinf( exit_code.range_start ) and 
-                     math.isinf( exit_code.range_end ) ):
+                if ( util.isinf( exit_code.range_start ) and 
+                     util.isinf( exit_code.range_end ) ):
                     log.warning( "Tool exit_code range %s will match on "
                                + "all exit codes" % code_range )
                 self.stdio_exit_codes.append( exit_code )
                     values = input_values[ input.name ]
                     current = values["__current_case__"]
                     wrap_values( input.cases[current].inputs, values )
+                elif isinstance( input, DataToolParameter ) and input.multiple:
+                    values = input_values[ input.name ]
+                    input_values[ input.name ] = \
+                        [DatasetFilenameWrapper( value,
+                                                 datatypes_registry = self.app.datatypes_registry,
+                                                 tool = self,
+                                                 name = input.name ) for value in values]
                 elif isinstance( input, DataToolParameter ):
                     ## FIXME: We're populating param_dict with conversions when 
                     ##        wrapping values, this should happen as a separate 
         Find any additional datasets generated by a tool and attach (for 
         cases where number of outputs is not known in advance).
         """
-        primary_datasets = {}
+        new_primary_datasets = {}
+        try:
+            json_file = open( os.path.join( job_working_directory, jobs.TOOL_PROVIDED_JOB_METADATA_FILE ), 'r' )
+            for line in json_file:
+                line = simplejson.loads( line )
+                if line.get( 'type' ) == 'new_primary_dataset':
+                    new_primary_datasets[ os.path.split( line.get( 'filename' ) )[-1] ] = line
+        except Exception, e:
+            log.debug( "Error opening galaxy.json file: %s" % e )
         # Loop through output file names, looking for generated primary 
         # datasets in form of:
         #     'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)'
+        primary_datasets = {}
         for name, outdata in output.items():
             filenames = []
             if 'new_file_path' in self.app.config.collect_outputs_from:
                 primary_data.info = outdata.info
                 primary_data.init_meta( copy_from=outdata )
                 primary_data.dbkey = dbkey
-                primary_data.set_meta()
-                primary_data.set_peek()
                 # Associate new dataset with job
                 job = None
                 for assoc in outdata.creating_job_associations:
                     self.sa_session.add( assoc )
                     self.sa_session.flush()
                 primary_data.state = outdata.state
+                #add tool/metadata provided information
+                new_primary_datasets_attributes = new_primary_datasets.get( os.path.split( filename )[-1] )
+                if new_primary_datasets_attributes:
+                    dataset_att_by_name = dict( ext='extension' )
+                    for att_set in [ 'name', 'info', 'ext', 'dbkey' ]:
+                        dataset_att_name = dataset_att_by_name.get( att_set, att_set )
+                        setattr( primary_data, dataset_att_name, new_primary_datasets_attributes.get( att_set, getattr( primary_data, dataset_att_name ) ) )           
+                primary_data.set_meta()
+                primary_data.set_peek()
                 self.sa_session.add( primary_data )
                 self.sa_session.flush()
                 outdata.history.add_dataset( primary_data )

lib/galaxy/tools/actions/index_genome.py

         job.user_id = incoming['user']
         start_job_state = job.state # should be job.states.NEW
         job.state = job.states.WAITING # we need to set job state to something other than NEW,
-        							   # or else when tracking jobs in db it will be picked up
-        							   # before we have added input / output parameters
+                                       # or else when tracking jobs in db it will be picked up
+                                       # before we have added input / output parameters
         trans.sa_session.add( job )
 
         # Create dataset that will serve as archive.

lib/galaxy/tools/genome_index/__init__.py

 from __future__ import with_statement
 
-import os, shutil, logging, tempfile, json, tarfile
+import os, shutil, logging, tempfile, tarfile
 
 from galaxy import model, util
 from galaxy.web.framework.helpers import to_unicode
 from galaxy.web.base.controller import UsesHistoryMixin
 from galaxy.tools.data import ToolDataTableManager
 
+import pkg_resources
+pkg_resources.require("simplejson")
+import simplejson
+
 log = logging.getLogger(__name__)
 
 def load_genome_index_tools( toolbox ):
             fp = open( gitd.dataset.get_file_name(), 'r' )
             deferred = sa_session.query( model.DeferredJob ).filter_by( id=gitd.deferred_job_id ).first()
             try:
-                logloc = json.load( fp )
+                logloc = simplejson.load( fp )
             except ValueError:
                 deferred.state = app.model.DeferredJob.states.ERROR
                 sa_session.add( deferred )
         import hashlib
         md5 = hashlib.md5()
         with open( filename, 'rb' ) as f: 
-            for chunk in iter( lambda: f.read( 8192 ), b'' ): 
+            for chunk in iter( lambda: f.read( 8192 ), '' ):
                  md5.update( chunk )
         return md5.digest()
 

lib/galaxy/tools/genome_index/index_genome.py

 """
 from __future__ import with_statement
 
-import optparse, sys, os, tempfile, time, subprocess, shlex, json, tarfile, shutil
+import optparse, sys, os, tempfile, time, subprocess, shlex, tarfile, shutil
+
+import pkg_resources
+pkg_resources.require("simplejson")
+import simplejson
 
 class ManagedIndexer():
     def __init__( self, output_file, infile, workingdir, rsync_url, tooldata ):
         return result
     
     def _flush_files( self ):
-        json.dump( self.locations, self.outfile )
+        simplejson.dump( self.locations, self.outfile )
         self.outfile.close()
         self.logfile.close()
     

lib/galaxy/tools/parameters/basic.py

         for col in column_list:
             if col != 'None':
                 if type(col) == type(()) and len(col) == 2: # fiddled
-                    options.append((col[1],'c' + col[0],False))
+                    options.append((col[1],col[0],False))
                 else:
                     options.append( ( 'c' + col, col, False ) )
         return options
             return value
         elif isinstance( value, DummyDataset ):
             return None
+        elif isinstance( value, list) and len(value) > 0 and isinstance( value[0], DummyDataset):
+            return None
+        elif isinstance( value, list ):
+            return ",".join( [ val if isinstance( val, str ) else str(val.id) for val in value] )
         return value.id
 
     def to_python( self, value, app ):
         # indicates that the dataset is optional, while '' indicates that it is not.
         if value is None or value == '' or value == 'None':
             return value
+        if isinstance(value, str) and value.find(",") > -1:
+            values = value.split(",")
+            # TODO: Optimize. -John
+            return [app.model.context.query( app.model.HistoryDatasetAssociation ).get( int( val ) ) for val in values]
         return app.model.context.query( app.model.HistoryDatasetAssociation ).get( int( value ) )
 
     def to_param_dict_string( self, value, other_values={} ):

lib/galaxy/tools/parameters/dynamic_options.py

     
     def parse_file_fields( self, reader ):
         rval = []
+        field_count = None
         for line in reader:
             if line.startswith( '#' ) or ( self.line_startswith and not line.startswith( self.line_startswith ) ):
                 continue
             if line:
                 fields = line.split( self.separator )
                 if self.largest_index < len( fields ):
+                    if not field_count:
+                        field_count = len( fields )
+                    elif field_count != len( fields ):
+                        try:
+                            name = reader.name
+                        except AttributeError:
+                            name = "a configuration file"
+                        # Perhaps this should be an error, but even a warning is useful.
+                        log.warn( "Inconsistent number of fields (%i vs %i) in %s using separator %r, check line: %r" % \
+                                  ( field_count, len( fields ), name, self.separator, line ) )
                     rval.append( fields )
         return rval
     

lib/galaxy/tools/search/__init__.py

 from galaxy.eggs import require
 from galaxy.web.framework.helpers import to_unicode
-# Whoosh is compatible with Python 2.5+ Try to import Whoosh and set flag to indicate whether tool search is enabled.
-try:
-    require( "Whoosh" )
+require( "Whoosh" )
 
-    from whoosh.filedb.filestore import RamStorage
-    from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT
-    from whoosh.index import Index
-    from whoosh.scoring import BM25F
-    from whoosh.qparser import MultifieldParser
-    tool_search_enabled = True
-    schema = Schema( id = STORED, title = TEXT, description = TEXT, help = TEXT )
-except ImportError, e:
-    tool_search_enabled = False
-    schema = None
+from whoosh.filedb.filestore import RamStorage
+from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT
+from whoosh.index import Index
+from whoosh.scoring import BM25F
+from whoosh.qparser import MultifieldParser
+schema = Schema( id = STORED, title = TEXT, description = TEXT, help = TEXT )
 
 class ToolBoxSearch( object ):
     """
         Create a searcher for `toolbox`. 
         """
         self.toolbox = toolbox
-        self.enabled = tool_search_enabled
-        if tool_search_enabled:
-            self.build_index()
+        self.build_index()
         
     def build_index( self ):
         self.storage = RamStorage()
         writer.commit()
         
     def search( self, query, return_attribute='id' ):
-        if not tool_search_enabled:
-            return []
         # Change field boosts for searcher to place more weight on title, description than help.
         searcher = self.index.searcher( \
                         weighting=BM25F( field_B={ 'title_B' : 3, 'description_B' : 2, 'help_B' : 1 } \
                                     ) )
         # Set query to search title, description, and help.
         parser = MultifieldParser( [ 'title', 'description', 'help' ], schema = schema )
-        results = searcher.search( parser.parse( query ), minscore=2.0 )
+        results = searcher.search( parser.parse( query ) )
         return [ result[ return_attribute ] for result in results ]

lib/galaxy/util/__init__.py

 except ImportError:
     from md5 import new as md5
 
+try:
+    from math import isinf
+except ImportError:
+    INF = float( 'inf' )
+    NEG_INF = -INF
+    ISINF_LIST = [ INF, NEG_INF ]
+    isinf = lambda x: x in ISINF_LIST
+
 from galaxy import eggs
 import pkg_resources
 

lib/galaxy/util/shed_util.py

     app.toolbox.shed_tool_confs[ index ] = shed_tool_conf_dict
     # Write the current in-memory version of the integrated_tool_panel.xml file to disk.
     app.toolbox.write_integrated_tool_panel_config_file()
-    if app.toolbox_search.enabled:
-        # If search support for tools is enabled, index the new installed tools.
-        app.toolbox_search = ToolBoxSearch( app.toolbox )
+    app.toolbox_search = ToolBoxSearch( app.toolbox )
 def alter_config_and_load_prorietary_datatypes( app, datatypes_config, relative_install_dir, deactivate=False, override=True ):
     """
     Parse a proprietary datatypes config (a datatypes_conf.xml file included in an installed tool shed repository) and
             can_generate_dependency_metadata = False
             tool_dependency_name = elem.get( 'name', None )
             if tool_dependency_name and tool_dependency_version:
-                for tool_dict in metadata_dict[ 'tools' ]:
+                for tool_dict in metadata_dict.get( 'tools', [] ):
                     requirements = tool_dict.get( 'requirements', [] )
                     for requirement_dict in requirements:
                         req_name = requirement_dict.get( 'name', None )
                 # <environment_variable name="R_SCRIPT_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable>
                 env_var_name = env_var_elem.get( 'name', None )
                 if env_var_name:
-                    for tool_dict in metadata_dict[ 'tools' ]:
+                    for tool_dict in metadata_dict.get( 'tools', [] ):
                         requirements = tool_dict.get( 'requirements', [] )
                         for requirement_dict in requirements:
                             # {"name": "R_SCRIPT_PATH", "type": "set_environment", "version": null}
                     metadata_dict[ 'readme' ] = relative_path_to_readme
                 # See if we have a tool config.
                 elif name not in NOT_TOOL_CONFIGS and name.endswith( '.xml' ):
-                    full_path = os.path.abspath( os.path.join( root, name ) )
+                    full_path = str( os.path.abspath( os.path.join( root, name ) ) )
                     if os.path.getsize( full_path ) > 0:
                         if not ( check_binary( full_path ) or check_image( full_path ) or check_gzip( full_path )[ 0 ]
                                  or check_bz2( full_path )[ 0 ] or check_zip( full_path ) ):
                                 element_tree_root = element_tree.getroot()
                                 is_tool = element_tree_root.tag == 'tool'
                             except Exception, e:
-                                print "Error parsing %s", full_path, ", exception: ", str( e )
+                                log.debug( "Error parsing %s, exception: %s" % ( full_path, str( e ) ) )
                                 is_tool = False
                             if is_tool:
                                 tool, valid, error_message = load_tool_from_config( app, full_path )
             # Handle tool dependency installation via other means here (future).
         if tool_dependencies_dict:
             metadata_dict[ 'tool_dependencies' ] = tool_dependencies_dict
+    else:
+        log.debug( "Name, version and type from the <requirement> tag does not match the information in the tool_dependencies.xml file. Tool dependencies will be ignored." )
     if tool_dependencies_dict:
         if original_tool_dependencies_dict:
             # We're generating metadata on an update pulled to a tool shed repository installed into a Galaxy instance, so handle changes to
     # Update the config_elems of the in-memory shed_tool_conf_dict.
     shed_tool_conf_dict[ 'config_elems' ] = config_elems
     trans.app.toolbox.shed_tool_confs[ index ] = shed_tool_conf_dict
-    if trans.app.toolbox_search.enabled:
-        # If search support for tools is enabled, index tools.
-        trans.app.toolbox_search = ToolBoxSearch( trans.app.toolbox )
+    trans.app.toolbox_search = ToolBoxSearch( trans.app.toolbox )
     if uninstall:
         # Write the current in-memory version of the integrated_tool_panel.xml file to disk.
         trans.app.toolbox.write_integrated_tool_panel_config_file()

lib/galaxy/visualization/data_providers/basic.py

         # set up the response, column lists
         response = {}
         response[ 'data' ] = data = [ [] for column in columns ]
-        response[ 'meta' ] = meta = [ { 'min': None, 'max': None } for column in columns ]
+        response[ 'meta' ] = meta = [{
+            'min'   : None,
+            'max'   : None,
+            'count' : 0,
+            'sum'   : 0
+        } for column in columns ]
         
         column_types = [ self.original_dataset.metadata.column_types[ column ] for column in columns ]
         
                 except: return None
             return val
         
+        returning_data = False
         f = open( self.original_dataset.file_name )
         #TODO: add f.seek if given fptr in kwargs
         for count, line in enumerate( f ):
                 continue
             if ( count - start_val ) >= max_vals:
                 break
+
+            returning_data = True
             
             fields = line.split()
             fields_len = len( fields )
             #NOTE: this will return None/null for abberrant column values (including bad indeces)
             for index, column in enumerate( columns ):
                 column_val = None
+                column_type = column_types[ index ]
                 if column < fields_len:
-                    column_val = cast_val( fields[ column ], column_types[ index ] )
+                    column_val = cast_val( fields[ column ], column_type )
                     if column_val != None:
-                        if( meta[ index ][ 'min' ] == None
-                        or  column_val < meta[ index ][ 'min' ] ):
-                            meta[ index ][ 'min' ] = column_val
-                        if( meta[ index ][ 'max' ] == None
-                        or  column_val > meta[ index ][ 'max' ] ):
-                            meta[ index ][ 'max' ] = column_val
+                        
+                        # if numeric, maintain min, max, sum
+                        if( column_type == 'float' or column_type == 'int' ):
+                            if( ( meta[ index ][ 'min' ] == None ) or ( column_val < meta[ index ][ 'min' ] ) ):
+                                meta[ index ][ 'min' ] = column_val
+                                
+                            if( ( meta[ index ][ 'max' ] == None ) or ( column_val > meta[ index ][ 'max' ] ) ):
+                                meta[ index ][ 'max' ] = column_val
+                            
+                            meta[ index ][ 'sum' ] += column_val
+                            
+                # maintain a count - for other stats
+                meta[ index ][ 'count' ] += 1
                 data[ index ].append( column_val )
             
         response[ 'endpoint' ] = dict( last_line=( count - 1 ), file_ptr=f.tell() )
         f.close()
 
+        if not returning_data: return None
+        
+        for index, meta in enumerate( response[ 'meta' ] ):
+            column_type = column_types[ index ]
+            count = meta[ 'count' ]
+            
+            if( ( column_type == 'float' or column_type == 'int' )
+            and   count ):
+                meta[ 'mean' ] = float( meta[ 'sum' ] ) / count
+                
+                sorted_data = sorted( response[ 'data' ][ index ] )
+                # even data count -
+                middle_index = ( count / 2 ) - 1
+                if count % 2 == 0:
+                    meta[ 'median' ] = sum( sorted_data[ middle_index : ( middle_index + 1 ) ] ) / 2.0
+                    
+                else:
+                    meta[ 'median' ] = sorted_data[ middle_index ]
+
+        # ugh ... metadata_data_lines is not a reliable source; hafta have an EOF
         return response

lib/galaxy/visualization/data_providers/genome.py

             chrom = chrom_info[ 'chrom' ]
             chrom_len = chrom_info[ 'len' ]
             chrom_data = self.get_data( chrom, 0, chrom_len, **kwargs )
-            if chrom_data:
-                chrom_data[ 'region' ] = "%s:%i-%i" % ( chrom, 0, chrom_len )
-                genome_data.append( chrom_data )
+            # FIXME: data providers probably should never return None.
+            # Some data providers return None when there's no data, so
+            # create a dummy dict if necessary.
+            if not chrom_data:
+                chrom_data = {
+                    'data': None
+                }
+            chrom_data[ 'region' ] = "%s:%i-%i" % ( chrom, 0, chrom_len )
+            genome_data.append( chrom_data )
 
         return {
             'data': genome_data,
         
         tabix = ctabix.Tabixfile(bgzip_fname, index_filename=self.converted_dataset.file_name)
         
-        # If chrom not in data, try alternative.
-        if chrom not in tabix.contigs:
+        # Get iterator using either naming scheme.
+        iterator = iter( [] )
+        if chrom in tabix.contigs:
+            iterator = tabix.fetch(reference=chrom, start=start, end=end)
+        else:
+            # Try alternative naming scheme.
             chrom = _convert_between_ucsc_and_ensemble_naming( chrom )
-        
-        return tabix.fetch(reference=chrom, start=start, end=end)
+            if chrom in tabix.contigs:
+                iterator = tabix.fetch(reference=chrom, start=start, end=end)
+
+        return iterator
+
                 
     def write_data_to_file( self, regions, filename ):
         out = open( filename, "w" )
         """
         Provides
         """
-        
+
         rval = []
         message = None
         for count, line in enumerate( iterator ):
             feature = line.split()
             length = len( feature )
             
-            s1 = int( feature[1] ), 
-            e1 = int( feature[2] ),
-            c = feature[3],
-            s2 = int( feature[4] ),
-            e2 = int( feature[5] ),
+            s1 = int( feature[1] )
+            e1 = int( feature[2] )
+            c = feature[3]
+            s2 = int( feature[4] )
+            e2 = int( feature[5] )
             v = float( feature[6] )
 
             # Feature initialization.
         return { 'data': rval, 'message': message }
 
     def get_default_max_vals( self ):
-        return 50000;
+        return 100000;
     
 class ChromatinInteractionsTabixDataProvider( TabixDataProvider, ChromatinInteractionsDataProvider ):
-    def get_iterator( self, chrom, start, end ):
+    def get_iterator( self, chrom, start=0, end=sys.maxint ):
         """
         """
         # Modify start as needed to get earlier interactions with start region.
-        start = max( 0, int( start) - 1000000 )
+        span = int( end ) - int( start )
+        filter_start = max( 0, int( start ) - span - span/2 )
         def filter( iter ):
             for line in iter:
                 feature = line.split()
-                s1 = int( feature[1] ), 
-                e1 = int( feature[2] ),
+                s1 = int( feature[1] ) 
+                e1 = int( feature[2] )
                 c = feature[3]
-                s2 = int( feature[4] ),
-                e2 = int( feature[5] ),
-                if ( ( c == chrom ) and ( s1 < end and e1 > start ) and ( s2 < end and e2 > start ) ):
+                s2 = int( feature[4] )
+                e2 = int( feature[5] )
+                #if ( s1 <= filter_end and e1 >= filter_start ) and ( s2 <= filter_end and e2 >= filter_start ) and ( max( s1, s2 ) - min( e1, e2 ) <= span * 2 ):
+                if ( ( s1 + s2 ) / 2 <= end ) and ( ( e1 + e2 ) / 2 >= start ) and ( c == chrom ):
                     yield line
-        return filter( TabixDataProvider.get_iterator( self, chrom, start, end ) )
+        return filter( TabixDataProvider.get_iterator( self, chrom, filter_start, end ) )
                
 #        
 # -- Helper methods. --

lib/galaxy/visualization/data_providers/phyloviz/baseparser.py

-import json
+import pkg_resources
+pkg_resources.require("simplejson")
+import simplejson
 
 class Node(object):
     """Node class of PhyloTree, which represents a CLAUDE in a phylogenetic tree"""
 
     def toJson(self, jsonDict):
         """Convenience method to get a json string from a python json dict"""
-        return json.dumps(jsonDict)
+        return simplejson.dumps(jsonDict)
 
     def _writeJsonToFile(self, filepath, json):
         """Writes the file out to the system"""

lib/galaxy/visualization/data_providers/phyloviz/newickparser.py

+from __future__ import with_statement
 from baseparser import Base_Parser, PhyloTree
 import re
 

lib/galaxy/visualization/data_providers/phyloviz/nexusparser.py

+from __future__ import with_statement
 from newickparser import Newick_Parser
 import re
 

lib/galaxy/web/base/controller.py

 from paste.httpexceptions import *
 from galaxy.exceptions import *
 from galaxy.model import NoConverterException, ConverterDependencyException
+from galaxy.datatypes.interval import ChromatinInteractions
 
 from Cheetah.Template import Template