Commits

Brad Chapman committed 88b17d3 Merge

Update from galaxy-central

Comments (0)

Files changed (342)

 
 # Database stuff
 database/beaker_sessions
+database/community_files
 database/compiled_templates
 database/files
 database/pbs
 Not all dependencies are included for the tools provided in the sample
 tool_conf.xml.  A full list of external dependencies is available at:
 
-http://bitbucket.org/galaxy/galaxy-central/wiki/ToolDependencies
+http://wiki.g2.bx.psu.edu/Admin/Tools/Tool%20Dependencies

community_wsgi.ini.sample

 # path to sendmail
 sendmail_path = /usr/sbin/sendmail
 
+# For use by email messages sent from the tool shed
+#smtp_server = smtp.your_tool_shed_server
+#email_from = your_tool_shed_email@server
+
+# The URL linked by the "Support" link in the "Help" menu.
+#support_url = http://wiki.g2.bx.psu.edu/Support
+
 # Write thread status periodically to 'heartbeat.log' (careful, uses disk space rapidly!)
 ## use_heartbeat = True
 

contrib/collect_sge_job_timings.sh

+#!/bin/sh
+
+##
+## CHANGE ME to galaxy's database name
+## 
+DATABASE=galaxyprod
+
+##
+## AWK script to extract the relevant fields of SGE's qacct report
+##   and write them all in one line.
+AWKSCRIPT='
+$1=="jobnumber" { job_number = $2 }
+$1=="qsub_time" { qsub_time = $2 }
+$1=="start_time" { start_time = $2 }
+$1=="end_time" { end_time = $2 
+        print job_number, qsub_time, start_time, end_time
+}
+'
+
+FIFO=$(mktemp -u) || exit 1
+mkfifo "$FIFO" || exit 1
+
+##
+## Write the SGE/QACCT job report into a pipe
+## (later will be loaded into a temporary table)
+qacct -j |
+    egrep "jobnumber|qsub_time|start_time|end_time" |
+    sed 's/  */\t/'  |
+    awk -v FS="\t" -v OFS="\t" "$AWKSCRIPT" |
+    grep -v -- "-/-" > "$FIFO" &
+
+##
+##  The SQL to generate the report
+##
+SQL="
+--
+-- Temporary table which contains the qsub/start/end times, based on SGE's qacct report.
+--
+CREATE TEMPORARY TABLE sge_times (
+  sge_job_id INTEGER PRIMARY KEY,
+  qsub_time TIMESTAMP WITHOUT TIME ZONE,
+  start_time TIMESTAMP WITHOUT TIME ZONE,
+  end_time TIMESTAMP WITHOUT TIME ZONE
+);
+
+COPY sge_times FROM '$FIFO' ;
+
+--
+-- Temporary table which contains a unified view of all galaxy jobs.
+-- for each job:
+--   the user name, total input size (bytes), and input file types, DBKEY
+--   creation time, update time, SGE job runner parameters
+-- If a job had more than one input file, then some parameters might not be accurate (e.g. DBKEY)
+-- as one will be chosen arbitrarily
+CREATE TEMPORARY TABLE job_input_sizes AS
+SELECT
+ job.job_runner_external_id as job_runner_external_id,
+ min(job.id) as job_id,
+ min(job.create_time) as job_create_time,
+ min(job.update_time) as job_update_time,
+ min(galaxy_user.email) as email,
+ min(job.tool_id) as tool_name,
+-- This hack requires a user-custom aggregate function, comment it out for now
+-- textcat_all(hda.extension || ' ') as file_types,
+ sum(dataset.file_size) as total_input_size,
+ count(dataset.file_size) as input_dataset_count,
+ min(job.job_runner_name) as job_runner_name,
+-- This hack tries to extract the DBKEY attribute from the metadata JSON string
+ min(substring(encode(metadata,'escape') from '\"dbkey\": \\\\[\"(.*?)\"\\\\]')) as dbkey
+FROM
+ job,
+ galaxy_user,
+ job_to_input_dataset,
+ history_dataset_association hda,
+ dataset
+WHERE
+ job.user_id = galaxy_user.id
+ AND
+ job.id = job_to_input_dataset.job_id
+ AND
+ hda.id = job_to_input_dataset.dataset_id
+ AND
+ dataset.id = hda.dataset_id
+ AND
+ job.job_runner_external_id is not NULL
+GROUP BY
+ job.job_runner_external_id;
+
+
+--
+-- Join the two temporary tables, create a nice report
+--
+SELECT
+ job_input_sizes.job_runner_external_id as sge_job_id,
+ job_input_sizes.job_id as galaxy_job_id,
+ job_input_sizes.email,
+ job_input_sizes.tool_name,
+-- ## SEE previous query for commented-out filetypes field
+-- job_input_sizes.file_types,
+ job_input_sizes.job_runner_name as sge_params,
+ job_input_sizes.dbkey,
+ job_input_sizes.total_input_size,
+ job_input_sizes.input_dataset_count,
+ job_input_sizes.job_update_time - job_input_sizes.job_create_time as galaxy_total_time,
+ sge_times.end_time - sge_times.qsub_time as sge_total_time,
+ sge_times.start_time - sge_times.qsub_time as sge_waiting_time,
+ sge_times.end_time - sge_times.start_time as sge_running_time,
+ job_input_sizes.job_create_time as galaxy_job_create_time
+-- ## no need to show the exact times, the deltas (above) are informative enough
+-- job_input_sizes.job_update_time as galaxy_job_update_time,
+-- sge_times.qsub_time as sge_qsub_time,
+-- sge_times.start_time as sge_start_time,
+-- sge_times.end_time as sge_end_time
+FROM
+ job_input_sizes
+LEFT OUTER JOIN
+ SGE_TIMES
+ON (job_input_sizes.job_runner_external_id = sge_times.sge_job_id)
+ORDER BY
+ galaxy_job_create_time
+ 
+"
+
+echo "$SQL" | psql --pset "footer=off" -F"  " -A --quiet "$DATABASE"
+
+

datatypes_conf.xml.sample

             <!-- <display file="gbrowse/gbrowse_gff.xml" inherit="True" /> -->
         </datatype>
         <datatype extension="gff3" type="galaxy.datatypes.interval:Gff3" display_in_upload="true"/>
-        <datatype extension="gif" type="galaxy.datatypes.images:Image" mimetype="image/gif"/>
+        <datatype extension="gif" type="galaxy.datatypes.images:Gif" mimetype="image/gif"/>
         <datatype extension="gmaj.zip" type="galaxy.datatypes.images:Gmaj" mimetype="application/zip"/>
         <datatype extension="gtf" type="galaxy.datatypes.interval:Gtf" display_in_upload="true"/>
+        <datatype extension="h5" type="galaxy.datatypes.data:Data" mimetype="application/octet-stream"/>
         <datatype extension="html" type="galaxy.datatypes.images:Html" mimetype="text/html"/>
         <datatype extension="interval" type="galaxy.datatypes.interval:Interval" display_in_upload="true">
             <converter file="interval_to_bed_converter.xml" target_datatype="bed"/>
         </datatype>
         <datatype extension="picard_interval_list" type="galaxy.datatypes.data:Text" subclass="True" display_in_upload="True"/>
         <datatype extension="gatk_interval" type="galaxy.datatypes.data:Text" subclass="True" display_in_upload="True"/>
-        <datatype extension="gatk_dbsnp" type="galaxy.datatypes.data:Text" subclass="True" display_in_upload="True"/>
-        <datatype extension="jpg" type="galaxy.datatypes.images:Image" mimetype="image/jpeg"/>
+        <datatype extension="gatk_dbsnp" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="True"/>
+        <datatype extension="jpg" type="galaxy.datatypes.images:Jpg" mimetype="image/jpeg"/>
+        <datatype extension="tiff" type="galaxy.datatypes.images:Tiff" mimetype="image/tiff"/>
+        <datatype extension="bmp" type="galaxy.datatypes.images:Bmp" mimetype="image/bmp"/>
+        <datatype extension="im" type="galaxy.datatypes.images:Im" mimetype="image/im"/>
+        <datatype extension="pcd" type="galaxy.datatypes.images:Pcd" mimetype="image/pcd"/>
+        <datatype extension="pcx" type="galaxy.datatypes.images:Pcx" mimetype="image/pcx"/>
+        <datatype extension="ppm" type="galaxy.datatypes.images:Ppm" mimetype="image/ppm"/>
+        <datatype extension="psd" type="galaxy.datatypes.images:Psd" mimetype="image/psd"/>
+        <datatype extension="xbm" type="galaxy.datatypes.images:Xbm" mimetype="image/xbm"/>
+        <datatype extension="xpm" type="galaxy.datatypes.images:Xpm" mimetype="image/xpm"/>
+        <datatype extension="rgb" type="galaxy.datatypes.images:Rgb" mimetype="image/rgb"/>
+        <datatype extension="pbm" type="galaxy.datatypes.images:Pbm" mimetype="image/pbm"/>
+        <datatype extension="pgm" type="galaxy.datatypes.images:Pgm" mimetype="image/pgm"/>
+        <datatype extension="eps" type="galaxy.datatypes.images:Eps" mimetype="image/eps"/>
+        <datatype extension="rast" type="galaxy.datatypes.images:Rast" mimetype="image/rast"/>
         <datatype extension="laj" type="galaxy.datatypes.images:Laj"/>
         <datatype extension="lav" type="galaxy.datatypes.sequence:Lav" display_in_upload="true"/>
         <datatype extension="maf" type="galaxy.datatypes.sequence:Maf" display_in_upload="true">
         </datatype>
         <datatype extension="pdf" type="galaxy.datatypes.images:Pdf" mimetype="application/pdf"/>
         <datatype extension="pileup" type="galaxy.datatypes.tabular:Pileup" display_in_upload="true" />
-        <datatype extension="png" type="galaxy.datatypes.images:Image" mimetype="image/png"/>
+        <datatype extension="png" type="galaxy.datatypes.images:Png" mimetype="image/png"/>
         <datatype extension="qual" type="galaxy.datatypes.qualityscore:QualityScore" />
         <datatype extension="qualsolexa" type="galaxy.datatypes.qualityscore:QualityScoreSolexa" display_in_upload="true"/>
         <datatype extension="qualillumina" type="galaxy.datatypes.qualityscore:QualityScoreIllumina" display_in_upload="true"/>
         <datatype extension="svg" type="galaxy.datatypes.images:Image" mimetype="image/svg+xml"/>
         <datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/>
         <datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true"/>
-	<datatype extension="twobit" type="galaxy.datatypes.binary:TwoBit" mimetype="application/octet-stream" display_in_upload="true"/>
+        <datatype extension="twobit" type="galaxy.datatypes.binary:TwoBit" mimetype="application/octet-stream" display_in_upload="true"/>
         <datatype extension="txt" type="galaxy.datatypes.data:Text" display_in_upload="true"/>
         <datatype extension="memexml" type="galaxy.datatypes.xml:MEMEXml" mimetype="application/xml" display_in_upload="true"/>
+        <datatype extension="cisml" type="galaxy.datatypes.xml:CisML" mimetype="application/xml" display_in_upload="true"/>
         <datatype extension="blastxml" type="galaxy.datatypes.xml:BlastXml" mimetype="application/xml" display_in_upload="true"/>
         <datatype extension="vcf" type="galaxy.datatypes.tabular:Vcf" display_in_upload="true">
             <converter file="vcf_to_bgzip_converter.xml" target_datatype="bgzip"/>
             <converter file="vcf_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/>
-            <converter file="vcf_to_summary_tree_converter.xml" target_datatype="summary_tree"/>    
+            <converter file="vcf_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
         </datatype>
         <datatype extension="wsf" type="galaxy.datatypes.wsf:SnpFile" display_in_upload="true"/>
         <datatype extension="velvet" type="galaxy.datatypes.assembly:Velvet" display_in_upload="false"/>
     </registration>
     <sniffers>
         <!--
-          The order in which Galaxy attempts to determine data types is 
-          important because some formats are much more loosely defined 
-          than others.  The following list should be the most rigidly 
-          defined format first, followed by next-most rigidly defined, 
+          The order in which Galaxy attempts to determine data types is
+          important because some formats are much more loosely defined
+          than others.  The following list should be the most rigidly
+          defined format first, followed by next-most rigidly defined,
           and so on.
         -->
         <sniffer type="galaxy.datatypes.tabular:Vcf"/>
         <sniffer type="galaxy.datatypes.tabular:Pileup"/>
         <sniffer type="galaxy.datatypes.interval:Interval"/>
         <sniffer type="galaxy.datatypes.tabular:Sam"/>
+        <sniffer type="galaxy.datatypes.images:Jpg"/>
+        <sniffer type="galaxy.datatypes.images:Png"/>
+        <sniffer type="galaxy.datatypes.images:Tiff"/>
+        <sniffer type="galaxy.datatypes.images:Bmp"/>
+        <sniffer type="galaxy.datatypes.images:Gif"/>
+        <sniffer type="galaxy.datatypes.images:Im"/>
+        <sniffer type="galaxy.datatypes.images:Pcd"/>
+        <sniffer type="galaxy.datatypes.images:Pcx"/>
+        <sniffer type="galaxy.datatypes.images:Ppm"/>
+        <sniffer type="galaxy.datatypes.images:Psd"/>
+        <sniffer type="galaxy.datatypes.images:Xbm"/>
+        <sniffer type="galaxy.datatypes.images:Xpm"/>
+        <sniffer type="galaxy.datatypes.images:Rgb"/>
+        <sniffer type="galaxy.datatypes.images:Pbm"/>
+        <sniffer type="galaxy.datatypes.images:Pgm"/>
+        <sniffer type="galaxy.datatypes.images:Xpm"/>
+        <sniffer type="galaxy.datatypes.images:Eps"/>
+        <sniffer type="galaxy.datatypes.images:Rast"/>
         <!--
         Keep this commented until the sniff method in the assembly.py
         module is fixed to not read the entire file.
 ; eggs.g2.bx.psu.edu).  Probably only useful to Galaxy developers at
 ; Penn State.  This file is used by scripts/dist-scramble.py
 ;
-; More information: http://bitbucket.org/galaxy/galaxy-central/wiki/Config/Eggs
+; More information: http://wiki.g2.bx.psu.edu/Admin/Config/Eggs
 ;
 
 [hosts]
 ;
 ; This file is version controlled and should not be edited by hand!
 ; For more information, see:
-;   http://bitbucket.org/galaxy/galaxy-central/wiki/Config/Eggs
+;   http://wiki.g2.bx.psu.edu/Admin/Config/Eggs
 ;
 
 [general]

lib/galaxy/app.py

 import galaxy.model
 import galaxy.datatypes.registry
 import galaxy.security
+import galaxy.quota
 from galaxy.tags.tag_handler import GalaxyTagHandler
 from galaxy.tools.imp_exp import load_history_imp_exp_tools
 from galaxy.sample_tracking import external_service_types
         #Load security policy
         self.security_agent = self.model.security_agent
         self.host_security_agent = galaxy.security.HostAgent( model=self.security_agent.model, permitted_actions=self.security_agent.permitted_actions )
+        # Load quota management
+        if self.config.enable_quotas:
+            self.quota_agent = galaxy.quota.QuotaAgent( self.model )
+        else:
+            self.quota_agent = galaxy.quota.NoQuotaAgent( self.model )
         # Heartbeat and memdump for thread / heap profiling
         self.heartbeat = None
         self.memdump = None

lib/galaxy/config.py

         # web API
         self.enable_api = string_as_bool( kwargs.get( 'enable_api', False ) )
         self.enable_openid = string_as_bool( kwargs.get( 'enable_openid', False ) )
+        self.enable_quotas = string_as_bool( kwargs.get( 'enable_quotas', False ) )
         self.tool_path = resolve_path( kwargs.get( "tool_path", "tools" ), self.root )
         self.tool_data_path = resolve_path( kwargs.get( "tool_data_path", "tool-data" ), os.getcwd() )
         self.len_file_path = kwargs.get( "len_file_path", resolve_path(os.path.join(self.tool_data_path, 'shared','ucsc','chrom'), self.root) )
         self.external_service_type_path = resolve_path( kwargs.get( 'external_service_type_path', 'external_service_types' ), self.root )
         # Tasked job runner.
         self.use_tasked_jobs = string_as_bool( kwargs.get( 'use_tasked_jobs', False ) )
+        self.local_task_queue_workers = int(kwargs.get("local_task_queue_workers", 2))
         # The transfer manager and deferred job queue
         self.enable_beta_job_managers = string_as_bool( kwargs.get( 'enable_beta_job_managers', 'False' ) )
-        self.local_task_queue_workers = int(kwargs.get("local_task_queue_workers", 2))
+        # Per-user Job concurrency limitations
+        self.user_job_limit = int( kwargs.get( 'user_job_limit', 0 ) )
         self.default_cluster_job_runner = kwargs.get( 'default_cluster_job_runner', 'local:///' )
         self.pbs_application_server = kwargs.get('pbs_application_server', "" )
         self.pbs_dataset_server = kwargs.get('pbs_dataset_server', "" )
         self.gbrowse_display_sites = kwargs.get( 'gbrowse_display_sites', "wormbase,tair,modencode_worm,modencode_fly,sgd_yeast" ).lower().split(",")
         self.genetrack_display_sites = kwargs.get( 'genetrack_display_sites', "main,test" ).lower().split(",")
         self.brand = kwargs.get( 'brand', None )
+        self.support_url = kwargs.get( 'support_url', 'http://wiki.g2.bx.psu.edu/Support' )
         self.wiki_url = kwargs.get( 'wiki_url', 'http://g2.trac.bx.psu.edu/' )
-        self.bugs_email = kwargs.get( 'bugs_email', None )
         self.blog_url = kwargs.get( 'blog_url', None )
         self.screencasts_url = kwargs.get( 'screencasts_url', None )
         self.library_import_dir = kwargs.get( 'library_import_dir', None )

lib/galaxy/datatypes/binary.py

 log = logging.getLogger(__name__)
 
 # Currently these supported binary data types must be manually set on upload
-unsniffable_binary_formats = [ 'ab1', 'scf' ]
+unsniffable_binary_formats = [ 'ab1', 'scf', 'h5' ]
 
 class Binary( data.Data ):
     """Binary data"""
             return "Binary bam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) )
     def get_track_type( self ):
         return "ReadTrack", {"data": "bai", "index": "summary_tree"}
-    
+
+class H5( Binary ):
+    """Class describing an HDF5 file"""
+    file_ext = "h5"
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        if not dataset.dataset.purged:
+            dataset.peek  = "Binary h5 file" 
+            dataset.blurb = data.nice_size( dataset.get_size() )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "Binary h5 sequence file (%s)" % ( data.nice_size( dataset.get_size() ) )
+
 class Scf( Binary ):
     """Class describing an scf binary sequence file"""
     file_ext = "scf"
         Binary.__init__( self, **kwd )
         self._magic = 0x8789F2EB
         self._name = "BigBed"
-        
     def get_track_type( self ):
         return "LineTrack", {"data_standalone": "bigbed"}
 
                 return True
         except IOError:
             return False
-        
     def set_peek(self, dataset, is_multi_byte=False):
         if not dataset.dataset.purged:
             dataset.peek = "Binary TwoBit format nucleotide file"
             dataset.blurb = data.nice_size(dataset.get_size())
         else:
             return super(TwoBit, self).set_peek(dataset, is_multi_byte)
-    
     def display_peek(self, dataset):
         try:
             return dataset.peek

lib/galaxy/datatypes/checkers.py

-import os, gzip, re, gzip, zipfile, binascii, bz2
+import os, gzip, re, gzip, zipfile, binascii, bz2, imghdr
 from galaxy import util
 
+try:
+    import Image as PIL
+except ImportError:
+    try:
+        from PIL import Image as PIL
+    except:
+        PIL = None
+
+def check_image( file_path ):
+    if PIL != None:
+        try:
+            im = PIL.open( file_path )
+        except:
+            return False
+        if im:
+            return im
+        return False
+    else:
+        if imghdr.what( file_path ) != None:
+            return True
+        return False
+
 def check_html( file_path, chunk=None ):
     if chunk is None:
         temp = open( file_path, "U" )

lib/galaxy/datatypes/images.py

 from galaxy.datatypes.metadata import MetadataElement
 from galaxy.datatypes import metadata
 from galaxy.datatypes.sniff import *
+from galaxy.datatypes.util.image_util import *
 from urllib import urlencode, quote_plus
 import zipfile
-import os, subprocess, tempfile
+import os, subprocess, tempfile, imghdr
+
+try:
+    import Image as PIL
+except ImportError:
+    try:
+        from PIL import Image as PIL
+    except:
+        PIL = None
 
 log = logging.getLogger(__name__)
 
+# TODO: Uploading image files of various types is supported in Galaxy, but on
+# the main public instance, the display_in_upload is not set for these data
+# types in datatypes_conf.xml because we do not allow image files to be uploaded
+# there.  There is currently no API feature that allows uploading files outside 
+# of a data library ( where it requires either the upload_paths or upload_directory
+# option to be enabled, which is not the case on the main public instance ).  Because
+# of this, we're currently safe, but when the api is enhanced to allow other uploads,
+# we need to ensure that the implementation is such that image files cannot be uploaded
+# to our main public instance.
+
 class Image( data.Data ):
     """Class describing an image"""
     def set_peek( self, dataset, is_multi_byte=False ):
         else:
             dataset.peek = 'file does not exist'
             dataset.blurb = 'file purged from disk'
+    def sniff( self, filename ):
+        # First check if we can  use PIL
+        if PIL is not None:
+            try:
+                im = PIL.open( filename )
+                im.close()
+                return True
+            except:
+                return False
+        else:
+            if imghdr.what( filename ) is not None:
+                return True
+            else:
+                return False
+    
+class Jpg( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in jpg format."""
+        return check_image_type( filename, ['JPEG'], image )
+
+class Png( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in png format."""
+        return check_image_type( filename, ['PNG'], image )
+    
+class Tiff( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in tiff format."""
+        return check_image_type( filename, ['TIFF'], image )
+    
+class Bmp( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in bmp format."""
+        return check_image_type( filename, ['BMP'], image )
+
+class Gif( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in gif format."""
+        return check_image_type( filename, ['GIF'], image )
+
+class Im( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in im format."""
+        return check_image_type( filename, ['IM'], image )
+
+class Pcd( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in pcd format."""
+        return check_image_type( filename, ['PCD'], image )        
+
+class Pcx( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in pcx format."""
+        return check_image_type( filename, ['PCX'], image )
+
+class Ppm( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in ppm format."""
+        return check_image_type( filename, ['PPM'], image )        
+
+class Psd( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in psd format."""
+        return check_image_type( filename, ['PSD'], image )        
+
+class Xbm( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in XBM format."""
+        return check_image_type( filename, ['XBM'], image )        
+
+class Xpm( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in XPM format."""
+        return check_image_type( filename, ['XPM'], image )        
+
+class Rgb( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in RGB format."""
+        return check_image_type( filename, ['RGB'], image )
+
+class Pbm( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in PBM format"""
+        return check_image_type( filename, ['PBM'], image )
+
+class Pgm( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in PGM format"""
+        return check_image_type( filename, ['PGM'], image )
+
+class Eps( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in eps format."""
+        return check_image_type( filename, ['EPS'], image )        
+
+
+class Rast( Image ):
+    def sniff(self, filename, image=None):
+        """Determine if the file is in rast format"""
+        return check_image_type( filename, ['RAST'], image )
 
 class Pdf( Image ):
     def sniff(self, filename):
-        """Determine if the file is in pdf format.
-        """
+        """Determine if the file is in pdf format."""
         headers = get_headers(filename, None, 1)
         try:
             if headers[0][0].startswith("%PDF"):
                 "nobutton": "false",
                 "urlpause" :"100",
                 "debug": "false",
-                "posturl": quote_plus( "history_add_to?%s" % "&".join( [ "%s=%s" % ( key, value ) for key, value in { 'history_id': dataset.history_id, 'ext': 'maf', 'name': 'GMAJ Output on data %s' % dataset.hid, 'info': 'Added by GMAJ', 'dbkey': dataset.dbkey, 'copy_access_from': dataset.id }.items() ] ) )
+                "posturl": "history_add_to?%s" % "&".join( map( lambda x: "%s=%s" % ( x[0], quote_plus( str( x[1] ) ) ), [ ( 'copy_access_from', dataset.id), ( 'history_id', dataset.history_id ), ( 'ext', 'maf' ), ( 'name', 'GMAJ Output on data %s' % dataset.hid ), ( 'info', 'Added by GMAJ' ), ( 'dbkey', dataset.dbkey ) ] ) ) 
                 }
                 class_name = "edu.psu.bx.gmaj.MajApplet.class"
                 archive = "/static/gmaj/gmaj.jar"

lib/galaxy/datatypes/registry.py

                 'tabular'     : tabular.Tabular(),
                 'taxonomy'    : tabular.Taxonomy(),
                 'txt'         : data.Text(),
-                'wig'         : interval.Wiggle()
+                'wig'         : interval.Wiggle(),
+                'xml'         : xml.GenericXml(),
             }
             self.mimetypes_by_extension = { 
                 'ab1'         : 'application/octet-stream',
                 'tabular'     : 'text/plain',
                 'taxonomy'    : 'text/plain',
                 'txt'         : 'text/plain',
-                'wig'         : 'text/plain'
+                'wig'         : 'text/plain',
+                'xml'         : 'application/xml',
             }
         # super supertype fix for input steps in workflows.
         if 'data' not in self.datatypes_by_extension:
                 binary.Bam(),
                 binary.Sff(),
                 xml.BlastXml(),
+                xml.GenericXml(),
                 sequence.Maf(),
                 sequence.Lav(),
                 sequence.csFasta(),

lib/galaxy/datatypes/util/gff_util.py

             intervals_copy.append( interval.copy() )
         return GFFFeature(self.reader, self.chrom_col, self.feature_col, self.start_col, self.end_col, self.strand_col,
                           self.score_col, self.strand, intervals=intervals_copy )
+                          
+    def lines( self ):
+        lines = []
+        for interval in self.intervals:
+            lines.append( '\t'.join( interval.fields ) )
+        return lines
+            
                         
 class GFFIntervalToBEDReaderWrapper( NiceReaderWrapper ):
     """ 
             #finally:
             #raw_size += len( self.current_line )
             
+            # Ignore comments.
+            if isinstance( interval, Comment ):
+                continue
+            
             # If interval not associated with feature, break.
             group = interval.attributes.get( 'group', None )
             # GFF test:

lib/galaxy/datatypes/util/image_util.py

+"""
+Provides utilities for working with image files.
+"""
+import logging, imghdr
+
+try:
+    import Image as PIL
+except ImportError:
+    try:
+        from PIL import Image as PIL
+    except:
+        PIL = None
+
+log = logging.getLogger(__name__)
+
+def image_type( filename, image=None ):
+    format = ''
+    if PIL is not None:
+        if image is not None:
+            format = image.format
+        else:
+            try:
+                im = PIL.open( filename )
+                format = im.format
+                im.close()
+            except:
+                return False
+    else:
+        format = imghdr.what( filename )
+        if format is not None:
+            format = format.upper()
+        else:
+            return False
+    return format
+def check_image_type( filename, types, image=None ):
+    format = image_type( filename, image )
+    # First check if we can  use PIL        
+    if format in types:
+        return True
+    return False
+def get_image_ext ( file_path, image ):
+    #determine ext
+    format = image_type( file_path, image )    
+    if format in [ 'JPG','JPEG' ]:
+        return 'jpg'
+    if format == 'PNG':
+        return 'png'
+    if format == 'TIFF':
+        return 'tiff'
+    if format == 'BMP':
+        return 'bmp'
+    if format == 'GIF':
+        return 'gif'
+    if format == 'IM':
+        return 'im'
+    if format == 'PCD':
+        return 'pcd'
+    if format == 'PCX':
+        return 'pcx'
+    if format == 'PPM':
+        return 'ppm'
+    if format == 'PSD':
+        return 'psd'
+    if format == 'XBM':
+        return 'xbm'
+    if format == 'XPM':
+        return 'xpm'
+    if format == 'RGB':
+        return 'rgb'
+    if format == 'PBM':
+        return 'pbm'
+    if format == 'PGM':
+        return 'pgm'
+    if format == 'EPS':
+        return 'eps'
+    return None

lib/galaxy/datatypes/xml.py

 
 log = logging.getLogger(__name__)
 
-class BlastXml( data.Text ):
+class GenericXml( data.Text ):
+    """Base format class for any XML file."""
+    file_ext = "xml"
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        """Set the peek and blurb text"""
+        if not dataset.dataset.purged:
+            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+            dataset.blurb = 'XML data'
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is XML or not
+        
+        >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' )
+        >>> GenericXml().sniff( fname )
+        True
+        >>> fname = get_test_fname( 'tblastn_four_human_vs_rhodopsin.xml' )
+        >>> BlastXml().sniff( fname )
+        True
+        >>> fname = get_test_fname( 'interval.interval' )
+        >>> GenericXml().sniff( fname )
+        False
+        """
+        #TODO - Use a context manager on Python 2.5+ to close handle
+        handle = open(filename)
+        line = handle.readline()
+        handle.close()
+        
+        #TODO - Is there a more robust way to do this?
+        return line.startswith('<?xml ')
+
+
+class BlastXml( GenericXml ):
     """NCBI Blast XML Output data"""
     file_ext = "blastxml"
 
         return True
         
 
-class MEMEXml( data.Text ):
+class MEMEXml( GenericXml ):
     """MEME XML Output data"""
     file_ext = "memexml"
 
             dataset.blurb = 'file purged from disk'
     def sniff( self, filename ):
         return False
+
+class CisML( GenericXml ):
+    """CisML XML data""" #see: http://www.ncbi.nlm.nih.gov/pubmed/15001475
+    file_ext = "cisml"
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        """Set the peek and blurb text"""
+        if not dataset.dataset.purged:
+            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+            dataset.blurb = 'CisML data'
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def sniff( self, filename ):
+        return False

lib/galaxy/jobs/__init__.py

 from galaxy.util.expressions import ExpressionContext
 from galaxy.jobs.actions.post import ActionBox
 
+from sqlalchemy.sql.expression import and_, or_
+
 import pkg_resources
 pkg_resources.require( "PasteDeploy" )
 
                                 .options( lazyload( "external_output_metadata" ), lazyload( "parameters" ) ) \
                                 .filter( model.Job.state == model.Job.states.NEW ).all()
         else:
+            # Get job objects and append to watch queue for any which were
+            # previously waiting
+            for job_id in self.waiting_jobs:
+                jobs_to_check.append( self.sa_session.query( model.Job ).get( job_id ) )
             try:
                 while 1:
                     message = self.queue.get_nowait()
                     jobs_to_check.append( self.sa_session.query( model.Job ).get( job_id ) )
             except Empty:
                 pass
-            # Get job objects and append to watch queue for any which were
-            # previously waiting
-            for job_id in self.waiting_jobs:
-                jobs_to_check.append( self.sa_session.query( model.Job ).get( job_id ) )
         # Iterate over new and waiting jobs and look for any that are 
         # ready to run
         new_waiting_jobs = []
                 elif job_state == JOB_DELETED:
                     log.info( "job %d deleted by user while still queued" % job.id )
                 elif job_state == JOB_ADMIN_DELETED:
-                    job.info( "job %d deleted by admin while still queued" % job.id )
+                    log.info( "job %d deleted by admin while still queued" % job.id )
                 else:
                     log.error( "unknown job state '%s' for job %d" % ( job_state, job.id ) )
                     if not self.track_jobs_in_database:
             return JOB_DELETED
         elif job.state == model.Job.states.ERROR:
             return JOB_ADMIN_DELETED
+        elif self.app.config.enable_quotas:
+            quota = self.app.quota_agent.get_quota( job.user )
+            if quota is not None:
+                try:
+                    usage = self.app.quota_agent.get_usage( user=job.user, history=job.history )
+                    if usage > quota:
+                        return JOB_WAIT
+                except AssertionError, e:
+                    pass # No history, should not happen with an anon user
         for dataset_assoc in job.input_datasets + job.input_library_datasets:
             idata = dataset_assoc.dataset
             if not idata:
             elif idata.state != idata.states.OK and not ( idata.state == idata.states.SETTING_METADATA and job.tool_id is not None and job.tool_id == self.app.datatypes_registry.set_external_metadata_tool.id ):
                 # need to requeue
                 return JOB_WAIT
+        return self.__check_user_jobs( job )
+
+    def __check_user_jobs( self, job ):
+        if not self.app.config.user_job_limit:
+            return JOB_READY
+        if job.user:
+            user_jobs = self.sa_session.query( model.Job ) \
+                            .options( lazyload( "external_output_metadata" ), lazyload( "parameters" ) ) \
+                            .filter( and_( model.Job.user_id == job.user.id,
+                                           or_( model.Job.state == model.Job.states.RUNNING,
+                                                model.Job.state == model.Job.states.QUEUED ) ) ).all()
+        elif job.galaxy_session:
+            user_jobs = self.sa_session.query( model.Job ) \
+                            .options( lazyload( "external_output_metadata" ), lazyload( "parameters" ) ) \
+                            .filter( and_( model.Job.session_id == job.galaxy_session.id,
+                                           or_( model.Job.state == model.Job.states.RUNNING,
+                                                model.Job.state == model.Job.states.QUEUED ) ) ).all()
+        else:
+            log.warning( 'Job %s is not associated with a user or session so job concurrency limit cannot be checked.' % job.id )
+            return JOB_READY
+        if len( user_jobs ) >= self.app.config.user_job_limit:
+            return JOB_WAIT
         return JOB_READY
             
     def put( self, job_id, tool ):
 
 class JobWrapper( object ):
     """
-    Wraps a 'model.Job' with convience methods for running processes and 
+    Wraps a 'model.Job' with convenience methods for running processes and 
     state management.
     """
     def __init__( self, job, queue ):
         self.sa_session = self.app.model.context
         self.extra_filenames = []
         self.command_line = None
+        # Tool versioning variables
+        self.version_string_cmd = None
+        self.version_string = ""
         self.galaxy_lib_dir = None
         # With job outputs in the working directory, we need the working
         # directory to be set before prepare is run, or else premature deletion
         param_dict = self.tool.params_from_strings( param_dict, self.app )
         return param_dict
         
+    def get_version_string_path( self ):
+        return os.path.abspath(os.path.join(self.app.config.new_file_path, "GALAXY_VERSION_STRING_%s" % self.job_id))
+        
     def prepare( self ):
         """
         Prepare the job to run by creating the working directory and the
             extra_filenames.append( param_filename )
         self.param_dict = param_dict
         self.extra_filenames = extra_filenames
+        self.version_string_cmd = self.tool.version_string_cmd
         return extra_filenames
 
     def fail( self, message, exception=False ):
             job.state = job.states.ERROR
         else:
             job.state = job.states.OK
+        if self.version_string_cmd:
+            version_filename = self.get_version_string_path()
+            if os.path.exists(version_filename):
+                self.version_string = open(version_filename).read()
+                os.unlink(version_filename)
+            
         if self.app.config.outputs_to_working_directory:
             for dataset_path in self.get_output_fnames():
                 try:
                 dataset.blurb = 'done'
                 dataset.peek  = 'no peek'
                 dataset.info  = context['stdout'] + context['stderr']
+                dataset.tool_version = self.version_string
                 dataset.set_size()
                 if context['stderr']:
                     dataset.blurb = "error"
                              tool=self.tool, stdout=stdout, stderr=stderr )
         job.command_line = self.command_line
 
+        bytes = 0
         # Once datasets are collected, set the total dataset size (includes extra files)
-        for dataset_assoc in job.output_datasets + job.output_library_datasets:
+        for dataset_assoc in job.output_datasets:
             dataset_assoc.dataset.dataset.set_total_size()
+            bytes += dataset_assoc.dataset.dataset.get_total_size()
+
+        if job.user:
+            job.user.total_disk_usage += bytes
 
         # fix permissions
         for path in [ dp.real_path for dp in self.get_output_fnames() ]:

lib/galaxy/jobs/runners/__init__.py

 import os, os.path
 
 class BaseJobRunner( object ):
-
     def build_command_line( self, job_wrapper, include_metadata=False ):
         """
-        Compose the sequence of commands neccesary to execute a job. This will
+        Compose the sequence of commands necessary to execute a job. This will
         currently include:
             - environment settings corresponding to any requirement tags
             - command line taken from job wrapper
         # occur
         if not commands:
             return None
+        # Prepend version string
+        if job_wrapper.version_string_cmd:
+            commands = "%s &> %s; " % ( job_wrapper.version_string_cmd, job_wrapper.get_version_string_path() ) + commands
         # Prepend dependency injection
         if job_wrapper.dependency_shell_commands:
             commands = "; ".join( job_wrapper.dependency_shell_commands + [ commands ] ) 
+        
         # Append metadata setting commands, we don't want to overwrite metadata
         # that was copied over in init_meta(), as per established behavior
         if include_metadata and self.app.config.set_metadata_externally:

lib/galaxy/jobs/runners/drmaa.py

             jt.nativeSpecification = native_spec
 
         script = drm_template % (job_wrapper.galaxy_lib_dir, os.path.abspath( job_wrapper.working_directory ), command_line)
-        fh = file( jt.remoteCommand, "w" )
-        fh.write( script )
-        fh.close()
-        os.chmod( jt.remoteCommand, 0750 )
+        try:
+            fh = file( jt.remoteCommand, "w" )
+            fh.write( script )
+            fh.close()
+            os.chmod( jt.remoteCommand, 0750 )
+        except:
+            job_wrapper.fail( "failure preparing job script", exception=True )
+            log.exception("failure running job %s" % job_wrapper.get_id_tag())
+            return                          
 
         # job was deleted while we were preparing it
         if job_wrapper.get_state() == model.Job.states.DELETED:

lib/galaxy/jobs/runners/pbs.py

 configured properly.  Galaxy's "scramble" system should make this installation
 simple, please follow the instructions found at:
 
-    http://bitbucket.org/galaxy/galaxy-central/wiki/Config/Cluster
+    http://wiki.g2.bx.psu.edu/Admin/Config/Performance/Cluster
 
 Additional errors may follow:
 %s

lib/galaxy/jobs/runners/sge.py

 "scramble" system should make this installation simple, please follow the
 instructions found at:
 
-  http://bitbucket.org/galaxy/galaxy-central/wiki/Config/Cluster
+  http://wiki.g2.bx.psu.edu/Admin/Config/Performance/Cluster
 
 Additional errors may follow:
 %s

lib/galaxy/model/__init__.py

 Naming: try to use class names that have a distinct plural form so that
 the relationship cardinalities are obvious (e.g. prefer Dataset to Data)
 """
-
+import pkg_resources
+pkg_resources.require( "simplejson" )
+import simplejson
 import galaxy.datatypes
 from galaxy.util.bunch import Bunch
 from galaxy import util
         # Relationships
         self.histories = []
         self.credentials = []
-        
+
     def set_password_cleartext( self, cleartext ):
         """Set 'self.password' to the digest of 'cleartext'."""
         self.password = new_secure_hash( text_type=cleartext )
     @property
     def safe_username(self):
         return self.username.lower().replace(" ", "-")
-    
+    def get_disk_usage( self, nice_size=False ):
+        rval = 0
+        if self.disk_usage is not None:
+            rval = self.disk_usage
+        if nice_size:
+            rval = galaxy.datatypes.data.nice_size( rval )
+        return rval
+    def set_disk_usage( self, bytes ):
+        self.disk_usage = bytes
+    total_disk_usage = property( get_disk_usage, set_disk_usage )
+    def calculate_disk_usage( self ):
+        dataset_ids = []
+        total = 0
+        # this can be a huge number and can run out of memory, so we avoid the mappers
+        db_session = object_session( self )
+        for history in db_session.query( History ).enable_eagerloads( False ).filter_by( user_id=self.id ).yield_per( 1000 ):
+            for hda in db_session.query( HistoryDatasetAssociation ).enable_eagerloads( False ).filter_by( history_id=history.id, purged=False ).yield_per( 1000 ):
+                if not hda.dataset.id in dataset_ids and not hda.dataset.purged and not hda.dataset.library_associations:
+                    dataset_ids.append( hda.dataset.id )
+                    total += hda.dataset.get_total_size()
+        return total
+
 class Job( object ):
     """
     A job represents a request to run a tool given input datasets, tool 
         self.job_runner_external_id = None
         self.post_job_actions = []
         self.imported = False
-        
+
     def add_parameter( self, name, value ):
         self.parameters.append( JobParameter( name, value ) )
     def add_input_dataset( self, name, dataset ):
     def __init__( self, name, value ):
         self.name = name
         self.value = value
-          
+
 class JobToInputDatasetAssociation( object ):
     def __init__( self, name, dataset ):
         self.name = name
         self.dataset = dataset
-        
+
 class JobToOutputDatasetAssociation( object ):
     def __init__( self, name, dataset ):
         self.name = name
         self.output_name = output_name
         self.action_arguments = action_arguments
         self.workflow_step = workflow_step
-        
+
 class PostJobActionAssociation( object ):
     def __init__(self, pja, job):
         self.job = job
         elif self.library_dataset_dataset_association:
             return self.library_dataset_dataset_association
         return None
-        
+
 class JobExportHistoryArchive( object ):
     def __init__( self, job=None, history=None, dataset=None, compressed=False, \
                   history_attrs_filename=None, datasets_attrs_filename=None, 
         self.history_attrs_filename = history_attrs_filename
         self.datasets_attrs_filename = datasets_attrs_filename
         self.jobs_attrs_filename = jobs_attrs_filename
-        
+
 class JobImportHistoryArchive( object ):
     def __init__( self, job=None, history=None, archive_dir=None ):
         self.job = job
             return True
         else:
             return False
-        
+
 class Group( object ):
     def __init__( self, name = None ):
         self.name = name
         self.group = group
 
 class History( object, UsesAnnotations ):
+    api_collection_visible_keys = ( 'id', 'name' )
+    api_element_visible_keys = ( 'id', 'name' )
     def __init__( self, id=None, name=None, user=None ):
         self.id = id
         self.name = name or "Unnamed history"
             self.galaxy_sessions.append( GalaxySessionToHistoryAssociation( galaxy_session, self ) )
         else:
             self.galaxy_sessions.append( association )
-    def add_dataset( self, dataset, parent_id=None, genome_build=None, set_hid = True ):
+    def add_dataset( self, dataset, parent_id=None, genome_build=None, set_hid=True, quota=True ):
         if isinstance( dataset, Dataset ):
             dataset = HistoryDatasetAssociation(dataset=dataset)
             object_session( self ).add( dataset )
         else:
             if set_hid:
                 dataset.hid = self._next_hid()
+        if quota and self.user:
+            self.user.total_disk_usage += dataset.quota_amount( self.user )
         dataset.history = self
         if genome_build not in [None, '?']:
             self.genome_build = genome_build
             name = self.name
         if not target_user:
             target_user = self.user
+        quota = True
+        if target_user == self.user:
+            quota = False
         new_history = History( name=name, user=target_user )
         db_session = object_session( self )
         db_session.add( new_history )
         db_session.flush()
-        
+
         # Copy annotation.
         self.copy_item_annotation( db_session, self.user, self, target_user, new_history )
 
             hdas = self.active_datasets
         for hda in hdas:
             # Copy HDA.
-            new_hda = hda.copy( copy_children=True, target_history=new_history )
-            new_history.add_dataset( new_hda, set_hid = False )
+            new_hda = hda.copy( copy_children=True )
+            new_history.add_dataset( new_hda, set_hid = False, quota=quota )
             db_session.add( new_hda )
-            db_session.flush()            
+            db_session.flush()
             # Copy annotation.
             self.copy_item_annotation( db_session, self.user, hda, target_user, new_hda )
         new_history.hid_counter = self.hid_counter
         if isinstance(history_name, str):
             history_name = unicode(history_name, 'utf-8')
         return history_name
+
+    def get_api_value( self, view='collection', value_mapper = None ):
+        if value_mapper is None:
+            value_mapper = {}
+        rval = {}
+        try:
+            visible_keys = self.__getattribute__( 'api_' + view + '_visible_keys' )
+        except AttributeError:
+            raise Exception( 'Unknown API view: %s' % view )
+        for key in visible_keys:
+            try:
+                rval[key] = self.__getattribute__( key )
+                if key in value_mapper:
+                    rval[key] = value_mapper.get( key )( rval[key] )
+            except AttributeError:
+                rval[key] = None
+        return rval
     @property
     def get_disk_size_bytes( self ):
         return self.get_disk_size( nice_size=False )
         self.type = type
         self.deleted = deleted
 
+class UserQuotaAssociation( object ):
+    def __init__( self, user, quota ):
+        self.user = user
+        self.quota = quota
+
+class GroupQuotaAssociation( object ):
+    def __init__( self, group, quota ):
+        self.group = group
+        self.quota = quota
+
+class Quota( object ):
+    valid_operations = ( '+', '-', '=' )
+    def __init__( self, name="", description="", amount=0, operation="=" ):
+        self.name = name
+        self.description = description
+        if amount is None:
+            self.bytes = -1
+        else:
+            self.bytes = amount
+        self.operation = operation
+    def get_amount( self ):
+        if self.bytes == -1:
+            return None
+        return self.bytes
+    def set_amount( self, amount ):
+        if amount is None:
+            self.bytes = -1
+        else:
+            self.bytes = amount
+    amount = property( get_amount, set_amount )
+    @property
+    def display_amount( self ):
+        if self.bytes == -1:
+            return "unlimited"
+        else:
+            return util.nice_size( self.bytes )
+
+class DefaultQuotaAssociation( Quota ):
+    types = Bunch(
+        UNREGISTERED = 'unregistered',
+        REGISTERED = 'registered'
+    )
+    def __init__( self, type, quota ):
+        assert type in self.types.__dict__.values(), 'Invalid type'
+        self.type = type
+        self.quota = quota
+
 class DatasetPermissions( object ):
     def __init__( self, action, dataset, role ):
         self.action = action
     file_name = property( get_file_name, set_file_name )
     @property
     def extra_files_path( self ):
-        if self._extra_files_path: 
+        if self._extra_files_path:
             path = self._extra_files_path
         else:
             path = os.path.join( self.file_path, "dataset_%d_files" % self.id )
     """A base class for all 'dataset instances', HDAs, LDAs, etc"""
     states = Dataset.states
     permitted_actions = Dataset.permitted_actions
-    def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None, 
+    def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, tool_version=None, extension=None,
                   dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None,
                   parent_id=None, validation_errors=None, visible=True, create_dataset=False, sa_session=None ):
         self.name = name or "Unnamed dataset"
         self.info = info
         self.blurb = blurb
         self.peek = peek
+        self.tool_version = tool_version
         self.extension = extension
         self.designation = designation
         self.metadata = metadata or dict()
         return dbkey[0]
     def set_dbkey( self, value ):
         if "dbkey" in self.datatype.metadata_spec:
-            if not isinstance(value, list): 
+            if not isinstance(value, list):
                 self.metadata.dbkey = [value]
-            else: 
+            else:
                 self.metadata.dbkey = value
     dbkey = property( get_dbkey, set_dbkey )
     def change_datatype( self, new_ext ):
     def set_size( self ):
         """Returns the size of the data on disk"""
         return self.dataset.set_size()
+    def get_total_size( self ):
+        return self.dataset.get_total_size()
+    def set_total_size( self ):
+        return self.dataset.set_total_size()
     def has_data( self ):
         """Detects whether there is any data"""
         return self.dataset.has_data()
         # See if we can convert the dataset
         if target_ext not in self.get_converter_types():
             raise NoConverterException("Conversion from '%s' to '%s' not possible" % (self.extension, target_ext) )
-        
         deps = {}
         # List of string of dependencies
         try:
             depends_list = trans.app.datatypes_registry.converter_deps[self.extension][target_ext]
         except KeyError:
             depends_list = []
-        
         # See if converted dataset already exists
         converted_dataset = self.get_converted_files_by_type( target_ext )
         if converted_dataset:
             return converted_dataset
-        
         # Conversion is possible but hasn't been done yet, run converter.
         # Check if we have dependencies
-        
         try:
             for dependency in depends_list:
                 dep_dataset = self.get_converted_dataset(trans, dependency)
                     raise ConverterDependencyException("A dependency (%s) was in an error state." % dependency)
                 elif dep_dataset.state != trans.app.model.Job.states.OK:
                     # Pending
-                    return None                    
-                
+                    return None
                 deps[dependency] = dep_dataset
         except NoConverterException:
             raise NoConverterException("A dependency (%s) is missing a converter." % dependency)
         except KeyError:
             pass # No deps
-            
         assoc = ImplicitlyConvertedDatasetAssociation( parent=self, file_type=target_ext, metadata_safe=False )
         new_dataset = self.datatype.convert_dataset( trans, self, target_ext, return_output=True, visible=False, deps=deps, set_output_history=False ).values()[0]
         new_dataset.name = self.name
         self.history = history
         self.copied_from_history_dataset_association = copied_from_history_dataset_association
         self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
-    def copy( self, copy_children = False, parent_id = None, target_history = None ):
+    def copy( self, copy_children = False, parent_id = None ):
         hda = HistoryDatasetAssociation( hid=self.hid, 
                                          name=self.name, 
                                          info=self.info, 
                                          blurb=self.blurb, 
                                          peek=self.peek, 
+                                         tool_version=self.tool_version, 
                                          extension=self.extension, 
                                          dbkey=self.dbkey, 
                                          dataset = self.dataset, 
                                          visible=self.visible, 
                                          deleted=self.deleted, 
                                          parent_id=parent_id, 
-                                         copied_from_history_dataset_association=self,
-                                         history = target_history )
+                                         copied_from_history_dataset_association=self )
         object_session( self ).add( hda )
         object_session( self ).flush()
         hda.set_size()
                                                  info=self.info,
                                                  blurb=self.blurb, 
                                                  peek=self.peek, 
+                                                 tool_version=self.tool_version, 
                                                  extension=self.extension, 
                                                  dbkey=self.dbkey, 
                                                  dataset=self.dataset, 
         return hda_name
     def get_access_roles( self, trans ):
         return self.dataset.get_access_roles( trans )
+    def quota_amount( self, user ):
+        """
+        If the user has multiple instances of this dataset, it will not affect their disk usage statistic.
+        """
+        rval = 0
+        # Anon users are handled just by their single history size.
+        if not user:
+            return rval
+        # Gets an HDA and its children's disk usage, if the user does not already have an association of the same dataset
+        if not self.dataset.library_associations and not self.purged and not self.dataset.purged:
+            for hda in self.dataset.history_associations:
+                if hda.id == self.id:
+                    continue
+                if not hda.purged and hda.history and hda.history.user and hda.history.user == user:
+                    break
+            else:
+                rval += self.get_total_size()
+        for child in self.children:
+            rval += child.get_disk_usage( user )
+        return rval
+    def get_api_value( self, view='collection' ):
+        # Since this class is a proxy to rather complex attributes we want to
+        # display in other objects, we can't use the simpler method used by
+        # other model classes.
+        hda = self
+        rval = dict( name = hda.name,
+                     deleted = hda.deleted,
+                     visible = hda.visible,
+                     state = hda.state,
+                     file_size = int( hda.get_size() ),
+                     data_type = hda.ext,
+                     genome_build = hda.dbkey,
+                     misc_info = hda.info,
+                     misc_blurb = hda.blurb )
+        for name, spec in hda.metadata.spec.items():
+            val = hda.metadata.get( name )
+            if isinstance( val, MetadataFile ):
+                val = val.file_name
+            elif isinstance( val, list ):
+                val = ', '.join( [str(v) for v in val] )
+            rval['metadata_' + name] = val
+        return rval
 
 class HistoryDatasetAssociationDisplayAtAuthorization( object ):
     def __init__( self, hda=None, user=None, site=None ):
             name = unicode( name, 'utf-8' )
         return name
     def get_api_value( self, view='collection' ):
-        rval = super( LibraryFolder, self ).get_api_value( vew=view )
+        rval = super( LibraryFolder, self ).get_api_value( view=view )
         info_association, inherited = self.get_info_association()
         if info_association:
             if inherited:
             for field in template.fields:
                 tmp_dict[field['label']] = content[field['name']]
             template_data[template.name] = tmp_dict
-        
+
         rval = dict( name = ldda.name,
                      file_name = ldda.file_name,
                      uploaded_by = ldda.user.email,
                                          info=self.info,
                                          blurb=self.blurb, 
                                          peek=self.peek, 
+                                         tool_version=self.tool_version, 
                                          extension=self.extension, 
                                          dbkey=self.dbkey, 
                                          dataset=self.dataset, 
                                                  info=self.info, 
                                                  blurb=self.blurb, 
                                                  peek=self.peek, 
+                                                 tool_version=self.tool_version, 
                                                  extension=self.extension, 
                                                  dbkey=self.dbkey, 
                                                  dataset=self.dataset, 
             else:
                 return template.get_widgets( trans.user )
         return []
+    def templates_dict( self ):
+        """
+        Returns a dict of template info
+        """
+        template_data = {}
+        for temp_info in self.info_association:
+            template = temp_info.template
+            content = temp_info.info.content
+            tmp_dict = {}
+            for field in template.fields:
+                tmp_dict[field['label']] = content[field['name']]
+            template_data[template.name] = tmp_dict
+        return template_data
+    def templates_json( self ):
+        return simplejson.dumps( self.templates_dict() )
+
     def get_display_name( self ):
         """
         LibraryDatasetDatasetAssociation name can be either a string or a unicode object.
             self.histories.append( GalaxySessionToHistoryAssociation( self, history ) )
         else:
             self.histories.append( association )
-    
+    def get_disk_usage( self ):
+        if self.disk_usage is None:
+            return 0
+        return self.disk_usage
+    def set_disk_usage( self, bytes ):
+        self.disk_usage = bytes
+    total_disk_usage = property( get_disk_usage, set_disk_usage )
+
 class GalaxySessionToHistoryAssociation( object ):
     def __init__( self, galaxy_session, history ):
         self.galaxy_session = galaxy_session
         self.id = None
         self.instance_id = None
         self.state = None
-        
+
 class UCI( object ):
     def __init__( self ):
         self.id = None
         self.state = None
         self.public_dns = None
         self.availability_zone = None
-        
+
 class CloudStore( object ):
     def __init__( self ):
         self.id = None
         self.user = None
         self.size = None
         self.availability_zone = None
-        
+
 class CloudSnapshot( object ):
     def __init__( self ):
         self.id = None
         self.user = None
         self.store_id = None
         self.snapshot_id = None
-        
+
 class CloudProvider( object ):
     def __init__( self ):
         self.id = None
         self.has_cycles = None
         self.has_errors = None
         self.steps = []
-        
+
 class WorkflowStep( object ):
     def __init__( self ):
         self.id = None
         self.position = None
         self.input_connections = []
         self.config = None
-        
+
 class WorkflowStepConnection( object ):
     def __init__( self ):
         self.output_step_id = None
     def __init__( self, workflow_step, output_name):
         self.workflow_step = workflow_step
         self.output_name = output_name
-        
+
 class StoredWorkflowUserShareAssociation( object ):
     def __init__( self ):
         self.stored_workflow = None
 class FormDefinitionCurrent( object ):