Commits

Brad Chapman committed bc077be Merge

Upgrade to latest galaxy-central; September 7, 2011 release

  • Participants
  • Parent commits 1577c4d, 9207ba3

Comments (0)

Files changed (498)

File .hgignore

File contents unchanged.

File buildbot_setup.sh

 datatypes_conf.xml.sample
 universe_wsgi.ini.sample
 tool_data_table_conf.xml.sample
+migrated_tools_conf.xml.sample
+tool-data/shared/ensembl/builds.txt.sample
+tool-data/shared/igv/igv_build_sites.txt.sample
+tool-data/shared/ncbi/builds.txt.sample
+tool-data/shared/rviewer/rviewer_build_sites.txt.sample
 tool-data/shared/ucsc/builds.txt.sample
-migrated_tools_conf.xml.sample
+tool-data/shared/ucsc/publicbuilds.txt.sample
 "
 
 DIRS="

File contrib/multiproccess.sh

-#!/bin/bash
-
-# copy this script to the top level galaxy directory and modify the following
-# for your environment
-
-web_server_names=(web{0..2}) # server names: web0 web1 web2
-runner_server_names=(runner0) # server name: runner0
-
-web_config='universe_wsgi.webapp.ini'
-runner_config='universe_wsgi.runner.ini'
-
-# actually do the requested action
-
-if [ -z "$1" ]; then
-    echo "usage: multiprocess.sh <--daemon|--stop-daemon>"
-    exit 1
-fi
-
-for server_name in ${web_server_names[@]}; do
-    echo "[$server_name]"
-    python ./scripts/paster.py serve $web_config --server-name=$server_name --pid-file=$server_name.pid --log-file=$server_name.log $@
-done
-for server_name in ${runner_server_names[@]}; do
-    echo "[$server_name]"
-    python ./scripts/paster.py serve $runner_config --server-name=$server_name --pid-file=$server_name.pid --log-file=$server_name.log $@
-done

File cron/get_ncbi.py

+import urllib, pkg_resources, os
+pkg_resources.require( 'elementtree' )
+from elementtree import ElementTree, ElementInclude
+from xml.parsers.expat import ExpatError as XMLParseErrorThing
+import sys
+
+import pkg_resources
+
+class GetListing:
+    def __init__( self, data ):
+        self.tree = ElementTree.parse( data )
+        self.root = self.tree.getroot()
+        ElementInclude.include(self.root)
+        
+    def xml_text(self, name=None):
+        """Returns the text inside an element"""
+        root = self.root
+        if name is not None:
+            # Try attribute first
+            val = root.get(name)
+            if val:
+                return val
+            # Then try as element
+            elem = root.find(name)
+        else:
+            elem = root
+        if elem is not None and elem.text:
+            text = ''.join(elem.text.splitlines())
+            return text.strip()
+        # No luck, return empty string
+        return ''
+
+def dlcachefile( webenv, querykey, i, results ):
+    url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nuccore&usehistory=y&term=nuccore_assembly[filter]%20AND%20refseq[filter]'
+    fp = urllib.urlopen( url )
+    search = GetListing( fp )
+    fp.close()
+    webenv = search.xml_text( 'WebEnv' )
+    querykey = search.xml_text( 'QueryKey' )
+    url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=nuccore&WebEnv=%s&query_key=%s&retstart=%d&retmax=%d' % ( webenv, querykey, i, results )
+    fp = urllib.urlopen( url )
+    cachefile = os.tmpfile()
+    for line in fp:
+        cachefile.write( line )
+    fp.close()
+    cachefile.flush()
+    cachefile.seek(0)
+    return cachefile
+    
+
+url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nuccore&usehistory=y&term=nuccore_assembly[filter]%20AND%20refseq[filter]'
+fp = urllib.urlopen( url )
+results = GetListing( fp )
+fp.close()
+
+webenv = results.xml_text( 'WebEnv' )
+querykey = results.xml_text( 'QueryKey' )
+counts = int( results.xml_text( 'Count' ) )
+results = 10000
+found = 0
+
+for i in range(0, counts + results, results):
+    rets = dict()
+    cache = dlcachefile( webenv, querykey, i, results )
+    try:
+        xmldoc = GetListing( cache )
+    except (IOError, XMLParseErrorThing):
+        cache = dlcachefile( webenv, querykey, i, results )
+        try:
+            xmldoc = GetListing( cache )
+        except (IOError, XMLParseErrorThing):
+            cache.close()
+            exit()
+        pass
+    finally:
+        cache.close()
+    entries = xmldoc.root.findall( 'DocSum' )
+    for entry in entries:
+        dbkey = None
+        children = entry.findall('Item')
+        for item in children:
+            rets[ item.get('Name') ] = item.text
+        if not rets['Caption'].startswith('NC_'):
+            continue
+            
+        for ret in rets['Extra'].split('|'):
+            if not ret.startswith('NC_'):
+                continue
+            else:
+                dbkey = ret
+                break
+        if dbkey is not None:
+            print '\t'.join( [ dbkey, rets['Title'] ] )

File cron/updatencbi.sh.sample

+#!/bin/sh 
+#
+# Script to update NCBI shared data tables.  The idea is to update, but if
+# the update fails, not replace current data/tables with error
+# messages.
+
+# Edit this line to refer to galaxy's path:
+GALAXY=/path/to/galaxy
+PYTHONPATH=${GALAXY}/lib
+export PYTHONPATH
+
+# setup directories
+echo "Creating required directories."
+DIRS="
+${GALAXY}/tool-data/shared/ncbi
+${GALAXY}/tool-data/shared/ncbi/new
+"
+for dir in $DIRS; do
+    if [ ! -d $dir ]; then
+        echo "Creating $dir"
+        mkdir $dir
+    else
+        echo "$dir already exists, continuing."
+    fi
+done
+
+date
+echo "Updating NCBI shared data tables."
+
+# Try to build "builds.txt"
+echo "Updating builds.txt"
+python ${GALAXY}/cron/get_ncbi.py > ${GALAXY}/tool-data/shared/ncbi/new/builds.txt
+if [ $? -eq 0 ]
+then
+    diff ${GALAXY}/tool-data/shared/ncbi/new/builds.txt ${GALAXY}/tool-data/shared/ncbi/builds.txt > /dev/null 2>&1
+    if [ $? -ne 0 ]
+    then
+        cp -f ${GALAXY}/tool-data/shared/ncbi/new/builds.txt ${GALAXY}/tool-data/shared/ncbi/builds.txt
+    fi
+else
+    echo "Failed to update builds.txt" >&2
+fi

File datatypes_conf.xml.sample

     <datatype extension="ab1" type="galaxy.datatypes.binary:Ab1" mimetype="application/octet-stream" display_in_upload="true"/>
     <datatype extension="afg" type="galaxy.datatypes.assembly:Amos" display_in_upload="false"/>
     <datatype extension="axt" type="galaxy.datatypes.sequence:Axt" display_in_upload="true"/>
+    <datatype extension="fli" type="galaxy.datatypes.tabular:FeatureLocationIndex" display_in_upload="false"/>
     <datatype extension="bam" type="galaxy.datatypes.binary:Bam" mimetype="application/octet-stream" display_in_upload="true">
       <converter file="bam_to_bai.xml" target_datatype="bai"/>
       <converter file="bam_to_summary_tree_converter.xml" target_datatype="summary_tree" depends_on="bai"/>
       <converter file="bed_to_bgzip_converter.xml" target_datatype="bgzip"/>
       <converter file="bed_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/>
       <converter file="bed_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+      <converter file="bed_to_fli_converter.xml" target_datatype="fli"/>
       <!-- <display file="ucsc/interval_as_bed.xml" /> -->
       <display file="genetrack.xml" />
       <display file="igb/bed.xml" />
     </datatype>
-    <datatype extension="bedgraph" type="galaxy.datatypes.interval:BedGraph" display_in_upload="true" />
+    <datatype extension="bedgraph" type="galaxy.datatypes.interval:BedGraph" display_in_upload="true">
+      <converter file="bedgraph_to_bigwig_converter.xml" target_datatype="bigwig"/>
+    </datatype>
     <datatype extension="bedstrict" type="galaxy.datatypes.interval:BedStrict" />
     <datatype extension="bed6" type="galaxy.datatypes.interval:Bed6">
       <converter file="bed_to_genetrack_converter.xml" target_datatype="genetrack"/>
       <converter file="gff_to_bed_converter.xml" target_datatype="bed"/>
       <converter file="gff_to_interval_index_converter.xml" target_datatype="interval_index"/>
       <converter file="gff_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+      <converter file="gff_to_fli_converter.xml" target_datatype="fli"/>
       <display file="ensembl/ensembl_gff.xml" inherit="True"/>
       <!-- <display file="gbrowse/gbrowse_gff.xml" inherit="True" /> -->
     </datatype>
     <datatype extension="linecount" type="galaxy.datatypes.data:LineCount" display_in_upload="false"/>
     <datatype extension="memexml" type="galaxy.datatypes.xml:MEMEXml" mimetype="application/xml" display_in_upload="true"/>
     <datatype extension="cisml" type="galaxy.datatypes.xml:CisML" mimetype="application/xml" display_in_upload="true"/>
-    <datatype extension="blastxml" type="galaxy.datatypes.xml:BlastXml" mimetype="application/xml" display_in_upload="true"/>
     <datatype extension="xml" type="galaxy.datatypes.xml:GenericXml" mimetype="application/xml" display_in_upload="true"/>
     <datatype extension="vcf" type="galaxy.datatypes.tabular:Vcf" display_in_upload="true">
       <converter file="vcf_to_bgzip_converter.xml" target_datatype="bgzip"/>
     <sniffer type="galaxy.datatypes.binary:TwoBit"/>
     <sniffer type="galaxy.datatypes.binary:Bam"/>
     <sniffer type="galaxy.datatypes.binary:Sff"/>
-    <sniffer type="galaxy.datatypes.xml:BlastXml"/>
     <sniffer type="galaxy.datatypes.xml:GenericXml"/>
     <sniffer type="galaxy.datatypes.sequence:Maf"/>
     <sniffer type="galaxy.datatypes.sequence:Lav"/>

File display_applications/gbrowse/gbrowse_gff.xml

 <display id="gbrowse_gff" version="1.0.0" name="display at GBrowse">
     <!-- Load links from file: one line to one link -->
-    <dynamic_links from_file="tool-data/shared/gbrowse/gbrowse_build_sites.txt" skip_startswith="#" id="0" name="0">
+    <dynamic_links from_file="tool-data/shared/gbrowse/gbrowse_build_sites.txt" skip_startswith="#" id="0" name="1">
+        
         <!-- Define parameters by column from file, allow splitting on builds -->
         <dynamic_param name="site_id" value="0"/>
-        <dynamic_param name="gbrowse_link" value="1"/>
-        <dynamic_param name="builds" value="2" split="True" separator="," />
+        <dynamic_param name="site_name" value="1"/>
+        <dynamic_param name="site_link" value="2"/>
+        <dynamic_param name="site_dbkeys" value="3" split="True" separator="," />
+        <dynamic_param name="site_organisms" value="4" split="True" separator="," />
+        
         <!-- Filter out some of the links based upon matching site_id to a Galaxy application configuration parameter and by dataset dbkey -->
         <filter>${site_id in $APP.config.gbrowse_display_sites}</filter>
-        <filter>${dataset.dbkey in $builds}</filter>
+        <filter>${dataset.dbkey in $site_dbkeys}</filter>
+        
         <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name -->
-        <url>${gbrowse_link}/?${position}eurl=${gff_file.qp}</url>
+        <url>${site_link}${site_organism}/?${position}eurl=${gff_file.qp}</url>
         <param type="data" name="gff_file" url="galaxy_${DATASET_HASH}.gff" />
+        <param type="template" name="site_organism" strip="True" >
+            $site_organisms[ $site_dbkeys.index( $gff_file.dbkey ) ]
+        </param>
         <param type="template" name="position" strip="True" >
 #set chrom, start, end = $gff_file.datatype.get_estimated_display_viewport( $gff_file )
 #if $chrom is not None:

File display_applications/gbrowse/gbrowse_interval_as_bed.xml

 <display id="gbrowse_interval_as_bed" version="1.0.0" name="display at GBrowse">
     <!-- Load links from file: one line to one link -->
-    <dynamic_links from_file="tool-data/shared/gbrowse/gbrowse_build_sites.txt" skip_startswith="#" id="0" name="0">
+    <dynamic_links from_file="tool-data/shared/gbrowse/gbrowse_build_sites.txt" skip_startswith="#" id="0" name="1">
+        
         <!-- Define parameters by column from file, allow splitting on builds -->
         <dynamic_param name="site_id" value="0"/>
-        <dynamic_param name="gbrowse_link" value="1"/>
-        <dynamic_param name="builds" value="2" split="True" separator="," />
+        <dynamic_param name="site_name" value="1"/>
+        <dynamic_param name="site_link" value="2"/>
+        <dynamic_param name="site_dbkeys" value="3" split="True" separator="," />
+        <dynamic_param name="site_organisms" value="4" split="True" separator="," />
+        
         <!-- Filter out some of the links based upon matching site_id to a Galaxy application configuration parameter and by dataset dbkey -->
         <filter>${site_id in $APP.config.gbrowse_display_sites}</filter>
-        <filter>${dataset.dbkey in $builds}</filter>
+        <filter>${dataset.dbkey in $site_dbkeys}</filter>
+        
         <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name -->
-        <url>${gbrowse_link}/?${position}eurl=${bed_file.qp}</url>
+        <url>${site_link}${site_organism}/?${position}eurl=${bed_file.qp}</url>
         <param type="data" name="bed_file" url="galaxy_${DATASET_HASH}.bed" format="bedstrict"/> <!-- Galaxy allows BED files to contain non-standard fields beyond the first 3 columns, gbrowse does not(?): force use of converter which will make strict BED6+ file -->
+        <param type="template" name="site_organism" strip="True" >
+            $site_organisms[ $site_dbkeys.index( $bed_file.dbkey ) ]
+        </param>
         <param type="template" name="position" strip="True" >
 #set chrom, start, end = $bed_file.datatype.get_estimated_display_viewport( $bed_file )
 #if $chrom is not None:

File display_applications/gbrowse/gbrowse_wig.xml

 <display id="gbrowse_wig" version="1.0.0" name="display at GBrowse">
     <!-- Load links from file: one line to one link -->
-    <dynamic_links from_file="tool-data/shared/gbrowse/gbrowse_build_sites.txt" skip_startswith="#" id="0" name="0">
+    <dynamic_links from_file="tool-data/shared/gbrowse/gbrowse_build_sites.txt" skip_startswith="#" id="0" name="1">
+        
         <!-- Define parameters by column from file, allow splitting on builds -->
         <dynamic_param name="site_id" value="0"/>
-        <dynamic_param name="gbrowse_link" value="1"/>
-        <dynamic_param name="builds" value="2" split="True" separator="," />
+        <dynamic_param name="site_name" value="1"/>
+        <dynamic_param name="site_link" value="2"/>
+        <dynamic_param name="site_dbkeys" value="3" split="True" separator="," />
+        <dynamic_param name="site_organisms" value="4" split="True" separator="," />
+        
         <!-- Filter out some of the links based upon matching site_id to a Galaxy application configuration parameter and by dataset dbkey -->
         <filter>${site_id in $APP.config.gbrowse_display_sites}</filter>
-        <filter>${dataset.dbkey in $builds}</filter>
+        <filter>${dataset.dbkey in $site_dbkeys}</filter>
+        
         <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name -->
-        <url>${gbrowse_link}/?${position}eurl=${wig_file.qp}</url>
+        <url>${site_link}${site_organism}/?${position}eurl=${wig_file.qp}</url>
         <param type="data" name="wig_file" url="galaxy_${DATASET_HASH}.wig" format="wig"/>
+        <param type="template" name="site_organism" strip="True" >
+            $site_organisms[ $site_dbkeys.index( $wig_file.dbkey ) ]
+        </param>
         <param type="template" name="position" strip="True" >
 #set chrom, start, end = $wig_file.datatype.get_estimated_display_viewport( $wig_file )
 #if $chrom is not None:
 ctypes = 1.0.2
 DRMAA_python = 0.2
 MarkupSafe = 0.12
-mercurial = 2.1.2
+mercurial = 2.2.3
 MySQL_python = 1.2.3c1
 numpy = 1.6.0
 pbs_python = 4.1.0

File lib/galaxy/config.py

         self.log_events = string_as_bool( kwargs.get( 'log_events', 'False' ) )
         self.sanitize_all_html = string_as_bool( kwargs.get( 'sanitize_all_html', True ) )
         self.ucsc_display_sites = kwargs.get( 'ucsc_display_sites', "main,test,archaea,ucla" ).lower().split(",")
-        self.gbrowse_display_sites = kwargs.get( 'gbrowse_display_sites', "wormbase,tair,modencode_worm,modencode_fly,sgd_yeast" ).lower().split(",")
+        self.gbrowse_display_sites = kwargs.get( 'gbrowse_display_sites', "modencode,sgd_yeast,tair,wormbase,wormbase_ws120,wormbase_ws140,wormbase_ws170,wormbase_ws180,wormbase_ws190,wormbase_ws200,wormbase_ws204,wormbase_ws210,wormbase_ws220,wormbase_ws225" ).lower().split(",")
         self.genetrack_display_sites = kwargs.get( 'genetrack_display_sites', "main,test" ).lower().split(",")
         self.brand = kwargs.get( 'brand', None )
         self.support_url = kwargs.get( 'support_url', 'http://wiki.g2.bx.psu.edu/Support' )
         if self.nginx_upload_store:
             self.nginx_upload_store = os.path.abspath( self.nginx_upload_store )
         self.object_store = kwargs.get( 'object_store', 'disk' )
-        self.aws_access_key = kwargs.get( 'aws_access_key', None )
-        self.aws_secret_key = kwargs.get( 'aws_secret_key', None )
-        self.s3_bucket = kwargs.get( 's3_bucket', None)
-        self.use_reduced_redundancy = kwargs.get( 'use_reduced_redundancy', False )
+        # Handle AWS-specific config options for backward compatibility
+        if kwargs.get( 'aws_access_key', None) is not None:
+            self.os_access_key= kwargs.get( 'aws_access_key', None )
+            self.os_secret_key= kwargs.get( 'aws_secret_key', None )
+            self.os_bucket_name= kwargs.get( 's3_bucket', None )
+            self.os_use_reduced_redundancy = kwargs.get( 'use_reduced_redundancy', False )
+        else:
+            self.os_access_key = kwargs.get( 'os_access_key', None )
+            self.os_secret_key = kwargs.get( 'os_secret_key', None )
+            self.os_bucket_name = kwargs.get( 'os_bucket_name', None )
+            self.os_use_reduced_redundancy = kwargs.get( 'os_use_reduced_redundancy', False )
+        self.os_host = kwargs.get( 'os_host', None )
+        self.os_port = kwargs.get( 'os_port', None )
+        self.os_is_secure = string_as_bool( kwargs.get( 'os_is_secure', True ) )
+        self.os_conn_path = kwargs.get( 'os_conn_path', '/' )
         self.object_store_cache_size = float(kwargs.get( 'object_store_cache_size', -1 ))
         self.distributed_object_store_config_file = kwargs.get( 'distributed_object_store_config_file', None )
         # Parse global_conf and save the parser

File lib/galaxy/datatypes/binary.py

 log = logging.getLogger(__name__)
 
 # Currently these supported binary data types must be manually set on upload
-unsniffable_binary_formats = [ 'ab1', 'scf', 'h5' ]
 
 class Binary( data.Data ):
     """Binary data"""
+    sniffable_binary_formats = []
+    unsniffable_binary_formats = []
+
+    @staticmethod
+    def register_sniffable_binary_format(data_type, ext, type_class):
+        Binary.sniffable_binary_formats.append({"type": data_type, "ext": ext, "class": type_class})
+
+    @staticmethod
+    def register_unsniffable_binary_ext(ext):
+        Binary.unsniffable_binary_formats.append(ext)
+
+    @staticmethod
+    def is_sniffable_binary(filename):
+        for format in Binary.sniffable_binary_formats:
+            if format["class"]().sniff(filename):
+                return (format["type"], format["ext"])
+        return None
+
+    @staticmethod
+    def is_ext_unsniffable(ext):
+        return ext in Binary.unsniffable_binary_formats
+
     def set_peek( self, dataset, is_multi_byte=False ):
         """Set the peek and blurb text"""
         if not dataset.dataset.purged:
         except:
             return "Binary ab1 sequence file (%s)" % ( data.nice_size( dataset.get_size() ) )
 
+Binary.register_unsniffable_binary_ext("ab1")
+
 class Bam( Binary ):
     """Class describing a BAM binary file"""
     file_ext = "bam"
     def get_track_type( self ):
         return "ReadTrack", {"data": "bai", "index": "summary_tree"}
 
+Binary.register_sniffable_binary_format("bam", "bam", Bam)
+
 class H5( Binary ):
     """Class describing an HDF5 file"""
     file_ext = "h5"
         except:
             return "Binary h5 sequence file (%s)" % ( data.nice_size( dataset.get_size() ) )
 
+Binary.register_unsniffable_binary_ext("h5")
+
 class Scf( Binary ):
     """Class describing an scf binary sequence file"""
     file_ext = "scf"
         except:
             return "Binary scf sequence file (%s)" % ( data.nice_size( dataset.get_size() ) )
 
+Binary.register_unsniffable_binary_ext("scf")
+
 class Sff( Binary ):
     """ Standard Flowgram Format (SFF) """
     file_ext = "sff"
         except:
             return "Binary sff file (%s)" % ( data.nice_size( dataset.get_size() ) )
 
+Binary.register_sniffable_binary_format("sff", "sff", Sff)
+
 class BigWig(Binary):
     """
     Accessing binary BigWig files from UCSC.
     def get_track_type( self ):
         return "LineTrack", {"data_standalone": "bigwig"}
 
+Binary.register_sniffable_binary_format("bigwig", "bigwig", BigWig)
+
 class BigBed(BigWig):
     """BigBed support from UCSC."""
     def __init__( self, **kwd ):
     def get_track_type( self ):
         return "LineTrack", {"data_standalone": "bigbed"}
 
+Binary.register_sniffable_binary_format("bigbed", "bigbed", BigBed)
+
 class TwoBit (Binary):
     """Class describing a TwoBit format nucleotide file"""
     

File lib/galaxy/datatypes/converters/bed_to_fli_converter.xml

+<tool id="CONVERTER_bed_to_fli_0" name="Convert BED to Feature Location Index">
+  <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+  <!-- Used on the metadata edit page. -->
+  <command interpreter="python">interval_to_fli.py -B $input1 $output1</command>
+  <inputs>
+    <param format="bed" name="input1" type="data" label="Choose BED file"/>
+  </inputs>
+  <outputs>
+    <data format="fli" name="output1"/>
+  </outputs>
+  <help>
+  </help>
+</tool>

File lib/galaxy/datatypes/converters/bedgraph_to_bigwig_converter.xml

+<tool id="CONVERTER_bedgraph_to_bigwig" name="Convert BedGraph to BigWig" hidden="true">
+  <!-- Used internally to generate track indexes -->
+  <command>grep -v "^track" $input | wigToBigWig -clip stdin $chromInfo $output</command>
+  <inputs>
+    <page>
+      <param format="bedgraph" name="input" type="data" label="Choose wiggle"/>
+    </page>
+   </inputs>
+   <outputs>
+      <data format="bigwig" name="output"/>
+   </outputs>
+  <help>
+  </help>
+</tool>

File lib/galaxy/datatypes/converters/gff_to_fli_converter.xml

+<tool id="CONVERTER_gff_to_fli_0" name="Convert GFF to Feature Location Index">
+  <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+  <!-- Used on the metadata edit page. -->
+  <command interpreter="python">interval_to_fli.py -G $input1 $output1</command>
+  <inputs>
+    <param format="gff" name="input1" type="data" label="Choose GFF file"/>
+  </inputs>
+  <outputs>
+    <data format="fli" name="output1"/>
+  </outputs>
+  <help>
+  </help>
+</tool>

File lib/galaxy/datatypes/converters/interval_to_fli.py

+'''
+Creates a feature location index (FLI) for a given BED/GFF file.
+FLI index has the form:
+    [line_length]
+    <symbol1_in_lowercase><tab><symbol1><tab><location>
+    <symbol2_in_lowercase><tab><symbol2><tab><location>
+    ...
+where location is formatted as:
+    contig:start-end
+and symbols are sorted in lexigraphical order.
+'''
+
+import sys, optparse
+from galaxy import eggs
+from galaxy.datatypes.util.gff_util import read_unordered_gtf, convert_gff_coords_to_bed
+
+def main():
+    # Process arguments.
+    parser = optparse.OptionParser()
+    parser.add_option( '-B', '--bed', action="store_true", dest="bed_input" )
+    parser.add_option( '-G', '--gff', action="store_true", dest="gff_input" )
+    (options, args) = parser.parse_args()
+    in_fname, out_fname = args
+
+
+    # Create dict of name-location pairings.
+    name_loc_dict = {}
+    if options.gff_input:
+        # GFF format
+        for feature in read_unordered_gtf( open( in_fname, 'r' ) ):
+            for name in feature.attributes:
+                val = feature.attributes[ name ]
+                try:
+                    float( val )
+                    continue
+                except:
+                    convert_gff_coords_to_bed( feature )
+                    # Value is not a number, so it can be indexed.
+                    if val not in name_loc_dict:
+                        # Value is not in dictionary.
+                        name_loc_dict[ val ] = {
+                            'contig': feature.chrom,
+                            'start': feature.start,
+                            'end': feature.end
+                        }
+                    else:
+                        # Value already in dictionary, so update dictionary.
+                        loc = name_loc_dict[ val ]
+                        if feature.start < loc[ 'start' ]:
+                            loc[ 'start' ] = feature.start
+                        if feature.end > loc[ 'end' ]:
+                            loc[ 'end' ] = feature.end
+    else:
+        # BED format.
+        for line in open( in_fname, 'r' ):
+            fields = line.split()
+            name_loc_dict[ fields[3] ] = {
+                'contig': fields[0],
+                'start': int( fields[1] ),
+                'end': int ( fields[2] )
+            }
+        
+    # Create sorted list of entries.
+    out = open( out_fname, 'w' )
+    max_len = 0
+    entries = []
+    for name in sorted( name_loc_dict.iterkeys() ):
+        loc = name_loc_dict[ name ]
+        entry = '%s\t%s\t%s' % ( name.lower(), name, '%s:%i-%i' % ( loc[ 'contig' ], loc[ 'start' ], loc[ 'end' ] ) )
+        if len( entry ) > max_len:
+            max_len = len( entry )
+        entries.append( entry )
+    
+    # Write padded entries.
+    out.write( str( max_len + 1 ).ljust( max_len ) + '\n' )
+    for entry in entries:
+        out.write( entry.ljust( max_len ) + '\n' )
+    out.close()
+
+if __name__ == '__main__':
+    main()

File lib/galaxy/datatypes/converters/interval_to_interval_index_converter.py.orig

-#!/usr/bin/env python
-
-"""
-Convert from interval file to interval index file. Default input file format is BED (0-based, half-open intervals).
-
-usage: %prog in_file out_file
-    -G, --gff: input is GFF format, meaning start and end coordinates are 1-based, closed interval
-"""
-
-from __future__ import division
-
-import sys, fileinput
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from galaxy.visualization.tracks.summary import *
-from bx.cookbook import doc_optparse
-from galaxy.tools.util.gff_util import convert_gff_coords_to_bed
-from bx.interval_index_file import Indexes
-from galaxy.tools.util.gff_util import parse_gff_attributes
-
-def main():
-    
-    # Read options, args.
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        gff_format = bool( options.gff )
-        input_fname, out_fname = args
-    except:
-        doc_optparse.exception()
-    
-    # Do conversion.
-    # TODO: take column numbers from command line.
-    if gff_format:
-        chr_col, start_col, end_col = ( 0, 3, 4 )
-    else:
-        chr_col, start_col, end_col = ( 0, 1, 2 )
-    index = Indexes()
-    offset = 0
-    # Need to keep track of last gene, transcript id for indexing GTF files.
-    last_gene_id = None
-    last_transcript_id = None
-    for line in open(input_fname, "r"):
-        feature = line.strip().split('\t')
-        if not feature or feature[0].startswith("track") or feature[0].startswith("#"):
-            offset += len(line)
-            continue
-        chrom = feature[ chr_col ]
-        chrom_start = int( feature[ start_col ] )
-        chrom_end = int( feature[ end_col ] )
-        if gff_format:
-            chrom_start, chrom_end = convert_gff_coords_to_bed( [chrom_start, chrom_end ] )
-            
-            # Only add feature if gene_id, transcript_id are different from last 
-            # values.
-            if len( feature ) == 9:
-                attributes = parse_gff_attributes( feature[8] )
-                gene_id = attributes.get( 'gene_id', None )
-                transcript_id = attributes.get( 'transcript_id', None )
-                if gene_id and transcript_id and gene_id == last_gene_id and \
-                   transcript_id == last_transcript_id:
-                    # Feature has same gene_id, transcript as last feature, so 
-                    # do not add.
-                    offset += len(line)
-                    continue
-                else:
-                    # gene_id, transcript_id set and are different from last 
-                    # values.
-                    last_gene_id = gene_id
-                    last_transcript_id = transcript_id
-
-        #print "%s %s %s %s %i %i %i" % (feature[2], last_gene_id, last_transcript_id, chrom, chrom_start, chrom_end, offset)
-        index.add( chrom, chrom_start, chrom_end, offset )
-        offset += len(line)
-            
-    index.write( open(out_fname, "w") )
-
-if __name__ == "__main__": 
-    main()
-    

File lib/galaxy/datatypes/converters/wig_to_bigwig_converter.xml

 <tool id="CONVERTER_wig_to_bigwig" name="Convert Wiggle to BigWig" hidden="true">
   <!-- Used internally to generate track indexes -->
-  <command>wigToBigWig $input $chromInfo $output</command>
+  <command>grep -v "^track" $input | wigToBigWig -clip stdin $chromInfo $output</command>
   <inputs>
     <page>
       <param format="wig" name="input" type="data" label="Choose wiggle"/>

File lib/galaxy/datatypes/data.py

     pass
 
 class Newick( Text ):
-    pass
+    """New Hampshire/Newick Format"""
+    file_ext = "nhx"
+
+    MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True )
+
+    def __init__(self, **kwd):
+        """Initialize foobar datatype"""
+        Text.__init__(self, **kwd)
+
+    def init_meta( self, dataset, copy_from=None ):
+        Text.init_meta( self, dataset, copy_from=copy_from )
+
+
+    def sniff( self, filename ):
+        """ Returning false as the newick format is too general and cannot be sniffed."""
+        return False
+
+
+class Nexus( Text ):
+    """Nexus format as used By Paup, Mr Bayes, etc"""
+    file_ext = "nex"
+
+    MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True )
+
+    def __init__(self, **kwd):
+        """Initialize foobar datatype"""
+        Text.__init__(self, **kwd)
+
+    def init_meta( self, dataset, copy_from=None ):
+        Text.init_meta( self, dataset, copy_from=copy_from )
+
+
+    def sniff( self, filename ):
+        """All Nexus Files Simply puts a '#NEXUS' in its first line"""
+        f = open(filename, "r")
+        firstline = f.readline().upper()
+        f.close()
+
+        if "#NEXUS" in firstline:
+            return True
+        else:
+            return False
+
 
 # ------------- Utility methods --------------
 

File lib/galaxy/datatypes/display_applications/application.py

         for line in open( filename ):
             if not skip_startswith or not line.startswith( skip_startswith ):
                 line = line.rstrip( '\n\r' )
+                if not line:
+                    continue
                 fields = line.split( separator )
-                if len( fields ) >= max_col:
+                if len( fields ) > max_col:
                     new_elem = deepcopy( elem )
                     new_elem.set( 'id', fields[id_col] )
                     new_elem.set( 'name', fields[name_col] )
                         dynamic_values[key] = value
                     #now populate
                     rval.append( DisplayApplicationLink.from_elem( new_elem, display_application, other_values = dynamic_values ) )
+                else:
+                    log.warning( 'Invalid dynamic display application link specified in %s: "%s"' % ( filename, line ) )
         self.links = rval
     def __iter__( self ):
         return iter( self.links )

File lib/galaxy/datatypes/images.py

 
 import data
 import logging
+from galaxy.datatypes.binary import Binary
 from galaxy.datatypes.metadata import MetadataElement
 from galaxy.datatypes import metadata
 from galaxy.datatypes.sniff import *
         except IndexError:
             return False
 
+Binary.register_sniffable_binary_format("pdf", "pdf", Pdf)
+
 def create_applet_tag_peek( class_name, archive, params ):
     text = """
 <!--[if !IE]>-->

File lib/galaxy/datatypes/interval.py

     file_ext = "bedgraph"
 
     def get_track_type( self ):
-        return "LineTrack", {"data": "array_tree"}
+        return "LineTrack", { "data": "bigwig", "index": "bigwig" }
         
     def as_ucsc_display_file( self, dataset, **kwd ):
         """
         except: return False
     
     def get_track_type( self ):
-        return "FeatureTrack", {"data": "tabix", "index": "summary_tree"}
+        return "FeatureTrack", {"data": "tabix", "index": "summary_tree", "feature_search": "fli"}
 
 class BedStrict( Bed ):
     """Tab delimited data in strict BED format - no non-standard columns allowed"""
     """Add metadata elements"""
     MetadataElement( name="columns", default=9, desc="Number of columns", readonly=True, visible=False )
     MetadataElement( name="column_types", default=['str','str','str','int','int','int','str','str','str'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False )
+
+    MetadataElement( name="attributes", default=0, desc="Number of attributes", readonly=True, visible=False, no_value=0 )
+    MetadataElement( name="attribute_types", default={}, desc="Attribute types", param=metadata.DictParameter, readonly=True, visible=False, no_value=[] )    
     
     def __init__( self, **kwd ):
         """Initialize datatype, by adding GBrowse display app"""
         Tabular.__init__(self, **kwd)
         self.add_display_app( 'ucsc', 'display at UCSC', 'as_ucsc_display_file', 'ucsc_links' )
         self.add_display_app( 'gbrowse', 'display in Gbrowse', 'as_gbrowse_display_file', 'gbrowse_links' )
+
+    def set_attribute_metadata( self, dataset ):
+        """ 
+        Sets metadata elements for dataset's attributes.
+        """
+
+        # Use first N lines to set metadata for dataset attributes. Attributes 
+        # not found in the first N lines will not have metadata.
+        num_lines = 200
+        attribute_types = {}
+        for i, line in enumerate( file ( dataset.file_name ) ):
+            if line and not line.startswith( '#' ):
+                elems = line.split( '\t' )
+                if len( elems ) == 9:
+                    try:
+                        # Loop through attributes to set types.
+                        for name, value in parse_gff_attributes( elems[8] ).items():
+                            # Default type is string.
+                            value_type = "str"
+                            try:
+                                # Try int.
+                                int( value )
+                                value_type = "int"
+                            except:
+                                try: 
+                                    # Try float.
+                                    float( value )
+                                    value_type = "float"
+                                except:
+                                    pass
+                            attribute_types[ name ] = value_type
+                    except:
+                        pass
+                if i + 1 == num_lines:
+                    break
+        
+        # Set attribute metadata and then set additional metadata.
+        dataset.metadata.attribute_types = attribute_types
+        dataset.metadata.attributes = len( attribute_types )
+
     def set_meta( self, dataset, overwrite = True, **kwd ):
+        self.set_attribute_metadata( dataset )
+
         i = 0
         for i, line in enumerate( file ( dataset.file_name ) ):
             line = line.rstrip('\r\n')
                     except:
                         pass
         Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i )
+
     def display_peek( self, dataset ):
         """Returns formated html of peek"""
         return Tabular.make_html_table( self, dataset, column_names=self.column_names )
             return False
             
     def get_track_type( self ):
-        return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"}
+        return "FeatureTrack", {"data": "interval_index", "index": "summary_tree", "feature_search": "fli"}
             
 
 class Gff3( Gff ):
         """Initialize datatype, by adding GBrowse display app"""
         Gff.__init__(self, **kwd)
     def set_meta( self, dataset, overwrite = True, **kwd ):
+        self.set_attribute_metadata( dataset )
+
         i = 0
         for i, line in enumerate( file ( dataset.file_name ) ):
             line = line.rstrip('\r\n')
     MetadataElement( name="columns", default=9, desc="Number of columns", readonly=True, visible=False )
     MetadataElement( name="column_types", default=['str','str','str','int','int','float','str','int','list'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False )
     
-    MetadataElement( name="attributes", default=0, desc="Number of attributes", readonly=True, visible=False, no_value=0 )
-    MetadataElement( name="attribute_types", default={}, desc="Attribute types", param=metadata.DictParameter, readonly=True, visible=False, no_value=[] )
-    
     def sniff( self, filename ):
         """
         Determines whether the file is in gtf format
             return True
         except:
             return False
-            
-    def set_meta( self, dataset, overwrite = True, **kwd ):        
-        # Use first N lines to set metadata for dataset attributes. Attributes 
-        # not found in the first N lines will not have metadata.
-        num_lines = 200
-        attribute_types = {}
-        for i, line in enumerate( file ( dataset.file_name ) ):
-            if line and not line.startswith( '#' ):
-                elems = line.split( '\t' )
-                if len( elems ) == 9:
-                    try:
-                        # Loop through attributes to set types.
-                        for name, value in parse_gff_attributes( elems[8] ).items():
-                            # Default type is string.
-                            value_type = "str"
-                            try:
-                                # Try int.
-                                int( value )
-                                value_type = "int"
-                            except:
-                                try: 
-                                    # Try float.
-                                    float( value )
-                                    value_type = "float"
-                                except:
-                                    pass
-                            attribute_types[ name ] = value_type
-                    except:
-                        pass
-                if i + 1 == num_lines:
-                    break
-        
-        # Set attribute metadata and then set additional metadata.
-        dataset.metadata.attribute_types = attribute_types
-        dataset.metadata.attributes = len( attribute_types )
-        Gff.set_meta( self, dataset, overwrite = overwrite, skip = i )
-        
-    def get_track_type( self ):
-        return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"}
-
 
 class Wiggle( Tabular, _RemoteCallMixin ):
     """Tab delimited data in wiggle format"""
         resolution = min( resolution, 100000 )
         resolution = max( resolution, 1 )
         return resolution
+        
     def get_track_type( self ):
-        return "LineTrack", {"data": "bigwig", "index": "bigwig"}
+        return "LineTrack", { "data": "bigwig", "index": "bigwig" }
 
 class CustomTrack ( Tabular ):
     """UCSC CustomTrack"""

File lib/galaxy/datatypes/registry.py

 Provides mapping between extensions and datatypes, mime-types, etc.
 """
 import mgh
-import os, sys, tempfile, threading, logging
+import os, sys, tempfile, threading, logging, imp
 import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo, binary, assembly, ngsindex
 import galaxy.util
 from galaxy.util.odict import odict
         being installed.  Since installation is occurring after the datatypes registry has been initialized, its
         contents cannot be overridden by new introduced conflicting data types.
         """
-        def __import_module( full_path, datatype_module ):
-            sys.path.insert( 0, full_path )
-            imported_module = __import__( datatype_module )
-            sys.path.pop( 0 )
+        def __import_module( full_path, datatype_module, datatype_class_name ):
+            open_file_obj, file_name, description = imp.find_module( datatype_module, [ full_path ] )
+            imported_module = imp.load_module( datatype_class_name, open_file_obj, file_name, description )
             return imported_module
         if root_dir and config:
             handling_proprietary_datatypes = False
                             datatype_module = fields[0]
                             datatype_class_name = fields[1]
                             datatype_class = None
-                            if proprietary_path and proprietary_datatype_module:
+                            if proprietary_path and proprietary_datatype_module and datatype_class_name:
                                 # We need to change the value of sys.path, so do it in a way that is thread-safe.
                                 lock = threading.Lock()
                                 lock.acquire( True )
                                 try:
-                                    imported_module = __import_module( proprietary_path, proprietary_datatype_module )
+                                    imported_module = __import_module( proprietary_path, proprietary_datatype_module, datatype_class_name )
                                     if imported_module not in self.imported_modules:
                                         self.imported_modules.append( imported_module )
                                     if hasattr( imported_module, datatype_class_name ):
                 'axt'         : sequence.Axt(),
                 'bam'         : binary.Bam(),
                 'bed'         : interval.Bed(), 
-                'blastxml'    : xml.BlastXml(),
                 'coverage'    : coverage.LastzCoverage(),
                 'customtrack' : interval.CustomTrack(),
                 'csfasta'     : sequence.csFasta(),
                 'axt'         : 'text/plain',
                 'bam'         : 'application/octet-stream',
                 'bed'         : 'text/plain', 
-                'blastxml'    : 'application/xml', 
                 'customtrack' : 'text/plain',
                 'csfasta'     : 'text/plain',
                 'eland'       : 'application/octet-stream',
             self.sniff_order = [
                 binary.Bam(),
                 binary.Sff(),
-                xml.BlastXml(),
                 xml.GenericXml(),
                 sequence.Maf(),
                 sequence.Lav(),

File lib/galaxy/datatypes/sniff.py

 import registry
 from galaxy import util
 from galaxy.datatypes.checkers import *
-from galaxy.datatypes.binary import unsniffable_binary_formats
+from encodings import search_function as encodings_search_function
 
 log = logging.getLogger(__name__)
-        
+
 def get_test_fname(fname):
     """Returns test data filename"""
     path, name = os.path.split(__file__)
     full_path = os.path.join(path, 'test', fname)
     return full_path
 
-def stream_to_open_named_file( stream, fd, filename ):
+def stream_to_open_named_file( stream, fd, filename, source_encoding=None, source_error='strict', target_encoding=None, target_error='strict' ):
     """Writes a stream to the provided file descriptor, returns the file's name and bool( is_multi_byte ). Closes file descriptor"""
     #signature and behavor is somewhat odd, due to backwards compatibility, but this can/should be done better
     CHUNK_SIZE = 1048576
     is_compressed = False
     is_binary = False
     is_multi_byte = False
+    if not target_encoding or not encodings_search_function( target_encoding ):
+        target_encoding = util.DEFAULT_ENCODING #utf-8
+    if not source_encoding:
+        source_encoding = util.DEFAULT_ENCODING #sys.getdefaultencoding() would mimic old behavior (defaults to ascii)
     while 1:
         chunk = stream.read( CHUNK_SIZE )
         if not chunk:
                 chars = chunk[:100]
                 is_multi_byte = util.is_multi_byte( chars )
                 if not is_multi_byte:
-                    for char in chars:
-                        if ord( char ) > 128:
-                            is_binary = True
-                            break
+                    is_binary = util.is_binary( chunk )
             data_checked = True
         if not is_compressed and not is_binary:
-            os.write( fd, chunk.encode( "utf-8" ) )
+            if not isinstance( chunk, unicode ):
+                chunk = chunk.decode( source_encoding, source_error )
+            os.write( fd, chunk.encode( target_encoding, target_error ) )
         else:
             # Compressed files must be encoded after they are uncompressed in the upload utility,
             # while binary files should not be encoded at all.
     os.close( fd )
     return filename, is_multi_byte
 
-def stream_to_file( stream, suffix='', prefix='', dir=None, text=False ):
+def stream_to_file( stream, suffix='', prefix='', dir=None, text=False, **kwd ):
     """Writes a stream to a temporary file, returns the temporary file's name"""
     fd, temp_name = tempfile.mkstemp( suffix=suffix, prefix=prefix, dir=dir, text=text )
-    return stream_to_open_named_file( stream, fd, temp_name )
+    return stream_to_open_named_file( stream, fd, temp_name, **kwd )
 
 def check_newlines( fname, bytes_to_read=52428800 ):
     """
 
 def convert_newlines( fname, in_place=True ):
     """
-    Converts in place a file from universal line endings 
+    Converts in place a file from universal line endings
     to Posix line endings.
 
     >>> fname = get_test_fname('temp.txt')
     """
     fd, temp_name = tempfile.mkstemp()
     fp = os.fdopen( fd, "wt" )
-    for i, line in enumerate( file( fname, "U" ) ):
+    i = 0
+    for i, line in enumerate( file( fname, "U" ), 1 ):
         fp.write( "%s\n" % line.rstrip( "\r\n" ) )
     fp.close()
     if in_place:
         shutil.move( temp_name, fname )
         # Return number of lines in file.
-        return ( i + 1, None )
+        return ( i, None )
     else:
-        return ( i + 1, temp_name )
+        return ( i, temp_name )
 
 def sep2tabs( fname, in_place=True, patt="\\s+" ):
     """
     regexp = re.compile( patt )
     fd, temp_name = tempfile.mkstemp()
     fp = os.fdopen( fd, "wt" )
-    for i, line in enumerate( file( fname ) ):
+    i = 0
+    for i, line in enumerate( file( fname ), 1):
         line  = line.rstrip( '\r\n' )
         elems = regexp.split( line )
         fp.write( "%s\n" % '\t'.join( elems ) )
     if in_place:
         shutil.move( temp_name, fname )
         # Return number of lines in file.
-        return ( i + 1, None )
+        return ( i, None )
     else:
-        return ( i + 1, temp_name )
+        return ( i, temp_name )
 
 def convert_newlines_sep2tabs( fname, in_place=True, patt="\\s+" ):
     """
 def get_headers( fname, sep, count=60, is_multi_byte=False ):
     """
     Returns a list with the first 'count' lines split by 'sep'
-    
+
     >>> fname = get_test_fname('complete.bed')
     >>> get_headers(fname,'\\t')
     [['chr7', '127475281', '127491632', 'NM_000230', '0', '+', '127486022', '127488767', '0', '3', '29,172,3225,', '0,10713,13126,'], ['chr7', '127486011', '127488900', 'D49487', '0', '+', '127486022', '127488767', '0', '2', '155,490,', '0,2399']]
         if idx == count:
             break
     return headers
-    
+
 def is_column_based( fname, sep='\t', skip=0, is_multi_byte=False ):
     """
-    Checks whether the file is column based with respect to a separator 
+    Checks whether the file is column based with respect to a separator
     (defaults to tab separator).
-    
+
     >>> fname = get_test_fname('test.gff')
     >>> is_column_based(fname)
     True
 
     >>> fname = get_test_fname('megablast_xml_parser_test1.blastxml')
     >>> guess_ext(fname)
-    'blastxml'
+    'xml'
     >>> fname = get_test_fname('interval.interval')
     >>> guess_ext(fname)
     'interval'
         """
         Some classes may not have a sniff function, which is ok.  In fact, the
         Tabular and Text classes are 2 examples of classes that should never have
-        a sniff function.  Since these classes are default classes, they contain 
+        a sniff function.  Since these classes are default classes, they contain
         few rules to filter out data of other formats, so they should be called
         from this function after all other datatypes in sniff_order have not been
         successfully discovered.
     else:
         for hdr in headers:
             for char in hdr:
-                if len( char ) > 1:
-                    for c in char:
-                        if ord( c ) > 128:
-                            is_binary = True
-                            break
-                elif ord( char ) > 128:
-                    is_binary = True
-                    break
+                #old behavior had 'char' possibly having length > 1,
+                #need to determine when/if this occurs 
+                is_binary = util.is_binary( char )
                 if is_binary:
                     break
             if is_binary:
         is_compressed = check_compressed_function( filename )
         if is_compressed:
             break #found compression type
-    if is_compressed: 
+    if is_compressed:
         if ext in AUTO_DETECT_EXTENSIONS:
             check_exts = COMPRESSION_DATATYPES[ compressed_type ]
         elif ext in COMPRESSED_EXTENSIONS:
                 keep_compressed = True
                 is_valid = True
                 break
-    
+
     if not is_compressed:
         is_valid = True
     elif not keep_compressed:
 
 def handle_uploaded_dataset_file( filename, datatypes_registry, ext = 'auto', is_multi_byte = False ):
     is_valid, ext = handle_compressed_file( filename, datatypes_registry, ext = ext )
-    
+
     if not is_valid:
         raise InappropriateDatasetContentError, 'The compressed uploaded file contains inappropriate content.'
-    
+
     if ext in AUTO_DETECT_EXTENSIONS:
         ext = guess_ext( filename, sniff_order = datatypes_registry.sniff_order, is_multi_byte=is_multi_byte )
-    
+
     if check_binary( filename ):
-        if ext not in unsniffable_binary_formats and not datatypes_registry.get_datatype_by_extension( ext ).sniff( filename ):
+        if not Binary.is_ext_unsniffable(ext) and not datatypes_registry.get_datatype_by_extension( ext ).sniff( filename ):
             raise InappropriateDatasetContentError, 'The binary uploaded file contains inappropriate content.'
     elif check_html( filename ):
         raise InappropriateDatasetContentError, 'The uploaded file contains inappropriate HTML content.'

File lib/galaxy/datatypes/tabular.py

 
     def display_data(self, trans, dataset, preview=False, filename=None, to_ext=None, chunk=None):
         #TODO Prevent failure when displaying extremely long > 50kb lines.
-        if to_ext:
-            return self._serve_raw(trans, dataset, to_ext)
         if chunk:
             return self.get_chunk(trans, dataset, chunk)
+        if to_ext or not preview:
+            return self._serve_raw(trans, dataset, to_ext)
         else:
             column_names = 'null'
             if dataset.metadata.column_names:
             dataset.metadata.reads = reads.keys()
 
 
+class FeatureLocationIndex( Tabular ):
+    """
+    An index that stores feature locations in tabular format.
+    """
+    file_ext='fli'
+    MetadataElement( name="columns", default=2, desc="Number of columns", readonly=True, visible=False )
+    MetadataElement( name="column_types", default=['str', 'str'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False, no_value=[] )
+

File lib/galaxy/datatypes/test/tblastn_four_human_vs_rhodopsin.xml

-<?xml version="1.0"?>
-<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">
-<BlastOutput>
-  <BlastOutput_program>tblastn</BlastOutput_program>
-  <BlastOutput_version>TBLASTN 2.2.25+</BlastOutput_version>
-  <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
-  <BlastOutput_db></BlastOutput_db>
-  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
-  <BlastOutput_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def>
-  <BlastOutput_query-len>406</BlastOutput_query-len>