Greg Von Kuster avatar Greg Von Kuster committed ab90893

Re-engineer the datatypes registry so that it is initialized once when the Galaxy server is started, but data types can continue to be loaded throughout the Galaxy server's session (hopefully this doesn't break anything).

Add support for a single "import_module" to be passed to the new load_datatypes() method in the datatypes registry. This provides the ability to load a single class module from an installed tool shed repository along with a datatypes_conf.xml file included in the installed repository and pass them to the new load_datatypes() method. In the future, multiple imported modules may be allowed. The datatypes_conf.xml file included in the repository must conform to a slightly different definition than the same named file that comes with the distribution. This new definition will be documented in the Galaxy tool shed wiki.

We now have the ability to load new data types into the Galaxy server from an installed tool shed repository without restarting the Galaxy server.

Comments (0)

Files changed (12)

lib/galaxy/app.py

         self.config.check()
         config.configure_logging( self.config )
         # Set up datatypes registry
-        self.datatypes_registry = galaxy.datatypes.registry.Registry( self.config.root, self.config.datatypes_config )
+        self.datatypes_registry = galaxy.datatypes.registry.Registry()
+        self.datatypes_registry.load_datatypes( self.config.root, self.config.datatypes_config )
         galaxy.model.set_datatypes_registry( self.datatypes_registry )
         # Set up the tool sheds registry
         if os.path.isfile( self.config.tool_sheds_config ):

lib/galaxy/datatypes/registry.py

     pass
 
 class Registry( object ):
-    def __init__( self, root_dir=None, config=None ):
+    def __init__( self ):
         self.log = logging.getLogger(__name__)
         self.log.addHandler( logging.NullHandler() )
         self.datatypes_by_extension = {}
         self.sniff_order = []
         self.upload_file_formats = []
         self.display_applications = odict() #map a display application id to a display application
-        inherit_display_application_by_class = []
+        self.datatype_converters_path = None
+        self.datatype_indexers_path = None
+        self.display_applications_path = None
+    def load_datatypes( self, root_dir=None, config=None, imported_module=None ):
         if root_dir and config:
+            inherit_display_application_by_class = []
             # Parse datatypes_conf.xml
             tree = galaxy.util.parse_xml( config )
             root = tree.getroot()
             # Load datatypes and converters from config
             self.log.debug( 'Loading datatypes from %s' % config )
             registration = root.find( 'registration' )
-            self.datatype_converters_path = os.path.join( root_dir, registration.get( 'converters_path', 'lib/galaxy/datatypes/converters' ) )
-            self.datatype_indexers_path = os.path.join( root_dir, registration.get( 'indexers_path', 'lib/galaxy/datatypes/indexers' ) )
-            self.display_applications_path = os.path.join( root_dir, registration.get( 'display_path', 'display_applications' ) )
-            if not os.path.isdir( self.datatype_converters_path ):
-                raise ConfigurationError( "Directory does not exist: %s" % self.datatype_converters_path )
-            if not os.path.isdir( self.datatype_indexers_path ):
-                raise ConfigurationError( "Directory does not exist: %s" % self.datatype_indexers_path )                
+            # The following implementation implies that only the first datatypes_conf.xml parsed will
+            # define the various paths.  This is probably ok, since we can justifiably require that the
+            # local datatypes_conf.xml file sets the standard, and all additional datatypes_conf.xml
+            # files installed with repositories from tool sheds must use the same paths.  However, we
+            # may discover at some future time that allowing for multiple paths is more optimal.
+            if not self.datatype_converters_path:
+                self.datatype_converters_path = os.path.join( root_dir, registration.get( 'converters_path', 'lib/galaxy/datatypes/converters' ) )
+                if not os.path.isdir( self.datatype_converters_path ):
+                    raise ConfigurationError( "Directory does not exist: %s" % self.datatype_converters_path )
+            if not self.datatype_indexers_path:
+                self.datatype_indexers_path = os.path.join( root_dir, registration.get( 'indexers_path', 'lib/galaxy/datatypes/indexers' ) )
+                if not os.path.isdir( self.datatype_indexers_path ):
+                    raise ConfigurationError( "Directory does not exist: %s" % self.datatype_indexers_path )
+            if not self.display_applications_path:
+                self.display_applications_path = os.path.join( root_dir, registration.get( 'display_path', 'display_applications' ) )
             for elem in registration.findall( 'datatype' ):
                 try:
                     extension = elem.get( 'extension', None ) 
                             fields = dtype.split( ':' )
                             datatype_module = fields[0]
                             datatype_class_name = fields[1]
-                            fields = datatype_module.split( '.' )
-                            module = __import__( fields.pop(0) )
-                            for mod in fields:
-                                module = getattr( module, mod )
-                            datatype_class = getattr( module, datatype_class_name )
+                            if imported_module:
+                                datatype_class = getattr( imported_module, datatype_class_name )
+                            else:
+                                fields = datatype_module.split( '.' )
+                                module = __import__( fields.pop(0) )
+                                for mod in fields:
+                                    module = getattr( module, mod )
+                                datatype_class = getattr( module, datatype_class_name )
                         elif type_extension:
                             datatype_class = self.datatypes_by_extension[type_extension].__class__
                         if make_subclass:
                 if not included:
                     self.sniff_order.append(datatype)
         append_to_sniff_order()
-    
     def get_available_tracks(self):
         return self.available_tracks
-        
     def get_mimetype_by_extension(self, ext, default = 'application/octet-stream' ):
         """Returns a mimetype based on an extension"""
         try:
             mimetype = default
             self.log.warning('unknown mimetype in data factory %s' % ext)
         return mimetype
-    
     def get_datatype_by_extension(self, ext ):
         """Returns a datatype based on an extension"""
         try:
         except KeyError:
             builder = data.Text()
         return builder
-
     def change_datatype(self, data, ext, set_meta = True ):
         data.extension = ext
         # call init_meta and copy metadata from itself.  The datatype
                 data.set_meta( overwrite = False )
                 data.set_peek()
         return data
-
     def old_change_datatype(self, data, ext):
         """Creates and returns a new datatype based on an existing data and an extension"""
         newdata = factory(ext)(id=data.id)
             setattr(newdata, key, value)
         newdata.ext = ext
         return newdata
-
     def load_datatype_converters( self, toolbox ):
         """Adds datatype converters from self.converters to the calling app's toolbox"""     
         for elem in self.converters:
                 self.log.debug( "Loaded converter: %s", converter.id )
             except:
                 self.log.exception( "error reading converter from path: %s" % converter_path )
-
     def load_external_metadata_tool( self, toolbox ):
         """Adds a tool which is used to set external metadata"""
         #we need to be able to add a job to the queue to set metadata. The queue will currently only accept jobs with an associated tool.
         toolbox.tools_by_id[ set_meta_tool.id ] = set_meta_tool
         self.set_external_metadata_tool = set_meta_tool
         self.log.debug( "Loaded external metadata tool: %s", self.set_external_metadata_tool.id )
-        
     def load_datatype_indexers( self, toolbox ):
         """Adds indexers from self.indexers to the toolbox from app"""
         for elem in self.indexers:
             toolbox.tools_by_id[indexer.id] = indexer
             self.datatype_indexers[datatype] = indexer
             self.log.debug( "Loaded indexer: %s", indexer.id )
-            
     def get_converters_by_datatype(self, ext):
         """Returns available converters by source type"""
         converters = odict()
         if ext in self.datatype_converters.keys():
             converters.update(self.datatype_converters[ext])
         return converters
-
     def get_indexers_by_datatype( self, ext ):
         """Returns indexers based on datatype"""
         class_chain = list()
         ext2type = lambda x: self.get_datatype_by_extension(x)
         class_chain = sorted(class_chain, lambda x,y: issubclass(ext2type(x),ext2type(y)) and -1 or 1)
         return [self.datatype_indexers[x] for x in class_chain]
-    
     def get_converter_by_target_type(self, source_ext, target_ext):
         """Returns a converter based on source and target datatypes"""
         converters = self.get_converters_by_datatype(source_ext)
         if target_ext in converters.keys():
             return converters[target_ext]
         return None
-
     def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe = True ):
         """Returns ( target_ext, existing converted dataset )"""
         for convert_ext in self.get_converters_by_datatype( dataset.ext ):
                     ret_data = None
                 return ( convert_ext, ret_data )
         return ( None, None )
-    
     def get_composite_extensions( self ):
         return [ ext for ( ext, d_type ) in self.datatypes_by_extension.iteritems() if d_type.composite_type is not None ]
-
     def get_upload_metadata_params( self, context, group, tool ):
         """Returns dict of case value:inputs for metadata conditional for upload tool"""
         rval = {}
         if 'auto' not in rval and 'txt' in rval: #need to manually add 'auto' datatype
             rval[ 'auto' ] = rval[ 'txt' ]
         return rval
-

lib/galaxy/datatypes/sniff.py

     """
     if sniff_order is None:
         datatypes_registry = registry.Registry()
+        datatypes_registry.load_datatypes()
         sniff_order = datatypes_registry.sniff_order
     for datatype in sniff_order:
         """

lib/galaxy/model/__init__.py

 
 log = logging.getLogger( __name__ )
 
-datatypes_registry = galaxy.datatypes.registry.Registry() #Default Value Required for unit tests
+datatypes_registry = galaxy.datatypes.registry.Registry()
+# Default Value Required for unit tests
+datatypes_registry.load_datatypes()
 
 class NoConverterException(Exception):
     def __init__(self, value):

lib/galaxy/tools/parameters/basic.py

             if tool is None:
                 #This occurs for things such as unit tests
                 import galaxy.datatypes.registry
-                formats.append( galaxy.datatypes.registry.Registry().get_datatype_by_extension( extension.lower() ).__class__ )
+                datatypes_registry = galaxy.datatypes.registry.Registry()
+                datatypes_registry.load_datatypes()
+                formats.append( datatypes_registry.get_datatype_by_extension( extension.lower() ).__class__ )
             else:
                 formats.append( tool.app.datatypes_registry.get_datatype_by_extension( extension.lower() ).__class__ )
         self.formats = tuple( formats )

lib/galaxy/util/none_like.py

     def __init__( self, datatypes_registry = None, ext = 'data', dbkey = '?' ):
         self.ext = self.extension = ext
         self.dbkey = dbkey
-        if datatypes_registry is None: datatypes_registry = Registry()
+        if datatypes_registry is None:
+            datatypes_registry = Registry()
+            datatypes_registry.load_datatypes()
         self.datatype = datatypes_registry.get_datatype_by_extension( ext )
         self._metadata = None
         self.metadata = MetadataCollection( self )

lib/galaxy/web/controllers/admin.py

 from galaxy.web.framework.helpers import time_ago, iff, grids
 from galaxy.tools.search import ToolBoxSearch
 from galaxy.tools import ToolSection, json_fix
-from galaxy.util import inflector
+from galaxy.util import parse_xml, inflector
 import logging
 log = logging.getLogger( __name__ )
 
                             tmp_stderr.close()
                             if returncode == 0:
                                 # Load data types required by tools.
-                                # TODO: uncomment the following when we're ready...
-                                #self.__load_datatypes( trans, repo_files_dir )
+                                self.__load_datatypes( trans, repo_files_dir )
                                 # Load tools and tool data files required by them.
                                 sample_files, repository_tools_tups = self.__get_repository_tools_and_sample_files( trans, tool_path, repo_files_dir )
                                 if repository_tools_tups:
                                     persisted_new_tool_section.write( new_tool_section )
                                     persisted_new_tool_section.close()
                                     # Parse the persisted tool panel section
-                                    tree = ElementTree.parse( tmp_name )
+                                    tree = parse_xml( tmp_name )
                                     root = tree.getroot()
-                                    ElementInclude.include( root )
                                     # Load the tools in the section into the tool panel.
                                     trans.app.toolbox.load_section_tag_set( root, trans.app.toolbox.tool_panel, tool_path )
                                     # Remove the temporary file
                         datatypes_config = os.path.abspath( os.path.join( root, name ) )
                         break
         if datatypes_config:
+            imported_module = None
             # Parse datatypes_config.
-            tree = ElementTree.parse( datatypes_config )
-            root = tree.getroot()
-            ElementInclude.include( root )
-            datatype_files = root.find( 'datatype_files' )
+            tree = parse_xml( datatypes_config )
+            datatypes_config_root = tree.getroot()
+            relative_path_to_datatype_file_name = None
+            datatype_files = datatypes_config_root.find( 'datatype_files' )
+            # Currently only a single datatype_file is supported.  For example:
+            # <datatype_files>
+            #    <datatype_file name="gmap.py"/>
+            # </datatype_files>
             for elem in datatype_files.findall( 'datatype_file' ):
                 datatype_file_name = elem.get( 'name', None )
                 if datatype_file_name:
                     # Find the file in the installed repository.
-                    relative_path = None
                     for root, dirs, files in os.walk( repo_files_dir ):
                         if root.find( '.hg' ) < 0:
                             for name in files:
                                 if name == datatype_file_name:
-                                    relative_path = os.path.join( root, name )
+                                    relative_path_to_datatype_file_name = os.path.join( root, name )
                                     break
-                    relative_head, relative_tail = os.path.split( relative_path )
-                    # TODO: get the import_module by parsing the <registration><datatype> tags
-                    if datatype_file_name.find( '.' ) > 0:
-                        import_module = datatype_file_name.split( '.' )[ 0 ]
-                    else:
-                        import_module = datatype_file_name
-                    try:
-                        sys.path.insert( 0, relative_head )
-                        module = __import__( import_module )
-                        sys.path.pop( 0 )
-                    except Exception, e:
-                        log.debug( "Exception importing datatypes code file included in installed repository: %s" % str( e ) )
-                    trans.app.datatypes_registry = galaxy.datatypes.registry.Registry( trans.app.config.root, datatypes_config )
+                    break
+            if relative_path_to_datatype_file_name:
+                relative_head, relative_tail = os.path.split( relative_path_to_datatype_file_name )
+                registration = datatypes_config_root.find( 'registration' )
+                # Get the module by parsing the <datatype> tag.
+                for elem in registration.findall( 'datatype' ):
+                    # A 'type' attribute is currently required.  The attribute
+                    # should be something like: type="gmap:GmapDB".
+                    dtype = elem.get( 'type', None )
+                    if dtype:
+                        fields = dtype.split( ':' )
+                        datatype_module = fields[0]
+                        datatype_class_name = fields[1]
+                        # Since we currently support only a single datatype_file,
+                        #  we have what we need.
+                        break
+                try:
+                    sys.path.insert( 0, relative_head )
+                    imported_module = __import__( datatype_module )
+                    sys.path.pop( 0 )
+                except Exception, e:
+                    log.debug( "Exception importing datatypes code file included in installed repository: %s" % str( e ) )
+            trans.app.datatypes_registry.load_datatypes( root_dir=trans.app.config.root, config=datatypes_config, imported_module=imported_module )
     def __get_repository_tools_and_sample_files( self, trans, tool_path, repo_files_dir ):
         # The sample_files list contains all files whose name ends in .sample
         sample_files = []

lib/galaxy/webapps/community/app.py

         self.config.check()
         config.configure_logging( self.config )
         # Set up datatypes registry
-        self.datatypes_registry = galaxy.datatypes.registry.Registry( self.config.root, self.config.datatypes_config )
+        self.datatypes_registry = galaxy.datatypes.registry.Registry()
+        self.datatypes_registry.load_datatypes( self.config.root, self.config.datatypes_config )
         # Determine the database url
         if self.config.database_connection:
             db_url = self.config.database_connection

scripts/functional_tests.py

     else:
         # FIXME: This doesn't work at all now that toolbox requires an 'app' instance
         #        (to get at datatypes, might just pass a datatype registry directly)
-        my_app = bunch.Bunch( datatypes_registry = galaxy.datatypes.registry.Registry() )
+        datatypes_registry = galaxy.datatypes.registry.Registry()
+        datatypes_registry.load_datatypes()
+        my_app = bunch.Bunch( datatypes_registry )
         test_toolbox.toolbox = tools.ToolBox( 'tool_conf.xml.test', 'tools', my_app )
 
     # ---- Find tests ---------------------------------------------------------

scripts/set_metadata.py

     # Set up datatypes registry
     config_root = sys.argv.pop( 1 )
     datatypes_config = sys.argv.pop( 1 )
-    galaxy.model.set_datatypes_registry( galaxy.datatypes.registry.Registry( config_root, datatypes_config ) )
+    datatypes_registry = galaxy.datatypes.registry.Registry()
+    datatypes_registry.load_datatypes( root_dir=config_root, config=datatypes_config )
+    galaxy.model.set_datatypes_registry( datatypes_registry )
 
     job_metadata = sys.argv.pop( 1 )
     ext_override = dict()

tools/data_source/data_source.py

         enhanced_handling = True
         json_file = open( job_params[ 'job_config' ][ 'TOOL_PROVIDED_JOB_METADATA_FILE' ], 'w' ) #specially named file for output junk to pass onto set metadata
     
-    datatypes_registry = Registry( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )
+    datatypes_registry = Registry()
+    datatypes_registry.load_datatypes( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )
     
     URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded
     URL_method = params.get( 'URL_method', None )

tools/data_source/upload.py

     output_paths = parse_outputs( sys.argv[4:] )
     json_file = open( 'galaxy.json', 'w' )
 
-    registry = Registry( sys.argv[1], sys.argv[2] )
+    registry = Registry()
+    registry.load_datatypes( root_dir=sys.argv[1], config=sys.argv[2] )
 
     for line in open( sys.argv[3], 'r' ):
         dataset = from_json_string( line )
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.