Source

galaxy-central / lib / galaxy / tools / parameters / dynamic_options.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
"""
Support for generating the options for a SelectToolParameter dynamically (based
on the values of other parameters or other aspects of the current state)
"""

import operator, sys, os, logging
import basic, validation
from galaxy.util import string_as_bool
import galaxy.tools

log = logging.getLogger(__name__)

class Filter( object ):
    """
    A filter takes the current options list and modifies it.
    """
    @classmethod
    def from_element( cls, d_option, elem ):
        """Loads the proper filter by the type attribute of elem"""
        type = elem.get( 'type', None )
        assert type is not None, "Required 'type' attribute missing from filter"
        return filter_types[type.strip()]( d_option, elem )
    def __init__( self, d_option, elem ):
        self.dynamic_option = d_option
        self.elem = elem
    def get_dependency_name( self ):
        """Returns the name of any depedencies, otherwise None"""
        return None
    def filter_options( self, options, trans, other_values ):
        """Returns a list of options after the filter is applied"""
        raise TypeError( "Abstract Method" )

class StaticValueFilter( Filter ):
    """
    Filters a list of options on a column by a static value.
    
    Type: static_value
    
    Required Attributes:
        value: static value to compare to
        column: column in options to compare with
    Optional Attributes:
        keep: Keep columns matching value (True)
              Discard columns matching value (False)
    """
    def __init__( self, d_option, elem ):
        Filter.__init__( self, d_option, elem )
        self.value = elem.get( "value", None )
        assert self.value is not None, "Required 'value' attribute missing from filter"
        column = elem.get( "column", None )
        assert column is not None, "Required 'column' attribute missing from filter, when loading from file"
        self.column = d_option.column_spec_to_index( column )
        self.keep = string_as_bool( elem.get( "keep", 'True' ) )
    def filter_options( self, options, trans, other_values ):
        rval = []
        for fields in options:
            if ( self.keep and fields[self.column] == self.value ) or ( not self.keep and fields[self.column] != self.value ):
                rval.append( fields )
        return rval

class DataMetaFilter( Filter ):
    """
    Filters a list of options on a column by a dataset metadata value.
    
    Type: data_meta
    
    When no 'from_' source has been specified in the <options> tag, this will populate the options list with (meta_value, meta_value, False).
    Otherwise, options which do not match the metadata value in the column are discarded.
    
    Required Attributes:
        ref: Name of input dataset
        key: Metadata key to use for comparison
        column: column in options to compare with (not required when not associated with input options)
    Optional Attributes:
        multiple: Option values are multiple, split column by separator (True)
        separator: When multiple split by this (,)
    """
    def __init__( self, d_option, elem ):
        Filter.__init__( self, d_option, elem )
        self.ref_name = elem.get( "ref", None )
        assert self.ref_name is not None, "Required 'ref' attribute missing from filter"
        d_option.has_dataset_dependencies = True
        self.key = elem.get( "key", None )
        assert self.key is not None, "Required 'key' attribute missing from filter"
        self.column = elem.get( "column", None )
        if self.column is None:
            assert self.dynamic_option.file_fields is None and self.dynamic_option.dataset_ref_name is None, "Required 'column' attribute missing from filter, when loading from file"
        else:
            self.column = d_option.column_spec_to_index( self.column )
        self.multiple = string_as_bool( elem.get( "multiple", "False" ) )
        self.separator = elem.get( "separator", "," )
    def get_dependency_name( self ):
        return self.ref_name
    def filter_options( self, options, trans, other_values ):
        def compare_meta_value( file_value, dataset_value ):
            if isinstance( dataset_value, list ):
                if self.multiple:
                    file_value = file_value.split( self.separator )
                    for value in dataset_value:
                        if value not in file_value:
                            return False
                    return True
                return file_value in dataset_value
            if self.multiple:
                return dataset_value in file_value.split( self.separator )
            return file_value == dataset_value
        assert self.ref_name in other_values or ( trans is not None and trans.workflow_building_mode), "Required dependency '%s' not found in incoming values" % self.ref_name
        ref = other_values.get( self.ref_name, None )
        if not isinstance( ref, self.dynamic_option.tool_param.tool.app.model.HistoryDatasetAssociation ) and not ( isinstance( ref, galaxy.tools.DatasetFilenameWrapper ) ):
            return [] #not a valid dataset
        meta_value = ref.metadata.get( self.key, None )
        if meta_value is None: #assert meta_value is not None, "Required metadata value '%s' not found in referenced dataset" % self.key
            return [ ( disp_name, basic.UnvalidatedValue( optval ), selected ) for disp_name, optval, selected in options ]
        
        if self.column is not None:
            rval = []
            for fields in options:
                if compare_meta_value( fields[self.column], meta_value ):
                    rval.append( fields )
            return rval
        else:
            if not isinstance( meta_value, list ):
                meta_value = [meta_value]
            for value in meta_value:
                options.append( ( value, value, False ) )
            return options

class ParamValueFilter( Filter ):
    """
    Filters a list of options on a column by the value of another input.
    
    Type: param_value
    
    Required Attributes:
        ref: Name of input value
        column: column in options to compare with
    Optional Attributes:
        keep: Keep columns matching value (True)
              Discard columns matching value (False)
        ref_attribute: Period (.) separated attribute chain of input (ref) to use as value for filter
    """
    def __init__( self, d_option, elem ):
        Filter.__init__( self, d_option, elem )
        self.ref_name = elem.get( "ref", None )
        assert self.ref_name is not None, "Required 'ref' attribute missing from filter"
        column = elem.get( "column", None )
        assert column is not None, "Required 'column' attribute missing from filter"
        self.column = d_option.column_spec_to_index( column )
        self.keep = string_as_bool( elem.get( "keep", 'True' ) )
        self.ref_attribute = elem.get( "ref_attribute", None )
        if self.ref_attribute:
            self.ref_attribute = self.ref_attribute.split( '.' )
        else:
            self.ref_attribute = []
    def get_dependency_name( self ):
        return self.ref_name
    def filter_options( self, options, trans, other_values ):
        if trans is not None and trans.workflow_building_mode: return []
        assert self.ref_name in other_values, "Required dependency '%s' not found in incoming values" % self.ref_name
        ref = other_values.get( self.ref_name, None )
        for ref_attribute in self.ref_attribute:
            if not hasattr( ref, ref_attribute ):
                return [] #ref does not have attribute, so we cannot filter, return empty list
            ref = getattr( ref, ref_attribute )
        ref = str( ref )
        rval = []
        for fields in options:
            if ( self.keep and fields[self.column] == ref ) or ( not self.keep and fields[self.column] != ref ):
                rval.append( fields )
        return rval

class UniqueValueFilter( Filter ):
    """
    Filters a list of options to be unique by a column value.
    
    Type: unique_value
    
    Required Attributes:
        column: column in options to compare with
    """
    def __init__( self, d_option, elem ):
        Filter.__init__( self, d_option, elem )
        column = elem.get( "column", None )
        assert column is not None, "Required 'column' attribute missing from filter"
        self.column = d_option.column_spec_to_index( column )
    def get_dependency_name( self ):
        return self.dynamic_option.dataset_ref_name
    def filter_options( self, options, trans, other_values ):
        rval = []
        skip_list = []
        for fields in options:
            if fields[self.column] not in skip_list:
                rval.append( fields )
                skip_list.append( fields[self.column] )
        return rval

class MultipleSplitterFilter( Filter ):
    """
    Turns a single line of options into multiple lines, by splitting a column and creating a line for each item.
    
    Type: multiple_splitter
    
    Required Attributes:
        column: column in options to compare with
    Optional Attributes:
        separator: Split column by this (,)
    """
    def __init__( self, d_option, elem ):
        Filter.__init__( self, d_option, elem )
        self.separator = elem.get( "separator", "," )
        columns = elem.get( "column", None )
        assert columns is not None, "Required 'columns' attribute missing from filter"
        self.columns = [ d_option.column_spec_to_index( column ) for column in columns.split( "," ) ]
    def filter_options( self, options, trans, other_values ):
        rval = []
        for fields in options:
            for column in self.columns:
                for field in fields[column].split( self.separator ):
                    rval.append( fields[0:column] + [field] + fields[column+1:] )
        return rval
        
class AttributeValueSplitterFilter( Filter ):
    """
    Filters a list of attribute-value pairs to be unique attribute names.

    Type: attribute_value_splitter

    Required Attributes:
        column: column in options to compare with
    Optional Attributes:
        pair_separator: Split column by this (,)
        name_val_separator: Split name-value pair by this ( whitespace )
    """
    def __init__( self, d_option, elem ):
        Filter.__init__( self, d_option, elem )
        self.pair_separator = elem.get( "pair_separator", "," )
        self.name_val_separator = elem.get( "name_val_separator", None )
        self.columns = elem.get( "column", None )
        assert self.columns is not None, "Required 'columns' attribute missing from filter"
        self.columns = [ int ( column ) for column in self.columns.split( "," ) ]
    def filter_options( self, options, trans, other_values ):
        attr_names = []
        rval = []
        for fields in options:
            for column in self.columns:
                for pair in fields[column].split( self.pair_separator ):
                    ary = pair.split( self.name_val_separator )
                    if len( ary ) == 2:
                        name, value = ary
                        if name not in attr_names:
                            rval.append( fields[0:column] + [name] + fields[column:] )
                            attr_names.append( name )
        return rval


class AdditionalValueFilter( Filter ):
    """
    Adds a single static value to an options list.
    
    Type: add_value
    
    Required Attributes:
        value: value to appear in select list
    Optional Attributes:
        name: Display name to appear in select list (value)
        index: Index of option list to add value (APPEND)
    """
    def __init__( self, d_option, elem ):
        Filter.__init__( self, d_option, elem )
        self.value = elem.get( "value", None )
        assert self.value is not None, "Required 'value' attribute missing from filter"
        self.name = elem.get( "name", None )
        if self.name is None:
            self.name = self.value
        self.index = elem.get( "index", None )
        if self.index is not None:
            self.index = int( self.index )
    def filter_options( self, options, trans, other_values ):
        rval = list( options )
        add_value = []
        for i in range( self.dynamic_option.largest_index + 1 ):
            add_value.append( "" )
        add_value[self.dynamic_option.columns['value']] = self.value
        add_value[self.dynamic_option.columns['name']] = self.name
        if self.index is not None:
            rval.insert( self.index, add_value )
        else:
            rval.append( add_value )
        return rval

class RemoveValueFilter( Filter ):
    """
    Removes a value from an options list.
    
    Type: remove_value
    
    Required Attributes:
        value: value to remove from select list
            or
        ref: param to refer to
            or
        meta_ref: dataset to refer to
        key: metadata key to compare to
    """
    def __init__( self, d_option, elem ):
        Filter.__init__( self, d_option, elem )
        self.value = elem.get( "value", None )
        self.ref_name = elem.get( "ref", None )
        self.meta_ref = elem.get( "meta_ref", None )
        self.metadata_key = elem.get( "key", None )
        assert self.value is not None or ( ( self.ref_name is not None or self.meta_ref is not None )and self.metadata_key is not None ), ValueError( "Required 'value' or 'ref' and 'key' attributes missing from filter" )
        self.multiple = string_as_bool( elem.get( "multiple", "False" ) )
        self.separator = elem.get( "separator", "," )
    def filter_options( self, options, trans, other_values ):
        if trans is not None and trans.workflow_building_mode: return options
        assert self.value is not None or ( self.ref_name is not None and self.ref_name in other_values ) or (self.meta_ref is not None and self.meta_ref in other_values ) or ( trans is not None and trans.workflow_building_mode), Exception( "Required dependency '%s' or '%s' not found in incoming values" % ( self.ref_name, self.meta_ref ) )
        def compare_value( option_value, filter_value ):
            if isinstance( filter_value, list ):
                if self.multiple:
                    option_value = option_value.split( self.separator )
                    for value in filter_value:
                        if value not in filter_value:
                            return False
                    return True
                return option_value in filter_value
            if self.multiple:
                return filter_value in option_value.split( self.separator )
            return option_value == filter_value
        value = self.value
        if value is None:
            if self.ref_name is not None:
                value = other_values.get( self.ref_name )
            else:
                data_ref = other_values.get( self.meta_ref )
                if not isinstance( data_ref, self.dynamic_option.tool_param.tool.app.model.HistoryDatasetAssociation ) and not ( isinstance( data_ref, galaxy.tools.DatasetFilenameWrapper ) ):
                    return options #cannot modify options
                value = data_ref.metadata.get( self.metadata_key, None )
        return [ ( disp_name, optval, selected ) for disp_name, optval, selected in options if not compare_value( optval, value ) ]

class SortByColumnFilter( Filter ):
    """
    Sorts an options list by a column
    
    Type: sort_by
    
    Required Attributes:
        column: column to sort by
    """
    def __init__( self, d_option, elem ):
        Filter.__init__( self, d_option, elem )
        column = elem.get( "column", None )
        assert column is not None, "Required 'column' attribute missing from filter"
        self.column = d_option.column_spec_to_index( column )
    def filter_options( self, options, trans, other_values ):
        rval = []
        for i, fields in enumerate( options ):
            for j in range( 0, len( rval ) ):
                if fields[self.column] < rval[j][self.column]:
                    rval.insert( j, fields )
                    break
            else:
                rval.append( fields )
        return rval


filter_types = dict( data_meta = DataMetaFilter,
                     param_value = ParamValueFilter,
                     static_value = StaticValueFilter,
                     unique_value = UniqueValueFilter,
                     multiple_splitter = MultipleSplitterFilter,
                     attribute_value_splitter = AttributeValueSplitterFilter,
                     add_value = AdditionalValueFilter,
                     remove_value = RemoveValueFilter,
                     sort_by = SortByColumnFilter )

class DynamicOptions( object ):
    """Handles dynamically generated SelectToolParameter options"""
    def __init__( self, elem, tool_param  ):
        def load_from_parameter( from_parameter, transform_lines = None ):
            obj = self.tool_param
            for field in from_parameter.split( '.' ):
                obj = getattr( obj, field )
            if transform_lines:
                obj = eval( transform_lines )
            return self.parse_file_fields( obj )
        self.tool_param = tool_param
        self.columns = {}
        self.filters = []
        self.file_fields = None
        self.largest_index = 0
        self.dataset_ref_name = None
        # True if the options generation depends on one or more other parameters
        # that are dataset inputs
        self.has_dataset_dependencies = False
        self.validators = []
        self.converter_safe = True
        
        # Parse the <options> tag
        self.separator = elem.get( 'separator', '\t' )
        self.line_startswith = elem.get( 'startswith', None )
        data_file = elem.get( 'from_file', None )
        self.index_file = None
        self.missing_index_file = None
        dataset_file = elem.get( 'from_dataset', None )
        from_parameter = elem.get( 'from_parameter', None )
        tool_data_table_name = elem.get( 'from_data_table', None )
        # Options are defined from a data table loaded by the app
        self.tool_data_table = None
        self.missing_tool_data_table_name = None
        if tool_data_table_name:
            app = tool_param.tool.app
            if tool_data_table_name in app.tool_data_tables:
                self.tool_data_table = app.tool_data_tables[ tool_data_table_name ]
                # Column definitions are optional, but if provided override those from the table
                if elem.find( "column" ) is not None:
                    self.parse_column_definitions( elem )
                else:
                    self.columns = self.tool_data_table.columns
                # Set self.missing_index_file if the index file to
                # which the tool_data_table refers does not exist.
                if self.tool_data_table.missing_index_file:
                    self.missing_index_file = self.tool_data_table.missing_index_file
            else:
                self.missing_tool_data_table_name = tool_data_table_name
                log.warn( "Data table named '%s' is required by tool but not configured" % tool_data_table_name )
        # Options are defined by parsing tabular text data from a data file
        # on disk, a dataset, or the value of another parameter
        elif data_file is not None or dataset_file is not None or from_parameter is not None:
            self.parse_column_definitions( elem )
            if data_file is not None:
                data_file = data_file.strip()
                if not os.path.isabs( data_file ):
                    full_path = os.path.join( self.tool_param.tool.app.config.tool_data_path, data_file )
                    if os.path.exists( full_path ):
                        self.index_file = data_file
                        self.file_fields = self.parse_file_fields( open( full_path ) )
                    else:
                        self.missing_index_file = data_file
            elif dataset_file is not None:
                self.dataset_ref_name = dataset_file
                self.has_dataset_dependencies = True
                self.converter_safe = False
            elif from_parameter is not None:
                transform_lines = elem.get( 'transform_lines', None )
                self.file_fields = list( load_from_parameter( from_parameter, transform_lines ) )
        
        # Load filters
        for filter_elem in elem.findall( 'filter' ):
            self.filters.append( Filter.from_element( self, filter_elem ) )
        
        # Load Validators
        for validator in elem.findall( 'validator' ):
            self.validators.append( validation.Validator.from_element( self.tool_param, validator ) )
        
        if self.dataset_ref_name:
            tool_param.data_ref = self.dataset_ref_name
            
    def parse_column_definitions( self, elem ):
        for column_elem in elem.findall( 'column' ):
            name = column_elem.get( 'name', None )
            assert name is not None, "Required 'name' attribute missing from column def"
            index = column_elem.get( 'index', None )
            assert index is not None, "Required 'index' attribute missing from column def"
            index = int( index )
            self.columns[name] = index
            if index > self.largest_index:
                self.largest_index = index
        assert 'value' in self.columns, "Required 'value' column missing from column def"
        if 'name' not in self.columns:
            self.columns['name'] = self.columns['value']
    
    def parse_file_fields( self, reader ):
        rval = []
        for line in reader:
            if line.startswith( '#' ) or ( self.line_startswith and not line.startswith( self.line_startswith ) ):
                continue
            line = line.rstrip( "\n\r" )
            if line:
                fields = line.split( self.separator )
                if self.largest_index < len( fields ):
                    rval.append( fields )
        return rval
    
    def get_dependency_names( self ):
        """
        Return the names of parameters these options depend on -- both data
        and other param types.
        """
        rval = []
        if self.dataset_ref_name:
            rval.append( self.dataset_ref_name )
        for filter in self.filters:
            depend = filter.get_dependency_name()
            if depend:
                rval.append( depend )
        return rval
    
    def get_fields( self, trans, other_values ):
        if self.dataset_ref_name:
            dataset = other_values.get( self.dataset_ref_name, None )
            assert dataset is not None, "Required dataset '%s' missing from input" % self.dataset_ref_name
            if not dataset: return [] #no valid dataset in history
            # Ensure parsing dynamic options does not consume more than a megabyte worth memory.
            path = dataset.file_name
            file_size = os.path.getsize( path )
            if os.path.getsize( path ) < 1048576:
                options = self.parse_file_fields( open( path ) )
            else:
                # Pass just the first megabyte to parse_file_fields. 
                import StringIO
                log.warn( "Attempting to load options from large file, reading just first megabyte" )
                contents = open( path, 'r' ).read( 1048576 )
                options = self.parse_file_fields( StringIO.StringIO( contents ) )
        elif self.tool_data_table:
            options = self.tool_data_table.get_fields()
        else:
            options = list( self.file_fields )
        for filter in self.filters:
            options = filter.filter_options( options, trans, other_values )
        return options
    
    def get_fields_by_value( self, value, trans, other_values ):
        """
        Return a list of fields with column 'value' matching provided value.
        """
        rval = []
        val_index = self.columns[ 'value' ]
        for fields in self.get_fields( trans, other_values ):
            if fields[ val_index ] == value:
                rval.append( fields )
        return rval
    
    def get_field_by_name_for_value( self, field_name, value, trans, other_values ):
        """
        Get contents of field by name for specified value.
        """
        rval = []
        if isinstance( field_name, int ):
            field_index = field_name
        else:
            assert field_name in self.columns, "Requested '%s' column missing from column def" % field_name
            field_index = self.columns[ field_name ]
        if not isinstance( value, list ):
            value = [value]
        for val in value:
            for fields in self.get_fields_by_value( val, trans, other_values ):
                rval.append( fields[ field_index ] )
        return rval
    
    def get_options( self, trans, other_values ):
        rval = []
        if self.file_fields is not None or self.tool_data_table is not None or self.dataset_ref_name is not None:
            options = self.get_fields( trans, other_values )
            for fields in options:
                rval.append( ( fields[self.columns['name']], fields[self.columns['value']], False ) )
        else:
            for filter in self.filters:
                rval = filter.filter_options( rval, trans, other_values )
        return rval
    
    def column_spec_to_index( self, column_spec ):
        """
        Convert a column specification (as read from the config file), to an
        index. A column specification can just be a number, a column name, or
        a column alias.
        """
        # Name?
        if column_spec in self.columns:
            return self.columns[column_spec]
        # Int?
        return int( column_spec )