Source

orange / Orange / feature / selection.py

astaric a7b0563 

Aleš Erjavec 6e9843f 

astaric a7b0563 



Aleš Erjavec 56cf220 
Blaz Zupan 43138a3 

astaric a7b0563 
Aleš Erjavec 56cf220 




astaric a7b0563 
Aleš Erjavec 56cf220 
astaric a7b0563 
Aleš Erjavec 56cf220 

astaric a7b0563 
Blaz Zupan 43138a3 
astaric a7b0563 
Aleš Erjavec 56cf220 
astaric a7b0563 
Janez Demšar 2ab4929 
astaric a7b0563 

Aleš Erjavec 94e1207 


astaric a7b0563 


Aleš Erjavec 94e1207 
Janez Demšar 94f7d4a 
astaric a7b0563 



Blaz Zupan 43138a3 
Janez Demšar 94f7d4a 
Janez Demšar 2ab4929 
astaric a7b0563 
Janez Demšar 2ab4929 

Janez Demšar 94f7d4a 
Janez Demšar 2ab4929 
astaric a7b0563 
Janez Demšar 2ab4929 


astaric a7b0563 
Blaz Zupan 43138a3 
astaric a7b0563 
Blaz Zupan 43138a3 
Blaz Zupan de7d66d 
astaric a7b0563 


Janez Demšar 94f7d4a 
astaric a7b0563 
Blaz Zupan 43138a3 

astaric a7b0563 
Janez Demšar 2ab4929 

Janez Demšar 94f7d4a 
Janez Demšar 2ab4929 
astaric a7b0563 
Janez Demšar 2ab4929 
astaric a7b0563 
Janez Demšar 2ab4929 
astaric a7b0563 







Janez Demšar 94f7d4a 




astaric a7b0563 
Janez Demšar 2ab4929 




astaric a7b0563 




Blaz Zupan 43138a3 
astaric a7b0563 






Blaz Zupan 43138a3 



astaric a7b0563 
Janez Demšar 2ab4929 


astaric a7b0563 
Blaz Zupan 43138a3 
astaric a7b0563 




Matija Polajnar 80400e4 
astaric a7b0563 










Janez Demšar 2ab4929 

astaric a7b0563 

Janez Demšar 2ab4929 
astaric a7b0563 









Blaz Zupan 43138a3 
Janez Demšar 2ab4929 

astaric a7b0563 
Janez Demšar 2ab4929 


astaric a7b0563 







Matija Polajnar 80400e4 
astaric a7b0563 












Blaz Zupan 43138a3 
astaric a7b0563 





Janez Demšar 2ab4929 


astaric a7b0563 
Janez Demšar 2ab4929 


astaric a7b0563 







Matija Polajnar 80400e4 
astaric a7b0563 



















Blaz Zupan 43138a3 


astaric a7b0563 













Matija Polajnar 80400e4 
astaric a7b0563 




















Janez Demšar 94f7d4a 
astaric a7b0563 






__docformat__ = 'restructuredtext'

from operator import itemgetter

import Orange.core as orange

from Orange.feature.scoring import score_all


def top_rated(scores, n, highest_best=True):
    """Return n top-rated features from the list of scores.

    :param list scores:
        A list such as the one returned by :func:`.score_all`
    :param int n: Number of features to select.
    :param bool highest_best:
        If true, the features that are scored higher are preferred.
    :rtype: :obj:`list`

    """
    return [f for f, score in
            sorted(scores, key=itemgetter(1), reverse=highest_best)[:n]]

bestNAtts = top_rated


def above_threshold(scores, threshold=0.0):
    """Return features (without scores) with scores above or
    equal to a specified threshold.

    :param list scores:
        A list such as one returned by :func:`.score_all`
    :param float threshold: Threshold for selection.
    :rtype: :obj:`list`

    """
    return [f for f, score in scores if score >= threshold]


attsAboveThreshold = above_threshold


def select(data, scores, n):
    """Construct and return a new data table that includes a
    class and only the best features from a list scores.

    :param data: a data table
    :type data: :obj:`Orange.data.Table`
    :param scores: a list such as the one returned by
      :obj:`~Orange.feature.scoring.score_all`
    :type scores: list
    :param n: number of features to select
    :type n: int
    :rtype: :obj:`Orange.data.Table`
    """
    return data.select(top_rated(scores, n) + [data.domain.classVar.name])

selectBestNAtts = select
select_best_n = select


def select_above_threshold(data, scores, threshold=0.0):
    """Construct and return a new data table that includes a class and
    features from the list returned by
    :obj:`~Orange.feature.scoring.score_all` with higher or equal score
    to a given threshold.

    :param data: a data table
    :type data: :obj:`Orange.data.Table`
    :param scores: a list such as the one returned by
      :obj:`~Orange.feature.scoring.score_all`
    :type scores: list
    :param threshold: threshold for selection
    :type threshold: float
    :rtype: :obj:`Orange.data.Table`
    """
    return data.select(above_threshold(scores, threshold) + \
                       [data.domain.classVar.name])

selectAttsAboveThresh = select_above_threshold


def select_relief(data, measure=orange.MeasureAttribute_relief(k=20, m=50), margin=0):
    """Iteratively remove the worst scored feature until no feature
    has a score below the margin. The filter procedure was originally
    designed for measures such as Relief, which are context dependent,
    i.e., removal of features may change the scores of other remaining
    features. The score is thus recomputed in each iteration.

    :param data: a data table
    :type data: :obj:`Orange.data.Table`
    :param measure: a feature scorer
    :type measure: :obj:`Orange.feature.scoring.Score`
    :param margin: margin for removal
    :type margin: float

    """
    measl = score_all(data, measure)
    while len(data.domain.attributes) > 0 and measl[-1][1] < margin:
        data = select(data, measl, len(data.domain.attributes) - 1)
        measl = score_all(data, measure)
    return data

filterRelieff = select_relief


class FilterAboveThreshold(object):
    """A wrapper around :obj:`select_above_threshold`; the
    constructor stores the parameters of the feature selection
    procedure that are then applied when the the selection
    is called with the actual data.

    :param measure: a feature scorer
    :type measure: :obj:`Orange.feature.scoring.Score`
    :param threshold: threshold for selection. Defaults to 0.
    :type threshold: float
    """

    def __new__(cls, data=None,
                measure=orange.MeasureAttribute_relief(k=20, m=50),
                threshold=0.0):
        if data is None:
            self = object.__new__(cls)
            return self
        else:
            self = cls(measure=measure, threshold=threshold)
            return self(data)

    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), \
                 threshold=0.0):
        self.measure = measure
        self.threshold = threshold

    def __call__(self, data):
        """Return data table features that have scores above given
        threshold.

        :param data: data table
        :type data: Orange.data.Table

        """
        ma = score_all(data, self.measure)
        return select_above_threshold(data, ma, self.threshold)

FilterAttsAboveThresh = FilterAboveThreshold
FilterAttsAboveThresh_Class = FilterAboveThreshold


class FilterBestN(object):
    """A wrapper around :obj:`select`; the
    constructor stores the filter parameters that are applied when the
    function is called.

    :param measure: a feature scorer
    :type measure: :obj:`Orange.feature.scoring.Score`
    :param n: number of features to select
    :type n: int

    """
    def __new__(cls, data=None,
                measure=orange.MeasureAttribute_relief(k=20, m=50),
                n=5):

        if data is None:
            self = object.__new__(cls)
            return self
        else:
            self = cls(measure=measure, n=n)
            return self(data)

    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
                 n=5):
        self.measure = measure
        self.n = n

    def __call__(self, data):
        ma = score_all(data, self.measure)
        self.n = min(self.n, len(data.domain.attributes))
        return select(data, ma, self.n)

FilterBestNAtts = FilterBestN
FilterBestNAtts_Class = FilterBestN


class FilterRelief(object):
    """A class wrapper around :obj:`select_best_n`; the
    constructor stores the filter parameters that are applied when the
    function is called.

    :param measure: a feature scorer
    :type measure: :obj:`Orange.feature.scoring.Score`
    :param margin: margin for Relief scoring
    :type margin: float

    """
    def __new__(cls, data=None,
                measure=orange.MeasureAttribute_relief(k=20, m=50),
                margin=0):

        if data is None:
            self = object.__new__(cls)
            return self
        else:
            self = cls(measure=measure, margin=margin)
            return self(data)

    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
                 margin=0):
        self.measure = measure
        self.margin = margin

    def __call__(self, data):
        return select_relief(data, self.measure, self.margin)

FilterRelief_Class = FilterRelief

##############################################################################
# wrapped learner


class FilteredLearner(object):
    """A feature selection wrapper around base learner. When provided data,
     this learner applies a given feature selection method and then calls
     the base learner.

    Here is an example of how to build a wrapper around naive Bayesian learner
    and use it on a data set::

        nb = Orange.classification.bayes.NaiveBayesLearner()
        learner = Orange.feature.selection.FilteredLearner(nb,
            filter=Orange.feature.selection.FilterBestN(n=5), name='filtered')
        classifier = learner(data)

    """
    def __new__(cls, baseLearner, data=None, weight=0,
                filter=FilterAboveThreshold(), name='filtered'):

        if data is None:
            self = object.__new__(cls)
            return self
        else:
            self = cls(baseLearner, filter=filter, name=name)
            return self(data, weight)

    def __init__(self, baseLearner, filter=FilterAboveThreshold(),
                 name='filtered'):
        self.baseLearner = baseLearner
        self.filter = filter
        self.name = name

    def __call__(self, data, weight=0):
        # filter the data and then learn
        fdata = self.filter(data)
        model = self.baseLearner(fdata, weight)
        return FilteredClassifier(classifier=model, domain=model.domain)

FilteredLearner_Class = FilteredLearner


class FilteredClassifier:
    """A classifier returned by FilteredLearner."""
    def __init__(self, **kwds):
        self.__dict__.update(kwds)

    def __call__(self, example, resultType=orange.GetValue):
        return self.classifier(example, resultType)

    def atts(self):
        return self.domain.attributes