Source

orange-multitarget / _multitarget / binary.py

"""
.. index:: Multi-target Binary Relevance Learner

***************************************
Multi-target Binary Relevance Learner
***************************************


.. index:: Multi-target Binary Relevance Learner
.. autoclass:: Orange.multitarget.binary.BinaryRelevanceLearner
    :members:
    :show-inheritance:

.. index:: Multi-target Binary Relevance Classifier
.. autoclass:: Orange.multitarget.binary.BinaryRelevanceClassifier
    :members:
    :show-inheritance:

"""

import Orange.core as orange
import Orange
import copy
from operator import add

class BinaryRelevanceLearner(orange.Learner):
    """
    Creates a standard single class learner for each class variable. Binary relevance assumes independance of class variables.

    :param learner: A single class learner.
    :type learner: :class:`Orange.core.Learner`

    :param callback: a function to be called after every iteration of
            induction of classifier. The call returns a parameter
            (from 0.0 to 1.0) that provides an estimate
            of completion of the learning progress.

    :param name: learner name.
    :type name: string

    :rtype: :class:`Orange.multitarget.BinaryRelevanceLearner` or 
            :class:`Orange.multitarget.BinaryRelevanceCLassifier`

    """

    def __new__(cls, data=None, weight = 0, **kwargs):
        self = Orange.classification.Learner.__new__(cls, **kwargs)
        if data:   
            self.__init__(**kwargs)
            return self(data,weight)
        else:
            return self

    def __init__(self, learner=None, name="Binary Relevance", callback=None):
        self.name = name
        self.callback = callback

        if not learner:
            raise TypeError("Wrong specification, learner not defined")
        else:
            self.learner = learner           

    def __call__(self, instances, weight=0):
        """
        Construct learners from the given table of data instances.
        
        :param instances: data for learning.
        :type instances: class:`Orange.data.Table`

        :param weight: weight.
        :type weight: int

        :rtype: :class:`Orange.multitarget.BinaryRelevanceClassifier`
        """

        instances = Orange.data.Table(instances.domain, instances) # bypasses ownership

        n = len(instances)
        m = len(instances.domain.class_vars)
        progress = 0.0

        classifiers = [None for _ in xrange(m)]
        domains = [None for _ in xrange(m)]
        orig_domain = copy.copy(instances.domain)

        class_order = [cv for cv in instances.domain.class_vars]

        learner = self.learner

        for i in range(m):
            # sets one of the class_vars as class_var
            instances.pick_class(class_order[i])            

            # save domains for classification
            domains[i] = Orange.data.Domain([d for d in instances.domain])

            classifiers[i] = learner(instances, weight)

            if self.callback:
                progress+=1
                self.callback(progress / m)

        return BinaryRelevanceClassifier(classifiers=classifiers, domains=domains, name=self.name)


class BinaryRelevanceClassifier(orange.Classifier):
    """
    Uses the classifiers induced by the :obj:`BinaryRelevanceLearner`. An input
    instance is classified into the class with the most frequent vote.
    However, this implementation returns the averaged probabilities from
    each of the trees if class probability is requested.
    
    :param classifiers: a list of classifiers.
    :type classifiers: list of  :class:`Orange.core.Learner`
        
    :param domains: the domains used by learners.
    :type domain: list of :class:`Orange.data.Domain`
    
    :param name: name of the classifier.
    :type name: string

    """

    def __init__(self, classifiers, domains, name):
        self.classifiers = classifiers
        self.name = name
        self.domains = domains

    def __call__(self, instance, result_type = orange.GetValue):
        """
        :param instance: instance to be classified.
        :type instance: :class:`Orange.data.Instance`
        
        :param result_type: :class:`Orange.classification.Classifier.GetValue` or \
              :class:`Orange.classification.Classifier.GetProbabilities` or
              :class:`Orange.classification.Classifier.GetBoth`
        
        :rtype: :class:`Orange.data.Value`, 
              :class:`Orange.statistics.Distribution` or a tuple with both
        """
        m = len(instance.domain.class_vars)
        values = [None for _ in range(m)] 
        probs = [None for _ in range(m)] 

        for i in range(m):
            #add blank class for classification
            inst = Orange.data.Instance(self.domains[i], [v for v in instance]+['?'])

            res = self.classifiers[i](inst, orange.GetBoth)
            values[i] = res[0]
            probs[i] = res[1]

        if result_type == orange.GetValue: return tuple(values)
        elif result_type == orange.GetProbabilities: return tuple(probs)
        else: 
            return [tuple(values),tuple(probs)]

    def __reduce__(self):
        return type(self), (self.classifiers, self.domains, self.name), dict(self.__dict__)

if __name__ == '__main__':
    import time
    print "STARTED"
    global_timer = time.time()

    data = Orange.data.Table('bridges.v2.nm')
    
    l1 = BinaryRelevanceLearner(learner = Orange.classification.tree.SimpleTreeLearner)
    l2 = BinaryRelevanceLearner(learner = Orange.classification.bayes.NaiveLearner)
    l3 = BinaryRelevanceLearner(learner = Orange.classification.majority.MajorityLearner)
    l4 = Orange.multitarget.tree.MultiTreeLearner()

    res = Orange.evaluation.testing.cross_validation([l1,l2,l3,l4],data)

    scores = Orange.evaluation.scoring.mt_average_score(res,Orange.evaluation.scoring.RMSE)

    for i in range(len(scores)):
        print res.classifierNames[i], scores[i]

    print "--DONE %.2f --" % (time.time()-global_timer)