Commits

Miran Levar committed 6c0af23

Major changes and additions to addon

  • Participants
  • Parent commits be96e62

Comments (0)

Files changed (15)

File _multitarget/__init__.py

 """
-Wrapper for constructing multi-target learners
-==============================================
-
-This module also contains a wrapper, an auxilary learner, that can be used
-to construct simple multi-target learners from standard learners designed
-for data with a single class. The wrapper uses the specified base learner
-to construct independent models for each class.
-
-.. index:: MultitargetLearner
-.. autoclass:: Orange.multitarget.MultitargetLearner
-    :members:
-    :show-inheritance:
-
-.. index:: MultitargetClassifier
-.. autoclass:: Orange.multitarget.MultitargetClassifier
-    :members:
-    :show-inheritance:
-
 Examples
 ========
 
 
 import Orange
 # Other algorithms which also work with multitarget data
-from Orange.regression import pls
 from Orange.regression import earth
 
 # Multi-target algorithms
 import binary
 import neural
 import scoring
-
-class MultitargetLearner(Orange.classification.Learner):
-    """
-    Wrapper for multitarget problems that constructs independent models
-    of a base learner for each class variable.
-
-    .. attribute:: learner
-
-        The base learner used to learn models for each class.
-    """
-
-    def __new__(cls, learner, data=None, weight=0, **kwargs):
-        self = Orange.classification.Learner.__new__(cls, **kwargs)
-        if data:
-            self.__init__(learner, **kwargs)
-            return self.__call__(data, weight)
-        else:
-            return self
-    
-    def __init__(self, learner, **kwargs):
-        """
-
-        :param learner: Base learner used to construct independent
-                        models for each class.
-        """
-
-        self.learner = learner
-        self.__dict__.update(kwargs)
-
-    def __call__(self, data, weight=0):
-        """
-        Learn independent models of the base learner for each class.
-
-        :param data: Multitarget data instances (with more than 1 class).
-        :type data: :class:`Orange.data.Table`
-
-        :param weight: Id of meta attribute with weights of instances
-        :type weight: :obj:`int`
-
-        :rtype: :class:`Orange.multitarget.MultitargetClassifier`
-        """
-
-        if not data.domain.class_vars:
-            raise Exception('No classes defined.')
-        
-        domains = [Orange.data.Domain(data.domain.attributes, y)
-                   for y in data.domain.class_vars]
-        classifiers = [self.learner(Orange.data.Table(dom, data), weight)
-                       for dom in domains]
-        return MultitargetClassifier(classifiers=classifiers, domains=domains)
-        
-    def __reduce__(self):
-        return type(self), (self.learner,), dict(self.__dict__)
-
-
-class MultitargetClassifier(Orange.classification.Classifier):
-    """
-    Multitarget classifier that returns a list of predictions from each
-    of the independent base classifiers.
-
-    .. attribute classifiers
-
-        List of individual classifiers for each class.
-    """
-
-    def __init__(self, classifiers, domains):
-        self.classifiers = classifiers
-        self.domains = domains
-
-    def __call__(self, instance, return_type=Orange.core.GetValue):
-        """
-        :param instance: Instance to be classified.
-        :type instance: :class:`Orange.data.Instance`
-
-        :param return_type: One of
-            :class:`Orange.classification.Classifier.GetValue`,
-            :class:`Orange.classification.Classifier.GetProbabilities` or
-            :class:`Orange.classification.Classifier.GetBoth`
-        """
-
-        predictions = [c(Orange.data.Instance(dom, instance), return_type)
-                       for c, dom in zip(self.classifiers, self.domains)]
-        return zip(*predictions) if return_type == Orange.core.GetBoth \
-               else predictions
-
-    def __reduce__(self):
-        return type(self), (self.classifiers, self.domains), dict(self.__dict__)
-
+import pls

File _multitarget/binary.py

     l1 = BinaryRelevanceLearner(learner = Orange.classification.tree.SimpleTreeLearner)
     l2 = BinaryRelevanceLearner(learner = Orange.classification.bayes.NaiveLearner)
     l3 = BinaryRelevanceLearner(learner = Orange.classification.majority.MajorityLearner)
-    l4 = Orange.multitarget.tree.MultiTreeLearner()
 
-    res = Orange.evaluation.testing.cross_validation([l1,l2,l3,l4],data)
+    res = Orange.evaluation.testing.cross_validation([l1,l2,l3],data)
 
-    scores = Orange.evaluation.scoring.mt_average_score(res,Orange.evaluation.scoring.RMSE)
+    scores = Orange.multitarget.scoring.mt_average_score(res,Orange.evaluation.scoring.RMSE)
 
     for i in range(len(scores)):
         print res.classifierNames[i], scores[i]

File _multitarget/chain.py

     :param name: learner name.
     :type name: string
 
-    :rtype: :class:`Orange.multitarget.chain.RandomForestClassifier` or 
-            :class:`Orange.multitarget.chain.RandomForestLearner`
+    :rtype: :class:`Orange.multitarget.chain.ClassifierChain` or 
+            :class:`Orange.multitarget.chain.ClassifierChainLearner`
 
     """
 
 
     res = Orange.evaluation.testing.cross_validation([cl1,cl2,cl3,cl4],data)
 
-    scores = Orange.evaluation.scoring.mt_average_score(res,Orange.evaluation.scoring.RMSE)
+    scores = Orange.multitarget.scoring.mt_average_score(res,Orange.evaluation.scoring.RMSE)
    
     for i in range(len(scores)):
         print res.classifierNames[i], scores[i]

File _multitarget/neural.py

 Multi-target Neural Network Learner
 ***************************************
 
-
-.. index:: Multi-target Neural Network Learner
-.. autoclass:: Orange.multitarget.neural.NeuralNetworkLearner
-    :members:
-    :show-inheritance:
-
-.. index:: Multi-target Neural Network Classifier
-.. autoclass:: Orange.multitarget.neural.NeuralNetworkClassifier
-    :members:
-    :show-inheritance:
+additional use cases, for MT data
 
 """
 
-
-import Orange
-import random
-import numpy as np
-np.seterr('ignore') # set to ignore to disable overflow errors
-import scipy.sparse
-from scipy.optimize import fmin_l_bfgs_b
-
-class _NeuralNetwork:
-    def __init__(self, layers,lambda_=1, callback=None, **fmin_args):
-        self.layers = layers
-        self.lambda_ = lambda_
-        self.callback = callback
-        self.fmin_args = fmin_args
-
-    def unfold_params(self, params):
-        i0, i1, i2 = self.layers
-
-        i = (i0 + 1) * i1
-
-        Theta1 = params[:i].reshape((i1, i0 + 1))
-        Theta2 = params[i:].reshape((i2, i1 + 1))
-
-        return Theta1, Theta2
-
-    def cost_grad(self, params):
-        Theta1, Theta2 = self.unfold_params(params)
-
-        if self.callback:
-            self.Theta1 = Theta1
-            self.Theta2 = Theta2
-            self.callback(self)
-
-        # Feedforward Propagation
-        m, n = self.X.shape
-
-        a1 = self.X
-        z2 = a1.dot(Theta1.T)
-        a2 = np.column_stack((np.ones(m), _sigmoid(z2)))
-        z3 = a2.dot(Theta2.T)
-        a3 = _sigmoid(z3)
-
-        # Cost
-        J = np.sum(-self.y * np.log(a3) - (1 - self.y) * np.log(1 - a3)) / m
-
-        t1 = Theta1.copy()
-        t1[:, 0] = 0
-        t2 = Theta2.copy()
-        t2[:, 0] = 0
-
-        # regularization
-        reg = np.dot(t1.flat, t1.flat)
-        reg += np.dot(t2.flat, t2.flat)
-        J += float(self.lambda_) * reg / (2.0 * m)
-
-        # Grad
-        d3 = a3 - self.y
-        d2 = d3.dot(Theta2)[:, 1:] * _sigmoid_gradient(z2)
-
-        D2 = a2.T.dot(d3).T / m
-        D1 = a1.T.dot(d2).T / m
-
-        # regularization
-        D2 += t2 * (float(self.lambda_) / m)
-        D1 += t1 * (float(self.lambda_) / m)
-
-        return J, np.hstack((D1.flat, D2.flat))
-
-    def fit(self, X, y):
-        i0, i1, i2 = self.layers
-
-        m, n = X.shape
-        n_params = i1 * (i0 + 1) + i2 * (i1 + 1)
-        eps = np.sqrt(6) / np.sqrt(i0 + i2)
-        initial_params = np.random.randn(n_params) * 2 * eps - eps
-
-        self.X = self.append_ones(X)
-        self.y = y
-
-        params, _, _ = fmin_l_bfgs_b(self.cost_grad, initial_params, **self.fmin_args)
-
-        self.Theta1, self.Theta2 = self.unfold_params(params)
-
-    def predict(self, X):
-        m, n = X.shape
-        
-        a2 = _sigmoid(self.append_ones(X).dot(self.Theta1.T))
-        a3 = _sigmoid(np.column_stack((np.ones(m), a2)).dot(self.Theta2.T))
-
-        return a3
-
-    def append_ones(self, X):
-        m, n = X.shape
-        if scipy.sparse.issparse(X):
-            return scipy.sparse.hstack((np.ones((m, 1)), X)).tocsr()
-        else:
-            return np.column_stack((np.ones(m), X))
-
-def _sigmoid(x):
-    return 1.0 / (1.0 + np.exp(-x))
-
-def _sigmoid_gradient(x):
-    sx = _sigmoid(x)
-    return sx * (1 - sx)
-
-
-class NeuralNetworkLearner(Orange.classification.Learner):
-    """
-    NeuralNetworkLearner uses jzbontar's implementation of neural networks and wraps it in
-    an Orange compatible learner. 
-    TODO: explain neural networks
-
-    :param name: learner name.
-    :type name: string
-
-    :param n_mid: Number of nodes in the hidden layer
-    :type n_mid: integer
-
-    :param reg_fact: Regularization factor.
-    :type reg_fact: float
-
-    :param max_iter: Maximum number of iterations.
-    :type max_iter: integer
-
-    :rtype: :class:`Orange.multitarget.neural.neuralNetworkLearner` or 
-            :class:`Orange.multitarget.chain.NeuralNetworkClassifier`
-    """
-
-    def __new__(cls, data=None, weight = 0, **kwargs):
-        self = Orange.classification.Learner.__new__(cls, **kwargs)
-
-        if data is None:   
-            return self
-        else:
-            self.__init__(**kwargs)
-            return self(data,weight)
-
-    def __init__(self, name="NeuralNetwork", n_mid=10, reg_fact=1, max_iter=1000, rand=None):
-        """
-        Current default values are the same as in the original implementation (neural_networks.py)
-        Currently supports only multi-label data.
-        """
-
-        self.name = name
-        self.n_mid = n_mid
-        self.reg_fact = reg_fact
-        self.max_iter = max_iter
-        self.rand = rand
-
-        if not self.rand:
-            self.rand = random.Random(42)
-        np.random.seed(self.rand.randint(0,10000))
-
-    def __call__(self,data,weight=0):
-        """
-        Learn from the given table of data instances.
-        
-        :param instances: data for learning.
-        :type instances: class:`Orange.data.Table`
-
-        :param weight: weight.
-        :type weight: int
-
-        :param class_order: list of descriptors of class variables
-        :type class_order: list of :class:`Orange.feature.Descriptor`
-
-        :rtype: :class:`Orange.multitarget.chain.NeuralNetworkClassifier`
-        """
-
-        #converts attribute data
-        X = data.to_numpy()[0] 
-
-        #converts multitarget classes
-        Y = np.array([[float(c) for c in d.get_classes()] for d in data])
-
-        #initializes neural networks
-        self.nn =  _NeuralNetwork([len(X[0]),self.n_mid,len(Y[0])], lambda_=self.reg_fact,maxfun=self.max_iter, iprint=-1)
-        
-        self.nn.fit(X,Y)
-               
-        return NeuralNetworkClassifier(classifier=self.nn.predict, domain = data.domain)
-
-
-class NeuralNetworkClassifier():
-    """    
-    Uses the classifier induced by the :obj:`NeuralNetworkLearner`.
-  
-    :param name: name of the classifier.
-    :type name: string
-    """
-
-    def __init__(self,**kwargs):
-        self.__dict__.update(**kwargs)
-
-    def __call__(self,example, result_type=Orange.core.GetValue):
-        """
-        :param instance: instance to be classified.
-        :type instance: :class:`Orange.data.Instance`
-        
-        :param result_type: :class:`Orange.classification.Classifier.GetValue` or \
-              :class:`Orange.classification.Classifier.GetProbabilities` or
-              :class:`Orange.classification.Classifier.GetBoth`
-        
-        :rtype: :class:`Orange.data.Value`, 
-              :class:`Orange.statistics.Distribution` or a tuple with both
-        """
-
-        # transform example to numpy
-        input = np.array([[float(e) for e in example]])
-        # transform results from numpy
-        results = self.classifier(input).tolist()[0]
-        mt_prob = []
-        mt_value = []
-        
-        for varn in range(len(self.domain.class_vars)):
-            # handle discrete class
-
-            if self.domain.class_vars[varn].var_type == Orange.feature.Discrete.Discrete:
-
-                cprob = Orange.statistics.distribution.Discrete(self.domain.class_vars[varn])
-                cprob[0] = 1.-results[varn]
-                cprob[1] = float(results[varn])
-                mt_prob.append(cprob)
-                mt_value.append(Orange.data.Value(Orange.feature.Continuous(self.domain.class_vars[varn].name),results[varn]))
-            else:
-                raise ValueError("non-discrete classes not supported")
-        
-        if result_type == Orange.classification.Classifier.GetValue: return tuple(mt_value)
-        elif result_type == Orange.classification.Classifier.GetProbabilities: return tuple(mt_prob)
-        else: 
-            return [tuple(mt_value),tuple(mt_prob)]
-
+from Orange.classification.neural import NeuralNetworkLearner, NeuralNetworkClassifier
 
 if __name__ == '__main__':
+    import Orange
     import time
     print "STARTED"
     global_timer = time.time()
 
+    l = Orange.multitarget.neural.NeuralNetworkLearner()
+
     data = Orange.data.Table('multitarget:emotions.tab')
-    
-    l = NeuralNetworkLearner()
-
-    res = Orange.evaluation.testing.cross_validation([l],data)
-
-    scores = Orange.evaluation.scoring.mt_average_score(res,Orange.evaluation.scoring.RMSE)
+    res = Orange.evaluation.testing.cross_validation([l],data, 3)
+    scores = Orange.multitarget.scoring.mt_average_score(res,Orange.evaluation.scoring.RMSE)
 
     for i in range(len(scores)):
         print res.classifierNames[i], scores[i]
 
+    data = Orange.data.Table('multitarget:flare.tab')
+
+    res = Orange.evaluation.testing.cross_validation([l],data, 3)
+    scores = Orange.multitarget.scoring.mt_average_score(res,Orange.evaluation.scoring.RMSE)
+
+    for i in range(len(scores)):
+       print res.classifierNames[i], scores[i]
+
     print "--DONE %.2f --" % (time.time()-global_timer)

File _multitarget/pls.py

+"""
+.. index:: PLS Classification Learner
+
+
+***************************************
+PLS Classification Learner
+***************************************
+
+
+.. index:: PLS CLassification Learner
+.. autoclass:: Orange.multitarget.pls.PLSClassificationLearner
+    :members:
+    :show-inheritance:
+
+.. index:: PLS Classifier
+.. autoclass:: Orange.multitarget.pls.PLSClassifier
+    :members:
+    :show-inheritance:
+
+"""
+
+
+from Orange.regression.pls import PLSRegressionLearner, PLSRegression
+import Orange
+
+class PLSClassificationLearner(Orange.classification.Learner):
+    """
+    Expands and wraps :class:`Orange.regression.PLSRegressionLearner` to support classification. Multi-classes are 
+    expanded with :class:`Orange.data.continuization.DomainContinuizer`.
+
+    :rtype: :class:`Orange.multitarget.psl.PLSClassifier` or
+    :rtype: :class:`Orange.multitarget.psl.PLSClassificationLearner`
+
+    """
+    def __new__(cls, data=None, weight = 0, **kwargs):
+        self = Orange.classification.Learner.__new__(cls, **kwargs)
+
+        if data is None:   
+            return self
+        else:
+            self.__init__(**kwargs)
+            return self(data,weight)
+
+    def __call__(self,data,weight=0, **kwargs):
+        """
+        Learn from the given table of data instances.
+        
+        :param instances: data for learning.
+        :type instances: class:`Orange.data.Table`
+
+        :param weight: weight.
+        :type weight: int
+
+        :rtype: :class:`Orange.multitarget.psl.PLSClassifier`
+        """
+        cont = Orange.data.continuization.DomainContinuizer(multinomial_treatment = Orange.data.continuization.DomainContinuizer.NValues)
+        pls = Orange.regression.pls.PLSRegressionLearner(data, weight, continuizer = cont,  **kwargs)
+
+        cvals = [len(cv.values) if len(cv.values) > 2 else 1 for cv in data.domain.class_vars]
+        cvals = [0] + [sum(cvals[0:i]) for i in xrange(1, len(cvals) + 1)]
+
+        return PLSClassifier(classifier=pls, domain=data.domain, cvals=cvals)
+
+class PLSClassifier():
+    """
+    Uses the classifier induced by :class:`Orange.multitarget.psl.PLSClassificationLearner`.
+
+    """
+    def __init__(self,**kwargs):
+        self.__dict__.update(**kwargs)
+
+    def __call__(self,example, result_type=Orange.core.GetValue):
+        """
+        :param instance: instance to be classified.
+        :type instance: :class:`Orange.data.Instance`
+        
+        :param result_type: :class:`Orange.classification.Classifier.GetValue` or \
+              :class:`Orange.classification.Classifier.GetProbabilities` or
+              :class:`Orange.classification.Classifier.GetBoth`
+        
+        :rtype: :class:`Orange.data.Value`, 
+              :class:`Orange.statistics.Distribution` or a tuple with both
+        """
+        res = self.classifier(example, 1)
+        mt_prob = []
+        mt_value = []
+
+        for cls in xrange(len(self.domain.class_vars)):
+            if self.cvals[cls + 1] - self.cvals[cls] > 2:
+                cprob = Orange.statistics.distribution.Discrete([p.keys()[0] for p in res[self.cvals[cls]:self.cvals[cls+1]]])
+                cprob.normalize()
+            else:
+                r = res[self.cvals[cls]].keys()[0]
+                cprob = Orange.statistics.distribution.Discrete([1.0 - r, r])
+
+            mt_prob.append(cprob)
+            mt_value.append(Orange.data.Value(self.domain.class_vars[cls], cprob.values().index(max(cprob))))
+
+        if result_type == Orange.core.GetValue: return tuple(mt_value)
+        elif result_type == Orange.core.GetProbabilities: return tuple(mt_prob)
+        else: 
+            return [tuple(mt_value),tuple(mt_prob)]
+
+if __name__ == '__main__':
+    import time
+    print "STARTED"
+    global_timer = time.time()
+
+    l = Orange.multitarget.pls.PLSClassificationLearner()
+
+    data = Orange.data.Table('multitarget:emotions.tab')
+    res = Orange.evaluation.testing.cross_validation([l],data, 3)
+    scores = Orange.multitarget.scoring.mt_average_score(res,Orange.evaluation.scoring.RMSE)
+
+    for i in range(len(scores)):
+        print res.classifierNames[i], scores[i]
+
+    data = Orange.data.Table('multitarget:bridges.tab')
+    res = Orange.evaluation.testing.cross_validation([l],data, 3)
+    scores = Orange.multitarget.scoring.mt_average_score(res,Orange.evaluation.scoring.RMSE)
+
+    for i in range(len(scores)):
+       print res.classifierNames[i], scores[i]
+
+    print "--DONE %.2f --" % (time.time()-global_timer)

File _multitarget/scoring.py

-"""
-.. index:: Multi-target Scoring
-.. automodule:: Orange.multitarget.scoring
-
-
-***************************************
-Multi-target Scoring
-***************************************
-
-:doc:`Multi-target <Orange.multitarget>` classifiers predict values for
-multiple target classes. They can be used with standard
-:obj:`~Orange.evaluation.testing` procedures (e.g.
-:obj:`~Orange.evaluation.testing.Evaluation.cross_validation`), but require
-special scoring functions to compute a single score from the obtained
-:obj:`~Orange.evaluation.testing.ExperimentResults`.
-Since different targets can vary in importance depending on the experiment,
-some methods have options to indicate this e.g. through weights or customized
-distance functions. These can also be used for normalization in case target
-values do not have the same scales.
-
-.. autofunction:: mt_flattened_score
-.. autofunction:: mt_average_score
-
-The whole procedure of evaluating multi-target methods and computing
-the scores (RMSE errors) is shown in the following example
-(:download:`mt-evaluate.py <code/mt-evaluate.py>`). Because we consider
-the first target to be more important and the last not so much we will
-indicate this using appropriate weights.
-
-.. literalinclude:: code/mt-evaluate.py
-
-Which outputs::
-
-    Weighted RMSE scores:
-        Majority    0.8228
-          MTTree    0.3949
-             PLS    0.3021
-           Earth    0.2880
-
-Two more accuracy measures based on the article by Zaragoza et al.(2011); applicable to discrete classes:
-
-Global accuracy (accuracy per example) over d-dimensional class variable:
-
-.. autofunction:: mt_global_accuracy
-
-Mean accuracy (accuracy per class or per label) over d class variables: 
-
-.. autofunction:: mt_mean_accuracy   
-
-References
-==========
-
-Zaragoza, J.H., Sucar, L.E., Morales, E.F.,Bielza, C., Larranaga, P.  (2011). 'Bayesian Chain Classifiers for Multidimensional Classification', Proc. of the International Joint Conference on Artificial Intelligence (IJCAI-2011),  pp:2192-2197.
-
+"""
+.. index:: Multi-target Scoring
+
+
+***************************************
+Multi-target Scoring
+***************************************
+
+:doc:`Multi-target <Orange.multitarget>` classifiers predict values for
+multiple target classes. They can be used with standard
+:obj:`~Orange.evaluation.testing` procedures (e.g.
+:obj:`~Orange.evaluation.testing.Evaluation.cross_validation`), but require
+special scoring functions to compute a single score from the obtained
+:obj:`~Orange.evaluation.testing.ExperimentResults`.
+Since different targets can vary in importance depending on the experiment,
+some methods have options to indicate this e.g. through weights or customized
+distance functions. These can also be used for normalization in case target
+values do not have the same scales.
+
+.. autofunction:: mt_flattened_score
+.. autofunction:: mt_average_score
+
+The whole procedure of evaluating multi-target methods and computing
+the scores (RMSE errors) is shown in the following example
+(:download:`mt-evaluate.py <code/mt-evaluate.py>`). Because we consider
+the first target to be more important and the last not so much we will
+indicate this using appropriate weights.
+
+.. literalinclude:: code/mt-evaluate.py
+
+Which outputs::
+
+    Weighted RMSE scores:
+        Majority    0.8228
+      Clust Tree    0.4528
+             PLS    0.3021
+           Earth    0.2880
+
+Two more accuracy measures based on the article by Zaragoza et al.(2011); applicable to discrete classes:
+
+Global accuracy (accuracy per example) over d-dimensional class variable:
+
+.. autofunction:: mt_global_accuracy
+
+Mean accuracy (accuracy per class or per label) over d class variables: 
+
+.. autofunction:: mt_mean_accuracy   
+
+References
+==========
+
+Zaragoza, J.H., Sucar, L.E., Morales, E.F.,Bielza, C., Larranaga, P.  (2011). 'Bayesian Chain Classifiers for Multidimensional Classification', Proc. of the International Joint Conference on Artificial Intelligence (IJCAI-2011),  pp:2192-2197.
+
 """
 
 import Orange
 from Orange import statc, corn
+from collections import Iterable
 from Orange.utils import deprecated_keywords, deprecated_function_name, \
     deprecation_warning, environ
 
     # save original classes
     clsss = [te.classes for te in res.results]
     aclsss = [te.actual_class for te in res.results]
+    probss = [te.probabilities if te.probabilities else None for te in res.results]
+    cls_vals = res.class_values if res.class_values else None
+
     # compute single target scores
     single_scores = []
     for i in range(n_classes):
-        for te, clss, aclss in zip(res.results, clsss, aclsss):
+        for te, clss, aclss, probs in zip(res.results, clsss, aclsss, probss):
             te.classes = [cls[i] for cls in clss]
             te.actual_class = aclss[i]
+            te.probabilities = [prob[i] for prob in probs] if probs else None
+        res.class_values = cls_vals[i] if cls_vals else None
+
         single_scores.append(score[i](res))
     # restore original classes
-    for te, clss, aclss in zip(res.results, clsss, aclsss):
+    for te, clss, aclss, probs in zip(res.results, clsss, aclsss, probss):
         te.classes = clss
         te.actual_class = aclss
+        te.probabilities = probs
+    res.class_values = cls_vals
+
     return [sum(w * s for w, s in zip(weights, scores)) / sum(weights)
         for scores in zip(*single_scores)]
 
     return results
 
 
-
 ################################################################################
 if __name__ == "__main__":
-    avranks = [3.143, 2.000, 2.893, 1.964]
-    names = ["prva", "druga", "tretja", "cetrta" ]
-    cd = compute_CD(avranks, 14)
-    #cd = compute_CD(avranks, 10, type="bonferroni-dunn")
-    print cd
-
-    print compute_friedman(avranks, 14)
-
-    #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5)
+    pass

File _multitarget/tree.py

 """
-.. index:: Multi-target Tree Learner
+
+.. index:: Clustering Tree Learner
 
 ***************************************
-Multi-target Tree Learner
+Clustering Tree Learner
 ***************************************
 
-To use the tree learning algorithm for multi-target data, standard
-orange trees (:class:`Orange.classification.tree.TreeLearner`) can be used.
-Only the :obj:`~Orange.classification.tree.TreeLearner.measure` for feature
-scoring and the :obj:`~Orange.classification.tree.TreeLearner.node_learner`
-components have to be chosen so that they work on multi-target data domains.
+:obj:`ClusteringTreeLearner` is an implementation of classification and regression
+trees, based on the :obj:`SimpleTreeLearner`. It is implemented in C++ for speed and low memory usage.
+Features are selected by finding the furthest apart clusters measured with the euclidean distance between prototypes, 
+which are the means of clusters.
 
-This module provides one such measure (:class:`MultitargetVariance`) that
-can be used and a helper class :class:`MultiTreeLearner` which extends
-:class:`~Orange.classification.tree.TreeLearner` and is the same in all
-aspects except for different (multi-target) defaults for
-:obj:`~Orange.classification.tree.TreeLearner.measure` and
-:obj:`~Orange.classification.tree.TreeLearner.node_learner`.
+:obj:`ClusteringTreeLearner` was developed for speeding up the construction
+of random forests, but can also be used as a standalone tree learner.
+
+.. class:: ClusteringTreeLearner
+
+    .. attribute:: min_majority
+
+        Minimal proportion of the majority class value each of the class variables has to reach
+        to stop induction (only used for classification). 
+
+    .. attribute:: min_MSE
+
+        Minimal mean squared error each of the class variables has to reach
+        to stop induction (only used for regression). 
+
+    .. attribute:: min_instances
+
+        Minimal number of instances in leaves. Instance count is weighed.
+
+    .. attribute:: max_depth
+
+        Maximal depth of tree.
+
+    .. attribute:: skip_prob
+
+        At every split an attribute will be skipped with probability ``skip_prob``.
+        Useful for building random forests.
+
+    .. attribute:: random_generator
+        
+        Provide your own :obj:`Orange.misc.Random`.
 
 Examples
 ========
 
-The following example demonstrates how to build a prediction model with
-MultitargetTreeLearner and use it to predict (multiple) class values for
-a given instance (:download:`multitarget.py <code/multitarget.py>`):
+:obj:`ClusteringTreeLearner` can be used on its own or in a random forest, below are
+examples of usage.
 
-.. literalinclude:: code/multitarget.py
-    :lines: 1-4, 10-12
 
+.. literalinclude:: code/clustering_tree.py
 
-.. index:: Multi-target Variance 
-.. autoclass:: Orange.multitarget.tree.MultitargetVariance
-    :members:
-    :show-inheritance:
-
-.. index:: Multi-target Tree Learner
-.. autoclass:: Orange.multitarget.tree.MultiTreeLearner
-    :members:
-    :show-inheritance:
-
-.. index:: Multi-target Tree Classifier
-.. autoclass:: Orange.multitarget.tree.MultiTree
-    :members:
-    :show-inheritance:
 
 """
 
-from operator import itemgetter
 
-import Orange
-import numpy as np
 
+from Orange.core import ClusteringTreeLearner, ClusteringTreeClassifier
 
-def weighted_variance(X, weights=None):
-    """Computes the variance using a weighted distance to the centroid."""
-    if not weights:
-        weights = [1] * len(X[0])
-    X = X * np.array(weights)
-    return np.sum(np.sum((X - np.mean(X, 0))**2, 1))
-
-class MultitargetVariance(Orange.feature.scoring.Score):
-    """
-    A multi-target score that ranks features based on the average class
-    variance of the subsets.
-
-    To compute it, a prototype has to be defined for each subset. Here, it
-    is just the mean vector of class variables. Then the sum of squared
-    distances to the prototypes is computed in each subset. The final score
-    is obtained as the average of subset variances (weighted, to account for
-    subset sizes).
-    
-    Weights can be passed to the constructor to normalize classes with values
-    of different magnitudes or to increase the importance of some classes. In
-    this case, class values are first scaled according to the given weights.
-    """
-
-    def __init__(self, weights=None):
-        """
-        :param weights: Weights of the class variables used when computing
-                        distances. If None, all weights are set to 1.
-        :type weigts: list
-        """
-
-        # Types of classes allowed
-        self.handles_discrete = True
-        ## TODO: for discrete classes with >2 values entropy should be used
-        ## instead of variance
-        self.handles_continuous = True
-        # Can handle continuous features
-        self.computes_thresholds = True
-        # Needs instances
-        self.needs = Orange.feature.scoring.Score.Generator
-
-        self.weights = weights
-
-
-    def threshold_function(self, feature, data, cont_distrib=None, weights=0):
-        """
-        Evaluates possible splits of a continuous feature into a binary one
-        and scores them.
-        
-        :param feature: Continuous feature to be split.
-        :type feature: :class:`Orange.feature.Descriptor`
-
-        :param data: The data set to be split using the given continuous
-                     feature.
-        :type data: :class:`Orange.data.Table`
-
-        :return: :obj:`list` of :obj:`tuples <tuple>`
-                 [(threshold, score, None),]
-        """
-
-        f = data.domain[feature]
-        values = sorted(set(ins[f].value for ins in data))
-        ts = []
-
-        if values[-1]=='?':
-            values = values[:-1]
-
-        ts = [(v1 + v2) / 2. for v1, v2 in zip(values, values[1:])]
-        if len(ts) > 40:
-            ts = ts[::len(ts)/20]
-        scores = []
-        for t in ts:
-            bf = Orange.feature.discretization.IntervalDiscretizer(
-                points=[t]).construct_variable(f)
-            dom2 = Orange.data.Domain([bf], class_vars=data.domain.class_vars)
-            data2 = Orange.data.Table(dom2, data)
-            scores.append((t, self.__call__(bf, data2)))
-        return scores
-
-    def best_threshold(self, feature, data):
-        """
-        Computes the best threshold for a split of a continuous feature.
-
-        :param feature: Continuous feature to be split.
-        :type feature: :class:`Orange.feature.Descriptor`
-
-        :param data: The data set to be split using the given continuous
-                     feature.
-        :type data: :class:`Orange.data.Table`
-
-        :return: :obj:`tuple` (threshold, score, None)
-        """
-
-        scores = self.threshold_function(feature, data)
-        threshold, score = max(scores, key=itemgetter(1))
-        return (threshold, score, None)
-
-    def __call__(self, feature, data, apriori_class_distribution=None,
-                 weights=0):
-        """
-        :param feature: The feature to be scored.
-        :type feature: :class:`Orange.feature.Descriptor`
-
-        :param data: The data set on which to score the feature.
-        :type data: :class:`Orange.data.Table`
-
-        :return: :obj:`float`
-        """
-
-        split = dict((ins[feature].value, []) for ins in data)
-        for ins in data:
-            split[ins[feature].value].append(ins.get_classes())
-        score = -sum(weighted_variance(x, self.weights) * len(x)
-                     for x in split.values())
-        return score
-
-
-class MultiTreeLearner(Orange.classification.tree.TreeLearner):
-    """
-    MultiTreeLearner is a multi-target version of a tree learner. It is the
-    same as :class:`~Orange.classification.tree.TreeLearner`, except for the
-    default values of two parameters:
-    
-    .. attribute:: measure
-        
-        A multi-target score is used by default: :class:`MultitargetVariance`.
-
-    .. attribute:: node_learner
-        
-        Standard trees use
-        :class:`~Orange.classification.majority.MajorityLearner`
-        to construct prediction models in the leaves of the tree.
-        MultiTreeLearner uses the multi-target equivalent which can be 
-        obtained simply by wrapping the majority learner:
-
-        :class:`Orange.multitarget.MultitargetLearner`
-        (:class:`Orange.classification.majority.MajorityLearner()`).
-
-    """
-
-    def __init__(self, **kwargs):
-        """
-        The constructor passes all arguments to
-        :class:`~Orange.classification.tree.TreeLearner`'s constructor
-        :obj:`Orange.classification.tree.TreeLearner.__init__`.
-        """
-        
-        if 'measure' not in kwargs:
-            kwargs['measure'] = MultitargetVariance()
-        if 'node_learner' not in kwargs:
-            kwargs['node_learner'] = Orange.multitarget.MultitargetLearner(
-                Orange.classification.majority.MajorityLearner())
-        Orange.classification.tree.TreeLearner.__init__(self, **kwargs)
-
-    def __call__(self, data, weight=0):
-        """
-        :param data: Data instances to learn from.
-        :type data: :class:`Orange.data.Table`
-
-        :param weight: Id of meta attribute with weights of instances.
-        :type weight: :obj:`int`
-        """
-        
-        # Use the class, if data does not have class_vars
-        if not data.domain.class_vars and data.domain.class_var:
-            dom = Orange.data.Domain(data.domain.features,
-                data.domain.class_var, class_vars=[data.domain.class_var])
-            data = Orange.data.Table(dom, data)
-
-        # Check for missing class values in data
-        for ins in data:
-            for cval in ins.get_classes():
-                if cval.is_special():
-                    raise ValueError('Data has missing class values.')
-
-        # TreeLearner does not work on class-less domains,
-        # so we set the class if necessary
-        if not data.domain.class_var and data.domain.class_vars:
-            dom = Orange.data.Domain(data.domain.features,
-                data.domain.class_vars[0], class_vars=data.domain.class_vars)
-            data = Orange.data.Table(dom, data)
-
-        tree = Orange.classification.tree.TreeLearner.__call__(
-            self, data, weight)
-        return MultiTree(base_classifier=tree)
-
-class MultiTree(Orange.classification.tree.TreeClassifier):
-    """
-    MultiTree classifier is almost the same as the base class it extends
-    (:class:`~Orange.classification.tree.TreeClassifier`). Only the
-    :obj:`__call__` method is modified so it works with multi-target data.
-    """
-
-    def __call__(self, instance, return_type=Orange.core.GetValue):
-        """
-        :param instance: Instance to be classified.
-        :type instance: :class:`Orange.data.Instance`
-
-        :param return_type: One of
-            :class:`Orange.classification.Classifier.GetValue`,
-            :class:`Orange.classification.Classifier.GetProbabilities` or
-            :class:`Orange.classification.Classifier.GetBoth`
-        """
-
-        node = self.descender(self.tree, instance)[0]
-        return node.node_classifier(instance, return_type)
-
-    def __str__(self):
-        # Orange.classification.tree.TreeClassifier.__str__ raises an
-        # exception because our domain does not have a class_var
-        # so we bypass it (at least for now).
-        return Orange.classification.Classifier.__str__(self)
+#distance methods for easier access
+inter_distance = 0
+intra_distance = 1
+silhouette = 2
+gini_index = 3
 
 
 if __name__ == '__main__':
+    import Orange
+
     data = Orange.data.Table('multitarget-synthetic')
     print 'Actual classes:\n', data[0].get_classes()
     
     majority = Orange.classification.majority.MajorityLearner()
-    mt_majority = Orange.multitarget.MultitargetLearner(majority)
+    mt_majority = Orange.multitarget.binary.BinaryRelevanceLearner(learner=majority)
     c_mtm = mt_majority(data)
     print 'Majority predictions:\n', c_mtm(data[0])
 
-    mt_tree = MultiTreeLearner(max_depth=3)
+    mt_tree = ClusteringTreeLearner()
     c_mtt = mt_tree(data)
     print 'Multi-target Tree predictions:\n', c_mtt(data[0])

File docs/rst/Orange.multitarget.pls.rst

+
+
+.. automodule:: Orange.multitarget.pls
+
+***************************************
+PLS Regression Learner
+***************************************
+
+.. automodule:: Orange.regression.pls

File docs/rst/Orange.multitarget.rst

    Orange.multitarget.binary
    Orange.multitarget.chain
    Orange.multitarget.neural
-   Orange.regression.pls
+   Orange.multitarget.pls
    Orange.regression.earth
    Orange.multitarget.scoring
 
 * :doc:`Orange.multitarget.binary`
 * :doc:`Orange.multitarget.chain`
 * :doc:`Orange.multitarget.neural`
-* :doc:`Orange.regression.pls`
+* :doc:`Orange.multitarget.pls`
 * :doc:`Orange.regression.earth`
 
 For evaluation of multi-target methods, see the corresponding section in 
 :doc:`Orange.multitarget.scoring`.
 
 
+The addon also includes three sample datasets:
+
+* **bridges.tab** - dataset with 5 multi-class class variables
+* **flare.tab** - dataset with 3 multi-class class variables
+* **emotions.tab** - dataset with 6 binary class variables (a multi-label dataset)
+
+Example of loading an included dataset:
+
+.. literalinclude:: code/multitarget.py
+    :lines: 1-2
+
+
 .. automodule:: Orange.multitarget

File docs/rst/Orange.regression.pls.rst

-.. automodule:: Orange.regression.pls

File docs/rst/code/clustering_tree.py

+import Orange
+data = Orange.data.Table('multitarget:bridges.tab')
+
+majority = Orange.multitarget.binary.BinaryRelevanceLearner(
+	learner = Orange.classification.majority.MajorityLearner, name = "Majority")
+
+clust_tree = Orange.multitarget.tree.ClusteringTreeLearner(
+	max_depth = 50, min_majority = 0.6, min_instances = 5, 
+	method = Orange.multitarget.tree.inter_distance, name = "CT inter dist")
+
+# we can use different distance measuring methods
+ct2 = Orange.multitarget.tree.ClusteringTreeLearner(
+	max_depth = 50, min_majority = 0.6, min_instances = 5, 
+	method = Orange.multitarget.tree.intra_distance, name = "CT intra dist")
+
+ct3 = Orange.multitarget.tree.ClusteringTreeLearner(
+	max_depth = 50, min_majority = 0.6, min_instances = 5, 
+	method = Orange.multitarget.tree.silhouette, name = "CT silhouette")
+
+ct4 = Orange.multitarget.tree.ClusteringTreeLearner(
+	max_depth = 50, min_majority = 0.6, min_instances = 5, 
+	method = Orange.multitarget.tree.gini_index, name = "CT gini index")
+
+
+# forests work better if trees pruned less
+forest_tree = Orange.multitarget.tree.ClusteringTreeLearner(
+	max_depth = 50, min_majority = 1.0, min_instances = 3)
+clust_forest = Orange.ensemble.forest.RandomForestLearner(
+	base_learner = forest_tree, trees = 50, name = "Clustering Forest")
+
+learners = [ majority, clust_tree, ct2, ct3, ct4, clust_forest ]
+
+results = Orange.evaluation.testing.cross_validation(learners, data, folds=5)
+
+print "Classification - bridges.tab"
+print "%17s  %6s  %8s  %8s" % ("Learner", "RMSE", "Mean Acc", "Glob Acc")
+for i in range(len(learners)):
+    print "%17s  %1.4f    %1.4f    %1.4f" % (learners[i].name,
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.RMSE)[i],
+    Orange.multitarget.scoring.mt_mean_accuracy(results)[i],
+    Orange.multitarget.scoring.mt_global_accuracy(results)[i])
+
+# regression uses a different parameter for pruning - min_MSE instead of min_majority
+clust_tree = Orange.multitarget.tree.ClusteringTreeLearner(
+	max_depth = 50, min_MSE = 0.05, min_instances = 5, name = "Clustering Tree")
+
+forest_tree = Orange.multitarget.tree.ClusteringTreeLearner(
+	max_depth = 50, min_MSE = 0.06, min_instances = 3)
+clust_forest = Orange.ensemble.forest.RandomForestLearner(
+	base_learner = forest_tree, trees = 50, name = "Clustering Forest")
+
+learners = [ majority, clust_tree, clust_forest ]
+
+data = Orange.data.Table('multitarget-synthetic.tab')
+results = Orange.evaluation.testing.cross_validation(learners, data, folds=5)
+
+print "Regression - multitarget-synthetic.tab"
+print "%17s  %6s " % ("Learner", "RMSE")
+for i in range(len(learners)):
+    print "%17s  %1.4f  " % (learners[i].name,
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.RMSE)[i])
+

File docs/rst/code/mt-evaluate.py

 
 data = Orange.data.Table('multitarget-synthetic')
 
-majority = Orange.multitarget.MultitargetLearner(
-    Orange.classification.majority.MajorityLearner(), name='Majority')
-tree = Orange.multitarget.tree.MultiTreeLearner(max_depth=3, name='MT Tree')
+majority = Orange.multitarget.binary.BinaryRelevanceLearner(learner=Orange.classification.majority.MajorityLearner(), name='Majority')
+tree = Orange.multitarget.tree.ClusteringTreeLearner(min_mse=1e-10, min_instances=3, name='Clust Tree')
 pls = Orange.multitarget.pls.PLSRegressionLearner(name='PLS')
 earth = Orange.multitarget.earth.EarthLearner(name='Earth')
 

File docs/rst/code/multitarget.py

 print 'Actual classes:', data[0].get_classes()
 
 majority = Orange.classification.majority.MajorityLearner()
-mt_majority = Orange.multitarget.MultitargetLearner(majority)
+mt_majority = Orange.multitarget.binary.BinaryRelevanceLearner(learner = majority)
 c_majority = mt_majority(data)
 print 'Majority predictions:\n', c_majority(data[0])
 
+mt_majority = Orange.multitarget.chain.ClassifierChainLearner(learner = majority)
+c_majority = mt_majority(data)
+print 'Chain Majority predictions:\n', c_majority(data[0])
+
 pls = Orange.multitarget.pls.PLSRegressionLearner()
 c_pls = pls(data)
 print 'PLS predictions:\n', c_pls(data[0])
 mt_tree = Orange.multitarget.tree.MultiTreeLearner(max_depth=3)
 c_tree = mt_tree(data)
 print 'Multi-target Tree predictions:\n', c_tree(data[0])
+
+clust_tree = Orange.multitarget.tree.ClusteringTreeLearner()
+c_clust_tree = clust_tree(data)
+print 'Clustering Tree predictions: \n', c_clust_tree(data[0])

File docs/rst/code/pls-example.py

+# Description: Partial least squares regression
+# Category:    regression
+# Uses:        multitarget-synthetic
+# Referenced:  Orange.regression.pls
+# Classes:     Orange.regression.pls.PLSRegressionLearner, Orange.regression.pls.PLSRegression
+
+import Orange
+
+data = Orange.data.Table("multitarget-synthetic.tab")
+print "Input variables:    ", data.domain.features
+print "Response variables: ", data.domain.class_vars
+    
+learner = Orange.multitarget.pls.PLSRegressionLearner()
+classifier = learner(data)
+
+print "Prediction for the first 2 data instances: \n" 
+for d in data[:2]:
+    print "Actual    ", d.get_classes()
+    print "Predicted ", classifier(d)
+    print 
+
+print 'Regression coefficients:\n', classifier    

File docs/rst/index.rst

 
 Source code is available on Bitbucket_. For issues and wiki we use Trac_.
 
-.. _Bitbucket: https://bitbucket.org/mlevar/orange-multitarget
+.. _Bitbucket: https://bitbucket.org/biolab/orange-multitarget
 .. _Trac: http://orange.biolab.si/trac/
 
 Indices and tables