Commits

Miran Levar committed 14d5bd1

Multiple fixes and additions

  • Participants
  • Parent commits 3bad5a9

Comments (0)

Files changed (9)

File _multitarget/__init__.py

 import chain
 import binary
 import neural
+import scoring
 
 class MultitargetLearner(Orange.classification.Learner):
     """
     def __reduce__(self):
         return type(self), (self.classifiers, self.domains), dict(self.__dict__)
 
-if __name__ == '__main__':
-	a = resource_filename(__name__, 'datasets')
-	a = datasets()
-	print "done", a

File _multitarget/binary.py

         else:
             return self
 
-    def __init__(self, learner=None, name="Binary Relevance", callback=None):
+    def __init__(self, learner=None, name="Binary Relevance", callback=None, **kwargs):
         self.name = name
         self.callback = callback
 
         if not learner:
             raise TypeError("Wrong specification, learner not defined")
         else:
-            self.learner = learner           
+            self.learner = learner
+        self.__dict__.update(kwargs)     
 
     def __call__(self, instances, weight=0):
         """
         :rtype: :class:`Orange.multitarget.BinaryRelevanceClassifier`
         """
 
-        instances = Orange.data.Table(instances.domain, instances) # bypasses ownership
+        if not instances.domain.class_vars: raise Exception('No classes defined.')
 
-        n = len(instances)
         m = len(instances.domain.class_vars)
-        progress = 0.0
 
         classifiers = [None for _ in xrange(m)]
-        domains = [None for _ in xrange(m)]
-        orig_domain = copy.copy(instances.domain)
-
-        class_order = [cv for cv in instances.domain.class_vars]
-
-        learner = self.learner
+        domains = [Orange.data.Domain(instances.domain.attributes, cv) \
+                   for cv in instances.domain.class_vars]
 
         for i in range(m):
-            # sets one of the class_vars as class_var
-            instances.pick_class(class_order[i])            
-
-            # save domains for classification
-            domains[i] = Orange.data.Domain([d for d in instances.domain])
-
-            classifiers[i] = learner(instances, weight)
-
+            classifiers[i] = self.learner(Orange.data.Table(domains[i], instances), weight)
             if self.callback:
-                progress+=1
-                self.callback(progress / m)
+                self.callback((i + 1.0) / m)
 
         return BinaryRelevanceClassifier(classifiers=classifiers, domains=domains, name=self.name)
 
 
     def __init__(self, classifiers, domains, name):
         self.classifiers = classifiers
-        self.name = name
         self.domains = domains
+        self.name = name   
 
     def __call__(self, instance, result_type = orange.GetValue):
         """
         :rtype: :class:`Orange.data.Value`, 
               :class:`Orange.statistics.Distribution` or a tuple with both
         """
-        m = len(instance.domain.class_vars)
-        values = [None for _ in range(m)] 
-        probs = [None for _ in range(m)] 
 
-        for i in range(m):
-            #add blank class for classification
-            inst = Orange.data.Instance(self.domains[i], [v for v in instance]+['?'])
+        predictions = [c(Orange.data.Instance(dom, instance), result_type) \
+                       for c, dom in zip(self.classifiers, self.domains)]
 
-            res = self.classifiers[i](inst, orange.GetBoth)
-            values[i] = res[0]
-            probs[i] = res[1]
-
-        if result_type == orange.GetValue: return tuple(values)
-        elif result_type == orange.GetProbabilities: return tuple(probs)
-        else: 
-            return [tuple(values),tuple(probs)]
+        return zip(*predictions) if result_type == Orange.core.GetBoth \
+               else predictions
 
     def __reduce__(self):
         return type(self), (self.classifiers, self.domains, self.name), dict(self.__dict__)
     print "STARTED"
     global_timer = time.time()
 
-    data = Orange.data.Table('bridges.v2.nm')
+    data = Orange.data.Table('multitarget:bridges.tab')
     
     l1 = BinaryRelevanceLearner(learner = Orange.classification.tree.SimpleTreeLearner)
     l2 = BinaryRelevanceLearner(learner = Orange.classification.bayes.NaiveLearner)

File _multitarget/chain.py

         else:
             return self
 
-    def __init__(self, learner=None, name="Classifier Chain", rand=None, callback=None, class_order=None):
+    def __init__(self, learner=None, name="Classifier Chain", rand=None, callback=None, class_order=None, **kwargs):
         self.name = name
         self.rand = rand
         self.callback = callback
 
         if not self.rand:
             self.rand = random.Random(42)
+        self.__dict__.update(kwargs)     
 
         self.randstate = self.rand.getstate()
            
-
     def __call__(self, instances, weight=0, class_order=None):
         """
         Learn from the given table of data instances.
         self.rand.setstate(self.randstate) 
         n = len(instances)
         m = len(instances.domain.class_vars)
-        progress = 0.0
 
         classifiers = [None for _ in xrange(m)]
         domains = [None for _ in xrange(m)]
                 class_vars=instances.domain.class_vars))
 
             if self.callback:
-                progress+=1
-                self.callback(progress / m)
+                self.callback((i + 1.0) / m)
 
         return ClassifierChain(classifiers=classifiers, class_order=class_order, domains=domains, name=self.name, orig_domain=orig_domain)
 
         values = {cv:None for cv in instance.domain.class_vars}
         probs = {cv:None for cv in instance.domain.class_vars}
 
-
-
         for i in range(len(self.class_order)):
             # add blank class for classification
 
     print "STARTED"
     import time
     global_timer = time.time()
-    data = Orange.data.Table('bridges.v2.nm')
+    data = Orange.data.Table('multitarget:bridges.tab')
 
     cl1 = EnsembleClassifierChainLearner(learner = Orange.classification.tree.SimpleTreeLearner, n_chains=50, sample_size=0.25, name="ECC T", rand = random.seed(time.time()))
     cl2 = EnsembleClassifierChainLearner(learner = Orange.classification.majority.MajorityLearner, n_chains=50, sample_size=0.25, name="ECC M", rand = random.seed(time.time()))

File _multitarget/neural.py

 
 
 import Orange
+import random
 import numpy as np
 np.seterr('ignore') # set to ignore to disable overflow errors
-np.random.seed(42) # TODO: check with Jure
 import scipy.sparse
 from scipy.optimize import fmin_l_bfgs_b
 
             self.__init__(**kwargs)
             return self(data,weight)
 
-    def __init__(self, name="NeuralNetwork", n_mid=10, reg_fact=1, max_iter=1000):
+    def __init__(self, name="NeuralNetwork", n_mid=10, reg_fact=1, max_iter=1000, rand=None):
         """
         Current default values are the same as in the original implementation (neural_networks.py)
         Currently supports only multi-label data.
         self.n_mid = n_mid
         self.reg_fact = reg_fact
         self.max_iter = max_iter
+        self.rand = rand
+
+        if not self.rand:
+            self.rand = random.Random(42)
+        np.random.seed(self.rand.randint(0,10000))
 
     def __call__(self,data,weight=0):
         """
     print "STARTED"
     global_timer = time.time()
 
-    data = Orange.data.Table('scene')
+    data = Orange.data.Table('multitarget:emotions.tab')
     
     l = NeuralNetworkLearner()
 

File _multitarget/scoring.py

+"""
+.. index:: Multi-target Scoring
+.. automodule:: Orange.multitarget.scoring
+
+
+***************************************
+Multi-target Scoring
+***************************************
+
+:doc:`Multi-target <Orange.multitarget>` classifiers predict values for
+multiple target classes. They can be used with standard
+:obj:`~Orange.evaluation.testing` procedures (e.g.
+:obj:`~Orange.evaluation.testing.Evaluation.cross_validation`), but require
+special scoring functions to compute a single score from the obtained
+:obj:`~Orange.evaluation.testing.ExperimentResults`.
+Since different targets can vary in importance depending on the experiment,
+some methods have options to indicate this e.g. through weights or customized
+distance functions. These can also be used for normalization in case target
+values do not have the same scales.
+
+.. autofunction:: mt_flattened_score
+.. autofunction:: mt_average_score
+
+The whole procedure of evaluating multi-target methods and computing
+the scores (RMSE errors) is shown in the following example
+(:download:`mt-evaluate.py <code/mt-evaluate.py>`). Because we consider
+the first target to be more important and the last not so much we will
+indicate this using appropriate weights.
+
+.. literalinclude:: code/mt-evaluate.py
+
+Which outputs::
+
+    Weighted RMSE scores:
+        Majority    0.8228
+          MTTree    0.3949
+             PLS    0.3021
+           Earth    0.2880
+
+Two more accuracy measures based on the article by Zaragoza et al.(2011); applicable to discrete classes:
+
+Global accuracy (accuracy per example) over d-dimensional class variable:
+
+.. autofunction:: mt_global_accuracy
+
+Mean accuracy (accuracy per class or per label) over d class variables: 
+
+.. autofunction:: mt_mean_accuracy   
+
+References
+==========
+
+Zaragoza, J.H., Sucar, L.E., Morales, E.F.,Bielza, C., Larranaga, P.  (2011). 'Bayesian Chain Classifiers for Multidimensional Classification', Proc. of the International Joint Conference on Artificial Intelligence (IJCAI-2011),  pp:2192-2197.
+
+"""
+
+import Orange
+from Orange import statc, corn
+from Orange.utils import deprecated_keywords, deprecated_function_name, \
+    deprecation_warning, environ
+
+def mt_average_score(res, score, weights=None):
+    """
+    Compute individual scores for each target and return the (weighted) average.
+
+    One method can be used to compute scores for all targets or a list of
+    scoring methods can be passed to use different methods for different
+    targets. In the latter case, care has to be taken if the ranges of scoring
+    methods differ.
+    For example, when the first target is scored from -1 to 1 (1 best) and the
+    second from 0 to 1 (0 best), using `weights=[0.5,-1]` would scale both
+    to a span of 1, and invert the second so that higher scores are better.
+
+    :param score: Single-target scoring method or a list of such methods
+                  (one for each target).
+    :param weights: List of real weights, one for each target,
+                    for a weighted average.
+
+    """
+    if not len(res.results):
+        raise ValueError, "Cannot compute the score: no examples."
+    if res.number_of_learners < 1:
+        return []
+    n_classes = len(res.results[0].actual_class)
+    if weights is None:
+        weights = [1.] * n_classes
+    if not isinstance(score, Iterable):
+        score = [score] * n_classes
+    elif len(score) != n_classes:
+        raise ValueError, "Number of scoring methods and targets do not match."
+    # save original classes
+    clsss = [te.classes for te in res.results]
+    aclsss = [te.actual_class for te in res.results]
+    # compute single target scores
+    single_scores = []
+    for i in range(n_classes):
+        for te, clss, aclss in zip(res.results, clsss, aclsss):
+            te.classes = [cls[i] for cls in clss]
+            te.actual_class = aclss[i]
+        single_scores.append(score[i](res))
+    # restore original classes
+    for te, clss, aclss in zip(res.results, clsss, aclsss):
+        te.classes = clss
+        te.actual_class = aclss
+    return [sum(w * s for w, s in zip(weights, scores)) / sum(weights)
+        for scores in zip(*single_scores)]
+
+def mt_flattened_score(res, score):
+    """
+    Flatten (concatenate into a single list) the predictions of multiple
+    targets and compute a single-target score.
+    
+    :param score: Single-target scoring method.
+    """
+    res2 = Orange.evaluation.testing.ExperimentResults(res.number_of_iterations,
+        res.classifier_names, class_values=res.class_values,
+        weights=res.weights, classifiers=res.classifiers, loaded=res.loaded,
+        test_type=Orange.evaluation.testing.TEST_TYPE_SINGLE, labels=res.labels)
+    for te in res.results:
+        for i, ac in enumerate(te.actual_class):
+            te2 = Orange.evaluation.testing.TestedExample(
+                iteration_number=te.iteration_number, actual_class=ac)
+            for c, p in zip(te.classes, te.probabilities):
+                te2.add_result(c[i], p[i])
+            res2.results.append(te2)
+    return score(res2)
+
+def mt_global_accuracy(res):
+    """
+    :math:`Acc = \\frac{1}{N}\\sum_{i=1}^{N}\\delta(\\mathbf{c_{i}'},\\mathbf{c_{i}}) \\newline`
+	
+    :math:`\\delta (\\mathbf{c_{i}'},\\mathbf{c_{i}} )=\\left\\{\\begin{matrix}1:\\mathbf{c_{i}'}=\\mathbf{c_{i}}\\\\ 0: otherwise\\end{matrix}\\right.`
+    """
+    results = []
+    for l in xrange(res.number_of_learners):
+        n_results = len(res.results)
+        n_correct = 0.
+
+        for r in res.results:
+            if list(r.classes[l]) == r.actual_class:
+                n_correct+=1
+
+        results.append(n_correct/n_results)
+    return results
+
+
+def mt_mean_accuracy(res):
+    """
+    :math:`\\overline{Acc_{d}} = \\frac{1}{d}\\sum_{j=1}^{d}Acc_{j} = \\frac{1}{d}\\sum_{j=1}^{d} \\frac{1}{N}\\sum_{i=1}^{N}\\delta(c_{ij}',c_{ij} ) \\newline`
+	
+    :math:`\\delta (c_{ij}',c_{ij} )=\\left\\{\\begin{matrix}1:c_{ij}'=c_{ij}\\\\ 0: otherwise\\end{matrix}\\right.`
+    """
+    results = []
+    for l in xrange(res.number_of_learners):
+        n_classes = len(res.results[0].actual_class)
+        n_results = len(res.results)
+        n_correct = 0.
+
+        for r in res.results:
+            for i in xrange(n_classes):
+                if r.classes[l][i] == r.actual_class[i]:
+                    n_correct+=1
+        results.append(n_correct/n_classes/n_results)
+    return results
+
+
+
+################################################################################
+if __name__ == "__main__":
+    avranks = [3.143, 2.000, 2.893, 1.964]
+    names = ["prva", "druga", "tretja", "cetrta" ]
+    cd = compute_CD(avranks, 14)
+    #cd = compute_CD(avranks, 10, type="bonferroni-dunn")
+    print cd
+
+    print compute_friedman(avranks, 14)
+
+    #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5)

File docs/rst/Orange.multitarget.rst

 Multi-target prediction (``multitarget``)
 ###########################################
 
-Multi-target prediction tries to achieve better prediction accuracy or speed
-through prediction of multiple dependent variables at once. It works on
-:ref:`multi-target data <multiple-classes>`, which is also supported by
-Orange's tab file format using :ref:`multiclass directive <tab-delimited>`.
-
 .. toctree::
    :maxdepth: 1
+   :hidden:
 
    Orange.multitarget.tree
    Orange.multitarget.binary
    Orange.multitarget.neural
    Orange.regression.pls
    Orange.regression.earth
+   Orange.multitarget.scoring
+
+
+Multi-target prediction tries to achieve better prediction accuracy or speed
+through prediction of multiple dependent variables at once. It works on
+:ref:`multi-target data <multiple-classes>`, which is also supported by
+Orange's tab file format using :ref:`multiclass directive <tab-delimited>`.
+
+List of supported  learners: 
+
+* :doc:`Orange.multitarget.tree`
+* :doc:`Orange.multitarget.binary`
+* :doc:`Orange.multitarget.chain`
+* :doc:`Orange.multitarget.neural`
+* :doc:`Orange.regression.pls`
+* :doc:`Orange.regression.earth`
 
 For evaluation of multi-target methods, see the corresponding section in 
-:ref:`Orange.evaluation.scoring <mt-scoring>`.
+:doc:`Orange.multitarget.scoring`.
 
 
 .. automodule:: Orange.multitarget

File docs/rst/Orange.multitarget.scoring.rst

+.. automodule:: Orange.multitarget.scoring

File docs/rst/code/mt-evaluate.py

+import Orange
+
+data = Orange.data.Table('multitarget-synthetic')
+
+majority = Orange.multitarget.MultitargetLearner(
+    Orange.classification.majority.MajorityLearner(), name='Majority')
+tree = Orange.multitarget.tree.MultiTreeLearner(max_depth=3, name='MT Tree')
+pls = Orange.multitarget.pls.PLSRegressionLearner(name='PLS')
+earth = Orange.multitarget.earth.EarthLearner(name='Earth')
+
+learners = [majority, tree, pls, earth]
+res = Orange.evaluation.testing.cross_validation(learners, data)
+rmse = Orange.evaluation.scoring.RMSE
+scores = Orange.evaluation.scoring.mt_average_score(
+            res, rmse, weights=[5,2,2,1])
+print 'Weighted RMSE scores:'
+print '\n'.join('%12s\t%.4f' % r for r in zip(res.classifier_names, scores))
 AUTHOR = 'Bioinformatics Laboratory, FRI UL'
 AUTHOR_EMAIL = 'contact@orange.biolab.si'
 URL = 'http://orange.biolab.si/addons/'
-DOWNLOAD_URL = 'https://bitbucket.org/mlevar/orange-multitarget/downloads'
+DOWNLOAD_URL = 'https://bitbucket.org/biolab/orange-multitarget/downloads'
 LICENSE = 'GPLv3'
 
 KEYWORDS = (