Commits

Miran Levar  committed 98c1806

Scoring additions, preparation for Clustering Trees and multi-target addon

  • Participants
  • Parent commits 7ea3b3f

Comments (0)

Files changed (5)

File Orange/ensemble/forest.py

 import random
 import copy
 from Orange.utils import deprecated_keywords
+from operator import add
 
 def _default_small_learner(attributes=None, rand=None, base=None):
     # tree learner assembled as suggested by Breiman (2001)
     return _RandomForestSimpleTreeLearner(base=base, rand=randorange)
 
 def _wrap_learner(base, rand, randorange):
-    if base == None or isinstance(base, Orange.classification.tree.SimpleTreeLearner):
+    if base == None or isinstance(base, Orange.classification.tree.SimpleTreeLearner) or isinstance(base, Orange.core.ClusteringTreeLearner):
         return _default_simple_learner(base, randorange)
     elif isinstance(base, Orange.classification.tree.TreeLearner):
         return _default_small_learner(None, rand, base)
 
 _RandomForestSimpleTreeLearner = Orange.utils.deprecated_members({"weightID":"weight_id", "examples":"instances"})(_RandomForestSimpleTreeLearner)
 
-
 class _RandomForestTreeLearner(Orange.core.Learner):
     """ A learner which wraps an ordinary TreeLearner with
     a new split constructor.
     """
 
     __new__ = Orange.utils._orange__new__(Orange.core.Learner)
-
+    
     def __init__(self, trees=100, attributes=None,\
                     name='Random Forest', rand=None, callback=None, base_learner=None, learner=None):
         self.trees = trees
         return RandomForestClassifier(classifiers = classifiers, name=self.name,\
                     domain=instances.domain, class_var=instances.domain.class_var, \
                     class_vars=instances.domain.class_vars)
-           
+
+
 RandomForestLearner = Orange.utils.deprecated_members({"examples":"instances"})(RandomForestLearner)
 
 class RandomForestClassifier(orange.Classifier):
         :rtype: :class:`Orange.data.Value`, 
               :class:`Orange.statistics.Distribution` or a tuple with both
         """
-        from operator import add
 
-        instance = Orange.data.Instance(self.domain, instance)
         # get results to avoid multiple calls
         res_both = [c(instance, orange.GetBoth) for c in self.classifiers]
 

File Orange/evaluation/scoring.py

 #def mlc_hierarchical_loss(res):
 #    pass
 
-
-def mt_average_score(res, score, weights=None):
+def logloss(res):
     """
-    Compute individual scores for each target and return the (weighted) average.
-
-    One method can be used to compute scores for all targets or a list of
-    scoring methods can be passed to use different methods for different
-    targets. In the latter case, care has to be taken if the ranges of scoring
-    methods differ.
-    For example, when the first target is scored from -1 to 1 (1 best) and the
-    second from 0 to 1 (0 best), using `weights=[0.5,-1]` would scale both
-    to a span of 1, and invert the second so that higher scores are better.
-
-    :param score: Single-target scoring method or a list of such methods
-                  (one for each target).
-    :param weights: List of real weights, one for each target,
-                    for a weighted average.
-
-    """
-    if not len(res.results):
-        raise ValueError, "Cannot compute the score: no examples."
-    if res.number_of_learners < 1:
-        return []
-    n_classes = len(res.results[0].actual_class)
-    if weights is None:
-        weights = [1.] * n_classes
-    if not isinstance(score, Iterable):
-        score = [score] * n_classes
-    elif len(score) != n_classes:
-        raise ValueError, "Number of scoring methods and targets do not match."
-    # save original classes
-    clsss = [te.classes for te in res.results]
-    aclsss = [te.actual_class for te in res.results]
-    # compute single target scores
-    single_scores = []
-    for i in range(n_classes):
-        for te, clss, aclss in zip(res.results, clsss, aclsss):
-            te.classes = [cls[i] for cls in clss]
-            te.actual_class = aclss[i]
-        single_scores.append(score[i](res))
-    # restore original classes
-    for te, clss, aclss in zip(res.results, clsss, aclsss):
-        te.classes = clss
-        te.actual_class = aclss
-    return [sum(w * s for w, s in zip(weights, scores)) / sum(weights)
-        for scores in zip(*single_scores)]
-
-def mt_flattened_score(res, score):
-    """
-    Flatten (concatenate into a single list) the predictions of multiple
-    targets and compute a single-target score.
-    
-    :param score: Single-target scoring method.
-    """
-    res2 = Orange.evaluation.testing.ExperimentResults(res.number_of_iterations,
-        res.classifier_names, class_values=res.class_values,
-        weights=res.weights, classifiers=res.classifiers, loaded=res.loaded,
-        test_type=Orange.evaluation.testing.TEST_TYPE_SINGLE, labels=res.labels)
-    for te in res.results:
-        for i, ac in enumerate(te.actual_class):
-            te2 = Orange.evaluation.testing.TestedExample(
-                iteration_number=te.iteration_number, actual_class=ac)
-            for c, p in zip(te.classes, te.probabilities):
-                te2.add_result(c[i], p[i])
-            res2.results.append(te2)
-    return score(res2)
-
-def mt_global_accuracy(res):
-    """
-    :math:`Acc = \\frac{1}{N}\\sum_{i=1}^{N}\\delta(\\mathbf{c_{i}'},\\mathbf{c_{i}}) \\newline`
-	
-    :math:`\\delta (\\mathbf{c_{i}'},\\mathbf{c_{i}} )=\\left\\{\\begin{matrix}1:\\mathbf{c_{i}'}=\\mathbf{c_{i}}\\\\ 0: otherwise\\end{matrix}\\right.`
+    Calculates LogLoss, n is the number of all test results and :math:`p_{i}` is the probability
+     withw hich the classifier predicted the actual class.
+     :math:`LogLoss = \\frac{1}{n}\\sum_{i = 1}^{n} -max(log(p_{i}), log \\frac{1}{n}) \\newline`
     """
     results = []
-    for l in xrange(res.number_of_learners):
-        n_results = len(res.results)
-        n_correct = 0.
-
+    n_results = len(res.results)
+    min_log = math.log(1.0/n_results)
+    for l in xrange(res.number_of_learners):       
+        temp = 0.0
         for r in res.results:
-            if list(r.classes[l]) == r.actual_class:
-                n_correct+=1
-
-        results.append(n_correct/n_results)
+            if not r.probabilities[l]:
+                raise ValueError, "Probabilities are needed to compute logloss"
+            temp-=max(math.log(max(r.probabilities[l][int(r.actual_class)],1e-20)),min_log)
+
+        results.append(temp/n_results)
     return results
 
 
-def mt_mean_accuracy(res):
+def mlc_F1_micro(res):
     """
-    :math:`\\overline{Acc_{d}} = \\frac{1}{d}\\sum_{j=1}^{d}Acc_{j} = \\frac{1}{d}\\sum_{j=1}^{d} \\frac{1}{N}\\sum_{i=1}^{N}\\delta(c_{ij}',c_{ij} ) \\newline`
-	
-    :math:`\\delta (c_{ij}',c_{ij} )=\\left\\{\\begin{matrix}1:c_{ij}'=c_{ij}\\\\ 0: otherwise\\end{matrix}\\right.`
+    F1_{micro} = 2 * \frac{\overline{precision}  * \overline{recall}}{\overline{precision} + \overline{recall}}
     """
+
+    precision = mlc_precision(res)
+    recall = mlc_recall(res)
+    return [2 * p * r / (p + r) for p,r in zip(precision, recall)]
+
+
+def mlc_F1_macro(res):
+    """
+    F1_{macro} = \frac{1}{d}\sum_{j=0}^{d} 2 * \frac{precision_j * recall_j}{precision_j + recall_j}
+    """
+
     results = []
-    for l in xrange(res.number_of_learners):
-        n_classes = len(res.results[0].actual_class)
-        n_results = len(res.results)
-        n_correct = 0.
-
+    n_results = gettotsize(res)
+    n_classes =  len(res.results[0].actual_class)
+
+    for l in xrange(res.number_of_learners): 
+        true_positive = [0.0] * n_classes
+        sum_fptp = [0.0] * n_classes
+        sum_fntp = [0.0] * n_classes
         for r in res.results:
-            for i in xrange(n_classes):
-                if r.classes[l][i] == r.actual_class[i]:
-                    n_correct+=1
-        results.append(n_correct/n_classes/n_results)
+            aclass = r.actual_class
+            for i, cls_val in enumerate(r.classes[l]):
+                if aclass[i] and cls_val:
+                    true_positive[i] += 1
+                if cls_val:
+                    sum_fptp[i] += 1
+                if aclass[i]:
+                    sum_fntp[i] += 1
+
+        results.append(sum([ 2*(tp/fptp * tp/fntp)/(tp/fptp + tp/fntp) for tp, fptp, fntp in \
+            zip(true_positive, sum_fptp, sum_fntp)] ) / n_classes)
     return results
 
 

File Orange/evaluation/testing.py

                 else:
                     self.converter = float
             elif test_type in (TEST_TYPE_MLC, TEST_TYPE_MULTITARGET):
+                self.class_values = [list(cv.values) if cv.var_type == cv.Discrete else None for cv in domain.class_vars]
                 self.labels = [var.name for var in domain.class_vars]
                 self.converter = mt_vals
 

File Orange/testing/unit/tests/test_display_name_mapping.py

 import orange, Orange
 
 class TestNameMapping(unittest.TestCase):
+
+    exempt = ["Orange.multitarget.tree",
+        ]
+
     def test_qualified_names(self):
         """ Test that qualified names of core C++ objects 
         map to the correct name in the Orange.* hierarchy.
         """
         for cls in orange.__dict__.values():
             if type(cls) == type:
+                if cls.__module__ in exempt:
+                    pass
                 try:
                     cls2 = eval(cls.__module__ + "." + cls.__name__)
                 except AttributeError as err:

File docs/reference/rst/Orange.evaluation.scoring.rst

 
 .. autofunction:: split_by_iterations
 
-
-.. _mt-scoring:
-
-============
-Multi-target
-============
-
-:doc:`Multi-target <Orange.multitarget>` classifiers predict values for
-multiple target classes. They can be used with standard
-:obj:`~Orange.evaluation.testing` procedures (e.g.
-:obj:`~Orange.evaluation.testing.Evaluation.cross_validation`), but require
-special scoring functions to compute a single score from the obtained
-:obj:`~Orange.evaluation.testing.ExperimentResults`.
-Since different targets can vary in importance depending on the experiment,
-some methods have options to indicate this e.g. through weights or customized
-distance functions. These can also be used for normalization in case target
-values do not have the same scales.
-
-.. autofunction:: mt_flattened_score
-.. autofunction:: mt_average_score
-
-The whole procedure of evaluating multi-target methods and computing
-the scores (RMSE errors) is shown in the following example
-(:download:`mt-evaluate.py <code/mt-evaluate.py>`). Because we consider
-the first target to be more important and the last not so much we will
-indicate this using appropriate weights.
-
-.. literalinclude:: code/mt-evaluate.py
-
-Which outputs::
-
-    Weighted RMSE scores:
-        Majority    0.8228
-          MTTree    0.3949
-             PLS    0.3021
-           Earth    0.2880
-
-Two more accuracy measures based on the article by Zaragoza et al.(2011); applicable to discrete classes:
-
-Global accuracy (accuracy per example) over d-dimensional class variable:
-
-.. autofunction:: mt_global_accuracy
-
-Mean accuracy (accuracy per class or per label) over d class variables: 
-
-.. autofunction:: mt_mean_accuracy   
-
-References
-==========
-
-Zaragoza, J.H., Sucar, L.E., Morales, E.F.,Bielza, C., Larranaga, P.  (2011). 'Bayesian Chain Classifiers for Multidimensional Classification', Proc. of the International Joint Conference on Artificial Intelligence (IJCAI-2011),  pp:2192-2197.
-
 ==========================
 Multi-label classification
 ==========================