Commits

Miran Levar committed 935cd55

Removed lingering multitarget files, made some fixes to scoring.

  • Participants
  • Parent commits 7c290ba

Comments (0)

Files changed (4)

Orange/classification/neural.py

 .. index:: Neural Network Learner
 
 ***************************************
-Neural Network Learner
+Neural Network Learner  (``neural``)
 ***************************************
 
 

Orange/evaluation/scoring.py

                     actual += 1
             if actual:
                 recalls[i] += intersection / actual
-
     return [x / example_num for x in recalls]
 
 #def mlc_ranking_loss(res):
 
     precision = mlc_precision(res)
     recall = mlc_recall(res)
-    return [2 * p * r / (p + r) for p,r in zip(precision, recall)]
+    return [0.0 if p == 0 and r == 0 else 2 * p * r / (p + r) for p,r in zip(precision, recall)]
 
 
 def mlc_F1_macro(res):
     n_classes =  len(res.results[0].actual_class)
 
     for l in xrange(res.number_of_learners): 
-        true_positive = [0.0] * n_classes
-        sum_fptp = [0.0] * n_classes
-        sum_fntp = [0.0] * n_classes
+        true_positive = [0.0000001] * n_classes
+        sum_fptp = [0.0000001] * n_classes
+        sum_fntp = [0.0000001] * n_classes
         for r in res.results:
             aclass = r.actual_class
+
             for i, cls_val in enumerate(r.classes[l]):
-                if aclass[i] and cls_val:
+                if aclass[i]==1 and int(cls_val)==1:
                     true_positive[i] += 1
-                if cls_val:
+                if int(cls_val)==1:
                     sum_fptp[i] += 1
-                if aclass[i]:
+                if aclass[i]==1:
                     sum_fntp[i] += 1
 
         results.append(sum([ 2*(tp/fptp * tp/fntp)/(tp/fptp + tp/fntp) for tp, fptp, fntp in \

Orange/multitarget/__init__.py

-"""
-Wrapper for constructing multi-target learners
-==============================================
-
-This module also contains a wrapper, an auxilary learner, that can be used
-to construct simple multi-target learners from standard learners designed
-for data with a single class. The wrapper uses the specified base learner
-to construct independent models for each class.
-
-.. index:: MultitargetLearner
-.. autoclass:: Orange.multitarget.MultitargetLearner
-    :members:
-    :show-inheritance:
-
-.. index:: MultitargetClassifier
-.. autoclass:: Orange.multitarget.MultitargetClassifier
-    :members:
-    :show-inheritance:
-
-Examples
-========
-
-The following example uses a simple multi-target data set (generated with
-:download:`generate_multitarget.py <code/generate_multitarget.py>`) to show
-some basic functionalities (part of
-:download:`multitarget.py <code/multitarget.py>`).
-
-.. literalinclude:: code/multitarget.py
-    :lines: 1-6
-
-Multi-target learners can build prediction models (classifiers)
-which then predict (multiple) class values for a new instance (continuation of
-:download:`multitarget.py <code/multitarget.py>`):
-
-.. literalinclude:: code/multitarget.py
-    :lines: 8-
-
-"""
-
-import Orange
-
-# Other algorithms which also work with multitarget data
-from Orange.regression import pls
-from Orange.regression import earth
-
-
-class MultitargetLearner(Orange.classification.Learner):
-    """
-    Wrapper for multitarget problems that constructs independent models
-    of a base learner for each class variable.
-
-    .. attribute:: learner
-
-        The base learner used to learn models for each class.
-    """
-
-    def __new__(cls, learner, data=None, weight=0, **kwargs):
-        self = Orange.classification.Learner.__new__(cls, **kwargs)
-        if data:
-            self.__init__(learner, **kwargs)
-            return self.__call__(data, weight)
-        else:
-            return self
-    
-    def __init__(self, learner, **kwargs):
-        """
-
-        :param learner: Base learner used to construct independent
-                        models for each class.
-        """
-
-        self.learner = learner
-        self.__dict__.update(kwargs)
-
-    def __call__(self, data, weight=0):
-        """
-        Learn independent models of the base learner for each class.
-
-        :param data: Multitarget data instances (with more than 1 class).
-        :type data: :class:`Orange.data.Table`
-
-        :param weight: Id of meta attribute with weights of instances
-        :type weight: :obj:`int`
-
-        :rtype: :class:`Orange.multitarget.MultitargetClassifier`
-        """
-
-        if not data.domain.class_vars:
-            raise Exception('No classes defined.')
-        
-        domains = [Orange.data.Domain(data.domain.attributes, y)
-                   for y in data.domain.class_vars]
-        classifiers = [self.learner(Orange.data.Table(dom, data), weight)
-                       for dom in domains]
-        return MultitargetClassifier(classifiers=classifiers, domains=domains)
-        
-    def __reduce__(self):
-        return type(self), (self.learner,), dict(self.__dict__)
-
-
-class MultitargetClassifier(Orange.classification.Classifier):
-    """
-    Multitarget classifier that returns a list of predictions from each
-    of the independent base classifiers.
-
-    .. attribute classifiers
-
-        List of individual classifiers for each class.
-    """
-
-    def __init__(self, classifiers, domains):
-        self.classifiers = classifiers
-        self.domains = domains
-
-    def __call__(self, instance, return_type=Orange.core.GetValue):
-        """
-        :param instance: Instance to be classified.
-        :type instance: :class:`Orange.data.Instance`
-
-        :param return_type: One of
-            :class:`Orange.classification.Classifier.GetValue`,
-            :class:`Orange.classification.Classifier.GetProbabilities` or
-            :class:`Orange.classification.Classifier.GetBoth`
-        """
-
-        predictions = [c(Orange.data.Instance(dom, instance), return_type)
-                       for c, dom in zip(self.classifiers, self.domains)]
-        return zip(*predictions) if return_type == Orange.core.GetBoth \
-               else predictions
-
-    def __reduce__(self):
-        return type(self), (self.classifiers, self.domains), dict(self.__dict__)
-

Orange/multitarget/tree.py

-"""
-.. index:: Multi-target Tree Learner
-
-***************************************
-Multi-target Tree Learner
-***************************************
-
-To use the tree learning algorithm for multi-target data, standard
-orange trees (:class:`Orange.classification.tree.TreeLearner`) can be used.
-Only the :obj:`~Orange.classification.tree.TreeLearner.measure` for feature
-scoring and the :obj:`~Orange.classification.tree.TreeLearner.node_learner`
-components have to be chosen so that they work on multi-target data domains.
-
-This module provides one such measure (:class:`MultitargetVariance`) that
-can be used and a helper class :class:`MultiTreeLearner` which extends
-:class:`~Orange.classification.tree.TreeLearner` and is the same in all
-aspects except for different (multi-target) defaults for
-:obj:`~Orange.classification.tree.TreeLearner.measure` and
-:obj:`~Orange.classification.tree.TreeLearner.node_learner`.
-
-Examples
-========
-
-The following example demonstrates how to build a prediction model with
-MultitargetTreeLearner and use it to predict (multiple) class values for
-a given instance (:download:`multitarget.py <code/multitarget.py>`):
-
-.. literalinclude:: code/multitarget.py
-    :lines: 1-4, 10-12
-
-
-.. index:: Multi-target Variance 
-.. autoclass:: Orange.multitarget.tree.MultitargetVariance
-    :members:
-    :show-inheritance:
-
-.. index:: Multi-target Tree Learner
-.. autoclass:: Orange.multitarget.tree.MultiTreeLearner
-    :members:
-    :show-inheritance:
-
-.. index:: Multi-target Tree Classifier
-.. autoclass:: Orange.multitarget.tree.MultiTree
-    :members:
-    :show-inheritance:
-
-"""
-
-from operator import itemgetter
-
-import Orange
-import numpy as np
-
-
-def weighted_variance(X, weights=None):
-    """Computes the variance using a weighted distance to the centroid."""
-    if not weights:
-        weights = [1] * len(X[0])
-    X = X * np.array(weights)
-    return np.sum(np.sum((X - np.mean(X, 0))**2, 1))
-
-class MultitargetVariance(Orange.feature.scoring.Score):
-    """
-    A multi-target score that ranks features based on the average class
-    variance of the subsets.
-
-    To compute it, a prototype has to be defined for each subset. Here, it
-    is just the mean vector of class variables. Then the sum of squared
-    distances to the prototypes is computed in each subset. The final score
-    is obtained as the average of subset variances (weighted, to account for
-    subset sizes).
-    
-    Weights can be passed to the constructor to normalize classes with values
-    of different magnitudes or to increase the importance of some classes. In
-    this case, class values are first scaled according to the given weights.
-    """
-
-    def __init__(self, weights=None):
-        """
-        :param weights: Weights of the class variables used when computing
-                        distances. If None, all weights are set to 1.
-        :type weigts: list
-        """
-
-        # Types of classes allowed
-        self.handles_discrete = True
-        ## TODO: for discrete classes with >2 values entropy should be used
-        ## instead of variance
-        self.handles_continuous = True
-        # Can handle continuous features
-        self.computes_thresholds = True
-        # Needs instances
-        self.needs = Orange.feature.scoring.Score.Generator
-
-        self.weights = weights
-
-
-    def threshold_function(self, feature, data, cont_distrib=None, weights=0):
-        """
-        Evaluates possible splits of a continuous feature into a binary one
-        and scores them.
-        
-        :param feature: Continuous feature to be split.
-        :type feature: :class:`Orange.feature.Descriptor`
-
-        :param data: The data set to be split using the given continuous
-                     feature.
-        :type data: :class:`Orange.data.Table`
-
-        :return: :obj:`list` of :obj:`tuples <tuple>`
-                 [(threshold, score, None),]
-        """
-
-        f = data.domain[feature]
-        values = sorted(set(ins[f].value for ins in data))
-        ts = [(v1 + v2) / 2. for v1, v2 in zip(values, values[1:])]
-        if len(ts) > 40:
-            ts = ts[::len(ts)/20]
-        scores = []
-        for t in ts:
-            bf = Orange.feature.discretization.IntervalDiscretizer(
-                points=[t]).construct_variable(f)
-            dom2 = Orange.data.Domain([bf], class_vars=data.domain.class_vars)
-            data2 = Orange.data.Table(dom2, data)
-            scores.append((t, self.__call__(bf, data2)))
-        return scores
-
-    def best_threshold(self, feature, data):
-        """
-        Computes the best threshold for a split of a continuous feature.
-
-        :param feature: Continuous feature to be split.
-        :type feature: :class:`Orange.feature.Descriptor`
-
-        :param data: The data set to be split using the given continuous
-                     feature.
-        :type data: :class:`Orange.data.Table`
-
-        :return: :obj:`tuple` (threshold, score, None)
-        """
-
-        scores = self.threshold_function(feature, data)
-        threshold, score = max(scores, key=itemgetter(1))
-        return (threshold, score, None)
-
-    def __call__(self, feature, data, apriori_class_distribution=None,
-                 weights=0):
-        """
-        :param feature: The feature to be scored.
-        :type feature: :class:`Orange.feature.Descriptor`
-
-        :param data: The data set on which to score the feature.
-        :type data: :class:`Orange.data.Table`
-
-        :return: :obj:`float`
-        """
-
-        split = dict((ins[feature].value, []) for ins in data)
-        for ins in data:
-            split[ins[feature].value].append(ins.get_classes())
-        score = -sum(weighted_variance(x, self.weights) * len(x)
-                     for x in split.values())
-        return score
-
-
-class MultiTreeLearner(Orange.classification.tree.TreeLearner):
-    """
-    MultiTreeLearner is a multi-target version of a tree learner. It is the
-    same as :class:`~Orange.classification.tree.TreeLearner`, except for the
-    default values of two parameters:
-    
-    .. attribute:: measure
-        
-        A multi-target score is used by default: :class:`MultitargetVariance`.
-
-    .. attribute:: node_learner
-        
-        Standard trees use
-        :class:`~Orange.classification.majority.MajorityLearner`
-        to construct prediction models in the leaves of the tree.
-        MultiTreeLearner uses the multi-target equivalent which can be 
-        obtained simply by wrapping the majority learner:
-
-        :class:`Orange.multitarget.MultitargetLearner`
-        (:class:`Orange.classification.majority.MajorityLearner()`).
-
-    """
-
-    def __init__(self, **kwargs):
-        """
-        The constructor passes all arguments to
-        :class:`~Orange.classification.tree.TreeLearner`'s constructor
-        :obj:`Orange.classification.tree.TreeLearner.__init__`.
-        """
-        
-        if 'measure' not in kwargs:
-            kwargs['measure'] = MultitargetVariance()
-        if 'node_learner' not in kwargs:
-            kwargs['node_learner'] = Orange.multitarget.MultitargetLearner(
-                Orange.classification.majority.MajorityLearner())
-        Orange.classification.tree.TreeLearner.__init__(self, **kwargs)
-
-    def __call__(self, data, weight=0):
-        """
-        :param data: Data instances to learn from.
-        :type data: :class:`Orange.data.Table`
-
-        :param weight: Id of meta attribute with weights of instances.
-        :type weight: :obj:`int`
-        """
-        
-        # Use the class, if data does not have class_vars
-        if not data.domain.class_vars and data.domain.class_var:
-            dom = Orange.data.Domain(data.domain.features,
-                data.domain.class_var, class_vars=[data.domain.class_var])
-            data = Orange.data.Table(dom, data)
-
-        # Check for missing class values in data
-        for ins in data:
-            for cval in ins.get_classes():
-                if cval.is_special():
-                    raise ValueError('Data has missing class values.')
-
-        # TreeLearner does not work on class-less domains,
-        # so we set the class if necessary
-        if not data.domain.class_var and data.domain.class_vars:
-            dom = Orange.data.Domain(data.domain.features,
-                data.domain.class_vars[0], class_vars=data.domain.class_vars)
-            data = Orange.data.Table(dom, data)
-
-        tree = Orange.classification.tree.TreeLearner.__call__(
-            self, data, weight)
-        return MultiTree(base_classifier=tree)
-
-class MultiTree(Orange.classification.tree.TreeClassifier):
-    """
-    MultiTree classifier is almost the same as the base class it extends
-    (:class:`~Orange.classification.tree.TreeClassifier`). Only the
-    :obj:`__call__` method is modified so it works with multi-target data.
-    """
-
-    def __call__(self, instance, return_type=Orange.core.GetValue):
-        """
-        :param instance: Instance to be classified.
-        :type instance: :class:`Orange.data.Instance`
-
-        :param return_type: One of
-            :class:`Orange.classification.Classifier.GetValue`,
-            :class:`Orange.classification.Classifier.GetProbabilities` or
-            :class:`Orange.classification.Classifier.GetBoth`
-        """
-
-        node = self.descender(self.tree, instance)[0]
-        return node.node_classifier(instance, return_type)
-
-
-if __name__ == '__main__':
-    data = Orange.data.Table('multitarget-synthetic')
-    print 'Actual classes:\n', data[0].get_classes()
-    
-    majority = Orange.classification.majority.MajorityLearner()
-    mt_majority = Orange.multitarget.MultitargetLearner(majority)
-    c_mtm = mt_majority(data)
-    print 'Majority predictions:\n', c_mtm(data[0])
-
-    mt_tree = MultiTreeLearner(max_depth=3)
-    c_mtt = mt_tree(data)
-    print 'Multi-target Tree predictions:\n', c_mtt(data[0])