Commits

Miran Levar committed e2c9a61

Expanded documentation, made some fixes.

Comments (0)

Files changed (20)

_multitarget/binary.py

 .. index:: Binary Relevance Learner
 
 ***************************************
-Binary Relevance Learner
+Binary Relevance Learner (``binary``)
 ***************************************
 
 

_multitarget/chain.py

 .. index:: Classifier Chain Learner
 
 ***************************************
-Classifier Chain Learner
+Classifier Chain Learner (``chain``)
 ***************************************
 
 
     :members:
     :show-inheritance:
 
-
+***************************************
 Ensemble Classifier Chain Learner
-###################################
+***************************************
 
 
 .. index:: Multi-target Ensemble Classifier Chain Learner
         else:
             return self
 
-    def __init__(self, learner=None, name="Classifier Chain", rand=None, callback=None, class_order=None, **kwargs):
+    def __init__(self, learner=None, name="Classifier Chain", rand=None, callback=None, class_order=None, actual_values=True, **kwargs):
         self.name = name
         self.rand = rand
         self.callback = callback
         self.class_order = class_order
+        self.actual_values = actual_values
 
         if not learner:
             raise TypeError("Wrong specification, learner not defined")
             data = Orange.data.Table(domains[i], instances)
 
             classifiers[i] = learner(data, weight)
-            
+
+            if not self.actual_values:
+                for j in xrange(len(instances)):
+                    instances[j][-1] = classifiers[i](data[j])
+
             # updates domain to include class_var in features
             instances.change_domain(Orange.data.Domain(instances.domain, False, \
                 class_vars=instances.domain.class_vars))
         else:
             return self
 
-    def __init__(self, n_chains=50, sample_size=0.25, learner=None, name="Ensemble CChain", rand=None, callback=None):
+    def __init__(self, n_chains=50, sample_size=0.25, learner=None, actual_values=True, name="Ensemble CChain", rand=None, callback=None):
         self.n_chains = n_chains
         self.sample_size = sample_size
         self.name = name
         self.rand = rand
         self.callback = callback
+        self.actual_values = actual_values
 
         if not learner:
             raise TypeError("Wrong specification, learner not defined")
             self.rand.shuffle(indices)
             data = instances.select_ref(indices,1)
 
-            learner = ClassifierChainLearner(learner = self.learner, rand=self.rand) # TODO might work in one step
+            learner = ClassifierChainLearner(learner=self.learner, actual_values=self.actual_values, rand=self.rand) # TODO might work in one step
 
             classifiers.append(learner(data, weight, copy.copy(class_order)))
 
     global_timer = time.time()
     data = Orange.data.Table('multitarget:bridges.tab')
 
-    cl1 = EnsembleClassifierChainLearner(learner = Orange.classification.tree.SimpleTreeLearner, n_chains=50, sample_size=0.25, name="ECC T", rand = random.seed(time.time()))
-    cl2 = EnsembleClassifierChainLearner(learner = Orange.classification.majority.MajorityLearner, n_chains=50, sample_size=0.25, name="ECC M", rand = random.seed(time.time()))
-    cl3 = ClassifierChainLearner(learner = Orange.classification.tree.SimpleTreeLearner, name="CC T")
-    cl4 = ClassifierChainLearner(learner = Orange.classification.majority.MajorityLearner, name="CC M")
+    cl1 = ClassifierChainLearner(learner = Orange.classification.tree.SimpleTreeLearner, name="CChain - Tree")
+    cl2 = ClassifierChainLearner(learner = Orange.classification.majority.MajorityLearner, name="CChain - Maj")
+    cl3 = EnsembleClassifierChainLearner(learner = Orange.classification.tree.SimpleTreeLearner, n_chains=50, sample_size=0.25, name="Ensemble CC - Tree")
+    cl4 = EnsembleClassifierChainLearner(learner = Orange.classification.majority.MajorityLearner, n_chains=50, sample_size=0.25, name="Ensemble CC - Maj")
+    
+    learners = [cl1,cl2,cl3,cl4]
 
-    res = Orange.evaluation.testing.cross_validation([cl1,cl2,cl3,cl4],data)
+    results = Orange.evaluation.testing.cross_validation(learners, data)
 
-    scores = Orange.multitarget.scoring.mt_average_score(res,Orange.evaluation.scoring.RMSE)
-   
-    for i in range(len(scores)):
-        print res.classifierNames[i], scores[i]
+
+    print "%18s  %6s  %8s  %8s" % ("Learner    ", "LogLoss", "Mean Acc", "Glob Acc")
+    for i in range(len(learners)):
+        print "%18s  %1.4f    %1.4f    %1.4f" % (learners[i].name,
+        Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.logloss)[i],
+        Orange.multitarget.scoring.mt_mean_accuracy(results)[i],
+        Orange.multitarget.scoring.mt_global_accuracy(results)[i])
+
+
 
     print "--DONE %.2f --" % (time.time()-global_timer)

_multitarget/neural.py

 """
 .. index:: Multi-target Neural Network Learner
 
-***************************************
-Multi-target Neural Network Learner
-***************************************
+********************************************************
+Multi-target Neural Network Learner (``neural``)
+********************************************************
 
-Example of multi-target  usage:
 
-.. literalinclude:: ../../_multitarget/neural.py
-    :lines: 17-
 """
 
 from Orange.classification.neural import NeuralNetworkLearner, NeuralNetworkClassifier
     print "STARTED"
     global_timer = time.time()
 
-    l = Orange.multitarget.neural.NeuralNetworkLearner()
+    l = Orange.multitarget.neural.NeuralNetworkLearner(n_mid = 20, reg_fact = 0.1, max_iter=100)
 
     data = Orange.data.Table('multitarget:emotions.tab')
     res = Orange.evaluation.testing.cross_validation([l],data, 3)

_multitarget/pls.py

 
 
 ***************************************
-PLS Classification Learner
+PLS Classification Learner (``pls``)
 ***************************************
 
 

_multitarget/scoring.py

 
 
 ***************************************
-Multi-target Scoring
+Multi-target Scoring (``scoring``)
 ***************************************
 
 :doc:`Multi-target <Orange.multitarget>` classifiers predict values for
              PLS    0.3021
            Earth    0.2880
 
-Two more accuracy measures based on the article by Zaragoza et al.(2011); applicable to discrete classes:
+Two more accuracy measures based on the article by Zaragoza et al._[1]; applicable to discrete classes:
 
 Global accuracy (accuracy per example) over d-dimensional class variable:
 
 .. autofunction:: mt_mean_accuracy   
 
 References
-==========
+============
+.. [1] Zaragoza, J.H., Sucar, L.E., Morales, E.F.,Bielza, C., Larranaga, P.  (2011). 'Bayesian Chain Classifiers for Multidimensional Classification',
+         Proc. of the International Joint Conference on Artificial Intelligence (IJCAI-2011),  pp:2192-2197.
 
-Zaragoza, J.H., Sucar, L.E., Morales, E.F.,Bielza, C., Larranaga, P.  (2011). 'Bayesian Chain Classifiers for Multidimensional Classification', Proc. of the International Joint Conference on Artificial Intelligence (IJCAI-2011),  pp:2192-2197.
+
+
 
 """
 

_multitarget/tree.py

 .. index:: Clustering Tree Learner
 
 ***************************************
-Clustering Tree Learner
+Clustering Tree Learner (``tree``)
 ***************************************
 
 :obj:`ClusteringTreeLearner` is an implementation of classification and regression
 trees, based on the :obj:`SimpleTreeLearner`. It is implemented in C++ for speed and low memory usage.
 Clustering trees work by splitting the data into clusters based on attributes. The attribute provides the optimal split based on a measure, 
-the default used in this implementation is the Euclidean distance between the centroids of clusters, which we try to maximize.
+the default used in this implementation is the Euclidean distance between the centroids of clusters, which we try to maximize. Additional measures
+are implemented, more information on them can be found in the parameter description.
+
+The implementation is based on the article by Blockeel et al. [1]_
 
 :obj:`ClusteringTreeLearner` was developed for speeding up the construction
 of random forests, but can also be used as a standalone tree learner.
 .. literalinclude:: code/clustering_tree.py
 
 
+
+References
+============
+.. [1] H. Blockeel, L. De Raedt, and J. Ramon, "Top-Down Induction of Clustering Trees", 
+        In Proceedings of the Fifteenth International Conference on Machine Learning (ICML '98), 55-63, 1998.
+
 """
 
 

docs/rst/Orange.multitarget.binary.rst

-.. automodule:: Orange.multitarget.binary
+.. automodule:: Orange.multitarget.binary
+
+*************
+Examples
+*************
+.. literalinclude:: code/binary.py

docs/rst/Orange.multitarget.chain.rst

 .. automodule:: Orange.multitarget.chain
+
+*************
+Examples
+*************
+.. literalinclude:: code/chain.py

docs/rst/Orange.multitarget.neural.rst

 
 
 
-.. automodule:: Orange.multitarget.neural
+.. automodule:: Orange.multitarget.neural
+
+
+Example of multi-target usage:
+
+.. literalinclude:: code/neural.py

docs/rst/Orange.multitarget.pls.rst

 ***************************************
 
 .. automodule:: Orange.regression.pls
+
+*************
+Examples
+*************
+.. literalinclude:: code/pls.py

docs/rst/Orange.multitarget.scoring.rst

-.. automodule:: Orange.multitarget.scoring
+.. automodule:: Orange.multitarget.scoring
+
+*************
+Examples
+*************
+.. literalinclude:: code/scoring.py

docs/rst/Orange.regression.earth.rst

-.. automodule:: Orange.regression.earth
+.. automodule:: Orange.regression.earth
+
+*************
+Examples
+*************
+.. literalinclude:: code/earth.py

docs/rst/code/binary.py

+import Orange
+
+data = Orange.data.Table('multitarget:bridges.tab')
+
+cl1 = Orange.multitarget.binary.BinaryRelevanceLearner(learner = Orange.classification.majority.MajorityLearner, name="Binary - Maj")
+cl2 = Orange.multitarget.binary.BinaryRelevanceLearner(learner = Orange.classification.tree.SimpleTreeLearner, name="Binary - Tree")
+
+learners = [cl1,cl2]
+
+results = Orange.evaluation.testing.cross_validation(learners, data)
+
+print "Classification - bridges.tab"
+print "%18s  %6s  %8s  %8s" % ("Learner    ", "LogLoss", "Mean Acc", "Glob Acc")
+for i in range(len(learners)):
+    print "%18s  %1.4f    %1.4f    %1.4f" % (learners[i].name,
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.logloss)[i],
+    Orange.multitarget.scoring.mt_mean_accuracy(results)[i],
+    Orange.multitarget.scoring.mt_global_accuracy(results)[i])

docs/rst/code/chain.py

+import Orange
+
+data = Orange.data.Table('multitarget:bridges.tab')
+
+cl1 = Orange.multitarget.chain.ClassifierChainLearner(learner = Orange.classification.majority.MajorityLearner, name="CChain - Maj")
+cl2 = Orange.multitarget.chain.ClassifierChainLearner(learner = Orange.classification.tree.SimpleTreeLearner, name="CChain - Tree")
+cl3 = Orange.multitarget.chain.EnsembleClassifierChainLearner(learner = Orange.classification.tree.SimpleTreeLearner, n_chains=50, sample_size=0.25, name="Ensemble CC - Tree")
+
+learners = [cl1,cl2,cl3]
+
+results = Orange.evaluation.testing.cross_validation(learners, data)
+
+print "Classification - bridges.tab"
+print "%18s  %6s  %8s  %8s" % ("Learner    ", "LogLoss", "Mean Acc", "Glob Acc")
+for i in range(len(learners)):
+    print "%18s  %1.4f    %1.4f    %1.4f" % (learners[i].name,
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.logloss)[i],
+    Orange.multitarget.scoring.mt_mean_accuracy(results)[i],
+    Orange.multitarget.scoring.mt_global_accuracy(results)[i])

docs/rst/code/clustering_tree.py

 	max_depth = 50, min_majority = 0.6, min_instances = 5, 
 	method = Orange.multitarget.tree.silhouette, name = "CT silhouette")
 
+# Gini index should be used when working with nominal class variables
 ct4 = Orange.multitarget.tree.ClusteringTreeLearner(
-	max_depth = 50, min_majority = 0.6, min_instances = 5, 
+	max_depth = 50, min_majority = 0.4, min_instances = 5, 
 	method = Orange.multitarget.tree.gini_index, name = "CT gini index")
 
 
 results = Orange.evaluation.testing.cross_validation(learners, data, folds=5)
 
 print "Classification - bridges.tab"
-print "%17s  %6s  %8s  %8s" % ("Learner", "RMSE", "Mean Acc", "Glob Acc")
+print "%17s  %6s  %8s  %8s" % ("Learner", "LogLoss", "Mean Acc", "Glob Acc")
 for i in range(len(learners)):
     print "%17s  %1.4f    %1.4f    %1.4f" % (learners[i].name,
-    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.RMSE)[i],
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.logloss)[i],
     Orange.multitarget.scoring.mt_mean_accuracy(results)[i],
     Orange.multitarget.scoring.mt_global_accuracy(results)[i])
 

docs/rst/code/earth.py

+import Orange
+
+l1 = Orange.multitarget.earth.EarthLearner(name="earth")
+l2 = Orange.multitarget.binary.BinaryRelevanceLearner(
+	learner = Orange.regression.mean.MeanLearner, name = "Majority")
+learners = [l1, l2]
+# PLSClassifier do not work with missing values, the missing values need to be imputed
+data = Orange.data.Table('multitarget-synthetic')
+
+results = Orange.evaluation.testing.cross_validation(learners, data, 3)
+
+print "Regression - multitarget-synthetic.tab"
+print "%18s  %6s" % ("Learner    ", "RMSE")
+for i in range(len(learners)):
+    print "%18s  %1.4f" % (learners[i].name,
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.RMSE)[i])
+    

docs/rst/code/mt-evaluate.py

 data = Orange.data.Table('multitarget-synthetic')
 
 majority = Orange.multitarget.binary.BinaryRelevanceLearner(learner=Orange.classification.majority.MajorityLearner(), name='Majority')
-tree = Orange.multitarget.tree.ClusteringTreeLearner(min_mse=1e-10, min_instances=3, name='Clust Tree')
+tree = Orange.multitarget.tree.ClusteringTreeLearner(min_MSE=1e-10, min_instances=3, name='Clust Tree')
 pls = Orange.multitarget.pls.PLSRegressionLearner(name='PLS')
 earth = Orange.multitarget.earth.EarthLearner(name='Earth')
 
 learners = [majority, tree, pls, earth]
 res = Orange.evaluation.testing.cross_validation(learners, data)
 rmse = Orange.evaluation.scoring.RMSE
-scores = Orange.evaluation.scoring.mt_average_score(
+scores = Orange.multitarget.scoring.mt_average_score(
             res, rmse, weights=[5,2,2,1])
 print 'Weighted RMSE scores:'
 print '\n'.join('%12s\t%.4f' % r for r in zip(res.classifier_names, scores))

docs/rst/code/neural.py

+import Orange
+
+l1 = Orange.multitarget.neural.NeuralNetworkLearner(n_mid=15, reg_fact=0.1, max_iter=100, name="Neural Network")
+l2 = Orange.multitarget.binary.BinaryRelevanceLearner(
+	learner = Orange.classification.majority.MajorityLearner, name = "Majority")
+learners = [l1, l2]
+
+data = Orange.data.Table('multitarget:flare.tab')
+
+results = Orange.evaluation.testing.cross_validation(learners, data, 3)
+
+print "Classification - flare.tab"
+print "%18s  %6s  %8s  %8s" % ("Learner    ", "LogLoss", "Mean Acc", "Glob Acc")
+for i in range(len(learners)):
+    print "%18s  %1.4f    %1.4f    %1.4f" % (learners[i].name,
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.logloss)[i],
+    Orange.multitarget.scoring.mt_mean_accuracy(results)[i],
+    Orange.multitarget.scoring.mt_global_accuracy(results)[i])
+
+
+# Neural Networks do not work with missing values, the missing values need to be imputed
+data = Orange.data.Table('multitarget:bridges.tab')
+imputer = Orange.feature.imputation.AverageConstructor()
+imputer = imputer(data)
+imp_data = imputer(data)
+
+results = Orange.evaluation.testing.cross_validation(learners, imp_data, 3)
+
+print "Classification - imputed bridges.tab"
+print "%18s  %6s  %8s  %8s" % ("Learner    ", "LogLoss", "Mean Acc", "Glob Acc")
+for i in range(len(learners)):
+    print "%18s  %1.4f    %1.4f    %1.4f" % (learners[i].name,
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.logloss)[i],
+    Orange.multitarget.scoring.mt_mean_accuracy(results)[i],
+    Orange.multitarget.scoring.mt_global_accuracy(results)[i])

docs/rst/code/pls.py

+import Orange
+
+l1 = Orange.multitarget.pls.PLSClassificationLearner(n_mid=15, reg_fact=0.1, max_iter=100, name="PLS")
+l2 = Orange.multitarget.binary.BinaryRelevanceLearner(
+	learner = Orange.classification.majority.MajorityLearner, name = "Majority")
+learners = [l1, l2]
+
+data = Orange.data.Table('multitarget:flare.tab')
+
+results = Orange.evaluation.testing.cross_validation(learners, data, 3)
+
+print "Classification - flare.tab"
+print "%18s  %6s  %8s  %8s" % ("Learner    ", "LogLoss", "Mean Acc", "Glob Acc")
+for i in range(len(learners)):
+    print "%18s  %1.4f    %1.4f    %1.4f" % (learners[i].name,
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.logloss)[i],
+    Orange.multitarget.scoring.mt_mean_accuracy(results)[i],
+    Orange.multitarget.scoring.mt_global_accuracy(results)[i])
+
+# REGRESSION
+l1 = Orange.multitarget.pls.PLSRegressionLearner(name="PLS")
+l2 = Orange.multitarget.binary.BinaryRelevanceLearner(
+	learner = Orange.regression.mean.MeanLearner, name = "Majority")
+learners = [l1, l2]
+# PLSClassifier do not work with missing values, the missing values need to be imputed
+data = Orange.data.Table('multitarget-synthetic')
+
+results = Orange.evaluation.testing.cross_validation(learners, data, 3)
+
+print "Regression - multitarget-synthetic.tab"
+print "%18s  %6s" % ("Learner    ", "RMSE")
+for i in range(len(learners)):
+    print "%18s  %1.4f" % (learners[i].name,
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.RMSE)[i])
+    

docs/rst/code/scoring.py

+import Orange
+
+data = Orange.data.Table('multitarget:bridges.tab')
+
+cl1 = Orange.multitarget.binary.BinaryRelevanceLearner(learner = Orange.classification.majority.MajorityLearner, name="Majority")
+cl2 = Orange.multitarget.tree.ClusteringTreeLearner(name="CTree")
+
+learners = [cl1,cl2]
+
+results = Orange.evaluation.testing.cross_validation(learners, data)
+
+print "%18s  %7s    %6s  %10s   %8s  %8s" % ("Learner    ", "LogLoss", "Brier", "Inf. Score", "Mean Acc", "Glob Acc")
+for i in range(len(learners)):
+    print "%18s   %1.4f    %1.4f     %+2.4f     %1.4f    %1.4f" % (learners[i].name,
+
+    # Calculate average logloss
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.logloss)[i],
+    # Calculate average Brier score
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.Brier_score)[i],
+    # Calculate average Information Score
+    Orange.multitarget.scoring.mt_average_score(results, Orange.evaluation.scoring.IS)[i],
+    # Calculate mean accuracy
+    Orange.multitarget.scoring.mt_mean_accuracy(results)[i],
+    # Calculate global accuracy
+    Orange.multitarget.scoring.mt_global_accuracy(results)[i])