Commits

Miha Stajdohar committed ff9ed22

Made a script.

Comments (0)

Files changed (1)

examples/modelmix/mixzoo.py

 """
 .. index:: model map
 
-***************
-Case Study: Zoo
-***************
+========================
+Model map on Zoo Dataset
+========================
 
-TODO
+Script :download:`mixzoo.py <../../examples/modelmix/mixzoo.py>` builds a model map on 7 kinds of models. 3 are classic classification models:
+
+* Naive Bayes
+* k-Nearest Neighbour
+* Classification Tree
+
+where Classification Trees are taken from Random forest as proposed by Breinman.
+We also include 4 projections in 2-dimensional plane:
+
+* Supervised PCA
+* Radviz
+* Polyviz
+* Scatter plot
+
+Projections are wrapped into k-NN classifiers that predict on projected points. The reason behind is that good
+projections are those that separate points well (Leban et al. 2005 and 2006).
+
+Run the scripy with::
+
+  python mixzoo.py -n 500 .
+
+This should create a model map in the current folder.
 
 """
 import argparse
-import collections
 import os
 import sys
 
 import Orange
 import Orange.orng.orngVizRank as vr
 import orangecontrib.modelmaps as mm
-import orangecontrib.network as network
 
-parser = argparse.ArgumentParser(description='NGS reads demultiplexer.')
 
-parser.add_argument('output_dir', help='output directory')
-parser.add_argument('-n', type=int, default=500, help='maximum number of models of one model type')
+if __name__ == 'main':
+    parser = argparse.ArgumentParser(description='NGS reads demultiplexer.')
 
-args = parser.parse_args()
+    parser.add_argument('output_dir', help='output directory')
+    parser.add_argument('-n', type=int, default=500, help='maximum number of models of one model type')
 
-build_map = mm.BuildModelMap('zoo', folds=10, model_limit=args.n, seed=42)
-data = build_map.data()
+    args = parser.parse_args()
 
-features = mm.get_feature_subsets(data.domain, args.n, seed=42)
+    build_map = mm.BuildModelMap('zoo', folds=10, model_limit=args.n, seed=42)
+    data = build_map.data()
 
-nfeatures = len(data.domain.features)
-max_scatterplots = (nfeatures ** 2 - nfeatures) / 2
-features_scatterplot = mm.get_feature_subsets_scatterplot(data.domain, max_scatterplots)
-final_models = []
+    features = mm.get_feature_subsets(data.domain, args.n, seed=42)
 
+    nfeatures = len(data.domain.features)
+    max_scatterplots = (nfeatures ** 2 - nfeatures) / 2
+    features_scatterplot = mm.get_feature_subsets_scatterplot(data.domain, max_scatterplots)
+    final_models = []
 
-def select_representatives(models):
-    print models[0].type
-    smx = build_map.build_model_matrix(models, mm.distance_euclidean)
-    nc, knn = 2, 1
-    while nc > 1:
-        net = mm.model_network(smx, knn=knn)
-        nc = len(network.nx.algorithms.components.connected_components(net))
-        print "  knn: {}, components: {}".format(knn, nc)
-        knn += 1
 
-    clusters = collections.defaultdict(list)
-    for node, cluster in network.community.label_propagation(net, seed=42).iteritems():
-        clusters[cluster].append(node)
+    def add(model_builder, feature_sets):
+        # build models
+        models = [model_builder(features) for features in feature_sets]
+        # select representative models from graph clusters
+        representatives = build_map.select_representatives(models, mm.distance_euclidean)
+        final_models.extend(representatives)
 
-    print "  representatives: {}".format(len(clusters))
 
-    representatives = []
-    for nodes in clusters.values():
-        cmatrix = smx.getitems(nodes)
-        cdsts  = zip([sum(i) for i in cmatrix], nodes)
-        cmedian = min(cdsts)[1]
-        representatives.append(models[cmedian])
+    add(lambda f: build_map.build_projection_model(f, vr.LINEAR_PROJECTION), features)
+    add(lambda f: build_map.build_projection_model(f, vr.RADVIZ), features)
+    add(lambda f: build_map.build_projection_model(f, vr.POLYVIZ), features)
+    add(lambda f: build_map.build_projection_model(f, vr.SCATTERPLOT), features_scatterplot)
 
-    return models
+    learner = Orange.classification.bayes.NaiveLearner()
+    add(lambda f: build_map.build_model(f, learner), features)
 
+    learner = Orange.classification.knn.kNNLearner()
+    add(lambda f: build_map.build_model(f, learner), features)
 
-def add(model_builder, feature_sets):
-    # build models
-    models = [model_builder(features) for features in feature_sets]
-    # select representative models from graph clusters
-    representatives = select_representatives(models)
+    models = build_map.build_rf_models(trees=args.n, max_depth=4, min_instances=5)
+    representatives = build_map.select_representatives(models, mm.distance_euclidean)
     final_models.extend(representatives)
 
+    table = build_map.build_model_data(final_models)
+    smx = build_map.build_model_matrix(final_models, mm.distance_euclidean)
 
-# add(lambda f: build_map.build_projection_model(f, vr.LINEAR_PROJECTION), features)
-# add(lambda f: build_map.build_projection_model(f, vr.RADVIZ), features)
-add(lambda f: build_map.build_projection_model(f, vr.POLYVIZ), features)
-# add(lambda f: build_map.build_projection_model(f, vr.SCATTERPLOT), features_scatterplot)
-
-# learner = Orange.classification.bayes.NaiveLearner()
-# add(lambda f: build_map.build_model(f, learner), features)
-#
-# learner = Orange.classification.knn.kNNLearner()
-# add(lambda f: build_map.build_model(f, learner), features)
-#
-# models = build_map.build_rf_models(trees=args.n, max_depth=4, min_instances=5)
-# representatives = select_representatives(models)
-# final_models.extend(representatives)
-
-table = build_map.build_model_data(final_models)
-smx = build_map.build_model_matrix(final_models, mm.distance_euclidean)
-
-mm.save(os.path.join(args.output_dir, "zoo_{}_{}".format(smx.dim, sys.platform)), smx, table, data)
-
-# graph = graph.subgraph(component)
-# smx = smx.getitems(component)
+    mm.save(os.path.join(args.output_dir, "zoo_{}_{}".format(smx.dim, sys.platform)), smx, table, data)