1. Miha Stajdohar
  2. orange-modelmaps

Source

orange-modelmaps / examples / modelmix / mixzoo.py

"""
.. index:: model map

***************
Case Study: Zoo
***************

TODO

"""
import argparse
import os
import sys

import Orange
import Orange.orng.orngVizRank as vr
import orangecontrib.modelmaps as mm


parser = argparse.ArgumentParser(description='NGS reads demultiplexer.')

parser.add_argument('output_dir', help='output directory')
parser.add_argument('-n', type=int, default=500, help='maximum number of models of one model type')

args = parser.parse_args()

build_map = mm.BuildModelMap('zoo', folds=10, model_limit=args.n, seed=42)
data = build_map.data()

features = mm.get_feature_subsets(data.domain, args.n, seed=42)

nfeatures = len(data.domain.features)
max_scatterplots = (nfeatures ** 2 - nfeatures) / 2
features_scatterplot = mm.get_feature_subsets_scatterplot(data.domain, max_scatterplots)
final_models = []


def select_representatives(models):
    return models


def add(model_builder, feature_sets):
    # build models
    models = [model_builder(features) for features in feature_sets]
    # select representative models from graph clusters
    representatives = select_representatives(models)
    final_models.extend(representatives)


add(lambda f: build_map.build_projection_model(f, vr.LINEAR_PROJECTION), features)
add(lambda f: build_map.build_projection_model(f, vr.RADVIZ), features)
add(lambda f: build_map.build_projection_model(f, vr.POLYVIZ), features)
add(lambda f: build_map.build_projection_model(f, vr.SCATTERPLOT), features_scatterplot)

learner = Orange.classification.bayes.NaiveLearner()
add(lambda f: build_map.build_model(f, learner), features)

learner = Orange.classification.knn.kNNLearner()
add(lambda f: build_map.build_model(f, learner), features)

models = build_map.build_rf_models(trees=args.n, max_depth=4, min_instances=5)
representatives = select_representatives(models)
final_models.extend(representatives)

table = build_map.build_model_data(final_models)
smx = build_map.build_model_matrix(final_models, mm.distance_euclidean)

mm.save(os.path.join(args.output_dir, "zoo_{}_{}".format(smx.dim, sys.platform)), smx, table, data)