Source

orange-modelmaps / archive / build_ensemble_map.py

Full commit
import Orange

from tools import *
from build_model_map import save_models, models2matrix

#FOLDS = 10
MODEL_LIMIT = 5000

#data_c = getData(ROOT + "tab/zoo-c.tab")
data_d = getData(ROOT + "tab/dermatology.tab")


def get_attributes(node):
    atts = []
    if node.branchSelector:
        a = node.branchSelector.classVar.name
        atts.append(a)
        for i in range(len(node.branches)):
            if node.branches[i]:
                atts.extend(get_attributes(node.branches[i]))
    return atts

def build_rf_models(data):
    
    tree = Orange.classification.tree.TreeLearner(storeNodeClassifier = 1, 
                   storeContingencies=0, storeDistributions=1, minExamples=5, 
                   storeExamples=1).instance()
    gini = Orange.feature.scoring.Gini()
    tree.split.discreteSplitConstructor.measure = tree.split.continuousSplitConstructor.measure = gini
    tree.maxDepth = 5
    tree.split = Orange.ensemble.forest.SplitConstructor_AttributeSubset(tree.split, 3)
    forestLearner = Orange.ensemble.forest.RandomForestLearner(learner=tree, trees=MODEL_LIMIT)
    forestClassifier = forestLearner(data)
    
    models = []
    for classifier in forestClassifier.classifiers:
        probabilities, instance_predictions, instance_classes = [], [], []
        for i in range(len(data)):
            
                ex = Orange.data.Instance(data[i])
                ex.setclass("?")
                cr = classifier(ex, Orange.core.GetBoth)
                if cr[0].isSpecial():
                    raise "Classifier %s returned unknown value" % (classifier.name)
                
                probabilities.append(numpy.array(list(cr[1])))
                instance_predictions.append(cr[0])
                instance_classes.append(data[i].get_class())
                
        models.append({'method' : 'TREE', 
                       'classifier' : classifier, 
                       'probabilities' : probabilities, 
                       'YAnchors' : None, 
                       'XAnchors' : None, 
                       'attributes': list(set(get_attributes(classifier.tree))),
                       'instance_predictions' : instance_predictions,
                       'instance_classes' : instance_classes})        
    return models

models = build_rf_models(data_d)
smx_rank = models2matrix(models)
save_models(models, smx_rank, '%s-%d' % (OUT_FILE, len(smx_rank)))