Source

orange-modelmaps / archive / classifier2matrix.py

import time
import uuid
import pickle
import numpy

import orange
import orngTree
import orngEnsemble
import orngTest
import orngStat

from tools import *

def getForestAttributes(node):
    atts = []
    if node.branchSelector:
        a = node.branchSelector.classVar.name
        atts.append(a)
        for i in range(len(node.branches)):
            if node.branches[i]:
                atts.extend(getForestAttributes(node.branches[i]))
    return atts

def getAttributes(classifier):
    if type(classifier).__name__ == "TreeClassifier":
        return getForestAttributes(classifier.tree)
    else:
        return [var.name for var in classifier.domain.attributes]

def classifier2matrix(data, method, classifiers, fn=None, labels=None):
    results = [orngTest.testOnData([c], data) for c in classifiers]

    cv = data.domain.classVar.name
    resultsByClass = [[orngTest.testOnData([c], data.filter({cv : val})) for val in data.domain.classVar.values] for c in classifiers]
    
    out = getModelsExampleTable()
    
    model_classprobs = []
    model_predictprobs = []
    for i, result in enumerate(results):
        model_classprobs.append(numpy.array([res.probabilities[0][res.actualClass] for res in result.results]))
        model_predictprobs.append([numpy.array(res.probabilities[0]) for res in result.results])
        attributes = list(set(getAttributes(classifiers[i])))
        ex = orange.Example(out.domain)
        ex['uuid'] = uuid.uuid4().hex
        ex['model'] = MODEL_LIST[method]
        ex['attributes'] = ", ".join(sorted(attributes))
        ex['number of attributes'] = len(attributes)
        ex['CA'] = orngStat.CA(result)[0]
        ex['AUC'] = orngStat.AUC(result)[0]
        ex['CA by class'] = ", ".join([str(orngStat.CA(res)[0]) for res in resultsByClass[i]])
        ex['label'] = labels[i] if labels else MODEL_LIST[method]
        out.append(ex)
        
    ##########################################################################
    ## calculate projection distance matrices
    print 'calculating model distance matrices,', len(model_classprobs), 'models'
    dim = len(model_classprobs)
    smx_class = orange.SymMatrix(dim)
    smx_probs = orange.SymMatrix(dim)
    
    counter = 0
    time_start = time.time()
    
    for i in range(dim):
        for j in range(i+1, dim):
            smx_class[i,j] = numpy.sum(numpy.power(model_classprobs[i] - model_classprobs[j], 2))
            # sum(sum_i(pi_1^i - pi_2^i)^2) - predictions probability squared error
            smx_probs[i,j] = sum([numpy.sum(numpy.power(p1 - p2, 2)) for (p1, p2) in zip(model_predictprobs[i],model_predictprobs[j])])
                                         
            counter += 1
            if counter % 5000 == 0:
                time_elapsed = time.time() - time_start
                time_total = time_elapsed / counter * dim * (dim - 1) / 2
                time_remainng = int(time_total - time_elapsed)
                print counter, '/', dim * (dim - 1) / 2, '| remaining:', time_remainng / 60 / 60, ':', time_remainng / 60 % 60, ':', time_remainng % 60
                
    RV = ([method for ex in out], [ex["uuid"].value for ex in out], [r.results for r in results], \
          [None for ex in out], classifiers, [ex["attributes"].value.split(', ') for ex in out])
    
    if fn:
        #saveSymMatrix(smx_class, root + out_file + "-" + "tree" + '-' + str(dim) + '-class', out)
        saveSymMatrix(smx_probs, '%s-%d' % (fn, dim), out)
        out.save('%s-%d.tab' % (fn, dim))
        
        for i, ex in enumerate(out):
            if str(ex["model"].value) == "SVM":
                classifiers[i] = None
        
        pickle.dump(dict(zip([ex["uuid"].value for ex in out], \
                             zip([method for ex in out], [r.results for r in results], \
                                 [None for ex in out], classifiers, \
                                 [ex["attributes"].value.split(', ') for ex in out]))), \
                                 open('%s-%d.res' % (fn, dim), 'wb'))
    
    smx_probs.items = out
    return smx_probs, RV