Source

orange-modelmaps / archive / matrix2network.py

Full commit
import orange
import orngNetwork
import OWDistanceFile

from tools import *

def matrix2network(name, ratio, kNN):
    if type(name) == type(""):
        dstFile = name + "-knnpredict.dst"
        tabFile = name + ".tab"
        netFile = name + "-knnpredict"
        smx, labels, data = OWDistanceFile.readMatrix(dstFile)
        net = orngNetwork.Network(smx.dim, 0)
        lower, upper = net.getDistanceMatrixThreshold(smx, ratio)
        net.fromDistanceMatrix(smx, 0, upper, kNN, 0)
        net.items = orange.ExampleTable(tabFile)
        net.save(netFile)
    else:
        smx = name
        net = orngNetwork.Network(smx.dim, 0)
        lower, upper = net.getDistanceMatrixThreshold(smx, ratio)
        net.fromDistanceMatrix(smx, 0, upper, kNN, 0)
        #net.items = smx.items.getitems()
        
    return net

#net_linproj     = matrix2network("primary-linproj-494"    , 0.01, 1)
#net_polyviz     = matrix2network("primary-polyviz-494"    , 0.01, 1)
#net_radviz      = matrix2network("primary-radviz-494"     , 0.01, 1)
#net_scatterplot = matrix2network("primary-scatterplot-253", 0.00, 1)

#smx, labels, data = OWDistanceFile.readMatrix(dstroot + "primary-scatterplot-253-knnpredict.dst")
#net = orngNetwork.Network(smx.dim, 0)
#lower, upper = net.getDistanceMatrixThreshold(smx, 0.05)
#print upper

#############################################################################
# best in cluster 2 network

def cluster2matrix(net, name):
    if type(name) == type(""):
        dstFile = name + "-knnpredict.dst"
        smx, labels, data = OWDistanceFile.readMatrix(dstFile)
    else:
        smx = name
    lbls = net.clustering.labelPropagation()
    clusters = set(lbls)
    medians = []
    csizes = []
    bests = []
    for c in clusters:
        cndxs = [i for i, ci in enumerate(lbls) if ci == c]
        cmatrix = smx.getitems(cndxs)
        cdsts  = zip([sum([j for j in i]) for i in cmatrix], cndxs, [net.items[i]['CA'].value for i in cndxs])
        max_score = max([net.items[i]['CA'].value for i in cndxs])
        cmedian = min(cdsts)[1]
        medians.append((cmedian, max_score, len(cndxs)))

    medians.sort()
    medians, bests, csizes = map(list, zip(*medians))
    medianmatrix = smx.getitems(medians)
    medianmatrix.items = net.items.getitems(medians)
    if type(name) == type(""):
        saveSymMatrix(medianmatrix, dstroot + "medians-" + name + "-" + str(medianmatrix.dim) + "-knnpredict", None, True)
    return medianmatrix, medians, csizes, bests

def cluster2matrix2(net, smx):
    lbls = net.clustering.labelPropagation()
    clusters = set(lbls)
    medians = []
    csizes = []
    bests = []
    for c in clusters:
        cndxs = [i for i, ci in enumerate(lbls) if ci == c]
        cmatrix = smx.getitems(cndxs)
        cdsts  = zip([sum([j for j in i]) for i in cmatrix], cndxs, [net.items[i]['CA'].value for i in cndxs])
        max_score = max([net.items[i]['CA'].value for i in cndxs])
        cmedian = min(cdsts)[1]
        medians.append((cmedian, max_score, len(cndxs)))

    medians.sort()
    medians, bests, csizes = map(list, zip(*medians))
    medianmatrix = smx.getitems(medians)
    medianmatrix.items = net.items.getitems(medians)
    medianmatrix.results = {}
    
    for i in range(len(medianmatrix.items)):
        medianmatrix.items[i]["cluster size"] = csizes[i]
        medianmatrix.items[i]["cluster CA"] = bests[i]
        uuid = medianmatrix.items[i]["uuid"].value
        medianmatrix.results[uuid] = smx.results[uuid]
    
    return medianmatrix

#cluster2matrix(net_linproj,     "primary-linproj-494")
#cluster2matrix(net_polyviz,     "primary-polyviz-494")
#cluster2matrix(net_radviz,      "primary-radviz-494")
#cluster2matrix(net_scatterplot, "primary-scatterplot-253")