Source

orange-modelmaps / archive / bestincluster.py

Full commit
import orange
import OWDistanceFile
import orngClustering

smx, lbl, data = OWDistanceFile.readMatrix(r'c:\Users\miha\Projects\res\metamining\dst\zoo-projections-500-tau.dst')
data = orange.ExampleTable(r'c:\Users\miha\Projects\res\metamining\dst\zoo-projections-500-tau.tab')

for i in range(smx.dim):
    for j in range(i):
        if smx[i,j] < 0:
            smx[i,j] = 0
            
root = orange.HierarchicalClustering(smx, linkage=orange.HierarchicalClustering.Complete)

def printClustering2(cluster):
    if cluster.branches:
        return "(%s%s)" % (printClustering2(cluster.left), printClustering2(cluster.right))
    else:
        return str(tuple(cluster))

def prune(cluster, togo):
    if cluster.branches:
        if togo<0:
            cluster.branches = None
        else:
            for branch in cluster.branches:
                prune(branch, togo - cluster.height)

#prune(root, 2)
#printClustering2(root)
nclusters = 20                
clustered = orngClustering.hierarhicalClustering_topClustersMembership(root, nclusters)

l = {}
for ndx, c in enumerate(clustered):
    l[c] = l[c] + [ndx] if c in l else [ndx]

bestincluster = []
for i, cluster in l.items():
    best_val = 0
    best_ndx = -1
    
    if len(cluster) < 5:
        continue

    for c in cluster:
        if float(data[c]['vizrank']) > best_val:
            best_val = float(data[c]['vizrank'])
            best_ndx = c
            
    #print best_ndx, best_val
    bestincluster.append(best_ndx)

dim = len(bestincluster)    
newsmx = orange.SymMatrix(dim)
for i in range(dim):
    for j in range(i):
        newsmx[i,j] = smx[bestincluster[i], bestincluster[j]]

newsmx.items = data.getitems(bestincluster)

def saveSymMatrix(matrix, file):
    fn = open(file + ".dst", 'w')
    fn.write("%d labeled\n" % matrix.dim)
    
    for i in range(matrix.dim):
        fn.write("%s" % matrix.items[i]['label'])
        for j in range(i+1):
            fn.write("\t%.6f" % matrix[i,j])
        fn.write("\n")
        
    fn.close()
    matrix.items.save(file + ".tab")

saveSymMatrix(newsmx, r'c:\Users\miha\Projects\res\metamining\dst\zoo-projections-bestinclust-20-tau')