Source

orange-modelmaps / archive / projection_dss.py

Full commit
import orange
import orngClustering
import OWDistanceFile

root = "c:\\Users\\miha\\Projects\\res\\metamining\\"
in_file = root + "dst\\zoo-projections-500-abs.dst"

smx, labels, data = OWDistanceFile.readMatrix(in_file)
data = orange.ExampleTable(root + 'tab\\zoo-projections-500.tab')

# normalize to interval [0,1]
smx.normalize(0)
# invert 1 - X
smx.invert(1)

c = orange.HierarchicalClustering(smx, linkage=orange.HierarchicalClustering.Average)
depth = 10
min_projections = 20

clusters = []
def findProjections(cluster, l):
    level = l + 1
    if cluster.branches and level <= depth and len(cluster) > min_projections:
        findProjections(cluster.left, level)
        findProjections(cluster.right, level)
    else:
        clusters.append(cluster)

findProjections(c, 0)

include = []
for cluster in clusters:
    scores = [(data[c]['vizrank'].value, data[c]['number of attributes'].value, c) for c in cluster]
    scores.sort()
    include.append(scores[-1][2])

new_smx = orange.SymMatrix(len(include))
for i in range(new_smx.dim):
    for j in range(i):
        new_smx[i,j] = smx[include[i], include[j]]
new_smx.items = data.getitems(include)

def saveSymMatrix(matrix, file):
    fn = open(file + ".dst", 'w')
    fn.write("%d labeled\n" % matrix.dim)
    
    for i in range(matrix.dim):
        fn.write("%s" % matrix.items[i]['label'])
        for j in range(i+1):
            fn.write("\t%.6f" % matrix[i,j])
        fn.write("\n")
        
    fn.close()
    matrix.items.save(file + ".tab")

saveSymMatrix(new_smx, root + 'projections-dss')