orange-modelmaps / examples / projections / distance_metric_comparisson.py

Miha Stajdohar e49e39e 


Miha Stajdohar 67ef2ae 
Miha Stajdohar e49e39e 

Aleš Erjavec 4f4446f 
Miha Stajdohar e49e39e 






Miha Stajdohar e68d359 
Miha Stajdohar e49e39e 
Miha Stajdohar c6b442c 
Miha Stajdohar e49e39e 

Miha Stajdohar 67ef2ae 








Miha Stajdohar e49e39e 

Miha Stajdohar c6b442c 
Miha Stajdohar e49e39e 



Miha Stajdohar c6b442c 

Miha Stajdohar e49e39e 






































































Miha Stajdohar c6b442c 

Miha Stajdohar 67ef2ae 

Miha Stajdohar c6b442c 

Miha Stajdohar e49e39e 
Miha Stajdohar c6b442c 

Miha Stajdohar e49e39e 
Miha Stajdohar c6b442c 

Miha Stajdohar e49e39e 
Miha Stajdohar c6b442c 

__author__ = 'Miha Stajdohar'

import cPickle as pickle
import os, os.path, sys
import scipy
import numpy as np
import orangecontrib.modelmaps as mm

from time import time
from Orange.orng import orngVizRank as vr
from Orange import utils

ROOT = "/home/miha/work/res/modelmaps"
#ROOT = "/Network/Servers/xgridcontroller.private/lab/mihas/modelmaps"
#ROOT = "C:\\Users\\Miha\\work\\res\\modelmaps"

def build_map_for_metric_comparisson(DATASET, N, max_features=None):
    print "DATA SET: %s" % DATASET

    fname = os.path.join(utils.environ.dataset_install_dir, "%s%s" % (DATASET, ".tab"))

    if not (os.path.exists(fname) and os.path.isfile(fname)):
        fname = os.path.join(ROOT, "tab", "%s%s" % (DATASET, ".tab"))

        if not (os.path.exists(fname) and os.path.isfile(fname)):
            raise IOError("File %s not found." % fname)

    build_map = mm.BuildModelMap(fname)

    nfeatures = len(build_map.data_d.domain.features)
    features = mm.get_feature_subsets(build_map.data_d.domain, N, min_features=3, max_features=max_features)

    max_nfeatures_scatterplot = (nfeatures ** 2 - nfeatures) / 2
    features_scatterplot = mm.get_feature_subsets_scatterplot(build_map.data_d.domain, max_nfeatures_scatterplot)

    print  "N:", len(features), " N scatterplot:", len(features_scatterplot)

    models = []
    models.extend([build_map.build_projection_model(f, vr.LINEAR_PROJECTION) for f in features])
    models.extend([build_map.build_projection_model(f, vr.RADVIZ) for f in features])
    models.extend([build_map.build_projection_model(f, vr.POLYVIZ) for f in features])
    models.extend([build_map.build_projection_model(attrs, vr.SCATTERPLOT) for attrs in features_scatterplot])

    table = build_map.build_model_data(models)

    smxs = {}
    s = time()
    smxs["5.1"] = build_map.build_model_matrix(models, mm.distance_class)
    print (time() - s) / 6
#    mm.save(os.path.join(ROOT, "_projections_", "proj_all_%s_%d_%s" % (DATASET, N, sys.platform)), smxs, table, build_map.data_d)

    s = time()
    smxs["5.3"] = build_map.build_model_matrix(models, mm.distance_euclidean)
    print (time() - s) / 60
#    mm.save(os.path.join(ROOT, "_projections_", "proj_all_%s_%d_%s" % (DATASET, N, sys.platform)), smxs, table, build_map.data_d)

    s = time()
    smxs["5.4"] = build_map.build_model_matrix(models, mm.distance_manhattan)
    print (time() - s) / 60
#    mm.save(os.path.join(ROOT, "_projections_", "proj_all_%s_%d_%s" % (DATASET, N, sys.platform)), smxs, table, build_map.data_d)

    s = time()
    smxs["5.5"] = build_map.build_model_matrix(models, mm.distance_rank)
    print (time() - s) / 60
#    mm.save(os.path.join(ROOT, "_projections_", "proj_all_%s_%d_%s" % (DATASET, N, sys.platform)), smxs, table, build_map.data_d)

    s = time()
    smxs["5.2"] = build_map.build_model_matrix(models, mm.distance_mi)
    print (time() - s) / 60
#    mm.save(os.path.join(ROOT, "_projections_", "proj_all_%s_%d_%s" % (DATASET, N, sys.platform)), smxs, table, build_map.data_d)

#    r_file = os.path.join(ROOT, "_projections_", "vals_%s.pkl" % sys.platform)
#    if os.path.exists(r_file) and os.path.isfile(r_file):
#        res = pickle.load(open(r_file, "rb"))
#    else:
#        res = {}
#
#    res.update({"%s %s" % (DATASET, key): smxs[key].get_values() for key in smxs})
#    pickle.dump(res, open(r_file, "wb"), -1)

    return smxs

def matrix_correlation(smxs):
    keys = sorted(smxs.keys())
    res = {k1: {k2: {} for k2 in keys} for k1 in keys}
    for i in range(len(smxs)):
        for j in range(i + 1):
            r = {}
            smx1 = smxs[keys[i]]
            smx2 = smxs[keys[j]]

            r["rank"], r["rank p"] = np.average([scipy.stats.spearmanr(smx1[n], smx2[n]) for n in range(smx1.dim)], axis=0)

            res[keys[i]][keys[j]] = r
            res[keys[j]][keys[i]] = r

    #pickle.dump(res, open(os.path.join(ROOT, "_projections_", "compare_distances.pkl"), "wb"), -1)

    print "rank"
    for i in range(len(smxs)):
        print keys[i], "  ",
        print "  ".join(["%s: %lf" % (keys[j], res[keys[i]][keys[j]]["rank"]) for j in range(i)])
    print
    print "rank p"
    for i in range(len(smxs)):
        print keys[i], "  ",
        print "  ".join(["%s: %e" % (keys[j], res[keys[i]][keys[j]]["rank p"]) for j in range(i)])


#smxs = build_map_for_metric_comparisson("breast-cancer-wisconsin", None)
#matrix_correlation(smxs)

smxs = build_map_for_metric_comparisson("dermatology", N=1000, max_features=8)
matrix_correlation(smxs)

smxs = build_map_for_metric_comparisson("iris", None)
matrix_correlation(smxs)

smxs = build_map_for_metric_comparisson("voting", N=1000, max_features=8)
matrix_correlation(smxs)

smxs = build_map_for_metric_comparisson("zoo", N=1000, max_features=8)
#smxs, table, data = mm.load(os.path.join(ROOT, "_projections_", "proj_alldist_4_zoo_1000"))
matrix_correlation(smxs)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.