orange-modelmaps / examples / projections / parse_entropy.py

import os.path, re
import numpy as np, scipy.stats
import matplotlib.pyplot as plt

from Orange import evaluation

ROOT = "/home/miha/work/res/modelmaps"
ROOT = "C:\\Users\\Miha\\work\\res\\modelmaps"

fnames = ["entropy_results_15.txt"]

res = {}
dataset = None
method = None
methods = set()

algorithms = [("VizRank", "vr"), ("K-means", "mm"), ("K-means iterative", "mmit"),
    ("Hierarchical (average)", "mmha"), ("Hierarchical (complete)", "mmhc"), ("Hierarchical (single)", "mmhs")]

for fname in fnames:
    fp = open(os.path.join(ROOT, "_projections_", fname))
    for line in fp:
        vals = line.split(" ")

        if len(vals) > 1 and vals[0] == "Dataset:":
            dataset = vals[1].strip()
            res[dataset] = res.get(dataset ,{})

        if len(vals) > 1 and vals[0] == "Method:":
            method = vals[1].strip()
            res[dataset][method] = res.get(method ,{})
            methods.add(method)

        if len(vals) > 0 and vals[0].strip() == "VizRank":
            type_ = "vr"

        if len(vals) > 2 and vals[0].strip() + vals[1].strip() + vals[2].strip() == "ModelMap:kmeans":
            type_ = "mm"

        if len(vals) > 3 and vals[0].strip() + vals[1].strip() + vals[2].strip() + vals[3].strip() == "ModelMap:kmeansiterative":
            type_ = "mmit"

        if len(vals) > 3 and vals[0].strip() + vals[1].strip() + vals[2].strip() + vals[3].strip() == "ModelMap:hierarchicalAVERAGE":
            type_ = "mmha"

        if len(vals) > 3 and vals[0].strip() + vals[1].strip() + vals[2].strip() + vals[3].strip() == "ModelMap:hierarchicalSINGLE":
            type_ = "mmhs"

        if len(vals) > 3 and vals[0].strip() + vals[1].strip() + vals[2].strip() + vals[3].strip() == "ModelMap:hierarchicalCOMPLETE":
            type_ = "mmhc"

        if len(vals) > 1 and vals[0] == "uncertainty:":
            res[dataset][method][type_ + "_uncertainty"] = [float(val.strip().replace(",", "")) for val in vals[1:] if val.strip() != '']

        if len(vals) > 1 and vals[0] == "score:":
            res[dataset][method][type_ + "_score"] = [float(val.strip().replace(",", "")) for val in vals[1:] if val.strip() != '']

        if len(vals) > 1 and vals[0] == "entropy:":
            res[dataset][method][type_ + "_entropy"] = [float(val.strip().replace(",", "")) for val in vals[1:] if val.strip() != '']

    fp.close()

def add_subplot(fig, dataset, type, y_label, method, counter):

    meta = res[dataset]
    if not method in meta:
        return

    ax = fig.add_subplot(4, 3, counter)

    ax.set_xlabel(r"$\mathrm{Number\/of\/projections\/}[i]$", size='medium')
    ax.set_ylabel(y_label, size='medium')
    y_all = []
    for name, style, color in [("vr_", "-", "k"), ("mmha_", "-", "0.7"), ("mm_", "--", "k"),
        ("mmhc_", "--", "0.7"), ("mmit_", ":", "k"), ("mmhs_", ":", "0.7")]:

        if name + type in meta[method]:
            x = range(1, len(meta[method][name + type]) + 1)
            y = meta[method][name + type]
            y_all.append(y)
            ax.plot(x, y, style, color=color)

    ax.locator_params(axis='y', tight=False, nbins=4)

    #print "x", len(vr_x)
    ax.set_xticks(list(x) if len(x) < 13 else [x_ for x_ in x if x_ % 2 == 1])
    #ax.set_yticks([0, 1])

    for label in ax.get_xticklabels():
        label.set_fontsize('small')

    for label in ax.get_yticklabels():
        label.set_fontsize('small')

    ax.set_xbound(1, len(x))
    y_all = np.array(y_all)
    #ax.set_ybound(np.min(y_all), np.max(y_all) + 0.003)
    ax.set_ybound(0, np.max(y_all) + 0.003)

    subtitle = " ".join([s[0].upper() + s[1:].lower() for s in re.split("_|-", dataset)])
    ax.set_title(r"%s" % (subtitle.replace("Sample", "")), weight='bold', size='large', position=(0.5, 1.1),
        horizontalalignment='center', verticalalignment='center')


def print_results(method):
    print r"""\begin{longtable}{llccc}
 \caption{%s\label{proj:measure}} \\
 \hline
  & & Top 5 & Top 10 & Top 15 \\
 \hline""" % " ".join([v[:1].upper() + v[1:].lower() for v in method.split("_")])

    for i, dataset in enumerate(sorted(res.iterkeys())):

        if dataset.lower() in ["car", "wine", "lenses", "zoo"]:
            continue

        meta = res[dataset]
        print " \multirow{6}{2.7cm}{%s}" % " ".join([v[:1].upper() + v[1:].lower() for v in re.split("_|-", dataset)])

        best5 = [meta[method]["%s_uncertainty" % abbr][4] for name, abbr in algorithms]
        best5 = [i for i, val in enumerate(best5) if val == min(best5)]
        best10 = [meta[method]["%s_uncertainty" % abbr][9] for name, abbr in algorithms]
        best10 = [i for i, val in enumerate(best10) if val == min(best10)]
        best15 = [meta[method]["%s_uncertainty" % abbr][14] for name, abbr in algorithms]
        best16 = [i for i, val in enumerate(best15) if val == min(best15)]

        for j, (name, abbr) in enumerate(algorithms):

            top5 = meta[method]["%s_uncertainty" % abbr][4]
            top10 = meta[method]["%s_uncertainty" % abbr][9]
            top15 = meta[method]["%s_uncertainty" % abbr][14]


            print r"  \nopagebreak" if j > 0 else "              ",
            print "& %s & %s & %s & %s \\\\" % (name,
                        "$\\boldsymbol{%.3f}$" % top5 if j in best5 else "$%.3f$" % top5,
                        "$\\boldsymbol{%.3f}$" % top10 if j in best10 else "$%.3f$" % top10,
                        "$\\boldsymbol{%.3f}$" % top15 if j in best16 else "$%.3f$" % top15)

        print r" \hline"

    print r"\end{longtable}"

def draw_graphs(method, type, yaxis_title, fname):
    fig = plt.figure(figsize=(9, 12), dpi=300)
    fig.subplots_adjust(wspace=0.3, hspace=0.6, top=0.9, bottom=0.05, left=0.1, right=0.95)

    counter = 0
    for i, dataset in enumerate(sorted(res.iterkeys())):
        meta = res[dataset]
        if dataset.lower() in ["car", "wine", "lenses"]:
            continue

        counter += 1
        add_subplot(fig, dataset, type, yaxis_title, method, counter)

    ax = fig.add_subplot(4, 3, counter + 1)

    ax.plot([0], [0], 'k-', [0], [0], 'k--', [0], [0], 'k:')
    ax.plot([0], [0], "-", color='0.7')
    ax.plot([0], [0], "--", color='0.7')
    ax.plot([0], [0], ":", color='0.7')
    ax.set_axis_off()
    leg = plt.legend(('VizRank', 'Kmeans', 'Kmeans iterative', 'Hierarchical average', 'Hierarchical complete', 'Hierarchical single'), frameon=False)
    leg = plt.gca().get_legend()
    ltext = leg.get_texts()
    plt.setp(ltext, fontsize='medium')

    #fig.text(0.5, 0.965,  method[0].upper() + method[1:].replace("_", " "), ha='center', color='black', weight='bold', size='medium')
    #fig.text(0.5, 0.950,  "(remaining class entropy)", ha='center', color='black', weight='bold', size='small')
    fig.savefig(os.path.join(ROOT, "_projections_", "%s_%s.pdf" % (fname, method)))

for method in methods:
    #print_results(method)
    draw_graphs(method, "entropy", r"$H(P_1 \ldots P_i)$", "entropy_joint")
    draw_graphs(method, "uncertainty", r"$H(Y \mid P_1 \ldots P_i)$", "entropy_remaining")
    print

#for top in [5, 10, 15]:
#    vals = {}
#    for name, abbr in algorithms:
#        vals[abbr] = []
#
#        for method in methods:
#            for dataset in res.iterkeys():
#                if dataset.lower() in ["car", "wine", "lenses", "zoo"]:
#                    continue
#
#                vals[abbr].append(res[dataset][method]["%s_uncertainty" % abbr][top - 1])
#
#    print "Considering top %d projections: Friedman Chi Square Test" % top
#    print "Chi Square: %.3f; p-value: %e" % scipy.stats.friedmanchisquare(*[np.array(val) for val in vals.itervalues()])
#    print
#    print "Ranks"
#    ranks = scipy.stats.mstats.rankdata(np.array([vals[abbr] for name, abbr in algorithms]), axis=0)
#    print "\n".join("%s: %.3f" % (name, r) for r, (name, abbr) in zip(np.mean(ranks, axis=1), algorithms))
#    print
#    print "Critical distance (Nemenyi)"
#    CD = 2.850 * np.sqrt(6. * 7 / 6 / 30)
#    print CD
#
#    cd = evaluation.scoring.compute_CD(list(np.mean(ranks, axis=1)), 30)
#    print "Orange CD:", cd
#    print
#    print
#
#
#    evaluation.scoring.graph_ranks(os.path.join(ROOT, "_projections_", "dermatology-cd-%d.png" % top), np.mean(ranks, axis=1), zip(*algorithms)[0], cd=cd, width=7, textspace=2.2)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.