Commits

Miha Stajdohar committed e177655

Parse entropy results: draw charts, print-out tables, ...

Comments (0)

Files changed (1)

examples/projections/parse_entropy.py

+import os.path, re
+import numpy as np, scipy.stats
+import matplotlib.pyplot as plt
+
+from Orange import evaluation
+
+ROOT = "/home/miha/work/res/modelmaps"
+ROOT = "C:\\Users\\Miha\\work\\res\\modelmaps"
+
+fnames = ["entropy_results_15.txt"]
+
+res = {}
+dataset = None
+method = None
+methods = set()
+
+algorithms = [("VizRank", "vr"), ("K-means", "mm"), ("K-means iterative", "mmit"),
+    ("Hierarchical (average)", "mmha"), ("Hierarchical (complete)", "mmhc"), ("Hierarchical (single)", "mmhs")]
+
+for fname in fnames:
+    fp = open(os.path.join(ROOT, "_projections_", fname))
+    for line in fp:
+        vals = line.split(" ")
+
+        if len(vals) > 1 and vals[0] == "Dataset:":
+            dataset = vals[1].strip()
+            res[dataset] = res.get(dataset ,{})
+
+        if len(vals) > 1 and vals[0] == "Method:":
+            method = vals[1].strip()
+            res[dataset][method] = res.get(method ,{})
+            methods.add(method)
+
+        if len(vals) > 0 and vals[0].strip() == "VizRank":
+            type_ = "vr"
+
+        if len(vals) > 2 and vals[0].strip() + vals[1].strip() + vals[2].strip() == "ModelMap:kmeans":
+            type_ = "mm"
+
+        if len(vals) > 3 and vals[0].strip() + vals[1].strip() + vals[2].strip() + vals[3].strip() == "ModelMap:kmeansiterative":
+            type_ = "mmit"
+
+        if len(vals) > 3 and vals[0].strip() + vals[1].strip() + vals[2].strip() + vals[3].strip() == "ModelMap:hierarchicalAVERAGE":
+            type_ = "mmha"
+
+        if len(vals) > 3 and vals[0].strip() + vals[1].strip() + vals[2].strip() + vals[3].strip() == "ModelMap:hierarchicalSINGLE":
+            type_ = "mmhs"
+
+        if len(vals) > 3 and vals[0].strip() + vals[1].strip() + vals[2].strip() + vals[3].strip() == "ModelMap:hierarchicalCOMPLETE":
+            type_ = "mmhc"
+
+        if len(vals) > 1 and vals[0] == "uncertainty:":
+            res[dataset][method][type_ + "_uncertainty"] = [float(val.strip().replace(",", "")) for val in vals[1:] if val.strip() != '']
+
+        if len(vals) > 1 and vals[0] == "score:":
+            res[dataset][method][type_ + "_score"] = [float(val.strip().replace(",", "")) for val in vals[1:] if val.strip() != '']
+
+        if len(vals) > 1 and vals[0] == "entropy:":
+            res[dataset][method][type_ + "_entropy"] = [float(val.strip().replace(",", "")) for val in vals[1:] if val.strip() != '']
+
+    fp.close()
+
+def add_subplot(fig, dataset, type, y_label, method, counter):
+
+    meta = res[dataset]
+    if not method in meta:
+        return
+
+    ax = fig.add_subplot(4, 3, counter)
+
+    ax.set_xlabel(r"$\mathrm{Number\/of\/projections\/}[i]$", size='medium')
+    ax.set_ylabel(y_label, size='medium')
+    y_all = []
+    for name, style, color in [("vr_", "-", "k"), ("mmha_", "-", "0.7"), ("mm_", "--", "k"),
+        ("mmhc_", "--", "0.7"), ("mmit_", ":", "k"), ("mmhs_", ":", "0.7")]:
+
+        if name + type in meta[method]:
+            x = range(1, len(meta[method][name + type]) + 1)
+            y = meta[method][name + type]
+            y_all.append(y)
+            ax.plot(x, y, style, color=color)
+
+    ax.locator_params(axis='y', tight=False, nbins=4)
+
+    #print "x", len(vr_x)
+    ax.set_xticks(list(x) if len(x) < 13 else [x_ for x_ in x if x_ % 2 == 1])
+    #ax.set_yticks([0, 1])
+
+    for label in ax.get_xticklabels():
+        label.set_fontsize('small')
+
+    for label in ax.get_yticklabels():
+        label.set_fontsize('small')
+
+    ax.set_xbound(1, len(x))
+    y_all = np.array(y_all)
+    ax.set_ybound(np.min(y_all), np.max(y_all) + 0.003)
+
+    subtitle = " ".join([s[0].upper() + s[1:].lower() for s in re.split("_|-", dataset)])
+    ax.set_title(r"%s" % (subtitle.replace("Sample", "")), weight='bold', size='large', position=(0.5, 1.1),
+        horizontalalignment='center', verticalalignment='center')
+
+
+def print_results(method):
+    print r"""\begin{longtable}{llccc}
+ \caption{%s\label{proj:measure}} \\
+ \hline
+  & & Top 5 & Top 10 & Top 15 \\
+ \hline""" % " ".join([v[:1].upper() + v[1:].lower() for v in method.split("_")])
+
+    for i, dataset in enumerate(sorted(res.iterkeys())):
+
+        if dataset.lower() in ["car", "wine", "lenses", "zoo"]:
+            continue
+
+        meta = res[dataset]
+        print " \multirow{6}{2.7cm}{%s}" % " ".join([v[:1].upper() + v[1:].lower() for v in re.split("_|-", dataset)])
+
+        best5 = [meta[method]["%s_uncertainty" % abbr][4] for name, abbr in algorithms]
+        best5 = [i for i, val in enumerate(best5) if val == min(best5)]
+        best10 = [meta[method]["%s_uncertainty" % abbr][9] for name, abbr in algorithms]
+        best10 = [i for i, val in enumerate(best10) if val == min(best10)]
+        best15 = [meta[method]["%s_uncertainty" % abbr][14] for name, abbr in algorithms]
+        best16 = [i for i, val in enumerate(best15) if val == min(best15)]
+
+        for j, (name, abbr) in enumerate(algorithms):
+
+            top5 = meta[method]["%s_uncertainty" % abbr][4]
+            top10 = meta[method]["%s_uncertainty" % abbr][9]
+            top15 = meta[method]["%s_uncertainty" % abbr][14]
+
+
+            print r"  \nopagebreak" if j > 0 else "              ",
+            print "& %s & %s & %s & %s \\\\" % (name,
+                        "$\\boldsymbol{%.3f}$" % top5 if j in best5 else "$%.3f$" % top5,
+                        "$\\boldsymbol{%.3f}$" % top10 if j in best10 else "$%.3f$" % top10,
+                        "$\\boldsymbol{%.3f}$" % top15 if j in best16 else "$%.3f$" % top15)
+
+        print r" \hline"
+
+    print r"\end{longtable}"
+
+def draw_graphs(method, type, yaxis_title, fname):
+    fig = plt.figure(figsize=(9, 12), dpi=300)
+    fig.subplots_adjust(wspace=0.3, hspace=0.6, top=0.9, bottom=0.05, left=0.1, right=0.95)
+
+    counter = 0
+    for i, dataset in enumerate(sorted(res.iterkeys())):
+        meta = res[dataset]
+        if dataset.lower() in ["car", "wine", "lenses"]:
+            continue
+
+        counter += 1
+        add_subplot(fig, dataset, type, yaxis_title, method, counter)
+
+    ax = fig.add_subplot(4, 3, counter + 1)
+
+    ax.plot([0], [0], 'k-', [0], [0], 'k--', [0], [0], 'k:')
+    ax.plot([0], [0], "-", color='0.7')
+    ax.plot([0], [0], "--", color='0.7')
+    ax.plot([0], [0], ":", color='0.7')
+    ax.set_axis_off()
+    leg = plt.legend(('VizRank', 'Kmeans', 'Kmeans iterative', 'Hierarchical average', 'Hierarchical complete', 'Hierarchical single'), frameon=False)
+    leg = plt.gca().get_legend()
+    ltext = leg.get_texts()
+    plt.setp(ltext, fontsize='medium')
+
+    #fig.text(0.5, 0.965,  method[0].upper() + method[1:].replace("_", " "), ha='center', color='black', weight='bold', size='medium')
+    #fig.text(0.5, 0.950,  "(remaining class entropy)", ha='center', color='black', weight='bold', size='small')
+    fig.savefig(os.path.join(ROOT, "_projections_", "%s_%s.pdf" % (fname, method)))
+
+for method in methods:
+    #print_results(method)
+    #draw_graphs(method, "entropy", r"$H(P_1 \ldots P_i)$", "entropy_joint")
+    #draw_graphs(method, "uncertainty", r"$H(Y \mid P_1 \ldots P_i)$", "entropy_remaining")
+    print
+
+for top in [5, 10, 15]:
+    vals = {}
+    for name, abbr in algorithms:
+        vals[abbr] = []
+
+        for method in methods:
+            for dataset in res.iterkeys():
+                if dataset.lower() in ["car", "wine", "lenses", "zoo"]:
+                    continue
+
+                vals[abbr].append(res[dataset][method]["%s_uncertainty" % abbr][top - 1])
+
+    print "Considering top %d projections: Friedman Chi Square Test" % top
+    print "Chi Square: %.3f; p-value: %e" % scipy.stats.friedmanchisquare(*[np.array(val) for val in vals.itervalues()])
+    print
+    print "Ranks"
+    ranks = scipy.stats.mstats.rankdata(np.array([vals[abbr] for name, abbr in algorithms]), axis=0)
+    print "\n".join("%s: %.3f" % (name, r) for r, (name, abbr) in zip(np.mean(ranks, axis=1), algorithms))
+    print
+    print "Critical distance (Nemenyi)"
+    CD = 2.850 * np.sqrt(6. * 7 / 6 / 30)
+    print CD
+
+    cd = evaluation.scoring.compute_CD(list(np.mean(ranks, axis=1)), 30)
+    print "Orange CD:", cd
+    print
+    print
+
+
+    evaluation.scoring.graph_ranks(os.path.join(ROOT, "_projections_", "dermatology-cd-%d.png" % top), np.mean(ranks, axis=1), zip(*algorithms)[0], cd=cd, width=7, textspace=2.2)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.