Miha Stajdohar avatar Miha Stajdohar committed 407dae6

Added Brier score to modelmap.

Comments (0)

Files changed (3)

_modelmaps/model.py

         inst = data.Instance(domain)
 
         inst['uuid'] = self.uuid
-        inst['number of attributes'] = len(self.attributes)
+        inst['number of attributes'] = len(set(self.attributes))
         results = [p == c for p, c in zip(self.instance_predictions, self.instance_classes)]
         inst['CA'] = sum(results) / float(len(results))
         inst['P'] = np.mean([p[self.class_values[c]] for p, c in zip(self.probabilities, self.instance_classes)])
+
+        classes = zip(*sorted(self.class_values.items(), key=itemgetter(1)))[0]
+        outcomes = np.array([c == self.instance_classes for c in classes]).T
+
+        inst['Brier'] = np.sum(np.square(self.probabilities - outcomes)) / len(self.probabilities)
+        inst['Brier by class'] = ', '.join(map(str, zip(*sorted(zip(classes, np.sum(np.square(self.probabilities - outcomes), axis=0) / len(self.probabilities))))[1]))
         inst['type'] = self.type
         inst['model'] = self
         inst['attributes'] = ', '.join(self.attributes)

_modelmaps/modelmap.py

 
 """
 
-import bz2, itertools, math, random, os.path, time, uuid, sys
+import bz2, itertools, math, random, os.path, time, uuid, re, sys
 import cPickle as pickle
 
 import scipy.stats
 
 from Orange import data, distance, ensemble, feature, misc, projection
 from Orange.classification.knn import kNNLearner
-from Orange.classification.tree import TreeLearner
+from Orange.classification.tree import SimpleTreeLearner
 
-MODEL_LIST = ["", "SCATTERPLOT", "RADVIZ", "SPCA", "POLYVIZ", "TREE", "NaiveLearner", "kNNLearner", "SVM"]
+MODEL_LIST = ["", "SCATTERPLOT", "RADVIZ", "SPCA", "POLYVIZ", "TREE", "NaiveLearner", "kNNLearner", "SVM", "RF"]
 
 def distance_mi(m1, m2):
     """Return inverted normalized mutual information.
     varAttrs.numberOfDecimals = 0
     attrs.append(varAttrs)
     attrs.append(feature.Continuous("CA"))
+    attrs.append(feature.String("CA by class"))
     attrs.append(feature.Continuous("P"))
     attrs.append(feature.Continuous("AUC"))
-    attrs.append(feature.String("CA by class"))
+    attrs.append(feature.Continuous("Brier"))
+    attrs.append(feature.Continuous("Brier by class"))
     attrs.append(feature.Continuous("cluster CA"))
     attrs.append(feature.String("label"))
     attrs.append(feature.String("attributes"))
                      YAnchors=YAnchors)
 
 
-    def build_rf_models(self, data):
-        probabilities = [[] for fold in self.folds]
+    def build_rf_models(self, trees=50, max_depth=2):
+        indices = data.sample.SubsetIndices2(p0=0.5, stratified=data.sample.SubsetIndices.Stratified, randseed=42)(self.data_d)
+        train = self.data_d.select(indices, 0)
+        test = self.data_d.select(indices, 1)
 
-        # estimate class probabilities using CV
-        for fold in range(self.folds):
-            learnset = data.selectref(indices, fold, negate=1)
-            testset = data.selectref(indices, fold, negate=0)
+        class SimpleTreeLearnerSetProb():
+            """
+            Orange.classification.tree.SimpleTreeLearner which sets the skip_prob
+            so that on average a square root of the attributes will be
+            randomly choosen for each split.
+            """
+            def __init__(self, wrapped):
+                self.wrapped = wrapped
 
-            tree = TreeLearner(storeNodeClassifier=1,
-                       storeContingencies=0, storeDistributions=1, minExamples=5,
-                       storeExamples=1).instance()
-            gini = feature.scoring.Gini()
-            tree.split.discreteSplitConstructor.measure = tree.split.continuousSplitConstructor.measure = gini
-            tree.maxDepth = 4
-            tree.split = ensemble.forest.SplitConstructor_AttributeSubset(tree.split, 3)
-            forestLearner = ensemble.forest.RandomForestLearner(learner=tree, trees=self.model_limit)
-            forestClassifier = forestLearner(learnset)
+            def __call__(self, examples, weight=0):
+                self.wrapped.skip_prob = 1-len(examples.domain.attributes)**0.5/len(examples.domain.attributes)
+                return self.wrapped(examples)
 
-            for classifier in forestClassifier.classifiers:
-                tcn = 0
-                for i in range(len(data)):
-                    if (indices[i] == fold):
-                        ex = data.Instance(testset[tcn])
-                        ex.setclass("?")
-                        tcn += 1
-                        cr = classifier(ex, classifier.GetBoth)
-                        if cr[0].isSpecial():
-                            raise "Classifier %s returned unknown value" % (classifier.name)
-                        probabilities.append(cr)
-        model_classifier = learner(data)
-        model_classifier.probabilities = probabilities
+        min_instances = 5
+        # uses gain ratio
+        #tree = SimpleTreeLearnerSetProb(SimpleTreeLearner(max_depth=max_depth, min_instances=min_instances))
+        #rf_learner = ensemble.forest.RandomForestLearner(learner=tree, trees=trees, name="RF: %d trees; max depth: %d; min instances: %d" % (trees, max_depth, min_instances))
+        rf_learner = ensemble.forest.RandomForestLearner(trees=trees, name="RF: %d trees; max depth: None; min instances: %d" % (trees, min_instances))
+        rf_classifier = rf_learner(train)
+
+        def get_features(cls, domain):
+            features = re.findall('{ [01] \d+ (\d+)', pickle.dumps(cls))
+            return [domain[i].name for i in map(int, features)]
+
+        models = []
+        for c in rf_classifier.classifiers:
+            probabilities = []
+            instance_predictions = []
+            instance_classes = []
+            for ex in test:
+                ex = data.Instance(ex)
+                instance_classes.append(ex.get_class().value)
+                ex.setclass("?")
+                cl, prob = c(ex, c.GetBoth)
+                if cl.isSpecial():
+                    raise "Classifier %s returned unknown value" % c.name
+                probabilities.append(list(prob))
+                instance_predictions.append(cl.value)
+
+            models.append(Model("RF",
+                                c,
+                                np.array(probabilities),
+                                {val: i for i, val in enumerate(test.domain.class_var.values)},
+                                get_features(c, test.domain),
+                                np.array(instance_predictions),
+                                np.array(instance_classes),
+                                XAnchors=None,
+                                YAnchors=None))
+
+        return models, rf_classifier
 
 
     def _print_time(self, time_start, iter, numiter):

_modelmaps/widgets/OWModelMap.py

               ("BAYES", "Classify/icons/NaiveBayes"),
               ("kNNLearner", "Classify/icons/kNearestNeighbours"),
               ("KNN", "Classify/icons/kNearestNeighbours"),
-              ("SVM", "Classify/icons/BasicSVM")]
+              ("SVM", "Classify/icons/BasicSVM"),
+              ("RF", "Classify/icons/RandomForest")]
 
 ICON_SIZES = ["16", "32", "40", "48", "60"]
 
     def paint(self, painter, option, widget):
         orangeqt.ModelItem.paint(self, painter, option, widget)
 
-        lbl = self.text()
-
-        if lbl == "":
-            return
-
-        metrics = painter.fontMetrics()
-        th = metrics.height()
-
-        pen = painter.pen()
-        pen.setColor(QtCore.Qt.black)
-        painter.setPen(pen)
-
-        for i, l in enumerate(lbl.split(", ")):
-            tw = metrics.width(l)
-            r = QtCore.QRectF(-tw / 2., self.size() / 2. + 5 + i * th, tw, th);
-            painter.drawText(r, QtCore.Qt.AlignCenter, l);
+#        lbl = self.text()
+#
+#        if lbl == "":
+#            return
+#
+#        metrics = painter.fontMetrics()
+#        th = metrics.height()
+#
+#        pen = painter.pen()
+#        pen.setColor(QtCore.Qt.black)
+#        painter.setPen(pen)
+#
+#        for i, l in enumerate(lbl.split(", ")):
+#            tw = metrics.width(l)
+#            r = QtCore.QRectF(-tw / 2., self.size() / 2. + 5 + i * th, tw, th);
+#            painter.drawText(r, QtCore.Qt.AlignCenter, l);
 
 class ModelCurve(NetworkCurve):
     def __init__(self, parent=None, pen=QPen(Qt.black), xData=None, yData=None):
 
         vizPredAcc = OWGUI.widgetBox(self.modelTab, "Attribute lists", orientation="vertical")
         OWGUI.checkBox(vizPredAcc, self, "vizAttributes", "Display attribute lists", callback=self.visualize_info)
-        self.attrIntersectionBox = OWGUI.listBox(vizPredAcc, self, "attrIntersection", "attrIntersectionList", "Attribute intersection", selectionMode=QListWidget.NoSelection)
-        self.attrDifferenceBox = OWGUI.listBox(vizPredAcc, self, "attrDifference", "attrDifferenceList", "Attribute difference", selectionMode=QListWidget.NoSelection)
+
+        self.attrGraph = OWDistributionGraph(self, vizPredAcc)
+        self.attrGraph.setMaximumSize(QSize(300, 300))
+        self.attrGraph.setYRlabels(None)
+        self.attrGraph.setAxisScale(QwtPlot.xBottom, 0.0, 1.0, 0.1)
+        self.attrGraph.numberOfBars = 2
+        self.attrGraph.barSize = 200 / (self.attrGraph.numberOfBars + 1)
+        vizPredAcc.layout().addWidget(self.attrGraph)
+
+
+        #self.attrIntersectionBox = OWGUI.listBox(vizPredAcc, self, "attrIntersection", "attrIntersectionList", "Attribute intersection", selectionMode=QListWidget.NoSelection)
+        #self.attrDifferenceBox = OWGUI.listBox(vizPredAcc, self, "attrDifference", "attrDifferenceList", "Attribute difference", selectionMode=QListWidget.NoSelection)
 
         self.attBox.hide()
         self.visualize_info()
             ckey.setData([currentBarsHeight[cn], currentBarsHeight[cn] + subBarHeight, currentBarsHeight[cn] + subBarHeight, currentBarsHeight[cn]], [tmpx, tmpx, tmpx2, tmpx2])
             currentBarsHeight[cn] += subBarHeight
 
+            self.predGraph.addMarker("%.4f" % score, 0, cn, Qt.AlignRight | Qt.AlignBottom)
+
+
         self.predGraph.replot()
 
     def display_attribute_info(self, vertices=None):
 
         attrList = [self.graph.items()[v]["attributes"].value.split(", ") for v in vertices]
 
-        attrIntersection = set(attrList[0])
-        attrUnion = set()
-        for attrs in attrList:
-            attrIntersection = attrIntersection.intersection(attrs)
-            attrUnion = attrUnion.union(attrs)
+        #attrIntersection = set(attrList[0])
+        #attrUnion = set()
+        #for attrs in attrList:
+        #    attrIntersection = attrIntersection.intersection(attrs)
+        #    attrUnion = attrUnion.union(attrs)
 
-        self.attrIntersectionList = attrIntersection
-        self.attrDifferenceList = attrUnion.difference(attrIntersection)
+        #self.attrIntersectionList = attrIntersection
+        #self.attrDifferenceList = attrUnion.difference(attrIntersection)
+
+
+
+        self.attrGraph.tips.removeAll()
+        self.attrGraph.clear()
+        #self.predGraph.setAxisScale(QwtPlot.yRight, 0.0, 1.0, 0.2)
+        self.attrGraph.setAxisScale(QwtPlot.xBottom, 0.0, 1.0, 0.2)
+
+        if not vertices:
+            self.attrGraph.replot()
+            return
+
+        labels = [attr.name for attr in self.matrix.originalData.domain.attributes]
+        attrList = [{label:len([a for a in attrs if a == label]) for label in labels} for attrs in attrList]
+
+
+        self.attrGraph.setAxisScale(QwtPlot.yLeft, -0.5, len(labels) - 0.5, 1)
+
+        scores = [sum([attributes[label] for attributes in attrList]) for label in labels]
+        #scores = [sum(score) / len(score) for score in zip(*scores)]
+
+        self.attrGraph.setAxisScale(QwtPlot.xBottom, 0.0, max(scores), 1)
+
+        currentBarsHeight = [0] * len(scores)
+        for cn, score in enumerate(scores):
+            subBarHeight = score
+            ckey = PolygonCurve(pen=QPen(self.attrGraph.discPalette[cn]), brush=QBrush(self.attrGraph.discPalette[cn]))
+            ckey.attach(self.attrGraph)
+            ckey.setRenderHint(QwtPlotItem.RenderAntialiased, self.attrGraph.useAntialiasing)
+
+            tmpx = cn - (self.attrGraph.barSize / 2.0) / 100.0
+            tmpx2 = cn + (self.attrGraph.barSize / 2.0) / 100.0
+            ckey.setData([currentBarsHeight[cn], currentBarsHeight[cn] + subBarHeight, currentBarsHeight[cn] + subBarHeight, currentBarsHeight[cn]], [tmpx, tmpx, tmpx2, tmpx2])
+            currentBarsHeight[cn] += subBarHeight
+
+            self.attrGraph.addMarker("%d" % score, 0, cn, Qt.AlignRight | Qt.AlignBottom)
+
+
+        self.attrGraph.replot()
 
     def visualize_info(self):
         self.networkCanvas.radius = self.radius
         self.set_node_styles()
         self.set_node_colors()
 
-        self.networkCanvas.set_node_labels(["attributes"])
+        #self.networkCanvas.set_node_labels(["attributes"])
 
         labels = self.matrix.originalData.domain.classVar.values.native()
         self.predGraph.numberOfBars = len(labels)
         self.predGraph.barSize = 200 / (self.predGraph.numberOfBars + 1)
         self.predGraph.setYLlabels(labels)
-        #self.predGraph.setShowMainTitle(self.showMainTitle)
-        #self.predGraph.setYLaxisTitle(self.matrix.originalData.domain.classVar.name)
-        #self.predGraph.setShowYLaxisTitle(True)
         self.predGraph.setAxisScale(QwtPlot.xBottom, 0.0, 1.0, 0.2)
-        self.predGraph.setAxisScale(QwtPlot.yLeft, -0.5, len(self.matrix.originalData.domain.classVar.values) - 0.5, 1)
+        self.predGraph.setAxisScale(QwtPlot.yLeft, -0.5, len(labels) - 0.5, 1)
 
         self.predGraph.enableYRaxis(0)
         self.predGraph.setYRaxisTitle("")
         self.predGraph.setShowXaxisTitle(True)
         self.predGraph.replot()
 
+        labels = [attr.name for attr in self.matrix.originalData.domain.attributes]
+        self.attrGraph.numberOfBars = len(labels)
+        self.attrGraph.barSize = 200 / (self.attrGraph.numberOfBars + 1)
+        self.attrGraph.setYLlabels(labels)
+        self.attrGraph.setAxisScale(QwtPlot.xBottom, 0.0, 1.0, 0.2)
+        self.attrGraph.setAxisScale(QwtPlot.yLeft, -0.5, len(labels) - 0.5, 1)
+
+        self.attrGraph.enableYRaxis(0)
+        self.attrGraph.setYRaxisTitle("")
+        self.attrGraph.setXaxisTitle("models")
+        self.attrGraph.setShowXaxisTitle(True)
+        self.attrGraph.replot()
+
         self.visualize_info()
 
 if __name__ == "__main__":
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.