Commits

Amela Rakanovic committed 91c4b27

Refactored OWComparisonDistributions.

Comments (0)

Files changed (2)

Orange/OrangeWidgets/Statistics/OWComparisonDistributionsQt.py

 from plot.owtools import *
 
 from scipy import stats
+from scipy import interpolate
 import numpy as np
 
 """
- ____ __ ____ ____ ____ __ ____  _  _ ____ __ __  __ _ 
-(    (  ) ___|_  _|  _ (  |  _ \/ )( (_  _|  )  \(  ( \
- ) D ()(\___ \ )(  )   /)( ) _ () \/ ( )(  )(  O )    /
-(____(__|____/(__)(__\_|__|____/\____/(__)(__)__/\_)__)
 Statistics: Comparison of distributions
 """
 
 class OWComparisonDistributionsQt(OWWidget):
-    settingsList = ["numberOfBars", "sig_threshold" "showProbabilities", "showConfidenceIntervals", "smoothLines", "lineWidth", "showMainTitle", "showXaxisTitle", "showYaxisTitle", "showYPaxisTitle"]
-    contextHandlers = {"": DomainContextHandler("", ["attribute", "targetValue", "visibleOutcomes", "mainTitle", "xaxisTitle", "yaxisTitle", "yPaxisTitle"], matchValues=DomainContextHandler.MatchValuesClass)}
-
+    settingsList = ["variables_wb"]
+    
     #UPORABNO
     #XX = myplot.add_curve('test', xData = [0.5,1,2], yData = [0.5,0.6,1], enableLegend = False, autoScale=1)
     #XX.set_style(OWCurve.Lines)
         "Constructor"
         OWWidget.__init__(self, parent, signalManager, "Comparison of distributions")
         # settings
-        self.inputs = [("Data", ExampleTable, self.setData, Default)]
+        self.inputs = [("Data", ExampleTable, self.set_data, Default)]
         self.data = None
         
         self.loadSettings()
         
-        self.variables = []
-        self.variables_select = [0]
+        self.graph_select = 0
+
+        self.variables_wb = []
+        self.variables_select = []
         self.sig_threshold = 0.05
         self.main_tab = self.controlArea 
 
         b = OWGUI.widgetBox(self.main_tab, "Variable / Distribution", addSpace=True)
-        self.attr_list_box = OWGUI.listBox(b, self, "variables_select", "variables", selectionMode=QListWidget.MultiSelection, callback = self.run_selected_test) 
+        self.attr_list_box = OWGUI.listBox(b, self, "variables_select", "variables_wb", selectionMode=QListWidget.MultiSelection, callback = self.run_selected_test) 
         
-        b = OWGUI.widgetBox(self.main_tab, "Sig threshold", addSpace=True)
+        #b = OWGUI.widgetBox(self.main_tab, "Sig threshold", addSpace=True)
         #OWGUI.spin(b, self, "barSize", label="Sig threshold", min=30, max=100, step=5, callbackOnReturn=True)
-        OWGUI.doubleSpin(b, self, "sig_threshold", 0.05, 0.5, step=0.05, label="Significance threshold", controlWidth=75, alignment=Qt.AlignRight)
+        self.tests = OWGUI.radioButtonsInBox(self.main_tab, self, "graph_select", box = "Statistics", btnLabels = ["compare cumulatives", "q-q plot"], callback = self.draw_graph) 
+        #OWGUI.doubleSpin(b, self, "sig_threshold", 0.05, 0.5, step=0.05, label="Significance threshold", controlWidth=75, alignment=Qt.AlignRight)
         
         self.graph=OWPlot()
         self.mainArea.layout().addWidget(self.graph)
         self.info = OWGUI.widgetLabel(e, "<center>No test results.</center>")
     
     def run_selected_test(self):
+        "Run selected statistical test and call function for graph ploting"
+
         if self.variables_select:
-            
-            if len(self.variables_select) < 2:
-                    self.info.setText('<center>No test results.</center>')
+            if 0 < len(self.variables_select) < 2:
+                self.info.setText('<center>No test results. Select two variables.</center>')
+                self.draw_graph(one=1)
             
             elif len(self.variables_select) == 2 and self.variables_select[0] != 0:
-                a = stat_kolmogorov(self.data, [str(self.variables[self.variables_select[0]][0]), str(self.variables[self.variables_select[1]][0])])
-                self.info.setText("<center>Kolmogorov-Smirnof statistic: %.3f (p=%.3f)</center>" % (a[0], a[1]))
-                
+                a = stat_kolmogorov(self.data, [self.variables[self.variables_select[0]].name, self.variables[self.variables_select[1]].name])
+                self.info.setText("<center>Kolmogorov-Smirnov statistic: %.3f (p=%.3f)</center>" % (a[0], a[1]))
                 self.draw_graph()
                 
             elif len(self.variables_select) == 2 and self.variables_select[0] == 0:
-                a = stat_shapirov(self.data, str(self.variables[self.variables_select[1]][0]))
+                a = stat_shapirov(self.data, self.variables[self.variables_select[1]].name)
                 self.info.setText("<center>Shapiro-Wilk test: %.3f (p=%.3f)</center>" % (a[0], a[1]))
-                
+
                 self.draw_graph(normal=1)
                 
-            elif len(self.variables_select) == 3:
+            elif len(self.variables_select ) == 3 and self.variables_select[0] == 0:
                 self.variables_select = [self.variables_select[0], self.variables_select[1]]
+                self.draw_graph(normal=1)
 
-    def draw_graph(self, normal=0):
-        at1 = self.variables[self.variables_select[1]][0]
-        tab1 = data_to_npcol(self.data, at1)
+            elif len(self.variables_select ) == 3:
+                self.variables_select = [self.variables_select[0], self.variables_select[1]]
+                self.draw_graph()
+        else:
+            self.graph.clear()
+
+    def draw_graph(self, normal=0, one=0):
+        "Decides which graph to draw, based on self.variables_wb and self.variables_select"
+
+        if len(self.variables_select) > 0:
+            if self.graph_select == 0:  
+            #distributions graph
+                
+                if len(self.variables_select) > 1: #2 variables selected
+                    at1 = self.variables[self.variables_select[1]].name
+                    tab1 = data_to_npcol(self.data, at1)
+
+                    if self.variables_select[0] != 0:   #first selected variable isnt Normal distribution
+                        at2 = self.variables[self.variables_select[0]].name
+                        tab2 = data_to_npcol(self.data, at2)
+                    
+                    else:   #first variable is Normal distribution
+                        tab2 = []
+                        at2 = 'Normal'
+                        for i in xrange(100):
+                            tab2.append(np.random.normal()) 
+                        
+                    data_tab1 = comulative(tab1)
+                    data_tab2 = comulative(tab2)
+                    
+                    self.graph.clear()
+                    at1 = at1[:6] + ("..." if len(at1) > 6 else "")
+                    at2 = at2[:6] + ("..." if len(at2) > 6 else "")
+                    XX = self.graph.add_curve(str(at1), xData = data_tab1[0], yData = data_tab1[1], enableLegend = True, autoScale=1, penColor = QColor(154, 205, 50, 200))
+                    YY = self.graph.add_curve(str(at2), xData = data_tab2[0], yData = data_tab2[1], enableLegend = True, autoScale=1, penColor = QColor(255, 165, 0, 150))
+                    XX.set_style(OWCurve.Points)
+                    YY.set_style(OWCurve.Points)
+                    self.graph.set_show_axis_title(yLeft, 0)
+                    self.graph.set_show_axis_title(xBottom, 0)
+
+                else:   #one variable is selected
+                    if self.variables_select[0] == 0: #variable is normal distribution
+                        tab1 = []
+                        at1 = 'Normal'
+                        for i in xrange(100):
+                            tab1.append(np.random.normal())
+
+                    else:   #variable is not normal distribution
+                        at1 = self.variables[self.variables_select[0]].name
+                        tab1 = data_to_npcol(self.data, at1)
+
+                    #plots
+                    data_tab1 = comulative(tab1)
+                    self.graph.clear()
+                    at1 = at1[:6] + ("..." if len(at1) > 6 else "")
+                    XX = self.graph.add_curve(str(at1), xData = data_tab1[0], yData = data_tab1[1], enableLegend = True, autoScale=1, penColor = QColor(154, 205, 50, 200))
+                    XX.set_style(OWCurve.Points)
+                    self.graph.set_show_axis_title(yLeft, 0)
+                    self.graph.set_show_axis_title(xBottom, 0)
+
+            else:
+            #q-q plot
+
+                if len(self.variables_select) == 2:     #2 selected variables
+                    if self.variables_select[0] != 0:   #Normal distribution isnt selected
+                        at1 = self.variables[self.variables_select[1]].name
+                        tab1 = data_to_npcol(self.data, at1)
+
+                        at2 = self.variables[self.variables_select[0]].name
+                        tab2 = data_to_npcol(self.data, at2)
+                            
+                        data_tab1 = comulative(tab1)
+                        data_tab2 = comulative(tab2)
+
+                        self.qq_calc(tab1, tab2, at1, at2)
+
+                    else:   #Normal distribution is selected
+                        tab1 = []
+                        at1 = 'Normal'
+                        for i in xrange(100):
+                            tab1.append(np.random.normal())
+
+                        at2 = self.variables[self.variables_select[1]].name
+                        tab2 = data_to_npcol(self.data, at2)
+                        
+                        self.qq_calc(tab1, tab2, at1, at2)
+
+                else:
+                    self.graph.clear()
+
+        else:
+            self.graph.clear()
+
+    def qq_calc(self, tab1, tab2, at1, at2):
+        """Calculate and draw appropriate q-q plot
         
-        if not normal:
-            at2 = self.variables[self.variables_select[0]][0]
-            tab2 = data_to_npcol(self.data, at2)
+        Usage::
+          (self.qq_calc(tab1, tab2, at1, at2)
+          
+        :param tab1:    table one with data
+        :param tab2:    table two with data
+        :param at1:     name of variable of tab1
+        :param at2:     name of variable of tab2
+        :return:        draws graph
+        """
         
-        else:
-            tab2 = []
-            for i in xrange(len(tab1)):
-                tab2.append(np.random.normal()) 
-            
-        data_tab1 = comulative(tab1, tab2, 0)
-        data_tab2 = comulative(tab2, tab1, 1)
+        data_tab1 = comulative(tab1)
+        data_tab2 = comulative(tab2)
         
+        want_y = sorted(data_tab1[1]+data_tab2[1])
+        my_min=max(min(data_tab1[1]),min(data_tab2[1]))
+        my_max=min(max(data_tab1[1]),max(data_tab2[1]))
+        want_y = [x for x in want_y if x>my_min and x<my_max ]
+
+        interp_fun1 = interpolate.interp1d(data_tab1[1],data_tab1[0])
+        interp_fun2 = interpolate.interp1d(data_tab2[1],data_tab2[0])
+        data_x1 = [ interp_fun1(y) for y in want_y ] 
+        data_x2 = [ interp_fun2(y) for y in want_y ]
+
+        att1 = at1[:6] + ("..." if len(at1) > 6 else "")
+        att2 = at2[:6] + ("..." if len(at2) > 6 else "")
+
         self.graph.clear()
-        XX = self.graph.add_curve('c1', xData = data_tab1[0], yData = data_tab1[1], enableLegend = False, autoScale=1, penColor = QColor(154, 205, 50, 200))
-        YY = self.graph.add_curve('c2', xData = data_tab2[0], yData = data_tab2[1], enableLegend = False, autoScale=1, penColor = QColor(255, 165, 0, 150))
+        XX = self.graph.add_curve(str(att1 + "-" + att2), xData = data_x1, yData = data_x2, enableLegend = True, autoScale=1, penColor = QColor(154, 205, 50, 200))
         XX.set_style(OWCurve.Points)
-        YY.set_style(OWCurve.Points)
 
-    def setData(self, data):
+        lin_curve = self.graph.add_curve('test', xData = [min(data_x1+data_x2), max(data_x1+data_x2)], yData = [min(data_x1+data_x2), max(data_x1+data_x2)], enableLegend = False, autoScale=1, penColor = Qt.red)
+        lin_curve.set_style(OWCurve.Lines)
+
+        self.graph.set_axis_title(yLeft, at2)
+        self.graph.set_axis_title(xBottom, at1)
+        
+        self.graph.set_show_axis_title(yLeft, 1)
+        self.graph.set_show_axis_title(xBottom, 1)
+
+        self.graph.replot()
+
+    def set_data(self, data):
+        """Read data from imported dataset"""
+
         self.closeContext()
         self.graph.clear()
         
         else:
             self.data = data
       
-        variables = []
         self.variables = ['Normal']
-        
+
         for attr in self.data.domain:
             if attr.varType == orange.VarTypes.Discrete:
                 pass
             else:
-                self.variables.append((attr.name, attr.varType))
-                variables.append(attr)
+                self.variables.append((attr))
+
         #force redrawing
+        self.variables_wb = [(a.name, a.varType) for a in self.variables[1:]]
+        self.variables_wb.insert(0, 'Normal')
+        self.variables_wb = self.variables_wb
+
         self.variables = self.variables
+        
 
 # ***************************************************************************
 # ***************************************************************************
     else:
         return dataset_np.T
 
-def comulative(tab1, tab2, n):
-    cum_1x = sorted(tab1)
-    cum_1y = np.cumsum(sorted(tab1))
-    cum_1y = cum_1y / float(cum_1y[-1])
+def comulative(tab1):
+    x = sorted(tab1)
+    y = [a/float(len(tab1)) for a in range(len(tab1))]
+    return [x, y]
 
-    cum_2x = sorted(tab2)
-    cum_2y = np.cumsum(sorted(tab2))
-    cum_2y = cum_2y / float(cum_2y[-1])
-    
-    novi_1 = [] # calculated new x
-    for i in xrange(len(cum_1x)):
-        novi_1.append(np.interp(cum_1y[i], xp=cum_2y, fp=cum_2x))
-    
-    #print novi_1
-    
-    novi_2 = [] # calculated new x
-    for i in xrange(len(cum_2x)):
-        novi_2.append(np.interp(cum_2y[i], xp=cum_1y, fp=cum_1x))
-    
-    if n == 0:
-        return [novi_1, cum_1x]
-    else:
-        return [cum_1x, novi_1]
 def stat_shapirov(arr, selected):
     """shapirov test"""
     tab1 = data_to_npcol(arr, selected)
     a = QApplication(sys.argv)
     owd = OWComparisonDistributionsQt()
     owd.show()
-    data=orange.ExampleTable("../doc/datasets/heart_disease.tab")
-    owd.setData(data)
+    data=orange.ExampleTable("heart_disease.tab")
+    owd.set_data(data)
     a.exec_()

Orange/OrangeWidgets/Statistics/OWIndDiscrete.py

 import textwrap
 
 """
-  __ __ _ ____ ____ ____ ____ __ _ ____ ____ __ _  ___ ____ 
- (  |  ( (    (  __|  _ (  __|  ( (    (  __|  ( \/ __|  __)
-  )(/    /) D () _) ) __/) _)/    /) D () _)/    ( (__ ) _) 
- (__)_)__|____(____|__) (____)_)__|____(____)_)__)\___|____)
  Statistics: Independence for discrete attributes
 """
 
         self.icons = self.createAttributeIconDict() 
         
         
-        self.inputs = [("Data", ExampleTable, self.setData, Default)]
+        self.inputs = [("Data", ExampleTable, self.set_data, Default)]
         self.outputs = []
         
         self.chii = self.residual = self.obsexp = 1
         #GUI
         self.controlArea.setMinimumWidth(500)
         self.mainArea.setMinimumWidth(350)
+        self.mainArea.setMinimumHeight(350)
         
         radioboxx = OWGUI.widgetBox(self.controlArea, "", addSpace=True, orientation='horizontal')
         self.buttons = OWGUI.radioButtonsInBox(radioboxx, self, "show_values", box = "", orientation="horizontal", btnLabels = ["Show p-values", "Show chi-square"], callback = self.set_table)
         self.all_stats = self.compute_all_stats()
         self.set_table()
 
-    def setData(self, data):
+    def set_data(self, data):
         self.closeContext()
 
         if data:
                 
             else:
                 self.full_values.hide()
+            
             if len(self.attributes) > 9:
                 self.attributes = []
                 self.table.clear()
                 self.table2.clear()
                 self.buttons.setDisabled(1)  
-                #self.updateGraph()
+                #self.update_graph()
                 
             else:            
                 self.init_table()
     
-                self.setShownAttributes(self.attributeSelectionList)
+                self.set_shown_attributes(self.attributeSelectionList)
                 self.set_list()
                 self.openContext("", self.data)
                   
             self.table2.clear()
             self.buttons.setDisabled(1)
             self.attributes = []
-            self.updateGraph()
+            self.update_graph()
         
-    def rowColAttributeChanged(self):
+    def row_col_attribute_changed(self):
         self.rowAttribute = str(self.attributes[self.table.currentColumn()][0])
         self.columnAttribute = str(self.attributes[self.table.currentRow()][0])
         self.realRowAttr = self.data.domain[self.rowAttribute]
         self.horizontalValues = list(self.realColAttr.values)
         self.verticalValues = list(self.realRowAttr.values)
         self.table2.show()
-        self.updateMatrix()
+        self.update_matrix()
         
-    def updateMatrix(self):
+    def update_matrix(self):
         self.table2.clearSelection()
         attr = self.data.domain[self.attribute]
         vtype = attr.varType
                 self.table2.setItem(rrowi, 0, w)
                 
                 w = QTableWidgetItem(shw)
+                
                 if shw:
                     w.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                     w.setBackground(aggBrush)
             
     def set_table(self):       
         """ Sets table for basic statistics """        
-
+        
         attrs = [ a[0]  for a in self.attributes ]
         
         self.table.setColumnCount(0)
                 
         self.table.selectionModel().selectionChanged.connect(self.set_list)
         
-        self.rowColAttributeChanged()
+        self.row_col_attribute_changed()
     
     def set_list(self):
         if not self.attributes:
         
         self.attrX = str(self.attributes[self.table.currentColumn()][0])
         self.attrY = str(self.attributes[self.table.currentRow()][0])
-        self.updateGraph()
-        self.rowColAttributeChanged()
+        self.update_graph()
+        self.row_col_attribute_changed()
 
-    def setShownAttributes(self, attrList):     
+    def set_shown_attributes(self, attrList):     
         """Attribute selection signal"""
+
         self.attributeSelectionList = attrList
         if self.data and self.attributeSelectionList and len(attrList) >= 2:
             attrs = [attr.name for attr in self.data.domain]
             if attrList[0] in attrs and attrList[1] in attrs:
                 self.attrX = attrList[0]
                 self.attrY = attrList[1]
-        self.updateGraph()
+        self.update_graph()
     
-    def getConditionalData(self, xAttr = None, yAttr = None, dropMissingData = 1):
+    def get_conditional_data(self, xAttr = None, yAttr = None, dropMissingData = 1):
         """create data subset depending on conditional attribute and value"""
+
         if not self.data: return None
 
         if not xAttr: xAttr = self.attrX
         if dropMissingData: return orange.Preprocessor_dropMissing(data)
         else: return data
 
-    def initCombos(self):
+    def init_combos(self):
         """initialize lists for shown and hidden attributes"""
         self.attrCondition = str('(None)')
 
-    def resizeEvent(self, e):
-        OWWidget.resizeEvent(self,e)
-        self.updateGraph()
+    def resize_event(self, e):
+        OWWidget.resize_event(self,e)
+        self.update_graph()
 
-    def showEvent(self, ev):
-        OWWidget.showEvent(self, ev)
-        self.updateGraph()
+    def show_event(self, ev):
+        OWWidget.show_event(self, ev)
+        self.update_graph()
         
     def compute_all_stats(self):
         """Compute all statistics for all combinations of attributes."""
             for a in range(len(self.attributes)):
                 attrX = self.attributes[a][0]
 
-                data = self.getConditionalData(xAttr = attrX, yAttr = attrY)
+                data = self.get_conditional_data(xAttr = attrX, yAttr = attrY)
                 (contX, contY, probs, chi2, chi2_p) = stats_attrattr(data, attrX, attrY)
                 
                 all_stats_x.append({
         
         return all_stats
     
-    def updateGraph(self, *args):
+    def update_graph(self, *args):
         for item in self.canvas.items():
             self.canvas.removeItem(item)    # remove all canvas items
         if not self.data: return
         if not self.attrX or not self.attrY: return
         
-        data = self.getConditionalData()
+        data = self.get_conditional_data()
         if not data or len(data) == 0: return
         
         (contX, contY, probs, chi2, chi2_p) = stats_attrattr(data, self.attrX, self.attrY)
 
                 # create rectangle
                 rect = OWCanvasRectangle(self.canvas, currX+2, currY+2, width-4, height-4, z = -10)
-                self.addRectIndependencePearson(rect, currX+2, currY+2, width-4, height-4, (xAttr, xVal), (yAttr, yVal), actual, sum)
+                self.add_rect_independence_pearson(rect, currX+2, currY+2, width-4, height-4, (xAttr, xVal), (yAttr, yVal), actual, sum)
                 
                 expected = float(xVal*yVal)/float(sum)
                 pearson = (actual - expected) / sqrt(expected)
 
         #self.canvas.update()
 
-    def addRectIndependencePearson(self, rect, x, y, w, h, (xAttr, xVal), (yAttr, yVal), actual, sum):
-        """show deviations from attribute independence with standardized pearson residuals"""
+    def add_rect_independence_pearson(self, rect, x, y, w, h, (xAttr, xVal), (yAttr, yVal), actual, sum):
+        """Show deviations from attribute independence with standardized pearson residuals"""
         
         expected = float(xVal*yVal)/float(sum)
         pearson = (actual - expected) / sqrt(expected)
             b = 255
             r = g = 255 - intPearson*20
             r = g = max(r, 55)  #
+        
         elif pearson < 0:
             intPearson = ceil(pearson)
             pen = QPen(QColor(255,0,0), 1)
             r = 255
             b = g = 255 + intPearson*20
             b = g = max(b, 55)
+        
         else:
             pen = QPen(QColor(255,255,255), 1)
             r = g = b = 255         # white
+        
         color = QColor(r,g,b)
         brush = QBrush(color); rect.setBrush(brush)
 
         if pearson > 0:
             pearson = min(pearson, 10)
             kvoc = 1 - 0.08 * pearson       #  if pearson in [0..10] --> kvoc in [1..0.2]
+        
         else:
             pearson = max(pearson, -10)
             kvoc = 1 - 0.4*pearson
         
-        self.addLines(x,y,w,h, kvoc, pen)
+        self.add_lines(x,y,w,h, kvoc, pen)
 
-    def addLines(self, x,y,w,h, diff, pen):
-        """add lines"""
+    def add_lines(self, x,y,w,h, diff, pen):
+        """Add lines"""
+
         if not self.showLines: return
         if w == 0 or h == 0: return
 
             OWCanvasLine(self.canvas, x, y+temp, x+w, y+temp, 1, pen.color())
             temp += dist
 
-    def closeEvent(self, ce):
-        QDialog.closeEvent(self, ce)
+    def close_event(self, ce):
+        QDialog.close_event(self, ce)
 
 # ***************************************************************************
 # ***************************************************************************
     
     contX = orange.ContingencyAttrAttr(attrX, attrX, data)   # distribution of X attribute
     contY = orange.ContingencyAttrAttr(attrY, attrY, data)   # distribution of Y attribute
-    
+
     # compute contingency of x and y attributes
     for key in contX.keys():
         sum = 0
     probs = {}
     actual = []
     expected = []
+    
     for i in range(len(valsX)):
         valx = valsX[i]
         for j in range(len(valsY)):
     a=QApplication(sys.argv)
     ow=OWIndDiscrete()
     ow.show()
-    data = orange.ExampleTable('../doc/datasets/heart_disease.tab')
-    ow.setData(data)
+    data = orange.ExampleTable('heart_disease.tab')
+    ow.set_data(data)
     a.exec_()