Commits

Amela Rakanovic committed 0dd4cf7

Refactored functions - OWIndContiniousQt.py.

Comments (0)

Files changed (1)

Orange/OrangeWidgets/Statistics/OWIndContiniousQt.py

 from OWScatterPlotGraphQt import *
 
 """
-  ___ __ ____ ____ ____ __     __ ____ __ __  __ _ 
- / __)  (  _ (  _ (  __|  )   / _(_  _|  )  \(  ( \
-( (_(  O )   /)   /) _)/ (_/\/    \)(  )(  O )    /
- \___)__(__\_|__\_|____)____/\_/\_(__)(__)__/\_)__)
  Statistics: Correlation between continuous variable
 """
 
         self.linregression_dirty = 0
         
         self.inputs = [("Data", ExampleTable, self.data)]
-        #self.outputs = [("Corelation data", ExampleTable), ("Sampled Data2", ExampleTable)]
         
         self.loadSettings()
         
         ## Main Area
         up = OWGUI.widgetBox(self.mainArea, "", addSpace=False, orientation="horizontal")
         self.tests = OWGUI.radioButtonsInBox(up, self, "test_select", box = "Statistics", orientation="horizontal", btnLabels = ["Pearson", "Spearman"], callback = self.run_selected_test) 
-        c = OWGUI.widgetBox(up, "Significance threshold for variable selection", addSpace=True)
-        self.intervalSlider=OWGUI.hSlider(OWGUI.indentedBox(c), self, "slider_intervals", None, 1, 100, labelFormat='%2.2f', divideFactor=100.0, callback = self.update_data)
+        OWGUI.rubber(up)
               
         b = OWGUI.widgetBox(self.mainArea, "", addSpace=True)  
         self.table = OWGUI.table(b, selectionMode=QTableWidget.SingleSelection)
         if not (0 <= self.table.currentRow() < self.table.rowCount and 0 <= self.table.currentColumn() < self.table.columnCount):
             self.table.setCurrentCell(0,0) 
        
+        print self.table.currentColumn()
+
         self.graph.updateData(self.attributes[self.table.currentColumn()][0], self.attributes[self.table.currentRow()][0], "")
         
         k = self.linear_array_slope[self.table.currentColumn()][self.table.currentRow()]
             self.linregression_dirty = 1
             self.linear_array_slope = []
             self.linear_array_intersect = []
-            for i in xrange(len(self.attributes)):
-                slopes, intercepts = stat_lin_regression(self.ddataset, i, len(self.attributes))
-                self.linear_array_slope.append(slopes)
-                self.linear_array_intersect.append(intercepts)
-            #print self.linear_array_slope
+            self.linear_array_slope, self.linear_array_intersect = fun_lin_regression(self.ddataset, self.attributes, stats.linregress)
         
         if self.test_select == 0:
             if self.pearson_dirty == 0:
                 print 'Running Pearson'
                 self.pearson_array = []
                 
-                
-                for i in xrange(len(self.attributes)):
-                    self.pearson_array.append(stat_pearson(self.ddataset, i, len(self.attributes))[0])
-                    tab = stat_pearson(self.ddataset, i, len(self.attributes))[1]
-                    
-                    #dirty = 0
-                    #for j in xrange(len(self.attributes)):
-                        #if tab[j] > self.slider_intervals/100.0 and dirty == 0:
-                            #print self.slider_intervals/100.0
-                            #self.sig_attr.remove(self.attributes_select[i])
-                            #dirty = 1
-                    ##print self.sig_attr   
-                
+                self.pearson_array = fun_lin_regression(self.ddataset, self.attributes, stats.pearsonr)[0]
                 self.correlation_np = np.array(self.pearson_array)
+
                 self.set_table()
                 self.to_graph()
-                
-                #self.corelation_data = Orange.data.Table(self.correlation_np)        
-                #self.send("Corelation data", self.corelation_data)
-                
-                #new_domain_table = []
-                #for i in self.sig_attr:
-                    #new_domain_table.append(str(self.attributes[i][0]))
-                #print new_domain_table
-                
-                #domain = self.ddataset.domain
-                #new_domain = Orange.data.Domain(new_domain_table, domain)
-                #data2 = Orange.data.Table(new_domain, self.ddataset)
-                #self.send("Corelation data", data2)
             
             else:
                 self.correlation_np = np.array(self.pearson_array)
                 self.spearman_dirty = 1
                 print 'Running Spearman'
                 self.spearman_array = []
-                for i in xrange(len(self.attributes)):
-                    self.spearman_array.append(stat_spearman(self.ddataset, i, len(self.attributes))[0])
-                    tab = stat_spearman(self.ddataset, i, len(self.attributes))[1]
+                self.spearman_array = fun_lin_regression(self.ddataset, self.attributes, stats.spearmanr)[0]
+                    #tab = fun_lin_regression(self.ddataset, self.attributes, i[0], stats.spearmanr)[1]
                     #print tab
                     
                 self.correlation_np = np.array(self.spearman_array)
             self.graph.clear()
             self.reset_all_data()
             #self.send("Sampled Data", None)
-        
-        
+              
     def update_data(self):
         pass
                     
         return dataset_np.T
     
 
-# TESTS  
-# Pearson test ----------------------------------------------------------------- 
+# Statistics function  -----------------------------------------------------------
 
-def stat_pearson(dataset, selected, leng):
-    """Pearson test
-    :param dataset: orange dataset
-    :param attr:    string of selected attribute
-    :return:        (C ,P)
+def fun_lin_regression(dataset,  attributes, alpha):
+    """Regular function for statistics
+    :param dataset:     orange dataset - non filtered
+    :parm attributes:   continuous attributes
+    :param alpha:       function to be runned
+    :return:            (C ,P)
     """  
-    pear_tab = []
-    pear_tab_p = []
-    for i in xrange(leng):
-      (tab1, tab2) = data_to_npcol(dataset, [selected, i])
-      #tab2 = data_to_npcol(dataset, i)
-      (p, t) = stats.pearsonr(tab1,tab2)
-      pear_tab.append(p)
-      pear_tab_p.append(t)
-    return (pear_tab, pear_tab_p)
+    table1 = []
+    table2 = []
 
-# Spearman test ---------------------------------------------------------------
+    for i in attributes:
+      val1 = []
+      val2 = [] 
+      
+      for attr in attributes:  
+        (tab1, tab2) = data_to_npcol(dataset, [i[0], attr[0]])
+        solve = alpha(tab1,tab2)
+        val1.append(solve[0])
+        val2.append(solve[1])
 
-def stat_spearman(dataset, selected, leng):
-    """Sperman test
-    :param dataset: orange dataset
-    :param attr:    string of selected attribute
-    :return:        (C ,P)
-    """  
-    spear_tab = []
-    spear_tab_p = []
-    for i in xrange(leng):
-      (tab1, tab2) = data_to_npcol(dataset, [selected, i])
-      #tab2 = data_to_npcol(dataset, i)
-      (p, t) = stats.spearmanr(tab1,tab2)
-      spear_tab.append(p)
-      spear_tab_p.append(t)
-    return (spear_tab, spear_tab_p)
-    
-# Linear regression -----------------------------------------------------------
-
-def stat_lin_regression(dataset, selected, leng):
-    """Linear regression
-    :param dataset: orange dataset
-    :param attr:    string of selected attribute
-    :return:        (C ,P)
-    """  
-    lin_slope = []
-    lin_intercept = []
-    for i in xrange(leng):  
-      (tab1, tab2) = data_to_npcol(dataset, [selected, i])
-
-      (slope, intercept, r_value, p_value, std_err) = stats.linregress(tab1,tab2)
-      #print slope, intercept, r_value, p_value, std_err
-      lin_slope.append(slope)
-      lin_intercept.append(intercept)
-    return (lin_slope, lin_intercept)
+      slopes, intercepts  = val1, val2
+      table1.append(slopes)
+      table2.append(intercepts)
+    return (table1, table2)
 # ***************************************************************************
 # ***************************************************************************