1. biolab
  2. Untitled project
  3. orange-bioinformatics

Commits

Marko Toplak  committed 98ff35c

Refactoring of obiGeneSetSig.

  • Participants
  • Parent commits f888bd8
  • Branches default

Comments (0)

Files changed (1)

File obiGeneSetSig.py

View file
  • Ignore whitespace
 import Orange
-import obiAssess
 import Orange.misc
 import obiGeneSets
 import obiGene
 import numpy
 from collections import defaultdict
 import stats
-import obiGsea
+from obiGsea import takeClasses
+from obiAssess import pca
 
 def setSig_example_geneset(ex, data):
     """ Gets learning data and example with the same domain, both
     def __call__(self, data, weight_id=None):
 
         #selection of classes and gene sets
-        data = obiGsea.takeClasses(data, classValues=self.class_values)
+        data = takeClasses(data, classValues=self.class_values)
         nm,_ =  self._mat_ni(data)
         gene_sets = select_genesets(nm, self.gene_sets, self.min_size, self.max_size, self.min_part)
 
         newdomain = Orange.data.Domain(newfeatures, data.domain.class_var)
         return Orange.data.Table(newdomain, data)
 
+    def build_features(self, data, gene_sets):
+        return [ self.build_feature(data, gs) for gs in gene_sets ]
+
 def vou(ex, gn, indices):
     """ returns the value or "?" for the given gene name gn"""
     if gn not in indices:
 
 class SetSig(GeneSetTrans):
 
-    def build_features(self, data, gene_sets):
+    def build_feature(self, data, gs):
 
-        attributes = []
+        at = Orange.feature.Continuous(name=str(gs))
 
-        for gs in gene_sets:
-            at = Orange.feature.Continuous(name=str(gs))
+        def t(ex, w, gs=gs, data=data): #copy od the data
+            geneset = list(gs.genes)
 
-            def t(ex, w, gs=gs, data=data): #copy od the data
-                geneset = list(gs.genes)
+            nm, name_ind = self._mat_ni(data)
+            nm2, name_ind2 = self._mat_ni(ex)
 
-                nm, name_ind = self._mat_ni(data)
-                nm2, name_ind2 = self._mat_ni(ex)
+            genes = [ nm.umatch(gene) for gene in geneset ]
+            genes2 = [ nm2.umatch(gene) for gene in geneset ]
 
-                genes = [ nm.umatch(gene) for gene in geneset ]
-                genes2 = [ nm2.umatch(gene) for gene in geneset ]
+            takegenes = [ i for i,a in enumerate(genes) if a != None ]
 
-                takegenes = [ i for i,a in enumerate(genes) if a != None ]
+            genes = [ genes[i] for i in takegenes ]
+            genes2 = [ genes2[i] for i in takegenes ]
 
-                genes = [ genes[i] for i in takegenes ]
-                genes2 = [ genes2[i] for i in takegenes ]
+            domain = Orange.data.Domain([data.domain.attributes[name_ind[gene]] for gene in genes], data.domain.class_var)
+            datao = Orange.data.Table(domain, data)
+           
+            #convert the example to the same domain
+            exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] + [ "?" ]
+            example = Orange.data.Instance(domain, exvalues)
 
-                domain = Orange.data.Domain([data.domain.attributes[name_ind[gene]] for gene in genes], data.domain.class_var)
-                datao = Orange.data.Table(domain, data)
-               
-                #convert the example to the same domain
-                exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] + [ "?" ]
-                example = Orange.data.Instance(domain, exvalues)
-
-                return setSig_example_geneset(example, datao) #only this one is setsig specific
-         
-            at.get_value_from = t
-            attributes.append(at)
-
-        return attributes 
-
-from obiAssess import pca
+            return setSig_example_geneset(example, datao) #only this one is setsig specific
+     
+        at.get_value_from = t
+        return at
 
 class PCA(GeneSetTrans):
 
-    def build_features(self, data, gene_sets):
+    def build_feature(self, data, gs):
 
-        attributes = []
+        at = Orange.feature.Continuous(name=str(gs))
 
-        for gs in gene_sets:
-            at = Orange.feature.Continuous(name=str(gs))
+        geneset = list(gs.genes)
 
-            geneset = list(gs.genes)
+        nm, name_ind = self._mat_ni(data)
+        genes = [ nm.umatch(gene) for gene in geneset ]
+        takegenes = [ i for i,a in enumerate(genes) if a != None ]
+        genes = [ genes[i] for i in takegenes ]
 
-            nm, name_ind = self._mat_ni(data)
-            genes = [ nm.umatch(gene) for gene in geneset ]
-            takegenes = [ i for i,a in enumerate(genes) if a != None ]
-            genes = [ genes[i] for i in takegenes ]
-    
-            domain = Orange.data.Domain([data.domain.attributes[name_ind[gene]] for gene in genes], data.domain.class_var)
+        domain = Orange.data.Domain([data.domain.attributes[name_ind[gene]] for gene in genes], data.domain.class_var)
 
-            datao = Orange.data.Table(domain, data)
+        datao = Orange.data.Table(domain, data)
 
-            evals, evect, xmean = pca(datao)
-            constructt = evals, evect, xmean
+        evals, evect, xmean = pca(datao)
+        constructt = evals, evect, xmean
 
-            def t(ex, w, geneset=geneset, constructt=constructt, takegenes=takegenes, domain=domain):
+        def t(ex, w, geneset=geneset, constructt=constructt, takegenes=takegenes, domain=domain):
 
-                nm2, name_ind2 = self._mat_ni(ex)
-                genes2 = [ nm2.umatch(gene) for gene in geneset ]
-                genes2 = [ genes2[i] for i in takegenes ]
-              
-                #convert the example to the same domain
-                exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] + [ "?" ]
-                
-                arr = numpy.array(exvalues[:-1])
-                
-                evals, evect, xmean = constructt
+            nm2, name_ind2 = self._mat_ni(ex)
+            genes2 = [ nm2.umatch(gene) for gene in geneset ]
+            genes2 = [ genes2[i] for i in takegenes ]
+          
+            #convert the example to the same domain
+            exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] + [ "?" ]
+            
+            arr = numpy.array(exvalues[:-1])
+            
+            evals, evect, xmean = constructt
 
-                arr = arr - xmean # same input transformation - a row in a matrix
-                ev0 = evect[0] #this is a row in a matrix - do a dot product
-                a = numpy.dot(arr, ev0)
+            arr = arr - xmean # same input transformation - a row in a matrix
+            ev0 = evect[0] #this is a row in a matrix - do a dot product
+            a = numpy.dot(arr, ev0)
 
-                return a
-                     
-            at.get_value_from = t
-            attributes.append(at)
-
-        return attributes 
+            return a
+                 
+        at.get_value_from = t
+        return at
 
 class SimpleFun(GeneSetTrans):
 
-    def build_features(self, data, gene_sets):
+    def build_feature(self, data, gs):
 
-        attributes = []
+        at = Orange.feature.Continuous(name=str(gs))
 
-        for gs in gene_sets:
-            at = Orange.feature.Continuous(name=str(gs))
+        def t(ex, w, gs=gs):
+            geneset = list(gs.genes)
+            nm2, name_ind2 = self._mat_ni(ex)
+            genes2 = [ nm2.umatch(gene) for gene in geneset ]
+           
+            exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] + [ "?" ]
+            exvalues = filter(lambda x: x != "?", exvalues)
 
-            def t(ex, w, gs=gs):
-                geneset = list(gs.genes)
-                nm2, name_ind2 = self._mat_ni(ex)
-                genes2 = [ nm2.umatch(gene) for gene in geneset ]
-               
-                exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] + [ "?" ]
-                exvalues = filter(lambda x: x != "?", exvalues)
-
-                return self.fn(exvalues)
-         
-            at.get_value_from = t
-            attributes.append(at)
-
-        return attributes 
+            return self.fn(exvalues)
+     
+        at.get_value_from = t
+        return at
 
 class Mean(SimpleFun):
 
        self.fn = numpy.median
        super(Median, self).__init__(**kwargs)
 
-
-
 if __name__ == "__main__":
 
     data = Orange.data.Table("iris")
         ol =  sorted(ar.items())
         print '\n'.join([ a + ": " +str(b) for a,b in ol])
 
-    ass = PCA(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0)
+    ass = Mean(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0)
     ar = to_old_dic(ass.domain, data[:5])
     pp2(ar)