Commits

Marko Toplak  committed f888bd8

Added PCA transformation to obiGeneSetSig.

  • Participants
  • Parent commits c39f563

Comments (0)

Files changed (2)

File obiAssess.py

 
     ev0 = evect[0] #this is a row in a matrix - do a dot product
     a = numpy.dot(arr, ev0)
-
     return a
 
 def pca(data, snapshot=0):
     "Perform PCA on M, return eigenvectors and eigenvalues, sorted."
     M = data.toNumpy("a")[0]
     XMean = numpy.mean(M, axis = 0)
-    print XMean.shape, M.shape
     M = M - XMean
 
     T, N = numpy.shape(M)
     choosen_cv = ["Iris-setosa", "Iris-versicolor"]
     #ass = AssessLearner()(data, matcher, gsets, rankingf=AT_loessLearner())
     #ass = MeanLearner()(data, matcher, gsets, default=False)
-    ass = MedianLearner()(data, matcher, gsets)
+    #ass = MedianLearner()(data, matcher, gsets)
     #ass = PLSLearner()(data, matcher, gsets, classValues=choosen_cv)
     #ass = SetSigLearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
-    #ass = PCALearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
+    ass = PCALearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
     #ass = GSALearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
 
     ar = defaultdict(list)

File obiGeneSetSig.py

                 genes = [ nm.umatch(gene) for gene in geneset ]
                 genes2 = [ nm2.umatch(gene) for gene in geneset ]
 
-                genes, genes2 = zip(*[ (g,g2) for g,g2 in zip(genes, genes2) if g != None])
+                takegenes = [ i for i,a in enumerate(genes) if a != None ]
+
+                genes = [ genes[i] for i in takegenes ]
+                genes2 = [ genes2[i] for i in takegenes ]
 
                 domain = Orange.data.Domain([data.domain.attributes[name_ind[gene]] for gene in genes], data.domain.class_var)
                 datao = Orange.data.Table(domain, data)
 
         return attributes 
 
+from obiAssess import pca
+
+class PCA(GeneSetTrans):
+
+    def build_features(self, data, gene_sets):
+
+        attributes = []
+
+        for gs in gene_sets:
+            at = Orange.feature.Continuous(name=str(gs))
+
+            geneset = list(gs.genes)
+
+            nm, name_ind = self._mat_ni(data)
+            genes = [ nm.umatch(gene) for gene in geneset ]
+            takegenes = [ i for i,a in enumerate(genes) if a != None ]
+            genes = [ genes[i] for i in takegenes ]
+    
+            domain = Orange.data.Domain([data.domain.attributes[name_ind[gene]] for gene in genes], data.domain.class_var)
+
+            datao = Orange.data.Table(domain, data)
+
+            evals, evect, xmean = pca(datao)
+            constructt = evals, evect, xmean
+
+            def t(ex, w, geneset=geneset, constructt=constructt, takegenes=takegenes, domain=domain):
+
+                nm2, name_ind2 = self._mat_ni(ex)
+                genes2 = [ nm2.umatch(gene) for gene in geneset ]
+                genes2 = [ genes2[i] for i in takegenes ]
+              
+                #convert the example to the same domain
+                exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] + [ "?" ]
+                
+                arr = numpy.array(exvalues[:-1])
+                
+                evals, evect, xmean = constructt
+
+                arr = arr - xmean # same input transformation - a row in a matrix
+                ev0 = evect[0] #this is a row in a matrix - do a dot product
+                a = numpy.dot(arr, ev0)
+
+                return a
+                     
+            at.get_value_from = t
+            attributes.append(at)
+
+        return attributes 
+
 class SimpleFun(GeneSetTrans):
 
     def build_features(self, data, gene_sets):
        self.fn = numpy.median
        super(Median, self).__init__(**kwargs)
 
+
+
 if __name__ == "__main__":
 
     data = Orange.data.Table("iris")
         ol =  sorted(ar.items())
         print '\n'.join([ a + ": " +str(b) for a,b in ol])
 
-    ass = Median(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0)
+    ass = PCA(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0)
     ar = to_old_dic(ass.domain, data[:5])
     pp2(ar)