Commits

Marko Toplak committed b366563

Started obiGeneSetSig (new interface for gene set signatures) with Minca's SetSig.

Comments (0)

Files changed (2)

             minSize=minSize, maxSize=maxSize, minPart=minPart, classValues=classValues)
         return SetSig(learndata=data, genesets=gsetsnum)
 
-class SetSigLearner2(object):
-
-    def __call__(self, data, matcher, geneSets, minSize=3, maxSize=1000, minPart=0.1, classValues=None):
-        data, oknames, gsetsnum = selectGenesetsData(data, matcher, geneSets, \
-            minSize=minSize, maxSize=maxSize, minPart=minPart, classValues=classValues)
-
-        def setSig_example_geneset(ex, data):
-            """ ex contains only selected genes """
-
-            distances = [ [], [] ]    
-
-            def pearsonr(v1, v2):
-                try:
-                    return statc.pearsonr(v1, v2)[0]
-                except:
-                    return numpy.corrcoef([v1, v2])[0,1]
-
-            def pearson(ex1, ex2):
-                attrs = range(len(ex1.domain.attributes))
-                vals1 = [ ex1[i].value for i in attrs ]
-                vals2 = [ ex2[i].value for i in attrs ]
-                return pearsonr(vals1, vals2)
-
-            def ttest(ex1, ex2):
-                try:
-                    return stats.lttest_ind(ex1, ex2)[0]
-                except:
-                    return 0.0
-            
-            #maps class value to its index
-            classValueMap = dict( [ (val,i) for i,val in enumerate(data.domain.classVar.values) ])
-         
-            #create distances to all learning data - save or other class
-            for c in data:
-                distances[classValueMap[c[-1].value]].append(pearson(c, ex))
-
-            return ttest(distances[0], distances[1])
-
-        attributes = []
-
-        for name, gs in gsetsnum.items(): #for each geneset
-            #for each gene set: take the attribute subset and work on the attribute subset only
-            #only select the subset of genes from the learning data
-            print name, gs
-            at = Orange.feature.Continuous(name=name.id)
-
-            def t(ex, w, gs=gs, ldata=data):
-                domain = orange.Domain([ldata.domain.attributes[ai] for ai in gs], ldata.domain.classVar)
-                datao = orange.ExampleTable(domain, ldata)
-                example = orange.Example(domain, ex) #domains need to be the same
-                return setSig_example_geneset(example, datao)
-         
-            at.get_value_from = t
-            attributes.append(at)
-       
-        newdomain = Orange.data.Domain(attributes, data.domain.class_var)
-
-        return newdomain
-
 if __name__ == "__main__":
 
     data = Orange.data.Table("iris")
     #ass = PCALearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
     #ass = GSALearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
 
-    def to_old_dic(d, data):
-        ar = defaultdict(list)
-        for ex1 in data:
-            ex = d(ex1)
-            for a,x in zip(d.attributes, ex):
-                ar[a.name].append(x.value)
-        return ar
-
     ar = defaultdict(list)
     for d in (list(ldata) + list(tdata))[:5]:
         for a,b in ass(d).items():
         ol =  sorted(ar.items())
         print '\n'.join([ a.id + ": " +str(b) for a,b in ol])
 
-    def pp2(ar):
-        ol =  sorted(ar.items())
-        print '\n'.join([ a + ": " +str(b) for a,b in ol])
+    pp1(ar)
 
-    pp1(ar)
-    ass = SetSigLearner2()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
-    ar = to_old_dic(ass, data[:5])
-    pp2(ar)
+import Orange
+import obiAssess
+import Orange.misc
+import orange
+import obiGeneSets
+import obiGene
+import numpy
+from collections import defaultdict
+import statc
+import stats
+
+class SetSig(object):
+
+    __new__ = Orange.misc._orange__new__(object)
+
+    def __init__(self, matcher, geneSets, minSize=3, maxSize=1000, minPart=0.1, classValues=None):
+        self.matcher = matcher
+        self.geneSets = geneSets
+        self.minSize = minSize
+        self.maxSize = maxSize
+        self.minPart = minPart
+        self.classValues = classValues
+
+    def __call__(self, data, weight_id=None):
+        data, oknames, gsetsnum = obiAssess.selectGenesetsData(data, 
+            self.matcher, self.geneSets,
+            minSize=self.minSize, maxSize=self.maxSize, 
+            minPart=self.minPart, classValues=self.classValues)
+
+        def setSig_example_geneset(ex, data):
+            """ ex contains only selected genes """
+
+            distances = [ [], [] ]    
+
+            def pearsonr(v1, v2):
+                try:
+                    return statc.pearsonr(v1, v2)[0]
+                except:
+                    return numpy.corrcoef([v1, v2])[0,1]
+
+            def pearson(ex1, ex2):
+                attrs = range(len(ex1.domain.attributes))
+                vals1 = [ ex1[i].value for i in attrs ]
+                vals2 = [ ex2[i].value for i in attrs ]
+                return pearsonr(vals1, vals2)
+
+            def ttest(ex1, ex2):
+                try:
+                    return stats.lttest_ind(ex1, ex2)[0]
+                except:
+                    return 0.0
+            
+            #maps class value to its index
+            classValueMap = dict( [ (val,i) for i,val in enumerate(data.domain.classVar.values) ])
+         
+            #create distances to all learning data - save or other class
+            for c in data:
+                distances[classValueMap[c[-1].value]].append(pearson(c, ex))
+
+            return ttest(distances[0], distances[1])
+
+        attributes = []
+
+        for name, gs in gsetsnum.items(): #for each geneset
+            #for each gene set: take the attribute subset and work on the attribute subset only
+            #only select the subset of genes from the learning data
+            at = Orange.feature.Continuous(name=name.id)
+
+            def t(ex, w, gs=gs, ldata=data):
+                domain = orange.Domain([ldata.domain.attributes[ai] for ai in gs], ldata.domain.classVar)
+                datao = orange.ExampleTable(domain, ldata)
+                example = orange.Example(domain, ex) #domains need to be the same
+                return setSig_example_geneset(example, datao)
+         
+            at.get_value_from = t
+            attributes.append(at)
+       
+        newdomain = Orange.data.Domain(attributes, data.domain.class_var)
+        return Orange.data.Table(newdomain, data)
+
+if __name__ == "__main__":
+
+    data = Orange.data.Table("iris")
+    gsets = obiGeneSets.collections({
+        "ALL": ['sepal length', 'sepal width', 'petal length', 'petal width'],
+        "f3": ['sepal length', 'sepal width', 'petal length'],
+        "l3": ['sepal width', 'petal length', 'petal width'],
+        })
+
+    fp = 120
+    ldata = orange.ExampleTable(data.domain, data[:fp])
+    tdata = orange.ExampleTable(data.domain, data[fp:])
+
+    matcher = obiGene.matcher([])
+
+    choosen_cv = ["Iris-setosa", "Iris-versicolor"]
+    def to_old_dic(d, data):
+        ar = defaultdict(list)
+        for ex1 in data:
+            ex = d(ex1)
+            for a,x in zip(d.attributes, ex):
+                ar[a.name].append(x.value)
+        return ar
+
+    def pp2(ar):
+        ol =  sorted(ar.items())
+        print '\n'.join([ a + ": " +str(b) for a,b in ol])
+
+    ass = SetSig(ldata, matcher=matcher, geneSets=gsets, classValues=choosen_cv, minPart=0.0)
+    print ass.domain
+    ar = to_old_dic(ass.domain, data[:5])
+    pp2(ar)