Commits

Marko Toplak committed 0234560

SetSig2 (which returns a domain) returns the same results as SetSig.

  • Participants
  • Parent commits 950a2fb

Comments (0)

Files changed (1)

             minSize=minSize, maxSize=maxSize, minPart=minPart, classValues=classValues)
         return SetSig(learndata=data, genesets=gsetsnum)
 
+class SetSigLearner2(object):
+
+    def __call__(self, data, matcher, geneSets, minSize=3, maxSize=1000, minPart=0.1, classValues=None):
+        data, oknames, gsetsnum = selectGenesetsData(data, matcher, geneSets, \
+            minSize=minSize, maxSize=maxSize, minPart=minPart, classValues=classValues)
+
+        def setSig_example_geneset(ex, data):
+            """ ex contains only selected genes """
+
+            distances = [ [], [] ]    
+
+            def pearsonr(v1, v2):
+                try:
+                    return statc.pearsonr(v1, v2)[0]
+                except:
+                    return numpy.corrcoef([v1, v2])[0,1]
+
+            def pearson(ex1, ex2):
+                attrs = range(len(ex1.domain.attributes))
+                vals1 = [ ex1[i].value for i in attrs ]
+                vals2 = [ ex2[i].value for i in attrs ]
+                return pearsonr(vals1, vals2)
+
+            def ttest(ex1, ex2):
+                try:
+                    return stats.lttest_ind(ex1, ex2)[0]
+                except:
+                    return 0.0
+            
+            #maps class value to its index
+            classValueMap = dict( [ (val,i) for i,val in enumerate(data.domain.classVar.values) ])
+         
+            #create distances to all learning data - save or other class
+            for c in data:
+                distances[classValueMap[c[-1].value]].append(pearson(c, ex))
+
+            return ttest(distances[0], distances[1])
+
+        attributes = []
+
+        for name, gs in gsetsnum.items(): #for each geneset
+            #for each gene set: take the attribute subset and work on the attribute subset only
+            #only select the subset of genes from the learning data
+            print name, gs
+            at = Orange.feature.Continuous(name=name.id)
+
+            def t(ex, w, gs=gs, ldata=data):
+                domain = orange.Domain([ldata.domain.attributes[ai] for ai in gs], ldata.domain.classVar)
+                datao = orange.ExampleTable(domain, ldata)
+                example = orange.Example(domain, ex) #domains need to be the same
+                return setSig_example_geneset(example, datao)
+         
+            at.get_value_from = t
+            attributes.append(at)
+       
+        newdomain = Orange.data.Domain(attributes, data.domain.class_var)
+
+        return newdomain
+
 if __name__ == "__main__":
 
     data = Orange.data.Table("iris")
     choosen_cv = ["Iris-setosa", "Iris-versicolor"]
     #ass = AssessLearner()(data, matcher, gsets, rankingf=AT_loessLearner())
     #ass = MeanLearner()(data, matcher, gsets, default=False))
-    ass = PLSLearner()(data, matcher, gsets, classValues=choosen_cv)
-    #ass = SetSigOLDLearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
-    #ass = SetSigLearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
+    #ass = PLSLearner()(data, matcher, gsets, classValues=choosen_cv)
+    ass = SetSigLearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
     #ass = PCALearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
     #ass = GSALearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
 
+    def to_old_dic(d, data):
+        ar = defaultdict(list)
+        for ex1 in data:
+            ex = d(ex1)
+            for a,x in zip(d.attributes, ex):
+                ar[a.name].append(x.value)
+        return ar
+
     ar = defaultdict(list)
     for d in (list(ldata) + list(tdata))[:5]:
         for a,b in ass(d).items():
             ar[a].append(b)
 
-    ol =  sorted(ar.items())
-    print '\n'.join([ a.id + ": " +str(b) for a,b in ol])
+    def pp1(ar):
+        ol =  sorted(ar.items())
+        print '\n'.join([ a.id + ": " +str(b) for a,b in ol])
 
+    def pp2(ar):
+        ol =  sorted(ar.items())
+        print '\n'.join([ a + ": " +str(b) for a,b in ol])
+
+    pp1(ar)
+    ass = SetSigLearner2()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0)
+    ar = to_old_dic(ass, data[:5])
+    pp2(ar)