Marko Toplak avatar Marko Toplak committed 58718fc

obiGeneSetSig: CORG were build a gene too large (fixed). SetSig has the option to check if the examples are exactly alike and discard the distance between them.

Comments (0)

Files changed (1)

_bioinformatics/obiGeneSetSig.py

             # The domain has the transformer that is build on all samples,
             # while the transformed data table uses cross-validation
             # internally
-            folds = 5
-            cvi = Orange.data.sample.SubsetIndicesCV(data, folds)
+            if self.cv == True:
+                cvi = Orange.data.sample.SubsetIndicesCV(data, 5)
+            elif self.cv != False:
+                cvi = self.cv(data)
             data_cv = [ [] for _ in range(len(data)) ]
-            for f in range(folds):
+            for f in set(cvi):
                 learn = data.select(cvi, f, negate=True)
                 test = data.select(cvi, f)
                 lf = self.build_features(learn, gene_sets)
 
         return attributes
    
-def setSig_example_geneset(ex, data, no_unknowns):
+def setSig_example_geneset(ex, data, no_unknowns, check_same=False):
     """ Gets learning data and example with the same domain, both
     containing only genes from the gene set. """
 
         vals1 = ex1.native(0)[:-1]
         vals2 = ex2.native(0)[:-1]
 
+        if check_same and vals1 == vals2:
+            return 10 #they are the same
+
         #leaves undefined elements out
         if not no_unknowns:
             common = [ True if v1 != "?" and v2 != "?" else False \
  
     #create distances to all learning data - save or other class
     for c in data:
-        distances[classValueMap[c[-1].value]].append(pearson(c, ex))
+        p = pearson(c, ex)
+        if p != 10:
+             distances[classValueMap[c[-1].value]].append(pearson(c, ex))
 
     return ttest(distances[0], distances[1])
 
 
     def __init__(self, **kwargs):
         self.no_unknowns = kwargs.pop("no_unknowns", False)
+        self.check_same = kwargs.pop("check_same", False)
         super(SetSig, self).__init__(**kwargs)
 
     def build_feature(self, data, gs):
             exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] + [ "?" ]
             example = Orange.data.Instance(domain, exvalues)
 
-            return setSig_example_geneset(example, datao, self.no_unknowns) #only this one is setsig specific
+            return setSig_example_geneset(example, datao, self.no_unknowns, check_same=self.check_same) #only this one is setsig specific
      
         at.get_value_from = t
         return at
             ex = Orange.data.Instance(domain, exvalues)
 
             return self._use_par(ex, constructt)
-            
+        
         at.get_value_from = t
+        at.dbg = constructt #for debugging
+        
         return at
 
 class PLS(ParametrizedTransformation):
     bg = 1
     for a in range(2, len(sortedinds)+1):
         tg = S(sortedinds[:a])
-        if tg > g:
+        if tg > g: #improvement
             g = tg
             bg = a
         else:
             break
         
-    return sortedinds[:a]
+    return sortedinds[:bg] #FIXED - one too many was taken
 
 class CORGs(ParametrizedTransformation):
     """
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.