Commits

Marko Toplak committed 87468c6

Removed use of the old obiKEGG module. Hacked a REST version for gene aliases in obiKEGG2.

Comments (0)

Files changed (5)

_bioinformatics/obiGene.py

         self.ignore_case = ignore_case
         self.filename() # test if valid filename can be built
 
+from Orange.utils import ConsoleProgressBar
+
 class MatcherAliasesKEGG(MatcherAliasesPickled):
 
     def _organism_name(self, organism):
-        from . import obiKEGG 
-        return obiKEGG.organism_name_search(organism)
+        from . import obiKEGG2
+        return obiKEGG2.organism_name_search(organism)
 
     def create_aliases(self):
         organism = self._organism_name(self.organism)
-        from . import obiKEGG
-        org = obiKEGG.KEGGOrganism(self.organism, genematcher=GMDirect())
-        genes = org.genes
-        osets = [ set([name]) | set(b.alt_names) for 
-                name,b in genes.items() ]
+        from . import obiKEGG2
+        org = obiKEGG2.KEGGOrganism(self.organism, genematcher=GMDirect())
+        osets = org._gm_gene_aliases()
         return osets
 
     def create_aliases_version(self):
-        from . import obiKEGG
-        return obiKEGG.KEGGOrganism.organism_version(self.organism) + ".1"
+        from . import obiKEGG2
+        return obiKEGG2.KEGGOrganism.organism_version(self.organism) + ".1"
 
     def filename(self):
-        return "kegg_" + self._organism_name(self.organism) 
+        return "kegg_2_" + self._organism_name(self.organism) 
 
     def __init__(self, organism, ignore_case=True):
         self.organism = organism

_bioinformatics/obiKEGG2/__init__.py

 """
 from __future__ import absolute_import
 
-
+import urllib2
 import os, sys
 from collections import defaultdict
 
     def enzymes(self, genes=None):
         raise NotImplementedError()
     
+    def _gm_gene_aliases(self):
+        """
+        Return a list of sets of equal genes. This is a hack for
+        gene matchers to work faster until the whole implementations
+        transitions to REST. Does not include links to DBs.
+        """
+        s1 = urllib2.urlopen("http://rest.kegg.jp/list/%s" % self.org_code).read()
+        out = []
+        for l in s1.split('\n'):
+            if l:
+                tabs = l.split("\t")
+                cset = set([tabs[0]])
+                try:
+                    rest = tabs[1].split(";")[0]
+                    cset |= set(rest.split(", "))
+                except:
+                    pass #do not crash if a line does not conform
+                out.append(cset)
+        return out
+
     def get_enriched_pathways(self, genes, reference=None, prob=obiProb.Binomial(), callback=None):
         """ Return a dictionary with enriched pathways ids as keys
         and (list_of_genes, p_value, num_of_reference_genes) tuples 
 def create_gene_sets():
     pass
 
-from .. import obiGene
-from Orange.utils import ConsoleProgressBar
-
-class MatcherAliasesKEGG(obiGene.MatcherAliasesPickled):
-    DOMAIN = "KEGG"
-    VERSION = "v3.0"
-    def create_aliases(self):
-        import cPickle
-        files = set(serverfiles.ServerFiles().listfiles(self.DOMAIN))
-        ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle"
-        if ids_filename in files:
-            filename = serverfiles.localpath_download(self.DOMAIN, ids_filename)
-            
-            aliases = cPickle.load(open(filename, "rb"))
-        else:
-            pb = ConsoleProgressBar("Retriving KEGG ids:")
-            kegg_org = KEGGOrganism(self.organism)
-            genes = kegg_org.genes
-            genes.pre_cache(progress_callback=pb.set_state)
-            aliases = []
-            for key, entry in genes.iteritems():
-                aliases.append(set([key]) | set(entry.alt_names))
-            filename = serverfiles.localpath_download(self.DOMAIN, ids_filename)
-            cPickle.dump(aliases, open(filename, "wb"))
-            
-        return aliases
-    
-    def filename(self):
-        return "kegg3_" + self.organism
-    
-    def aliases_path(self):
-        ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle"
-        return serverfiles.localpath(self.DOMAIN, ids_filename)
-    
-    def create_aliases_version(self):
-        files = set(serverfiles.listfiles(self.DOMAIN))
-        ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle"
-        if ids_filename in files:
-            version = serverfiles.info(self.DOMAIN, ids_filename)["datetime"]
-        else:
-            kegg_org = KEGGOrganism(self.organism)
-            genes = kegg_org.genes
-            version = genes.info.release
-        return version
-        
-    def __init__(self, organism, **kwargs):
-        self.organism = organism
-        sf = serverfiles.ServerFiles()
-        files = set(sf.listfiles(self.DOMAIN))
-        ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle"
-        if ids_filename in files:
-            serverfiles.update(self.DOMAIN, ids_filename)
-            
-        obiGene.MatcherAliasesPickled.__init__(self, **kwargs)
-
 def main():
     KEGGGenome()
     import doctest

_bioinformatics/obiTaxonomy.py

 def to_taxid(code, mapTo=None):
     """ See if the code is a valid code in any database and return a set of its taxids.
     """
-    from . import obiKEGG, obiGO
+    from . import obiKEGG2, obiGO
     results = set()
-    for test in [obiKEGG.to_taxid, obiGO.to_taxid]:
+    for test in [obiKEGG2.to_taxid, obiGO.to_taxid]:
         try:
             r = test(code)
             if type(r) == set:

_bioinformatics/obimiRNA.py

 from Orange.orng import orngServerFiles as osf
 import statc
 
-from . import obiGene as ge, obiGO as go, obiKEGG as kg, obiProb as op, obiTaxonomy
+from . import obiGene as ge, obiGO as go, obiKEGG2 as kg, obiProb as op, obiTaxonomy
 
 mirnafile = osf.localpath_download('miRNA','miRNA.txt')
 premirnafile = osf.localpath_download('miRNA','premiRNA.txt')
     for m in mirna_list:
         kegg_genes = [keggNames[g] for g in get_info(m).targets.split(',') if g in keggNames]
         if enrichment:
-            mirnaPathways[m] = [path_id for path_id,(geneList,p,geneNum) in org.get_enriched_pathways_by_genes(kegg_genes).items() if p < pVal]
+            mirnaPathways[m] = [path_id for path_id,(geneList,p,geneNum) in org.get_enriched_pathways(kegg_genes).items() if p < pVal]
         else:
             paths = filter(None,[list(org.get_pathways_by_genes([k])) for k in kegg_genes])                   
             if paths:

_bioinformatics/widgets/OWKEGGPathwayBrowser.py

 from Orange.OrangeWidgets import OWGUI
 from Orange.OrangeWidgets.OWWidget import *
 
-from .. import obiKEGG
 from .. import obiTaxonomy
 from .. import obiKEGG2 as obiKEGG
 from .. import obiGeneSets
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.