Marko Toplak avatar Marko Toplak committed bcd52ea

Classes for storing gene set data moved to Orange.bio.geneset. obiGeneSets adapted to make new pickled classes.

Comments (0)

Files changed (6)

_bioinformatics/geneset.py

+def only_option(a):
+    if len(a) == 1:
+        return list(a)[0]
+    else:
+        raise Exception()
+
+class GenesetRegException(Exception): pass
+
+class GeneSet(object):
+
+    def __init__(self, genes=None, name=None, id=None, \
+        description=None, link=None, organism=None, hierarchy=None, pair=None):
+        """
+        pair can be (id, listofgenes) - it is used before anything else.
+        """
+        if genes == None:
+            genes = []
+
+        self.hierarchy = hierarchy
+        self.genes = set(genes)
+        self.name = name
+        self.id = id
+        self.description = description
+        self.link = link
+        self.organism = organism
+
+        if pair:
+            self.id, self.genes = pair[0], set(pair[1])
+
+    """
+    the following functions are needed for sets of gene sets to be able
+    to assess equality
+    """
+
+    def __hash__(self):
+        return self.id.__hash__() + self.name.__hash__()
+
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return self.__dict__ == other.__dict__
+        else:
+            return False
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def size(self):
+        return len(self.genes)
+
+    def cname(self, source=True, name=True):
+        """ Constructs a gene set name with the hierarchy. """
+        oname = self.id
+        if source and self.hierarchy:
+            oname = "[ " + ", ".join(self.hierarchy) + " ] " + oname
+        if name and self.name:
+            oname = oname + " " + self.name
+        return oname
+
+    def to_odict(self, source=True, name=True):
+        """
+        Returns a pair (id, listofgenes), like in old format.
+        """
+        return self.cname(source=source, name=name), self.genes
+
+    def __repr__(self):
+        return "GeneSet(" + ", ".join( [ 
+            "id=" + str(self.id),
+            "genes=" + str(self.genes),
+            "name=" + str(self.name),
+            "link=" + str(self.link),
+            "hierarchy=" + str(self.hierarchy)
+        ]) + ")"
+
+class GeneSetIDException(Exception):
+    pass
+
+class GeneSets(set):
+    
+    def __init__(self, input=None):
+        """
+        odict are genesets in old dict format.
+        gs are genesets in new format
+        """
+        if input != None and len(input) > 0:
+            self.update(input)
+
+    def update(self, input):
+        if isinstance(input, GeneSets):
+            super(GeneSets, self).update(input)
+        else:
+            prepared_genesets = [] #parse them all before adding,
+                                   #so that it fails on error
+            if hasattr(input, "items"):
+                for i, g in input.items():
+                    prepared_genesets.append(GeneSet(pair=(i, g)))
+            else:
+                for i in input:
+                    if isinstance(i, GeneSet):
+                        prepared_genesets.append(i)
+                    else:
+                        i, g = i
+                        prepared_genesets.append(GeneSet(pair=(i, g)))
+
+            for g in prepared_genesets:
+                self.add(g)
+
+    def to_odict(self):
+        """ Return gene sets in old dictionary format. """
+        return dict(gs.to_odict() for gs in self)
+
+    def set_hierarchy(self, hierarchy):
+        """ Sets hierarchy for all gene sets """
+        for gs in self:
+            gs.hierarchy = hierarchy
+
+    def __repr__(self):
+        return "GeneSets(" + set.__repr__(self) + ")"
+
+    def common_org(self):
+        """ Returns the common organism. """
+        if len(self) == 0:
+            raise GenesetRegException("Empty gene sets.")
+
+        organisms = set(a.organism for a in self)
+
+        try:
+            return only_option(organisms)
+        except:
+            raise GenesetRegException("multiple organisms: " + str(organisms))
+
+    def hierarchies(self):
+        """ Returns all hierachies """
+        if len(self) == 0:
+            raise GenesetRegException("Empty gene sets.")
+        return set(a.hierarchy for a in self)
+
+    def common_hierarchy(self):
+        hierarchies = self.hierarchies()
+
+        def common_hierarchy1(hierarchies):
+            def hier(l): return set(map(lambda x: x[:currentl], hierarchies))
+            currentl = max(map(len, hierarchies))
+            while len(hier(currentl)) > 1:
+                currentl -= 1
+            return only_option(hier(currentl))
+
+        return common_hierarchy1(hierarchies)
+
+    def split_by_hierarchy(self):
+        """ Splits gene sets by hierarchies. """
+        hd = dict((h,GeneSets()) for h in  self.hierarchies())
+        for gs in self:
+            hd[gs.hierarchy].add(gs)
+        return hd.values()
+

_bioinformatics/obiGO.py

         file = "gene_association.%s.tar.gz" % code
 
         path = os.path.join(orngServerFiles.localpath("GO"), file)
+
         if not os.path.exists(path):
+            sf = orngServerFiles.ServerFiles()
+            available = sf.listfiles("GO")
+            if file not in available:
+                from . import obiKEGG2
+                raise obiKEGG2.OrganismNotFoundError(org + str(code))
             orngServerFiles.download("GO", file)
+
         return cls(path, ontology=ontology, genematcher=genematcher, progressCallback=progressCallback)
     
     def ParseFile(self, file, progressCallback=None):

_bioinformatics/obiGeneSets.py

 def nth(l,n):
     return [ a[n] for a in l]
 
-class GeneSet(object):
-
-    def __init__(self, genes=None, name=None, id=None, \
-        description=None, link=None, organism=None, hierarchy=None, pair=None):
-        """
-        pair can be (id, listofgenes) - it is used before anything else.
-        """
-        if genes == None:
-            genes = []
-
-        self.hierarchy = hierarchy
-        self.genes = set(genes)
-        self.name = name
-        self.id = id
-        self.description = description
-        self.link = link
-        self.organism = organism
-
-        if pair:
-            self.id, self.genes = pair[0], set(pair[1])
-
-    """
-    the following functions are needed for sets of gene sets to be able
-    to assess equality
-    """
-
-    def __hash__(self):
-        return self.id.__hash__() + self.name.__hash__()
-
-    def __eq__(self, other):
-        if isinstance(other, self.__class__):
-            return self.__dict__ == other.__dict__
-        else:
-            return False
-
-    def __ne__(self, other):
-        return not self.__eq__(other)
-
-    def size(self):
-        return len(self.genes)
-
-    def cname(self, source=True, name=True):
-        """ Constructs a gene set name with the hierarchy. """
-        oname = self.id
-        if source and self.hierarchy:
-            oname = "[ " + ", ".join(self.hierarchy) + " ] " + oname
-        if name and self.name:
-            oname = oname + " " + self.name
-        return oname
-
-    def to_odict(self, source=True, name=True):
-        """
-        Returns a pair (id, listofgenes), like in old format.
-        """
-        return self.cname(source=source, name=name), self.genes
-
-    def __repr__(self):
-        return "GeneSet(" + ", ".join( [ 
-            "id=" + str(self.id),
-            "genes=" + str(self.genes),
-            "name=" + str(self.name),
-            "link=" + str(self.link),
-            "hierarchy=" + str(self.hierarchy)
-        ]) + ")"
-
-class GeneSetIDException(Exception):
-    pass
-
-class GeneSets(set):
-    
-    def __init__(self, input=None):
-        """
-        odict are genesets in old dict format.
-        gs are genesets in new format
-        """
-        if input != None and len(input) > 0:
-            self.update(input)
-
-    def update(self, input):
-        from . import obiGeneSets
-        if isinstance(input, obiGeneSets.GeneSets):
-            super(GeneSets, self).update(input)
-        elif hasattr(input, "items"):
-            for i, g in input.items():
-                self.add(obiGeneSets.GeneSet(pair=(i, g)))
-        else:
-            for i in input:
-                if isinstance(i, obiGeneSets.GeneSet):
-                    self.add(i)
-                else:
-                    i, g = i
-                    self.add(obiGeneSets.GeneSet(pair=(i, g)))
-
-    def to_odict(self):
-        """ Return gene sets in old dictionary format. """
-        return dict(gs.to_odict() for gs in self)
-
-    def set_hierarchy(self, hierarchy):
-        """ Sets hierarchy for all gene sets """
-        for gs in self:
-            gs.hierarchy = hierarchy
-
-    def __repr__(self):
-        return "GeneSets(" + set.__repr__(self) + ")"
-
-    def common_org(self):
-        """ Returns the common organism. """
-        if len(self) == 0:
-            raise GenesetRegException("Empty gene sets.")
-
-        organisms = set(a.organism for a in self)
-
-        try:
-            return only_option(organisms)
-        except:
-            raise GenesetRegException("multiple organisms: " + str(organisms))
-
-    def hierarchies(self):
-        """ Returns all hierachies """
-        if len(self) == 0:
-            raise GenesetRegException("Empty gene sets.")
-        return set(a.hierarchy for a in self)
-
-    def common_hierarchy(self):
-        hierarchies = self.hierarchies()
-
-        def common_hierarchy1(hierarchies):
-            def hier(l): return set(map(lambda x: x[:currentl], hierarchies))
-            currentl = max(map(len, hierarchies))
-            while len(hier(currentl)) > 1:
-                currentl -= 1
-            return only_option(hier(currentl))
-
-        return common_hierarchy1(hierarchies)
-
-    def split_by_hierarchy(self):
-        """ Splits gene sets by hierarchies. """
-        from . import obiGeneSets
-        hd = dict((h,obiGeneSets.GeneSets()) for h in  self.hierarchies())
-        for gs in self:
-            hd[gs.hierarchy].add(gs)
-        return hd.values()
+from Orange.bio.geneset import GeneSet, GeneSets, GenesetRegException
 
 def goGeneSets(org):
     """Returns gene sets from GO."""
-    from . import obiGeneSets
-
     ontology = obiGO.Ontology()
     annotations = obiGO.Annotations(org, ontology=ontology)
 
         genes = annotations.GetAllGenes(termn)
         hier = ("GO", term.namespace)
         if len(genes) > 0:
-            gs = obiGeneSets.GeneSet(id=termn, name=term.name, genes=genes, hierarchy=hier, organism=org, link=link_fmt % termn) 
+            gs = GeneSet(id=termn, name=term.name, genes=genes, hierarchy=hier, organism=org, link=link_fmt % termn) 
             genesets.append(gs)
 
-    return obiGeneSets.GeneSets(genesets)
+    return GeneSets(genesets)
 
 def keggGeneSets(org):
     """
     Returns gene sets from KEGG pathways.
     """
-    from . import obiKEGG2 as obiKEGG, obiGeneSets
+    from . import obiKEGG2 as obiKEGG
     
     kegg = obiKEGG.KEGGOrganism(org)
 
     genesets = []
     for id in kegg.pathways():
+        print id
         pway = obiKEGG.KEGGPathway(id)
         hier = ("KEGG","pathways")
-        gs = obiGeneSets.GeneSet(id=id,
+        gs = GeneSet(id=id,
                                  name=pway.title,
                                  genes=kegg.get_genes_by_pathway(id),
                                  hierarchy=hier,
                                  link=pway.link)
         genesets.append(gs)
 
-    return obiGeneSets.GeneSets(genesets)
+    return GeneSets(genesets)
 
 def omimGeneSets():
     """
     
     go_sets = obimiRNA.filter_GO(go_sets, annotations, treshold=treshold)
     
-    from . import obiGeneSets as gs
     link_fmt = "http://amigo.geneontology.org/cgi-bin/amigo/term-details.cgi?term=%s"
-    gsets = [gs.GeneSet(id=key, name=ontology[key].name, genes=value, hierarchy=("miRNA", "go_sets",),
+    gsets = [GeneSet(id=key, name=ontology[key].name, genes=value, hierarchy=("miRNA", "go_sets",),
                         organism=org, link=link_fmt % key) for key, value in go_sets.items()]
-    gset = gs.GeneSets(gsets)
+    gset = GeneSets(gsets)
     return gset
 
 
     For now the description is skipped.
     """
 
-    from . import obiGeneSets
-
     def hline(s):
         tabs = [tab.strip() for tab in s.split("\t")]
-        return obiGeneSets.GeneSet(id=tabs[0], description=tabs[1],
+        return GeneSet(id=tabs[0], description=tabs[1],
                                    hierarchy=(name,), genes=tabs[2:])
 
     def handleNELines(s, fn):
         lines = (l.strip() for l in s.splitlines())
         return [fn(l) for l in lines if l]
 
-    return obiGeneSets.GeneSets(handleNELines(contents, hline))
+    return GeneSets(handleNELines(contents, hline))
 
 """
 We have multiple paths for gene set data:
     """ Returns gene set availability index for some folder. """
     pass
 
-class GenesetRegException(Exception): pass
-
-def only_option(a):
-    if len(a) == 1:
-        return list(a)[0]
-    else:
-        raise Exception()
-
 def filename(hierarchy, organism):
     """ Obtain a filename for given hierarchy and organism. """
     return "gs_" + "_._".join(hierarchy + \
     finally:
         os.remove(tfname)
 
-def register_local(genesets):
+def _register_local(genesets):
     """ Registers using the common hierarchy and organism. """
     pth = local_path()
 
     return fn
 
 def pickle_temp(obj):
-    """ Pickle a file to a temporary file returns its name """
+    """ Pickle a file to a temporary file and returns its name """
     fd,tfname = tempfile.mkstemp()
     os.close(fd)
     f = open(tfname, 'wb')
     f.close()
     return tfname
 
-def register_serverfiles(genesets, serverFiles):
+def _register_serverfiles(genesets, serverFiles):
     """ Registers using the common hierarchy and organism. """
     org = genesets.common_org()
     hierarchy = genesets.common_hierarchy()
     Hierarchy is induced from the gene set names.
     """
     if serverFiles == None:
-        register_local(genesets)
+        _register_local(genesets)
     else:
-        register_serverfiles(genesets, serverFiles)
+        _register_serverfiles(genesets, serverFiles)
 
 def build_hierarchy_dict(files):
     hierd = defaultdict(list)
 
 def load_local(hierarchy, organism):
     files = map(lambda x: x[:2], list_local())
-
     hierd = build_hierarchy_dict(files)
 
     out = GeneSets()
 def load_serverfiles(hierarchy, organism):
     files = map(lambda x: x[:2], list_serverfiles())
     hierd = build_hierarchy_dict(files)
-
     out = GeneSets()
     for (h, o) in [ files[i] for i in hierd[(hierarchy, organism)]]:
         fname = orngServerFiles.localpath_download(sfdomain, 
     return out
 
 def load(hierarchy, organism):
-    """ First try to load from the local registred folder, then
-    from the server files. """
+    """ First try to load from the local registred folder. If the file
+    is not available, load it from the server files. """
     ret = load_local(hierarchy, organism)
-    ret.update(load_serverfiles(hierarchy, organism))
+    if len(ret) == 0:
+        ret.update(load_serverfiles(hierarchy, organism))
     return ret
 
 def collections(*args):
     Collection can either be a tuple (hierarchy, orgranism), where
     hierarchy is a tuple also.
     """
-    from . import obiGeneSets
-    result = obiGeneSets.GeneSets()
+    result = GeneSets()
 
     for collection in args:
         try:
     """
     orngServerFiles.update_local_files()
 
-    genesetsfn = [ keggGeneSets, goGeneSets, miRNAGeneSets]
+    from . import obiKEGG2 as obiKEGG
+
+    #genesetsfn = [ keggGeneSets, goGeneSets, miRNAGeneSets]
+    genesetsfn = [ goGeneSets, miRNAGeneSets]
     organisms = obiTaxonomy.common_taxids()
     for fn in genesetsfn:
         for org in organisms:
-        #for org in [ "9606" ]:
-            print "Uploading ORG", org, fn
             try:
+                print "Uploading ORG", org, fn
                 genesets = fn(org).split_by_hierarchy()
                 for gs in genesets:
                     print "registering", gs.common_hierarchy()
-                    register_serverfiles(gs, rsf)
+                    #register(gs, rsf) #server files
+                    register(gs)
                     print "successful", gs.common_hierarchy()
-            except Exception, e:
-                print "Not successful"
+            except (obiKEGG.OrganismNotFoundError, GenesetRegException):
+                print "organism not found", org
+
 
 if __name__ == "__main__":
     rsf = orngServerFiles.ServerFiles(username=sys.argv[1], password=sys.argv[2])
     upload_genesets(rsf)
+    pass

_bioinformatics/obiKEGG2/__init__.py

 from .. import obiProb
 from Orange.utils import deprecated_keywords, deprecated_attribute
 
+class OrganismNotFoundError(Exception): pass
+
 class Organism(object):
     def __init__(self, org, genematcher=None):
         self.org_code = self.organism_name_search(org)
         try:
             return genome[name].entry_key
         except KeyError:
-            raise ValueError("Organism with name='%s' not found in KEGG." % name)
+            raise OrganismNotFoundError(name)
         
     @classmethod
     def organism_version(cls, name):

_bioinformatics/obiKEGG2/caching.py

     
     def __exit__(self, *args):
         pass
-    
 
 class Sqlite3Store(Store, UserDict.DictMixin):
     def __init__(self, filename):
         self.filename = filename
         self.con = sqlite3.connect(filename)
+        #self.con = sqlite3.connect(":memory:")
         self.con.execute("""
         CREATE TABLE IF NOT EXISTS cache 
             (key TEXT UNIQUE,
         
     for png_filename in glob.glob(os.path.join(path, "*.png")):
         os.remove(png_filename)
-    
+    

_bioinformatics/widgets/OWKEGGPathwayBrowser.py

 
 from __future__ import absolute_import, with_statement 
 
+if __name__ == "__main__": 
+    __package__ = "Orange.bio.widgets"
+
 import sys
 from collections import defaultdict
 import webbrowser
 
+import Orange
 import orange
 from Orange.orng import orngMisc, orngServerFiles
 from Orange.orng.orngDataCaching import data_hints
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.