Source

orange-bioinformatics / server_update / updateGO.py

Marko Toplak b9f3957 


Marko Toplak 921b378 

Marko Toplak 6e88615 

Marko Toplak 921b378 
Marko Toplak b9f3957 


Marko Toplak 921b378 
Marko Toplak b9f3957 


















Marko Toplak 921b378 
Marko Toplak b9f3957 
Marko Toplak 921b378 
Marko Toplak b9f3957 












































Marko Toplak 921b378 
Flashpoint 43e5f69 
Marko Toplak b9f3957 
Marko Toplak 921b378 
Marko Toplak b9f3957 



Marko Toplak 921b378 
Marko Toplak b9f3957 







Marko Toplak 921b378 
Marko Toplak b9f3957 
Marko Toplak 921b378 
##!interval=7
##!contact=ales.erjavec@fri.uni-lj.si

from common import *

from Orange.bio import obiGO, obiTaxonomy, obiGene, obiGenomicsUpdate

import urllib2, tarfile

from collections import defaultdict

tmpDir = os.path.join(environ.buffer_dir, "tmp_GO")
try:
    os.mkdir(tmpDir)
except Exception:
    pass

u = obiGO.Update(local_database_path = tmpDir)

uncompressedSize = lambda filename: sum(info.size for info in tarfile.open(filename).getmembers())

def pp(*args, **kw): print args, kw

if u.IsUpdatable(obiGO.Update.UpdateOntology, ()):
    u.UpdateOntology()
    filename = os.path.join(tmpDir, "gene_ontology_edit.obo.tar.gz")
    ##load the ontology to test it
    o = obiGO.Ontology(filename)
    del o
    ##upload the ontology
    print "Uploading gene_ontology_edit.obo.tar.gz"
    sf_server.upload("GO", "gene_ontology_edit.obo.tar.gz", filename, title = "Gene Ontology (GO)",
                       tags=["gene", "ontology", "GO", "essential", "#uncompressed:%i" % uncompressedSize(filename), "#version:%i" % obiGO.Ontology.version])
    sf_server.unprotect("GO", "gene_ontology_edit.obo.tar.gz")

#from obiGeneMatch import _dbOrgMap
#
#exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]
#lines = [line.split("\t") for line in urllib2.urlopen("ftp://ftp.genome.jp/pub/kegg/genes/taxonomy").readlines() if not line.startswith("#")]
#keggOrgNames = dict([(line[1].strip(), line[-1][:-5].strip().replace("(", "").replace(")", "") if line[-1].endswith("(EST)\n") else line[-1].strip()) for line in lines if len(line)>1])

#additionalNames = {"goa_arabidopsis":"Arabidopsis thaliana", "sgn":"Solanaceae", "PAMGO_Oomycetes":"Oomycete"}
#essentialOrgs = ["goa_human", "sgd", "mgi", "dictyBase"]

orgMap = {"352472":"44689", "562":"83333", "3055":None, "7955":None, "11103":None, "2104":None, "4754":None, "31033":None, "8355":None, "4577":None}

#commonOrgs = dict([(obiGO.from_taxid(orgMap.get(id, id)).pop(), orgMap.get(id, id)) for id in obiTaxonomy.common_taxids() if orgMap.get(id, id) != None])
commonOrgs = dict([(obiGO.from_taxid(id), id) for id in obiTaxonomy.common_taxids() if obiGO.from_taxid(id) != None])

essentialOrgs = [obiGO.from_taxid(id) for id in obiTaxonomy.essential_taxids()]

exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]

updatedTaxonomy = defaultdict(set)

for org in u.GetAvailableOrganisms():
    if org in exclude or org not in commonOrgs:
        continue
    
    if u.IsUpdatable(obiGO.Update.UpdateAnnotation, (org,)):
        u.UpdateAnnotation(org)
        filename = os.path.join(tmpDir, "gene_association." + org + ".tar.gz")
        
        ## Load the annotations to test them and collect all taxon ids from them
        a = obiGO.Annotations(filename, genematcher=obiGene.GMDirect())
        taxons = set([ann.taxon for ann in a.annotations])
        for taxId in [t.split(":")[-1] for t in taxons if "|" not in t]: ## exclude taxons with cardinality 2
            updatedTaxonomy[taxId].add(org)
        del a
        ## Upload the annotation
#        if org in _dbOrgMap:
#            orgName = keggOrgNames[_dbOrgMap[org]].split("(")[0].strip()
#        elif org in additionalNames:
#            orgName = additionalNames[org]
#        else:
#            orgName = org
        orgName = obiTaxonomy.name(commonOrgs[org])
#            print "unknown organism name translation for:", org
        print "Uploading", "gene_association." + org + ".tar.gz"
        sf_server.upload("GO", "gene_association." + org + ".tar.gz", filename, title = "GO Annotations for " + orgName,
                           tags=["gene", "annotation", "ontology", "GO", orgName + obiTaxonomy.shortname(org), "#uncompressed:%i" % uncompressedSize(filename),
                                 "#organism:"+orgName, "#version:%i" % obiGO.Annotations.version] + (["essential"] if org in essentialOrgs else []))
        sf_server.unprotect("GO", "gene_association." + org + ".tar.gz")
        
try:
    import cPickle
#    tax = cPickle.load(open(os.path.join(tmpDir, "taxonomy.pickle"), "rb"))
    tax = cPickle.load(open(sf_local.localpath_download("GO", "taxonomy.pickle"), "rb"))
except Exception:
    tax = {}

## Upload taxonomy if any differences in the updated taxonomy
if any(tax.get(key, set()) != updatedTaxonomy.get(key, set()) for key in set(updatedTaxonomy)):
    tax.update(updatedTaxonomy)
    cPickle.dump(tax, open(os.path.join(tmpDir, "taxonomy.pickle"), "wb"))
    print "Uploading", "taxonomy.pickle"
    sf_server.upload("GO", "taxonomy.pickle", os.path.join(tmpDir, "taxonomy.pickle"), title="GO taxon IDs",
                       tags = ["GO", "taxon", "organism", "essential", "#version:%i" % obiGO.Taxonomy.version])
    sf_server.unprotect("GO", "taxonomy.pickle")
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.