Commits

Marko Toplak  committed b9f3957

Copied server update scripts from Orange source.

  • Participants
  • Parent commits 4c4266f

Comments (0)

Files changed (14)

File server_update/updateAll.py

+import sys, os
+import subprocess
+import time
+
+from getopt import getopt
+from datetime import datetime
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+age = datetime.now() - datetime.fromtimestamp(0) ## age of the universe
+
+files = ["updateTaxonomy.py", "updateGO.py", "updateMeSH.py", "updateNCBI_geneinfo.py",
+         "updateHomoloGene.py", "updateDictyBase.py", "updatePPI.py"]
+
+for filename in files:
+    options = dict([line[3:].split("=") for line in open(filename).readlines() if line.startswith("##!")])
+    if age.days % int(options.get("interval", "7")) == 0:
+        output = open("log.txt", "w")
+        process = subprocess.Popen([sys.executable, filename, "-u", username, "-p", password], stdout=output, stderr=output)
+        while process.poll() == None:
+            time.sleep(3)
+#        print "/sw/bin/python2.5 %s -u %s -p %s" % (filename, username, password)
+#        print os.system("/sw/bin/python2.5 %s -u %s -p %s" % (filename, username, password))
+        output.close()
+        if process.poll() != 0:
+            content = open("log.txt", "r").read()
+            print content
+            toaddr = options.get("contact", "ales.erjavec@fri.uni-lj.si")
+            fromaddr = "orange@fri.uni-lj.si"
+            msg = "From: %s\r\nTo: %s\r\nSubject: Exception in server update script - %s\r\n\r\n" % (fromaddr, toaddr, filename) + content
+            try:
+                import smtplib
+                s = smtplib.SMTP('212.235.188.18', 25)
+                s.sendmail(fromaddr, toaddr, msg)
+                s.quit()
+            except Exception, ex:
+                print "Failed to send error report due to:", ex
+                

File server_update/updateDictyBase.py

+##interval:7
+import orngServerFiles, orngEnviron
+import sys, os
+from gzip import GzipFile
+from getopt import getopt
+import tempfile
+from obiDicty import DictyBase
+import shutil
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+tmpdir = tempfile.mkdtemp("dictybase")
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+print username, password
+
+base = DictyBase.pickle_data()
+filename = os.path.join(tmpdir, "tf")
+
+f = open(filename, 'wb')
+f.write(base)
+f.close()
+
+dom = DictyBase.domain
+fn = DictyBase.filename
+
+sf = orngServerFiles.ServerFiles(username, password)
+
+try:
+    sf.create_domain('dictybase')
+except:
+    pass
+
+print filename
+
+sf.upload(dom, fn, filename, title="dictyBase gene aliases",
+    tags=DictyBase.tags)
+sf.unprotect(dom, fn)
+
+shutil.rmtree(tmpdir)

File server_update/updateGEO.py

+##!interval=7
+##!contact=blaz.zupan@fri.uni-lj.si
+
+import obiTaxonomy
+import sys
+import orngServerFiles
+from getopt import getopt
+import cPickle
+import re
+import ftplib
+import time
+from datetime import datetime
+import obiGEO
+import os
+
+DOMAIN = "GEO"
+GDS_INFO = "gds_info.pickled"
+TITLE = "Gene Expression Omnibus data sets information"
+TAGS = ["Gene Expression Omnibus", "data sets", "GEO", "GDS"]
+
+FTP_NCBI = "ftp.ncbi.nih.gov"
+NCBI_DIR = "pub/geo/DATA/SOFT/GDS"
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+server = orngServerFiles.ServerFiles(username, password)
+
+force_update = False
+# check if the DOMAIN/files are already on the server, else, create
+if DOMAIN not in server.listdomains():
+    # DOMAIN does not exist on the server, create it
+    server.create_domain(DOMAIN)
+
+localfile = orngServerFiles.localpath(DOMAIN, GDS_INFO)
+
+def _create_path_for_file(target): #KEGG uses this!
+    try:
+        os.makedirs(os.path.dirname(target))
+    except OSError:
+        pass
+
+path = orngServerFiles.localpath(DOMAIN)
+if GDS_INFO in server.listfiles(DOMAIN):
+    print "Updating info file from server ..."
+    orngServerFiles.update(DOMAIN, GDS_INFO)
+    info = orngServerFiles.info(DOMAIN, GDS_INFO)
+    gds_info_datetime = datetime.strptime(info["datetime"], "%Y-%m-%d %H:%M:%S.%f")
+    
+else:
+    print "Creating a local path..."
+    _create_path_for_file(localfile)
+    f = file(localfile, "wb")
+    cPickle.dump(({}, {}), f, True)
+    f.close()
+    server.upload(DOMAIN, GDS_INFO, localfile, TITLE, TAGS)
+    server.protect(DOMAIN, GDS_INFO, "0")
+    gds_info_datetime = datetime.fromtimestamp(0)
+    
+
+
+# read the information from the local file
+gds_info, excluded = cPickle.load(file(localfile, "rb"))
+# excluded should be a dictionary (GEO_ID, TAX_ID)
+
+# if need to refresh the data base
+if force_update:
+    gds_info, excluded = ({}, {})
+
+# list of common organisms may have changed, rescan excluded list
+excluded = dict([(id, taxid) for id, taxid in excluded.items() 
+                 if taxid not in obiTaxonomy.common_taxids()])
+excluded.update([(id, info["taxid"]) for id, info in gds_info.items() 
+                 if info["taxid"] not in obiTaxonomy.common_taxids()])
+gds_info = dict([(id, info) for id, info in gds_info.items() 
+                 if info["taxid"] in obiTaxonomy.common_taxids()])
+
+# get the list of GDS files from NCBI directory
+
+
+print "Retreiving ftp directory ..."
+ftp = ftplib.FTP(FTP_NCBI)
+ftp.login()
+ftp.cwd(NCBI_DIR)
+dirlist = []
+ftp.dir(dirlist.append)
+
+from datetime import datetime
+def modified(line):
+    line = line.split()
+    try:
+        date  = " ".join(line[5: 8] + [str(datetime.today().year)])
+        return datetime.strptime(date, "%b %d %H:%M %Y")
+    except ValueError:
+        pass
+    try:
+        date = " ".join(line[5: 8])
+        return datetime.strptime(date, "%b %d %Y")
+    except ValueError:
+        print "Warning: could not retrieve modified date for\n%s" % line
+    return datetime.today()
+    
+m = re.compile("GDS[0-9]*")
+gds_names = [(m.search(d).group(0), modified(d)) for d in dirlist if m.search(d)]
+#gds_names = [name for name, time_m in gds_names if time_t > gds_info_datetime]
+#gds_names = [m.search(d).group(0) for d in dirlist if m.search(d)]
+#gds_names = [name for name in gds_names if not(name in gds_info or name in excluded)]
+gds_names = [name for name, time_m in gds_names if not(name in gds_info or name in excluded) or time_m > gds_info_datetime]
+skipped = []
+
+if len(gds_names):
+    for count, gds_name in enumerate(gds_names):
+        print "%3d of %3d -- Adding %s ..." % (count+1, len(gds_names), gds_name)
+        try:
+            time.sleep(1)
+            gds = obiGEO.GDS(gds_name)
+            if gds.info["taxid"] not in obiTaxonomy.common_taxids():
+                excluded[gds_name] = gds.info["taxid"]
+                print "... excluded (%s)." % gds.info["sample_organism"]
+            else:
+                gds_info.update({gds_name: gds.info})
+                f = file(localfile, "wb")
+                cPickle.dump((gds_info, excluded), f, True)
+                f.close()
+                print "... added."
+        except Exception, ex:
+            print "... skipped (error):", str(ex)
+            skipped.append(gds_name)
+    
+    print "Updating %s:%s on the server ..." % (DOMAIN, GDS_INFO)
+ 
+    server.upload(DOMAIN, GDS_INFO, localfile, TITLE, TAGS)
+    server.protect(DOMAIN, GDS_INFO, "0")
+else:
+    print "No update required."
+
+print
+print "GDS data sets: %d" % len(gds_info)
+print "Organisms:"
+organisms = [info["sample_organism"] for info in gds_info.values()]
+for org in set(organisms):
+    print "  %s (%d)" % (org, organisms.count(org))

File server_update/updateGO.py

+##!interval=7
+##!contact=ales.erjavec@fri.uni-lj.si
+
+import obiGO, obiTaxonomy, obiGene, obiGenomicsUpdate, orngEnviron, orngServerFiles
+import os, sys, shutil, urllib2, tarfile
+from getopt import getopt
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+from collections import defaultdict
+
+tmpDir = os.path.join(orngEnviron.bufferDir, "tmp_GO")
+try:
+    os.mkdir(tmpDir)
+except Exception:
+    pass
+
+serverFiles = orngServerFiles.ServerFiles(username, password)
+
+u = obiGO.Update(local_database_path = tmpDir)
+
+uncompressedSize = lambda filename: sum(info.size for info in tarfile.open(filename).getmembers())
+
+def pp(*args, **kw): print args, kw
+
+if u.IsUpdatable(obiGO.Update.UpdateOntology, ()):
+    u.UpdateOntology()
+    filename = os.path.join(tmpDir, "gene_ontology_edit.obo.tar.gz")
+    ##load the ontology to test it
+    o = obiGO.Ontology(filename)
+    del o
+    ##upload the ontology
+    print "Uploading gene_ontology_edit.obo.tar.gz"
+    serverFiles.upload("GO", "gene_ontology_edit.obo.tar.gz", filename, title = "Gene Ontology (GO)",
+                       tags=["gene", "ontology", "GO", "essential", "#uncompressed:%i" % uncompressedSize(filename), "#version:%i" % obiGO.Ontology.version])
+    serverFiles.unprotect("GO", "gene_ontology_edit.obo.tar.gz")
+
+#from obiGeneMatch import _dbOrgMap
+#
+#exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]
+#lines = [line.split("\t") for line in urllib2.urlopen("ftp://ftp.genome.jp/pub/kegg/genes/taxonomy").readlines() if not line.startswith("#")]
+#keggOrgNames = dict([(line[1].strip(), line[-1][:-5].strip().replace("(", "").replace(")", "") if line[-1].endswith("(EST)\n") else line[-1].strip()) for line in lines if len(line)>1])
+
+#additionalNames = {"goa_arabidopsis":"Arabidopsis thaliana", "sgn":"Solanaceae", "PAMGO_Oomycetes":"Oomycete"}
+#essentialOrgs = ["goa_human", "sgd", "mgi", "dictyBase"]
+
+orgMap = {"352472":"44689", "562":"83333", "3055":None, "7955":None, "11103":None, "2104":None, "4754":None, "31033":None, "8355":None, "4577":None}
+
+#commonOrgs = dict([(obiGO.from_taxid(orgMap.get(id, id)).pop(), orgMap.get(id, id)) for id in obiTaxonomy.common_taxids() if orgMap.get(id, id) != None])
+commonOrgs = dict([(obiGO.from_taxid(id), id) for id in obiTaxonomy.common_taxids() if obiGO.from_taxid(id) != None])
+
+essentialOrgs = [obiGO.from_taxid(id) for id in obiTaxonomy.essential_taxids()]
+
+exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"]
+
+updatedTaxonomy = defaultdict(set)
+import obiTaxonomy
+
+for org in u.GetAvailableOrganisms():
+    if org in exclude or org not in commonOrgs:
+        continue
+    
+    if u.IsUpdatable(obiGO.Update.UpdateAnnotation, (org,)):
+        u.UpdateAnnotation(org)
+        filename = os.path.join(tmpDir, "gene_association." + org + ".tar.gz")
+        
+        ## Load the annotations to test them and collect all taxon ids from them
+        a = obiGO.Annotations(filename, genematcher=obiGene.GMDirect())
+        taxons = set([ann.taxon for ann in a.annotations])
+        for taxId in [t.split(":")[-1] for t in taxons if "|" not in t]: ## exclude taxons with cardinality 2
+            updatedTaxonomy[taxId].add(org)
+        del a
+        ## Upload the annotation
+#        if org in _dbOrgMap:
+#            orgName = keggOrgNames[_dbOrgMap[org]].split("(")[0].strip()
+#        elif org in additionalNames:
+#            orgName = additionalNames[org]
+#        else:
+#            orgName = org
+        orgName = obiTaxonomy.name(commonOrgs[org])
+#            print "unknown organism name translation for:", org
+        print "Uploading", "gene_association." + org + ".tar.gz"
+        serverFiles.upload("GO", "gene_association." + org + ".tar.gz", filename, title = "GO Annotations for " + orgName,
+                           tags=["gene", "annotation", "ontology", "GO", orgName, "#uncompressed:%i" % uncompressedSize(filename),
+                                 "#organism:"+orgName, "#version:%i" % obiGO.Annotations.version] + (["essential"] if org in essentialOrgs else []))
+        serverFiles.unprotect("GO", "gene_association." + org + ".tar.gz")
+        
+try:
+    import cPickle
+#    tax = cPickle.load(open(os.path.join(tmpDir, "taxonomy.pickle"), "rb"))
+    tax = cPickle.load(open(orngServerFiles.localpath_download("GO", "taxonomy.pickle"), "rb"))
+except Exception:
+    tax = {}
+
+## Upload taxonomy if any differences in the updated taxonomy
+if any(tax.get(key, set()) != updatedTaxonomy.get(key, set()) for key in set(updatedTaxonomy)):
+    tax.update(updatedTaxonomy)
+    cPickle.dump(tax, open(os.path.join(tmpDir, "taxonomy.pickle"), "wb"))
+    print "Uploading", "taxonomy.pickle"
+    serverFiles.upload("GO", "taxonomy.pickle", os.path.join(tmpDir, "taxonomy.pickle"), title="GO taxon IDs",
+                       tags = ["GO", "taxon", "organism", "essential", "#version:%i" % obiGO.Taxonomy.version])
+    serverFiles.unprotect("GO", "taxonomy.pickle")

File server_update/updateHomoloGene.py

+##!interval=7
+##!contact=ales.erjavec@fri.uni-lj.si
+
+import obiHomoloGene
+import orngServerFiles
+
+import orngEnviron
+import os, sys
+import gzip, shutil
+
+from getopt import getopt
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+path = os.path.join(orngEnviron.bufferDir, "tmp_HomoloGene")
+serverFiles = orngServerFiles.ServerFiles(username, password)
+
+try:
+    os.mkdir(path)
+except OSError:
+    pass
+filename = os.path.join(path, "homologene.data")
+obiHomoloGene.HomoloGene.download_from_NCBI(filename)
+uncompressed = os.stat(filename).st_size
+import gzip, shutil
+f = gzip.open(filename + ".gz", "wb")
+shutil.copyfileobj(open(filename), f)
+f.close()
+
+#serverFiles.create_domain("HomoloGene")
+print "Uploading homologene.data"
+serverFiles.upload("HomoloGene", "homologene.data", filename + ".gz", title="HomoloGene",
+                   tags=["genes", "homologs", "HomoloGene", "#compression:gz",
+                         "#uncompressed:%i" % uncompressed, 
+                         "#version:%i" % obiHomoloGene.HomoloGene.VERSION])
+serverFiles.unprotect("HomoloGene", "homologene.data")
+
+####
+# InParanioid Orthologs update
+####
+
+organisms = {"3702": "A.thaliana",
+            "9913": "B.taurus",
+            "6239": "C.elegans",
+            "3055": "C.reinhardtii",
+            "7955": "D.rerio",
+            "352472": "D.discoideum",
+            "7227":  "D.melanogaster",
+            "562":  "E.coliK12",
+            #"11103", # Hepatitis C virus
+            "9606": "H.sapiens",
+            "10090": "M.musculus",
+            #"2104",  # Mycoplasma pneumoniae
+            "4530": "O.sativa",
+            "5833": "P.falciparum",
+            #"4754",  # Pneumocystis carinii
+            "10116": "R.norvegicus",
+            "4932": "S.cerevisiae",
+            "4896":  "S.pombe",
+            "31033": "T.rubripes"
+            #"8355",  # Xenopus laevis
+            #"4577",  # Zea mays
+            }
+
+import urllib2
+combined_orthologs = []
+        
+def gen(i=0):
+    while True:
+        yield str(i)
+        i += 1
+
+from collections import defaultdict
+unique_cluster_id = defaultdict(gen().next)
+         
+organisms = sorted(organisms.values())
+
+import time
+for i, org1 in enumerate(organisms):
+    for org2 in organisms[i+1:]:
+        print "http://inparanoid.sbc.su.se/download/current/orthoXML/InParanoid.%s-%s.orthoXML" % (org1, org2)
+        try:
+            stream = urllib2.urlopen("http://inparanoid.sbc.su.se/download/current/orthoXML/InParanoid.%s-%s.orthoXML" % (org1, org2))
+        except Exception, ex:
+            print ex
+            continue
+        orthologs = obiHomoloGene._parseOrthoXML(stream)
+        orthologs = [(unique_cluster_id[org1, org2, clid], taxid, gene_symbol) for (clid, taxid , gene_symbol) in orthologs]
+        
+        combined_orthologs.extend(orthologs)
+        time.sleep(10)
+        
+#import cPickle
+#cPickle.dump(combined_orthologs, open("orthologs.pck", "wb"))
+#combined_orthologs = cPickle.load(open("orthologs.pck"))
+
+import sqlite3
+
+filename  = os.path.join(path, "InParanoid.sqlite")
+con = sqlite3.connect(filename)
+con.execute("drop table if exists homologs")
+con.execute("create table homologs (groupid text, taxid text, geneid text)")
+con.execute("create index group_index on homologs(groupid)")
+con.execute("create index geneid_index on homologs(geneid)")
+con.executemany("insert into homologs values (?, ?, ?)", combined_orthologs)
+con.commit()
+
+
+
+file = open(filename, "rb")
+gzfile = gzip.GzipFile(filename + ".gz", "wb")
+shutil.copyfileobj(file, gzfile)
+gzfile.close()
+
+print "Uploading InParanoid.sqlite"
+serverFiles.upload("HomoloGene", "InParanoid.sqlite", filename + ".gz", title="InParanoid: Eukaryotic Ortholog Groups",
+                   tags=["genes", "homologs", "orthologs", "InParanoid", "#compression:gz",
+                         "#uncompressed:%i" % os.stat(filename).st_size,
+                         "#version:%i" % obiHomoloGene.InParanoid.VERSION])
+serverFiles.unprotect("HomoloGene", "InParanoid.sqlite")
+        
+        
+            

File server_update/updateKEGG.py

+##!interval=7
+##!contact=ales.erjavec@fri.uni-lj.si
+
+import obiKEGG2, obiGene, obiTaxonomy, obiGeneSets
+import os, sys, tarfile, urllib2, shutil, cPickle
+from getopt import getopt
+
+from Orange.misc import serverfiles
+from Orange.utils import ConsoleProgressBar
+
+DOMAIN = "KEGG"
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+sf = serverfiles.ServerFiles(username, password)
+
+genome = obiKEGG2.KEGGGenome()
+common = genome.common_organisms()
+
+rev_taxmap = dict([(v, k) for k, v in genome.TAXID_MAP.items()])
+
+
+for org in common:
+    
+    #####################
+    # Create gene aliases
+    #####################
+    
+#    genes = obiKEGG2.KEGGGenes(org)
+#    
+#    pb = ConsoleProgressBar("Retriving KEGG ids for %r:" % org)
+#    genes.pre_cache(progress_callback=pb.set_state)
+#    aliases = []
+#    for key, entry in genes.iteritems():
+#        aliases.append(set([key]) | set(entry.alt_names))
+#    pb.finish()
+#    
+#    taxid = obiKEGG2.to_taxid(org)
+#    ids_filename = "kegg_gene_id_aliases_" + taxid + ".pickle"
+#    filename = serverfiles.localpath(DOMAIN, ids_filename)
+#    
+#    cPickle.dump(aliases, open(filename, "wb"))
+#    
+#    print "Uploading", ids_filename
+#    sf.upload(DOMAIN, ids_filename, filename,
+#              "KEGG Gene id aliases",
+#              tags=["KEGG", "genes", "aliases", 
+#                    "#version:%s" % obiKEGG2.MatcherAliasesKEGG.VERSION
+#                    ],
+#              )
+#    sf.unprotect(DOMAIN, ids_filename)
+    
+    ##########################
+    # Create pathway gene sets
+    ##########################
+    
+    organism = obiKEGG2.KEGGOrganism(org)
+    ge = genome[org]
+    
+    taxid = rev_taxmap.get(ge.taxid, ge.taxid)
+    gene_sets = obiGeneSets.keggGeneSets(taxid)
+    
+    print "Uploading pathway gene sets for", taxid, "(%s)" % org
+    obiGeneSets.register_serverfiles(gene_sets, sf)
+    

File server_update/updateMeSH.py

+##!interval=7
+##!contact=ales.erjavec@fri.uni-lj.si
+
+from urllib import urlopen
+import orngServerFiles
+import os, sys
+
+from getopt import getopt
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+
+ontology = urlopen("ftp://nlmpubs.nlm.nih.gov/online/mesh/.asciimesh/d2008.bin")
+size = int(ontology.info().getheader("Content-Length"))
+rsize = 0
+results = list()
+for i in ontology:
+	rsize += len(i)
+	line = i.rstrip("\t\n")
+	if(line == "*NEWRECORD"):
+		if(len(results) > 0 and results[-1][1] == []): # we skip nodes with missing mesh id
+			results[-1] = ["",[],"No description."]
+		else:
+			results.append(["",[],"No description."])	
+	parts = line.split(" = ")
+	if(len(parts) == 2 and len(results)>0):
+		if(parts[0] == "MH"):
+			results[-1][0] = parts[1].strip("\t ") 
+
+		if(parts[0] == "MN"):
+			results[-1][1].append(parts[1].strip("\t "))
+		if(parts[0] == "MS"):
+			results[-1][2] = parts[1].strip("\t ")
+ontology.close()
+
+output = file('mesh-ontology.dat', 'w')
+
+for i in results:
+	print i[0] + "\t"
+	output.write(i[0] + "\t")
+	g=len(i[1])			
+	for k in i[1]:
+		g -= 1
+		if(g > 0):
+			output.write(k + ";")
+		else:
+			output.write(k + "\t" + i[2] + "\n")
+output.close()
+print "Ontology downloaded."
+
+
+
+
+ordinary = orngServerFiles.ServerFiles()
+authenticated = orngServerFiles.ServerFiles(username, password)
+
+authenticated.upload('MeSH', 'mesh-ontology.dat', 'mesh-ontology.dat', title="MeSH ontology", tags=['MeSH', 'ontology', 'orngMeSH'])
+#authenticated.upload('MeSH', 'cid-annotation.dat', 'cid-annotation.dat', title="Annotation for chemicals (CIDs)", tags =['CID','MeSH','orngMeSH','annotation'])
+
+authenticated.unprotect('MeSH', 'mesh-ontology.dat')
+os.remove('mesh-ontology.dat')
+print "Ontology uploaded to server."

File server_update/updateNCBI_geneinfo.py

+##interval:7
+import obiGene, obiTaxonomy
+import orngServerFiles, orngEnviron
+import sys, os
+from gzip import GzipFile
+from getopt import getopt
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+tmpdir = os.path.join(orngEnviron.bufferDir, "tmp_NCBIGene_info")
+try:
+    os.mkdir(tmpdir)
+except Exception, ex:
+    pass
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+gene_info_filename = os.path.join(tmpdir, "gene_info")
+gene_history_filename = os.path.join(tmpdir, "gene_history")
+
+obiGene.NCBIGeneInfo.get_geneinfo_from_ncbi(gene_info_filename)
+obiGene.NCBIGeneInfo.get_gene_history_from_ncbi(gene_history_filename)
+
+info = open(gene_info_filename, "rb")
+hist = open(gene_history_filename, "rb")
+
+taxids = obiGene.NCBIGeneInfo.common_taxids()
+essential = obiGene.NCBIGeneInfo.essential_taxids()
+
+genes = dict([(taxid, []) for taxid in taxids])
+for gi in info:
+    if any(gi.startswith(id + "\t") for id in taxids):
+        genes[gi.split("\t", 1)[0]].append(gi.strip())
+
+history = dict([(taxid, []) for taxid in taxids])
+for hi in hist:
+    if any(hi.startswith(id + "\t") for id in taxids): 
+        history[hi.split("\t", 1)[0]].append(hi.strip())
+
+        
+sf = orngServerFiles.ServerFiles(username, password)
+
+for taxid, genes in genes.items():
+    filename = os.path.join(tmpdir, "gene_info.%s.db" % taxid)
+    f = open(filename, "wb")
+    f.write("\n".join(genes))
+    f.flush()
+    f.close()
+    print "Uploading", filename
+    sf.upload("NCBI_geneinfo", "gene_info.%s.db" % taxid, filename,
+              title = "NCBI gene info for %s" % obiTaxonomy.name(taxid),
+              tags = ["NCBI", "gene info", "gene_names", obiTaxonomy.name(taxid)] + (["essential"] if taxid in essential else []))
+    sf.unprotect("NCBI_geneinfo", "gene_info.%s.db" % taxid)
+    
+    filename = os.path.join(tmpdir, "gene_history.%s.db" % taxid)
+    f = open(filename, "wb")
+    f.write("\n".join(history.get(taxid, "")))
+    f.flush()
+    f.close()
+    print "Uploading", filename
+    sf.upload("NCBI_geneinfo", "gene_history.%s.db" % taxid, filename,
+              title = "NCBI gene history for %s" % obiTaxonomy.name(taxid),
+              tags = ["NCBI", "gene info", "history", "gene_names", obiTaxonomy.name(taxid)] + (["essential"] if taxid in essential else []))
+    sf.unprotect("NCBI_geneinfo", "gene_history.%s.db" % taxid)

File server_update/updateOMIM.py

+##!interval=7
+##!contact=ales.erjavec@fri.uni-lj.si
+
+import obiOMIM
+import orngServerFiles
+
+import orngEnviron
+import os, sys
+
+from getopt import getopt
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+path = os.path.join(orngEnviron.bufferDir, "tmp_OMIM")
+serverFiles = orngServerFiles.ServerFiles(username, password)
+
+try:
+    os.mkdir(path)
+except OSError:
+    pass
+filename = os.path.join(path, "morbidmap")
+obiOMIM.OMIM.download_from_NCBI(filename)
+
+serverFiles.upload("OMIM", "morbidmap", filename, title="Online Mendelian Inheritance in Man (OMIM)",
+                   tags=["genes", "diseases", "human", "OMIM" "#version:%i" % obiOMIM.OMIM.VERSION])
+serverFiles.unprotect("OMIM", "morbidmap")

File server_update/updatePPI.py

+##!interval=7
+##!contact=ales.erjavec@fri.uni-lj.si
+
+import obiPPI, orngServerFiles
+import os, sys, shutil, urllib2, tarfile
+from getopt import getopt
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+serverFiles = orngServerFiles.ServerFiles(username, password)
+
+try:
+    os.mkdir(orngServerFiles.localpath("PPI"))
+except OSError:
+    pass
+
+obiPPI.MIPS.download()
+
+try:
+    serverFiles.create_domain("PPI")
+except Exception, ex:
+    print ex
+filename = orngServerFiles.localpath("PPI", "mppi.gz")
+serverFiles.upload("PPI", "allppis.xml", filename, "MIPS Protein interactions",
+                   tags=["protein interaction", "MIPS", "#compression:gz", "#version:%i" % obiPPI.MIPS.VERSION]
+                   )
+serverFiles.unprotect("PPI", "allppis.xml") 
+
+if False: ## download BIOGRID-ALL manually
+    import gzip
+    filename = orngServerFiles.localpath("PPI", "BIOGRID-ALL.tab")
+    gz = gzip.GzipFile(filename + ".gz", "wb")
+    gz.write(open(filename, "rb").read())
+    gz.close()
+    serverFiles.upload("PPI", "BIOGRID-ALL.tab", filename + ".gz", title="BioGRID Protein interactions", 
+                       tags=["protein interaction", "BioGrid", "#compression:gz", "#version:%i" % obiPPI.BioGRID.VERSION]
+                       )
+    serverFiles.unprotect("PPI", "BIOGRID-ALL.tab")
+

File server_update/updateSTRING.py

+##!interval=7
+##!contact=ales.erjavec@fri.uni-lj.si
+
+import obiPPI, orngServerFiles
+import os, sys, shutil, urllib2, gzip
+from getopt import getopt
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+serverFiles = orngServerFiles.ServerFiles(username, password)
+
+import obiPPI
+
+filename = orngServerFiles.localpath("PPI", obiPPI.STRING.FILENAME)
+
+if os.path.exists(filename):
+    os.remove(filename)
+
+obiPPI.STRING.download_data("v9.0")
+
+gzfile = gzip.GzipFile(filename + ".gz", "wb")
+shutil.copyfileobj(open(filename, "rb"), gzfile)
+
+serverFiles.upload("PPI", obiPPI.STRING.FILENAME, filename + ".gz", 
+                   "STRING Protein interactions (Creative Commons Attribution 3.0 License)",
+                   tags=["protein interaction", "STRING", 
+                         "#compression:gz", "#version:%s" % obiPPI.STRING.VERSION]
+                   )
+serverFiles.unprotect("PPI", obiPPI.STRING.FILENAME)
+
+# The second part
+filename = orngServerFiles.localpath("PPI", obiPPI.STRINGDetailed.FILENAME_DETAILED)
+
+if os.path.exists(filename):
+    os.remove(filename)
+
+obiPPI.STRINGDetailed.download_data("v9.0")
+
+gzfile = gzip.GzipFile(filename + ".gz", "wb")
+shutil.copyfileobj(open(filename, "rb"), gzfile)
+
+serverFiles.upload("PPI", obiPPI.STRINGDetailed.FILENAME_DETAILED, filename + ".gz", 
+                   "STRING Protein interactions (Creative Commons Attribution-Noncommercial-Share Alike 3.0 License)" ,
+                   tags=["protein interaction", "STRING",
+                         "#compression:gz", "#version:%s" % obiPPI.STRINGDetailed.VERSION]
+                   )
+serverFiles.unprotect("PPI", obiPPI.STRINGDetailed.FILENAME_DETAILED)
+    

File server_update/updateTaxonomy.py

+##!interval=7
+##!contact=ales.erjavec@fri.uni-lj.si
+
+import obiTaxonomy
+import orngServerFiles
+
+import orngEnviron
+import os, sys, tarfile
+import socket
+
+from getopt import getopt
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+path = os.path.join(orngEnviron.bufferDir, "tmp_Taxonomy")
+serverFiles = orngServerFiles.ServerFiles(username, password)
+u = obiTaxonomy.Update(local_database_path=path)
+
+uncompressedSize = lambda filename: sum(info.size for info in tarfile.open(filename).getmembers())
+
+if u.IsUpdatable(obiTaxonomy.Update.UpdateTaxonomy, ()):
+    for i in range(3):
+        try:
+            u.UpdateTaxonomy()
+            break
+        except socket.timeout, ex:
+            print ex
+            pass
+    serverFiles.upload("Taxonomy", "ncbi_taxonomy.tar.gz", os.path.join(path, "ncbi_taxonomy.tar.gz"), title ="NCBI Taxonomy",
+                       tags=["NCBI", "taxonomy", "organism names", "essential", "#uncompressed:%i" % uncompressedSize(os.path.join(path, "ncbi_taxonomy.tar.gz"))])
+    serverFiles.unprotect("Taxonomy", "ncbi_taxonomy.tar.gz")

File server_update/updatemiRNA.py

+
+import urllib
+import re
+import pylab
+import random
+import os
+import math
+import locale
+import gzip
+import StringIO
+import sys
+from getopt import getopt
+import zipfile
+
+import obiTaxonomy as tax
+import orngServerFiles
+import orngEnviron
+
+def fastprint(filename,mode,what):
+    
+    file = open(filename,mode)
+    file.write(what)
+    file.close()
+    
+
+def sendMail(subject):
+    
+    toaddr = "rsberex@yahoo.it"
+    fromaddr = "orange@fri.uni-lj.si";
+    msg = "From: %s\r\nTo: %s\r\nSubject: %s" % (fromaddr, toaddr, subject)
+    try:
+        import smtplib
+        s = smtplib.SMTP('212.235.188.18', 25)
+        s.sendmail(fromaddr, toaddr, msg)
+        s.quit()
+    except Exception, ex:
+        print "Failed to send error report due to:", ex
+
+        
+def format_checker(content):
+    
+    if len(re.findall('(ID.*?)ID',content.replace('\n',''))):        
+        return True
+    else:
+        sendMail('Uncorrect format of miRBase data-file.')        
+        return False
+
+    
+def get_intoFiles(path, data_webPage):
+    
+    sections = data_webPage.split('//\n')
+    sections.pop()
+    
+    files = []
+    os.system('rm %s/*_sections.txt' % path)
+    
+    for s in sections:
+        org = str(re.findall('ID\s*(\S*)\s*standard;',s.splitlines()[0])[0]).split('-')[0]
+        fastprint(os.path.join(path,'%s_sections.txt' % org),'a',s+'//\n')
+        
+        if not('%s_sections.txt' % org) in files:
+            files.append('%s_sections.txt' % org)
+            
+    content = '\n'.join(list(set(files)))    
+    fastprint(os.path.join(path,'fileList.txt'),'w',content)
+            
+    return os.path.join(path,'fileList.txt')
+    
+            
+        
+def miRNA_info(path,object,org_name):
+    
+    address = os.path.join(path,'%s' % object)
+    prefix = str(re.findall('(\S*)_sections\.txt',object)[0])
+    
+    try:
+        data_webPage = urllib.urlopen(address).read()
+    except IOError:
+        print "miRNA_info Error: Check the web-address."
+    
+    if data_webPage == []:
+        sendMail('Cannot read %s ' % address)
+    else:
+        format_checker(data_webPage)
+            
+        print 'I have read: %s' % address
+        sections = data_webPage.split('//\n')
+        sections.pop()
+        print 'Sections found: ', str(len(sections))
+            
+        num_s = 0
+        
+        ### files to write        
+        fastprint(os.path.join(path,'%s_premiRNA.txt' % prefix),'w','preID'+'\t'+'preACC'+'\t'+'preSQ'+'\t'+'matACCs'+'\t'+'pubIDs'+'\t'+'clusters'+'\t'+'web_addr'+'\n')
+        fastprint(os.path.join(path,'%s_matmiRNA.txt' % prefix),'w','matID'+'\t'+'matACC'+'\t'+'matSQ'+'\t'+'pre_forms'+'\t'+'targets'+'\n')
+        
+        dictG = {}
+        dictP = {}
+            
+        for s in sections:
+            num_s = num_s+1
+            print 'section: ', num_s, '/', str(len(sections)),
+                            
+            pubIDs = []
+            matIDs = ''
+            matACCs = ''
+            preSQ=[]
+            
+            my_ids =[]
+            my_accs=[]
+            my_locs=[] # if it's [61..81] you have to take from 60 to 81.
+            
+            rows = s.splitlines()
+                
+            for r in rows:
+                
+                if r[0:2] == 'ID':
+                    preID = str(re.findall('ID\s*(\S*)\s*standard;',r)[0])
+                    print preID
+                        
+                elif r[0:2] == 'AC':
+                    preACC = str(re.findall('AC\s*(\S*);',r)[0])
+                    web_addr = 'http://www.mirbase.org/cgi-bin/mirna_entry.pl?acc=%s' % preACC
+                        
+                elif r[0:2] == 'RX' and not(re.findall('RX\s*PUBMED;\s(\d*).',r)==[]):
+                    pubIDs.append(str(re.findall('RX\s*PUBMED;\s(\d*).',r)[0]))
+                            
+                elif r[0:2]=='FT' and not(re.findall('FT\s*miRNA\s*(\d{1,}\.\.\d{1,})',r)==[]):
+                    loc_mat = str(re.findall('FT\s*miRNA\s*(\d{1,}\.\.\d{1,})',r)[0])
+                        
+                    if not(loc_mat==[]):
+                         my_locs.append(loc_mat)
+                
+                elif r[0:2]=='FT' and not(re.findall('FT\s*/accession="(MIMAT[0-9]*)"', r)==[]):
+                     mat_acc = str(re.findall('FT\s*/accession="(MIMAT[0-9]*)"', r)[0])
+                        
+                     if matACCs == '':
+                         matACCs = mat_acc
+                     else:
+                         matACCs = matACCs + ',' + mat_acc
+                            
+                     if not(mat_acc == []):
+                         my_accs.append(mat_acc)    
+                                
+                elif r[0:2]=='FT' and not(re.findall('FT\s*/product="(\S*)"', r)==[]):
+                     mat_id = str(re.findall('FT\s*/product="(\S*)"', r)[0])
+                        
+                     if matIDs == '':
+                         matIDs = mat_id
+                     else:
+                         matIDs = matIDs + ',' + mat_id     
+                        
+                     if not(mat_id == []):
+                         my_ids.append(mat_id)
+                                          
+                elif r[0:2]=='SQ':
+            
+                     preSQ_INFO = str(re.findall('SQ\s*(.*other;)', r)[0])
+                     seq = 'on'
+            
+                elif r[0:2]=='  ' and seq == 'on':
+                     preSQ.append(str(re.findall('\s*([a-z\s]*)\s*\d*',r)[0]).replace(' ',''))
+                     
+            ### cluster search
+            clusters = ''
+            try:
+                mirna_page = urllib.urlopen('http://www.mirbase.org/cgi-bin/mirna_entry.pl?acc=%s' % preACC).read()
+            except IOError:
+                print "miRNA_info Error: Check the address for the miRNA page."
+                pass
+            
+            clust_check = re.findall('<td class="\S*">(Clustered miRNAs)</td>',mirna_page)
+                
+            if clust_check != [] and str(clust_check[0]) == 'Clustered miRNAs':    
+                 clusters = ','.join(re.findall('<td><a href="/cgi-bin/mirna_entry.pl\?acc=MI\d*">(\S*?)</a></td>',mirna_page))
+                      
+            if clusters == '':
+                clusters = 'None'
+            
+            ### before printing:       
+            if pubIDs == []:
+                 pubIDs = 'None'
+            else:
+                pubIDs = ','.join(pubIDs)
+            
+            preSQ = ''.join(preSQ)
+            
+            fastprint(os.path.join(path,'%s_premiRNA.txt' % prefix),'a',preID+'\t'+preACC+'\t'+preSQ+'\t'+matACCs+'\t'+pubIDs+'\t'+clusters+'\t'+web_addr+'\n')
+                
+            for tup in zip(my_ids, my_accs, my_locs):
+                
+                [start,stop] = tup[2].split('..')
+                
+                if not(tup[0] in dictG):
+                    dictG[tup[0]]=[]
+                
+                dictG[tup[0]] = [tup[1],preSQ[int(start)-1:int(stop)]]
+                
+                if not(tup[0] in dictP):
+                    dictP[tup[0]]=[]
+                
+                dictP[tup[0]].append(preID)
+                
+        for k,v in dictG.items():                
+            pre_forms = ','.join(dictP[k]) 
+            
+            ### targets
+            targets = 'None'
+            if k in TargetScanLib:
+                targets = ','.join(TargetScanLib[k])
+           
+            fastprint(os.path.join(path,'%s_matmiRNA.txt' % prefix),'a',k+'\t'+v[0]+'\t'+v[1]+'\t'+pre_forms+'\t'+targets+'\n')
+        
+            
+        return [os.path.join(path,'%s_matmiRNA.txt' % prefix), os.path.join(path,'%s_premiRNA.txt' % prefix)]
+
+
+
+##############################################################################################################################################################
+##############################################################################################################################################################
+
+opt = dict(getopt(sys.argv[1:], "u:p:", ["user=", "password="])[0])
+
+username = opt.get("-u", opt.get("--user", "username"))
+password = opt.get("-p", opt.get("--password", "password"))
+
+path = os.path.join(orngEnviron.bufferDir, "tmp_miRNA")
+print 'path: ', path
+
+serverFiles = orngServerFiles.ServerFiles(username, password)
+
+try:
+    os.mkdir(path)
+except OSError:
+    pass
+
+
+org_taxo = [tax.name(id) for id in tax.common_taxids()]
+
+### targets library from TargetScan
+try:
+    tarscan_url = 'http://www.targetscan.org//vert_50//vert_50_data_download/Conserved_Site_Context_Scores.txt.zip'
+    
+    zf = zipfile.ZipFile(StringIO.StringIO(urllib.urlopen(tarscan_url).read()))
+    arch = zf.read(zf.namelist()[0]).splitlines()[1:]
+    arch.pop()
+    mirnas = [a.split('\t')[3] for a in arch]
+    gene_ids = [a.split('\t')[1] for a in arch]
+    
+    TargetScanLib = {}
+    for m,t in zip(mirnas,gene_ids):
+        if not(m in TargetScanLib):
+            TargetScanLib[m] = []
+        if not(t in TargetScanLib[m]):           
+            TargetScanLib[m].append(t)
+except IOError:
+    sendMail('Targets not found on: %s' % tarscan_url)    
+
+### miRNA library form miRBase
+print "\nBuilding miRNA library..."
+address = 'ftp://mirbase.org/pub/mirbase/CURRENT/miRNA.dat.gz'
+flag = 1
+try:
+    data_webPage = gzip.GzipFile(fileobj=StringIO.StringIO(urllib.urlopen(address).read())).read()    
+except IOError:
+    flag = 0
+    sendMail('Database file of miRNAs not found on: %s' % address)
+     
+        
+if flag:
+    orgs_des = dict(zip([re.findall('ID\s*(\S{3,4})-\S*\s*standard;',l)[0] for l in data_webPage.splitlines() if l[0:2]=='ID'],[re.findall('DE\s*(.*)\s\S*.*\sstem[\s|-]loop',l)[0] for l in data_webPage.splitlines() if l[0:2]=='DE']))
+    
+    file_org = get_intoFiles(path,data_webPage)
+    
+    miRNA_path = os.path.join(path,'miRNA.txt')
+    print 'miRNA file path: %s' % miRNA_path
+    premiRNA_path = os.path.join(path,'premiRNA.txt')
+    print 'pre-miRNA file path: %s' % premiRNA_path
+    
+    fastprint(miRNA_path,'w','matID'+'\t'+'matACC'+'\t'+'matSQ'+'\t'+'pre_forms'+'\t'+'targets'+'\n')
+    fastprint(premiRNA_path,'w','preID'+'\t'+'preACC'+'\t'+'preSQ'+'\t'+'matACCs'+'\t'+'pubIDs'+'\t'+'clusters'+'\t'+'web_addr'+'\n')
+    
+    for fx in [l.rstrip() for l in open(file_org).readlines()]:
+        if orgs_des[fx.split('_')[0]] in org_taxo:
+            
+            end_files = miRNA_info(path, fx,orgs_des[fx.split('_')[0]])
+            
+            for filename in end_files:
+                print "Now reading %s..." % filename            
+                org = re.findall('/(\S{3,4})_\S{3}miRNA\.txt',filename)[0]
+                type_file = re.findall(org+'_(\S*)miRNA\.txt',filename)[0]
+                label = re.findall('/(\S{3,4}_\S{3}miRNA?)\.txt',filename)[0]
+                
+                if type_file == 'mat':
+                    serverFiles.upload("miRNA", label, filename, title="miRNA: %s mature form" % org, tags=["tag1", "tag2"])
+                    serverFiles.unprotect("miRNA", label)
+                    print '%s mat uploaded' % org
+                    
+                    for file_line in open(filename).readlines()[1:]:
+                        fastprint(miRNA_path,'a',file_line)                 
+                    
+                elif type_file == 'pre':
+                    serverFiles.upload("miRNA", label, filename, title="miRNA: %s pre-form" % org, tags=["tag1", "tag2"])
+                    serverFiles.unprotect("miRNA", label)
+                    print '%s pre uploaded' % org
+                    
+                    for file_line in open(filename).readlines()[1:]:
+                        fastprint(premiRNA_path,'a',file_line)
+                        
+                else:
+                    print 'Check the label.'
+    
+    serverFiles.upload("miRNA", "miRNA.txt", miRNA_path)
+    serverFiles.unprotect("miRNA", "miRNA.txt")
+    print '\nmiRNA.txt uploaded'
+    
+    serverFiles.upload("miRNA", "premiRNA.txt", premiRNA_path)
+    serverFiles.unprotect("miRNA", "premiRNA.txt")
+    print 'premiRNA.txt uploaded\n'
+else:
+    print "Check the address of miRNA file on %s" % address
+
+                
+            
+
+
+
+   

File server_update/updater.py

+import sys, os
+import subprocess
+import time, glob
+import optparse
+
+from getopt import getopt
+from datetime import datetime
+
+usage="""usage: %prog [options] [update_script ...]
+
+Run update scripts"""
+
+parser = optparse.OptionParser(usage=usage)
+parser.add_option("-u", "--user", help="User name")
+parser.add_option("-p", "--password", help="Password")
+parser.add_option("-l", "--log-dir", dest="log_dir", help="Directory to store the logs", default="./")
+parser.add_option("-m", "--mailto", help="e-mail the results to EMAIL", metavar="EMAIL", default=None)
+
+option, args = parser.parse_args()
+
+if not args:
+    args = ["updateTaxonomy.py", "updateGO.py", "updateMeSH.py", "updateNCBI_geneinfo.py",
+            "updateHomoloGene.py", "updateDictyBase.py", "updatePPI.py"]
+    
+for script in args:
+    log = open(os.path.join(option.log_dir, script + ".log.txt"), "wb")
+    p = subprocess.Popen([sys.executable, script, "-u", option.user, "-p", option.password], stdout=log, stderr=log)
+    while p.poll() is None:
+        time.sleep(3)
+    log.write("\n" + script + " exited with exit status %s" % p.poll())
+    log.close()
+    if option.mailto:
+        fromaddr = "orange@fri.uni-lj.si"
+        toaddr = option.mailto.split(",")
+        msg = open(os.path.join(option.log_dir, script + ".log.txt"), "rb").read()
+        msg = "From: %s\r\nTo: %s\r\nSubject: Error running %s update script\r\n\r\n" % (fromaddr, ",".join(toaddr), script) + msg
+        try:
+            import smtplib
+            s = smtplib.SMTP('212.235.188.18', 25)
+            s.sendmail(fromaddr, toaddr, msg)
+            s.quit()
+        except Exception, ex:
+            print "Failed to send error report due to:", ex
+    
+
+def files_report():
+  import orngServerFiles as sf
+  sf = sf.ServerFiles()
+  html = []
+  for domain in sf.listdomains():
+      if domain not in ["demo", "demo2", "test", "gad"]:
+          allinfo = sf.allinfo(domain)
+          html += ["<h2>%s</h2>" % domain,
+                   "<table><tr><th>Title</th><th>Date</th><th>Filename</th></tr>"] + \
+                  ["<tr><td>%s</td><td>%s</td><td>%s</td></tr>" % (info["title"], info["datetime"], file) \
+                   for file, info in allinfo.items()] + \
+                  ["</table>"]
+  return "\n".join(html)
+  
+open(os.path.join(option.log_dir, "serverFiles.html"), "wb").write(files_report())