Commits

Aleš Erjavec  committed deedfc2

Moved scripts to scripts directory.

  • Participants
  • Parent commits 1dc1f35

Comments (0)

Files changed (8)

File scripts/fragment_mapper.py

+import orange
+import orngChem_Old as orngChem
+import sys, getopt
+
+smilesFilename="smiles.tab"
+fragmentsFilename="fragments.txt"
+outputFilename="fragmentmap.tab"
+binary=False
+
+opt=dict(getopt.getopt(sys.argv[1:], "s:f:o:a:b")[0])
+
+smilesFilename=opt.get("-s",None) or smilesFilename
+fragmetnsFilename=opt.get("-f", None) or fragmentsFilename
+outputFilename=opt.get("-o", None) or outputFilename
+attrName=opt.get("-a", 1)
+binary=opt.has_key("-b")
+
+smilesData=orange.ExampleTable(smilesFilename)
+smilesData=smilesData.filter(orange.Filter(lambda e:not e[attrName].isSpecial()))
+#smilesDict=dict([(str(e[0]), str(e[1])) for e in smilesData])
+#revSmilesDict=dict([(val, key) for key, val in smilesDict.items()])
+smilesCodes=[str(e[attrName]) for e in smilesData]
+
+fragments=map(lambda s:s.strip(), open(fragmetnsFilename).read().split("\n"))
+fragmentMap=orngChem.map_fragments(fragments, smilesCodes, binary)
+
+vars=[orange.FloatVariable(frag) for frag in fragments]
+mid=orange.newmetaid()
+cvar=orange.StringVariable("chemical")
+vars=smilesData.domain.attributes+vars+(smilesData.domain.classVar and [smilesData.domain.classVar] or [])
+
+domain=orange.Domain(vars,1)
+domain.addmetas(smilesData.domain.getmetas())
+#domain.addmeta(mid,cvar)
+table=orange.ExampleTable(domain)
+for e in smilesData:
+    ex=orange.Example(domain)
+    for v in smilesData.domain.variables+smilesData.domain.getmetas().values():
+        ex[v]=e[v]
+    fragmap=fragmentMap[str(e[attrName])]
+    for frag, val in fragmap.items():
+        ex[frag]=val
+    table.append(ex)
+"""
+for chem, fragmap in fragmentMap.items():
+    ex=orange.Example(domain)
+    for fragment, val in fragmap.items():
+        ex[fragment]=val
+    ex[mid]=chem
+    table.append(ex)
+"""
+table.save(outputFilename)

File scripts/fragmenter.py

+import orange
+import orngChem_Old as orngChem
+import getopt
+import sys
+
+smilesFilename="smiles.tab"
+fragmentFilename="fragments.txt"
+freq=0.4
+opt=dict(getopt.getopt(sys.argv[1:], "s:f:o:a:")[0])
+smilesFilename=opt.get("-s", None) or smilesFilename
+fragmentFilename=opt.get("-o", None) or fragmentFilename
+freq=float(opt.get("-f", freq))
+attrName=opt.get("-a",None) or 1
+
+if smilesFilename.endswith(".tab"):
+    smilesData=orange.ExampleTable(smilesFilename)
+    smiles=map(str, [e[attrName] for e in smilesData if not e[attrName].isSpecial()])
+else:
+    smiles=map(lambda s:s.strip(), open(smilesFilename).read().split("\n"))
+    
+fragments=orngChem.find_fragments(smiles,freq)
+file=open(fragmentFilename, "w")
+file.write("\n".join(fragments))
+file.close()
+
+
+

File scripts/map_genes_to_terms.py

+import go
+go.loadAnnotation()
+go.loadGO()
+from sets import Set
+terms=["GO:0030036",
+       "GO:0043037",
+       "GO:0030528",
+       "GO:0016044",
+       "GO:0006873",
+       "GO:0006281",
+       "GO:0042026",
+       "GO:0007047",
+       "GO:0000754",
+       "GO:0008202",
+       "GO:0007017"]
+
+booneGenes=open("genes.txt").read().split("\n")
+
+file=open("terms_with_genes.txt","w")
+#for each term find all genes that map to this term and those that are from boone's paper
+for term in terms:
+    genes=go.findGenes([term]).keys()
+    #test
+    t=go.GOTermFinder(genes, aspect="P")
+    #t=go.findTerms(genes)
+    #if term not in t.keys():
+    #    print "!!!!!ERROR!!!!!", term, t
+    #print genes
+    genesBoone=filter(lambda a: go.mapGeneName(a) in genes, booneGenes)
+    #t1=go.GOTermFinder(genesBoone, aspect="P")
+    #print genesBoone
+    t=go.findTerms(genesBoone, aspect=["P"])
+    #print t
+    #print genesBoone
+    #print Set(t).difference(Set(t1)), Set(t1).difference(Set(t))
+    #if term not in t.keys():
+    #    print "!!!!!ERROR BOONE!!!!!", term, go.loadedGO.termDict[term].name
+    #print genesBoone
+    file.write(term+"\t"+",".join(genes)+"\t"+",".join(genesBoone)+"\n")
+file.close()

File scripts/map_to_closest_terms.py

+import orngCA
+import sys, math, getopt
+import orange
+import numpy
+
+def dist(a,b):
+    return math.sqrt((a[0]-b[0])**2+(a[1]-b[1])**2)
+
+k=5
+j=3
+
+optlist, args=getopt.getopt(sys.argv[1:],"i:m:k:j:o1:o2")
+opt=dict(optlist)
+print opt
+data=orange.ExampleTable(opt["-i"])
+mid="-m" in opt and int(opt["-m"]) or 0
+k="-k" in opt and int(opt["-k"]) or k
+j="-j" in opt and int(opt["-j"]) or j
+
+names1=[v.name for v in data.domain.variables]
+mid=data.domain.getmetas().keys()[mid]
+names2=[str(d[mid]) for d in data]
+
+data=[map(lambda a:float(a) or 1e-6, e) for e in data]
+ca=orngCA.CA(data, names2, names1)
+row=ca.getPrincipalRowProfilesCoordinates()
+col=ca.getPrincipalColProfilesCoordinates()
+print row
+print col
+ofile="-o1" in opt and opt["-o1"] or "term-chem.tab"
+file=open(ofile, "w")
+file.write("term\t")
+file.write("\t".join(["chemical"+str(i)+"\t"+"dist"+str(i) for i in range(k)])+"\n")
+file.write("d\t"+"\t".join(["d\tc" for i in range(k)])+"\n\n")
+for r, name in zip(row, names2):
+    file.write(name)
+    d=[(n, dist(r,c)) for c, n in zip(col, names1)]
+    d.sort(lambda a,b:cmp(a[1],b[1]))
+    #print d
+    d=d[0:min(k,len(d))]
+    for f in d:
+        file.write("\t"+str(f[0])+"\t"+str(f[1]))
+    file.write("\n")
+file.close()
+
+
+ofile="-o2" in opt and opt["-o2"] or "chem-term.tab"
+file=open(ofile, "w")
+file.write("chemical\t")
+file.write("\t".join(["term"+str(i)+"\t"+"dist"+str(i) for i in range(j)])+"\n")
+file.write("d\t"+"\t".join(["d\tc" for i in range(j)])+"\n\n")
+for r, name in zip(col, names1):
+    file.write(name)
+    d=[(n, dist(r,c)) for c, n in zip(row, names2)]
+    d.sort(lambda a,b:cmp(a[1],b[1]))
+    d=d[0:min(k,len(d))]
+    for f in d:
+        file.write("\t"+str(f[0])+"\t"+str(f[1]))
+    file.write("\n")
+file.close()

File scripts/plot.py

+import orngCA
+import sys
+import orange
+if len(sys.argv)>2:
+    mid=int(sys.argv[1].strip("-"))
+    data=orange.ExampleTable(sys.argv[2])
+else:
+    mid=0
+    data=orange.ExampleTable(sys.argv[1])
+names1=[v.name for v in data.domain.variables]
+mid=data.domain.getmetas().keys()[mid]
+names2=[str(d[mid]) for d in data]
+#import go
+#go.setDataDir("./")
+#go.loadGO()
+#map go term id's to term names
+#if mapTermNames:
+#    names2=[go.loadedGO.termDict[t.strip()].name for t in names2]
+data=[map(lambda a:float(a) or 1e-6, e) for e in data]
+ca=orngCA.CA(data, names2, names1)
+ca.Biplot()

File scripts/profile_analisys.py

+import orange
+import go
+import orngChem_Old as orngChem
+import sys, getopt
+
+smilesFilename="smiles.tab"
+sensFilename="sens.tab"
+fragmentsFilename="fragments.txt"
+outputFilename="sens_profile.tab"
+subsetFilename=None
+goDataDir=None
+slimsSubset=True
+aspect="P"
+fragmentBased=True
+
+opt=dict(getopt.getopt(sys.argv[1:], "s:S:b:f:a:g:o:lm")[0])
+
+smilesFilename=opt.get("-s",None) or smilesFilename
+fragmetnsFilename=opt.get("-f", None) or fragmentsFilename
+sensFilename=opt.get("-S", None) or sensFilename
+outputFilename=opt.get("-o", None) or outputFilename
+subsetFilename=opt.get("-b", None)
+goDataDir=opt.get("-g", None)
+slimsSubset=opt.has_key("-l")
+fragmentBased=not opt.has_key("-m")
+aspect=opt.get("-a", None) or aspect
+
+smilesData=orange.ExampleTable(smilesFilename)
+smilesData=smilesData.filter(orange.Filter(lambda e:not e[1].isSpecial()))
+smilesDict=dict([(str(e[0]), str(e[1])) for e in smilesData])
+revSmilesDict=dict([(val, key) for key, val in smilesDict.items()])
+
+sensData=orange.ExampleTable(sensFilename)
+sensDict=dict([(str(e[0]), e) for e in sensData])
+
+genes=map(str, [e[0] for e in sensData])
+
+if goDataDir:
+    go.setDataDir(goDataDir)
+go.loadGO()
+go.loadAnnotation("sgd")
+go.setSlims("goslim_yeast")
+
+terms=go.findTerms(genes, slimsOnly=slimsSubset, aspect=aspect, reportEvidence=False)
+
+if subsetFilename:
+    file=open(subsetFilename)
+    subset=map(lambda s:s.strip(), file.read().split("\n"))
+    l=filter(lambda t:t[0] in subset or go.loadedGO.termDict[t[0]].name in subset, terms.items())
+    terms=dict(l)
+
+if fragmentBased:
+    #print terms
+    fragments=map(lambda s:s.strip(), open(fragmetnsFilename).read().split("\n"))
+    fragmentMap=orngChem.map_fragments(fragments, smilesDict.values())
+    domain=orange.Domain([orange.FloatVariable(frag) for frag in fragments],0)
+    mid1=orange.newmetaid()
+    mid2=orange.newmetaid()
+    domain.addmeta(mid1, orange.StringVariable("GOTerm id"))
+    domain.addmeta(mid2, orange.StringVariable("GOTerm name"))
+    table=orange.ExampleTable(domain)
+    matrix=[]
+    
+    for term, genes in terms.items():
+        ex=orange.Example(domain)
+        for frag in fragments:
+            chemicals=filter(lambda a: fragmentMap[a][frag], fragmentMap.keys())
+            avgSens=0.0
+            for g in genes:
+                for c in chemicals:
+                    avgSens+=float(sensDict[g][revSmilesDict[c]])
+            avgSens/=len(genes)*len(chemicals)
+            ex[frag]=avgSens
+        ex[mid1]=term
+        ex[mid2]=go.loadedGO.termDict[term].name
+        table.append(ex)
+    table.save(outputFilename)
+else:
+    domain=orange.Domain(sensData.domain.variables[1:],0)
+    mid1=orange.newmetaid()
+    mid2=orange.newmetaid()
+    domain.addmeta(mid1, orange.StringVariable("GOTerm id"))
+    domain.addmeta(mid2, orange.StringVariable("GOTerm name"))
+    table=orange.ExampleTable(domain)
+    for term, genes in terms.items():
+        ex=orange.Example(domain)
+        for chem in sensData.domain.variables[1:]:
+            avgSens=0.0
+            for g in genes:
+                avgSens+=float(sensDict[g][chem])
+            avgSens/=len(genes)
+            ex[chem]=avgSens
+        ex[mid1]=term
+        ex[mid2]=go.loadedGO.termDict[term].name
+        table.append(ex)
+    table.save(outputFilename)
+
+            
+                
+    
+    
+
+        
+
+
+
+

File scripts/profile_transpose.py

+import orange
+import Numeric
+import sys, getopt
+
+inputFilename="sens_profile.tab"
+outputFilename="sens_profile_t.tab"
+metaVarName="name"
+metaindex=0
+
+opt=dict(getopt.getopt(sys.argv[1:], "i:n:m:o:")[0])
+
+inputFilename=opt.get("-i", None) or inputFilename
+outputFilename=opt.get("-o", None) or outputFilename
+metaVarName=opt.get("-n", None) or metaVarName
+metaindex=opt.get("-m") or metaindex
+try:
+    metaindex=int(metaindex)
+except:
+    pass
+
+data=orange.ExampleTable(inputFilename)
+try:
+    meta=data.domain.getmetas().keys()[metaindex]
+except:
+    meta=data.domain[metaindex]
+vars=[str(var.name) for var in data.domain.variables if str(var.name)!=meta]
+metavals=map(str,[e[meta] for e in data])
+domain=orange.Domain([orange.FloatVariable(m) for m in metavals],0)
+mid=orange.newmetaid()
+domain.addmeta(mid, orange.StringVariable(metaVarName))
+table=orange.ExampleTable(domain)
+print vars
+#print metavals
+for var in vars:
+    ex=orange.Example(domain)
+    for e, m in zip(data, metavals):
+        ex[m]=e[var]
+    ex[mid]=var
+    table.append(ex)
+table.save(outputFilename)
+
+

File scripts/vis.py

+from openeye.oechem import *
+from openeye.oedepict import *
+
+def moleculeFragment2BMP(molSmiles, fragSmiles, filename, size=200, title=""):
+    """given smiles codes of molecle and a fragment will draw the molecule and save it
+    to a file"""
+    mol=OEGraphMol()
+    OEParseSmiles(mol, molSmiles)
+    depict(mol)
+    mol.SetTitle(title)
+    match=subsetSearch(mol, fragSmiles)
+    view=createMolView(mol, size)
+    colorSubset(view, mol, match)
+    renderImage(view, filename)
+
+def molecule2BMP(molSmiles, filename, size=200, title=""):
+    """given smiles code of a molecule will draw the molecule and save it
+    to a file"""
+    mol=OEGraphMol()
+    OEParseSmiles(mol, molSmiles)
+    mol.SetTitle(title)
+    depict(mol)
+    view=createMolView(mol, size)
+    renderImage(view, filename)
+
+def depict(mol):
+    """depict a molecule - i.e assign 2D coordinates to atoms"""
+    if mol.GetDimension()==3:
+        OEPerceiveChiral(mol)
+        OE3DToBondStereo(mol)
+        OE3DToAtomStereo(mol)
+    OEAddDepictionHydrogens(mol)
+    OEDepictCoordinates(mol)
+    OEMDLPerceiveBondStereo(mol)
+
+def subsetSearch(mol, pattern):
+    """finds the matches of pattern in mol"""
+    pat=OESubSearch()
+    pat.Init(pattern)
+    return pat.Match(mol,1)
+
+def createMolView(mol, size=200, title=""):
+    """creates a view for the molecule mol"""
+    view=OEDepictView()
+    view.SetMolecule(mol)
+    view.SetLogo(False)
+    view.SetTitleSize(12)
+    view.AdjustView(size, size)
+    return view
+
+def colorSubset(view, mol, match):
+    """assigns a differnet color to atoms and bonds of mol in view that are present in match"""
+    for matchbase in match:
+        for mpair in matchbase.GetAtoms():
+            style=view.AStyle(mpair.target.GetIdx())
+            #set style
+            style.r=255
+            style.g=0
+            style.b=0
+
+    for matchbasem in match:
+        for mpair in matchbase.GetBonds():
+            style=view.BStyle(mpair.target.GetIdx())            
+            #set style
+            style.r=255
+            style.g=0
+            style.b=0
+
+def renderImage(view, filename):
+    """renders the view to a filename"""
+    img=OE8BitImage(view.XRange(), view.YRange())
+    view.RenderImage(img)
+    ofs=oeofstream(filename)
+    OEWriteBMP(ofs, img)
+
+def render2OE8BitImage(view):
+    """renders the view to a OE8BitImage"""
+    img=OE8BitImage(view.XRange(), view.YRange())
+    view.RenderImage(img)
+    return view
+
+if __name__=="__main__":
+    import sys
+    if len(sys.argv)!=3:
+        molSmiles="CN(C)CCCN1C2=CC=CC=C2SC3=C1C=C(C=C3)Cl.Cl"
+        fragSmiles="C-N"
+    else:
+        molSmiles=sys.argv[1]
+        fragSmiles=sys.argv[2]
+    molecule2BMP(molSmiles, "mol.bmp")
+    moleculeFragment2BMP(molSmiles, fragSmiles, "mol_sub.bmp")