Commits

Marko Toplak committed 251e5d2

Removed old orngChem scripts (they were moved into ./scripts).

Comments (0)

Files changed (8)

fragment_mapper.py

-import orange
-import orngChem_Old as orngChem
-import sys, getopt
-
-smilesFilename="smiles.tab"
-fragmentsFilename="fragments.txt"
-outputFilename="fragmentmap.tab"
-binary=False
-
-opt=dict(getopt.getopt(sys.argv[1:], "s:f:o:a:b")[0])
-
-smilesFilename=opt.get("-s",None) or smilesFilename
-fragmetnsFilename=opt.get("-f", None) or fragmentsFilename
-outputFilename=opt.get("-o", None) or outputFilename
-attrName=opt.get("-a", 1)
-binary=opt.has_key("-b")
-
-smilesData=orange.ExampleTable(smilesFilename)
-smilesData=smilesData.filter(orange.Filter(lambda e:not e[attrName].isSpecial()))
-#smilesDict=dict([(str(e[0]), str(e[1])) for e in smilesData])
-#revSmilesDict=dict([(val, key) for key, val in smilesDict.items()])
-smilesCodes=[str(e[attrName]) for e in smilesData]
-
-fragments=map(lambda s:s.strip(), open(fragmetnsFilename).read().split("\n"))
-fragmentMap=orngChem.map_fragments(fragments, smilesCodes, binary)
-
-vars=[orange.FloatVariable(frag) for frag in fragments]
-mid=orange.newmetaid()
-cvar=orange.StringVariable("chemical")
-vars=smilesData.domain.attributes+vars+(smilesData.domain.classVar and [smilesData.domain.classVar] or [])
-
-domain=orange.Domain(vars,1)
-domain.addmetas(smilesData.domain.getmetas())
-#domain.addmeta(mid,cvar)
-table=orange.ExampleTable(domain)
-for e in smilesData:
-    ex=orange.Example(domain)
-    for v in smilesData.domain.variables+smilesData.domain.getmetas().values():
-        ex[v]=e[v]
-    fragmap=fragmentMap[str(e[attrName])]
-    for frag, val in fragmap.items():
-        ex[frag]=val
-    table.append(ex)
-"""
-for chem, fragmap in fragmentMap.items():
-    ex=orange.Example(domain)
-    for fragment, val in fragmap.items():
-        ex[fragment]=val
-    ex[mid]=chem
-    table.append(ex)
-"""
-table.save(outputFilename)

fragmenter.py

-import orange
-import orngChem_Old as orngChem
-import getopt
-import sys
-
-smilesFilename="smiles.tab"
-fragmentFilename="fragments.txt"
-freq=0.4
-opt=dict(getopt.getopt(sys.argv[1:], "s:f:o:a:")[0])
-smilesFilename=opt.get("-s", None) or smilesFilename
-fragmentFilename=opt.get("-o", None) or fragmentFilename
-freq=float(opt.get("-f", freq))
-attrName=opt.get("-a",None) or 1
-
-if smilesFilename.endswith(".tab"):
-    smilesData=orange.ExampleTable(smilesFilename)
-    smiles=map(str, [e[attrName] for e in smilesData if not e[attrName].isSpecial()])
-else:
-    smiles=map(lambda s:s.strip(), open(smilesFilename).read().split("\n"))
-    
-fragments=orngChem.find_fragments(smiles,freq)
-file=open(fragmentFilename, "w")
-file.write("\n".join(fragments))
-file.close()
-
-
-

map_genes_to_terms.py

-import go
-go.loadAnnotation()
-go.loadGO()
-from sets import Set
-terms=["GO:0030036",
-       "GO:0043037",
-       "GO:0030528",
-       "GO:0016044",
-       "GO:0006873",
-       "GO:0006281",
-       "GO:0042026",
-       "GO:0007047",
-       "GO:0000754",
-       "GO:0008202",
-       "GO:0007017"]
-
-booneGenes=open("genes.txt").read().split("\n")
-
-file=open("terms_with_genes.txt","w")
-#for each term find all genes that map to this term and those that are from boone's paper
-for term in terms:
-    genes=go.findGenes([term]).keys()
-    #test
-    t=go.GOTermFinder(genes, aspect="P")
-    #t=go.findTerms(genes)
-    #if term not in t.keys():
-    #    print "!!!!!ERROR!!!!!", term, t
-    #print genes
-    genesBoone=filter(lambda a: go.mapGeneName(a) in genes, booneGenes)
-    #t1=go.GOTermFinder(genesBoone, aspect="P")
-    #print genesBoone
-    t=go.findTerms(genesBoone, aspect=["P"])
-    #print t
-    #print genesBoone
-    #print Set(t).difference(Set(t1)), Set(t1).difference(Set(t))
-    #if term not in t.keys():
-    #    print "!!!!!ERROR BOONE!!!!!", term, go.loadedGO.termDict[term].name
-    #print genesBoone
-    file.write(term+"\t"+",".join(genes)+"\t"+",".join(genesBoone)+"\n")
-file.close()

map_to_closest_terms.py

-import orngCA
-import sys, math, getopt
-import orange
-import numpy
-
-def dist(a,b):
-    return math.sqrt((a[0]-b[0])**2+(a[1]-b[1])**2)
-
-k=5
-j=3
-
-optlist, args=getopt.getopt(sys.argv[1:],"i:m:k:j:o1:o2")
-opt=dict(optlist)
-print opt
-data=orange.ExampleTable(opt["-i"])
-mid="-m" in opt and int(opt["-m"]) or 0
-k="-k" in opt and int(opt["-k"]) or k
-j="-j" in opt and int(opt["-j"]) or j
-
-names1=[v.name for v in data.domain.variables]
-mid=data.domain.getmetas().keys()[mid]
-names2=[str(d[mid]) for d in data]
-
-data=[map(lambda a:float(a) or 1e-6, e) for e in data]
-ca=orngCA.CA(data, names2, names1)
-row=ca.getPrincipalRowProfilesCoordinates()
-col=ca.getPrincipalColProfilesCoordinates()
-print row
-print col
-ofile="-o1" in opt and opt["-o1"] or "term-chem.tab"
-file=open(ofile, "w")
-file.write("term\t")
-file.write("\t".join(["chemical"+str(i)+"\t"+"dist"+str(i) for i in range(k)])+"\n")
-file.write("d\t"+"\t".join(["d\tc" for i in range(k)])+"\n\n")
-for r, name in zip(row, names2):
-    file.write(name)
-    d=[(n, dist(r,c)) for c, n in zip(col, names1)]
-    d.sort(lambda a,b:cmp(a[1],b[1]))
-    #print d
-    d=d[0:min(k,len(d))]
-    for f in d:
-        file.write("\t"+str(f[0])+"\t"+str(f[1]))
-    file.write("\n")
-file.close()
-
-
-ofile="-o2" in opt and opt["-o2"] or "chem-term.tab"
-file=open(ofile, "w")
-file.write("chemical\t")
-file.write("\t".join(["term"+str(i)+"\t"+"dist"+str(i) for i in range(j)])+"\n")
-file.write("d\t"+"\t".join(["d\tc" for i in range(j)])+"\n\n")
-for r, name in zip(col, names1):
-    file.write(name)
-    d=[(n, dist(r,c)) for c, n in zip(row, names2)]
-    d.sort(lambda a,b:cmp(a[1],b[1]))
-    d=d[0:min(k,len(d))]
-    for f in d:
-        file.write("\t"+str(f[0])+"\t"+str(f[1]))
-    file.write("\n")
-file.close()

plot.py

-import orngCA
-import sys
-import orange
-if len(sys.argv)>2:
-    mid=int(sys.argv[1].strip("-"))
-    data=orange.ExampleTable(sys.argv[2])
-else:
-    mid=0
-    data=orange.ExampleTable(sys.argv[1])
-names1=[v.name for v in data.domain.variables]
-mid=data.domain.getmetas().keys()[mid]
-names2=[str(d[mid]) for d in data]
-#import go
-#go.setDataDir("./")
-#go.loadGO()
-#map go term id's to term names
-#if mapTermNames:
-#    names2=[go.loadedGO.termDict[t.strip()].name for t in names2]
-data=[map(lambda a:float(a) or 1e-6, e) for e in data]
-ca=orngCA.CA(data, names2, names1)
-ca.Biplot()

profile_analisys.py

-import orange
-import go
-import orngChem_Old as orngChem
-import sys, getopt
-
-smilesFilename="smiles.tab"
-sensFilename="sens.tab"
-fragmentsFilename="fragments.txt"
-outputFilename="sens_profile.tab"
-subsetFilename=None
-goDataDir=None
-slimsSubset=True
-aspect="P"
-fragmentBased=True
-
-opt=dict(getopt.getopt(sys.argv[1:], "s:S:b:f:a:g:o:lm")[0])
-
-smilesFilename=opt.get("-s",None) or smilesFilename
-fragmetnsFilename=opt.get("-f", None) or fragmentsFilename
-sensFilename=opt.get("-S", None) or sensFilename
-outputFilename=opt.get("-o", None) or outputFilename
-subsetFilename=opt.get("-b", None)
-goDataDir=opt.get("-g", None)
-slimsSubset=opt.has_key("-l")
-fragmentBased=not opt.has_key("-m")
-aspect=opt.get("-a", None) or aspect
-
-smilesData=orange.ExampleTable(smilesFilename)
-smilesData=smilesData.filter(orange.Filter(lambda e:not e[1].isSpecial()))
-smilesDict=dict([(str(e[0]), str(e[1])) for e in smilesData])
-revSmilesDict=dict([(val, key) for key, val in smilesDict.items()])
-
-sensData=orange.ExampleTable(sensFilename)
-sensDict=dict([(str(e[0]), e) for e in sensData])
-
-genes=map(str, [e[0] for e in sensData])
-
-if goDataDir:
-    go.setDataDir(goDataDir)
-go.loadGO()
-go.loadAnnotation("sgd")
-go.setSlims("goslim_yeast")
-
-terms=go.findTerms(genes, slimsOnly=slimsSubset, aspect=aspect, reportEvidence=False)
-
-if subsetFilename:
-    file=open(subsetFilename)
-    subset=map(lambda s:s.strip(), file.read().split("\n"))
-    l=filter(lambda t:t[0] in subset or go.loadedGO.termDict[t[0]].name in subset, terms.items())
-    terms=dict(l)
-
-if fragmentBased:
-    #print terms
-    fragments=map(lambda s:s.strip(), open(fragmetnsFilename).read().split("\n"))
-    fragmentMap=orngChem.map_fragments(fragments, smilesDict.values())
-    domain=orange.Domain([orange.FloatVariable(frag) for frag in fragments],0)
-    mid1=orange.newmetaid()
-    mid2=orange.newmetaid()
-    domain.addmeta(mid1, orange.StringVariable("GOTerm id"))
-    domain.addmeta(mid2, orange.StringVariable("GOTerm name"))
-    table=orange.ExampleTable(domain)
-    matrix=[]
-    
-    for term, genes in terms.items():
-        ex=orange.Example(domain)
-        for frag in fragments:
-            chemicals=filter(lambda a: fragmentMap[a][frag], fragmentMap.keys())
-            avgSens=0.0
-            for g in genes:
-                for c in chemicals:
-                    avgSens+=float(sensDict[g][revSmilesDict[c]])
-            avgSens/=len(genes)*len(chemicals)
-            ex[frag]=avgSens
-        ex[mid1]=term
-        ex[mid2]=go.loadedGO.termDict[term].name
-        table.append(ex)
-    table.save(outputFilename)
-else:
-    domain=orange.Domain(sensData.domain.variables[1:],0)
-    mid1=orange.newmetaid()
-    mid2=orange.newmetaid()
-    domain.addmeta(mid1, orange.StringVariable("GOTerm id"))
-    domain.addmeta(mid2, orange.StringVariable("GOTerm name"))
-    table=orange.ExampleTable(domain)
-    for term, genes in terms.items():
-        ex=orange.Example(domain)
-        for chem in sensData.domain.variables[1:]:
-            avgSens=0.0
-            for g in genes:
-                avgSens+=float(sensDict[g][chem])
-            avgSens/=len(genes)
-            ex[chem]=avgSens
-        ex[mid1]=term
-        ex[mid2]=go.loadedGO.termDict[term].name
-        table.append(ex)
-    table.save(outputFilename)
-
-            
-                
-    
-    
-
-        
-
-
-
-

profile_transpose.py

-import orange
-import Numeric
-import sys, getopt
-
-inputFilename="sens_profile.tab"
-outputFilename="sens_profile_t.tab"
-metaVarName="name"
-metaindex=0
-
-opt=dict(getopt.getopt(sys.argv[1:], "i:n:m:o:")[0])
-
-inputFilename=opt.get("-i", None) or inputFilename
-outputFilename=opt.get("-o", None) or outputFilename
-metaVarName=opt.get("-n", None) or metaVarName
-metaindex=opt.get("-m") or metaindex
-try:
-    metaindex=int(metaindex)
-except:
-    pass
-
-data=orange.ExampleTable(inputFilename)
-try:
-    meta=data.domain.getmetas().keys()[metaindex]
-except:
-    meta=data.domain[metaindex]
-vars=[str(var.name) for var in data.domain.variables if str(var.name)!=meta]
-metavals=map(str,[e[meta] for e in data])
-domain=orange.Domain([orange.FloatVariable(m) for m in metavals],0)
-mid=orange.newmetaid()
-domain.addmeta(mid, orange.StringVariable(metaVarName))
-table=orange.ExampleTable(domain)
-print vars
-#print metavals
-for var in vars:
-    ex=orange.Example(domain)
-    for e, m in zip(data, metavals):
-        ex[m]=e[var]
-    ex[mid]=var
-    table.append(ex)
-table.save(outputFilename)
-
-

vis.py

-from openeye.oechem import *
-from openeye.oedepict import *
-
-def moleculeFragment2BMP(molSmiles, fragSmiles, filename, size=200, title=""):
-    """given smiles codes of molecle and a fragment will draw the molecule and save it
-    to a file"""
-    mol=OEGraphMol()
-    OEParseSmiles(mol, molSmiles)
-    depict(mol)
-    mol.SetTitle(title)
-    match=subsetSearch(mol, fragSmiles)
-    view=createMolView(mol, size)
-    colorSubset(view, mol, match)
-    renderImage(view, filename)
-
-def molecule2BMP(molSmiles, filename, size=200, title=""):
-    """given smiles code of a molecule will draw the molecule and save it
-    to a file"""
-    mol=OEGraphMol()
-    OEParseSmiles(mol, molSmiles)
-    mol.SetTitle(title)
-    depict(mol)
-    view=createMolView(mol, size)
-    renderImage(view, filename)
-
-def depict(mol):
-    """depict a molecule - i.e assign 2D coordinates to atoms"""
-    if mol.GetDimension()==3:
-        OEPerceiveChiral(mol)
-        OE3DToBondStereo(mol)
-        OE3DToAtomStereo(mol)
-    OEAddDepictionHydrogens(mol)
-    OEDepictCoordinates(mol)
-    OEMDLPerceiveBondStereo(mol)
-
-def subsetSearch(mol, pattern):
-    """finds the matches of pattern in mol"""
-    pat=OESubSearch()
-    pat.Init(pattern)
-    return pat.Match(mol,1)
-
-def createMolView(mol, size=200, title=""):
-    """creates a view for the molecule mol"""
-    view=OEDepictView()
-    view.SetMolecule(mol)
-    view.SetLogo(False)
-    view.SetTitleSize(12)
-    view.AdjustView(size, size)
-    return view
-
-def colorSubset(view, mol, match):
-    """assigns a differnet color to atoms and bonds of mol in view that are present in match"""
-    for matchbase in match:
-        for mpair in matchbase.GetAtoms():
-            style=view.AStyle(mpair.target.GetIdx())
-            #set style
-            style.r=255
-            style.g=0
-            style.b=0
-
-    for matchbasem in match:
-        for mpair in matchbase.GetBonds():
-            style=view.BStyle(mpair.target.GetIdx())            
-            #set style
-            style.r=255
-            style.g=0
-            style.b=0
-
-def renderImage(view, filename):
-    """renders the view to a filename"""
-    img=OE8BitImage(view.XRange(), view.YRange())
-    view.RenderImage(img)
-    ofs=oeofstream(filename)
-    OEWriteBMP(ofs, img)
-
-def render2OE8BitImage(view):
-    """renders the view to a OE8BitImage"""
-    img=OE8BitImage(view.XRange(), view.YRange())
-    view.RenderImage(img)
-    return view
-
-if __name__=="__main__":
-    import sys
-    if len(sys.argv)!=3:
-        molSmiles="CN(C)CCCN1C2=CC=CC=C2SC3=C1C=C(C=C3)Cl.Cl"
-        fragSmiles="C-N"
-    else:
-        molSmiles=sys.argv[1]
-        fragSmiles=sys.argv[2]
-    molecule2BMP(molSmiles, "mol.bmp")
-    moleculeFragment2BMP(molSmiles, fragSmiles, "mol_sub.bmp")