Commits

mitar committed 427f187

Revamped documentation.

Comments (0)

Files changed (139)

docs/reference-html/assess1.py

+import orange
+import obiAssess
+import obiGeneSets
+
+gs = obiGeneSets.collections([":kegg:hsa"])
+data = orange.ExampleTable("DLBCL.tab")
+
+asl = obiAssess.AssessLearner()
+ass = asl(data, "hsa", geneSets=gs)
+
+print "Enrichments for the first example (10 pathways)"
+enrichments = ass(data[0])
+for patw, enric in sorted(enrichments.items())[:10]:
+    print patw, enric
+

docs/reference-html/assess2.py

+import orange
+import obiAssess
+import obiGeneSets
+
+gs = obiGeneSets.collections([":kegg:hsa"])
+data = orange.ExampleTable("DLBCL.tab")
+
+asl = obiAssess.AssessLearner()
+ass = asl(data, "hsa", geneSets=gs)
+
+def genesetsAsAttributes(data, ass, domain=None):
+    """
+    Construct new data set with gene sets as attributes from data
+    set "data" with assess model "ass".
+    """
+
+    ares = {}
+    for ex in data:
+        cres = ass(ex)
+        for name,val in cres.items():
+            aresl = ares.get(name, [])
+            aresl.append(val)
+            ares[name] = aresl
+
+    ares = sorted(ares.items())
+
+    if not domain: #construct new domain instance if needed
+        domain = orange.Domain([ orange.FloatVariable(name=name) \
+            for name in [ a[0] for a in ares]], data.domain.classVar )
+
+    examples = [ [ b[zap] for a,b in ares ] + \
+        [ data[zap][-1] ]   for zap in range(len(data)) ]
+
+    et = orange.ExampleTable(domain, examples)
+    return et
+
+tdata = genesetsAsAttributes(data, ass)
+
+print "First 10 attributes of the first example in transformed data set"
+for pathw, enric in zip(tdata.domain,tdata[0])[:10]:
+    print pathw.name, enric.value

docs/reference-html/assess3.py

+import orange
+import obiAssess
+import obiGeneSets
+
+gs = obiGeneSets.collections([":kegg:hsa"])
+data = orange.ExampleTable("DLBCL.tab")
+
+asl = obiAssess.AssessLearner()
+
+def genesetsAsAttributes(data, ass, domain=None):
+    """
+    Construct new data set with gene sets as attributes from data
+    set "data" with assess model "ass".
+    """
+
+    ares = {}
+    for ex in data:
+        cres = ass(ex)
+        for name,val in cres.items():
+            aresl = ares.get(name, [])
+            aresl.append(val)
+            ares[name] = aresl
+
+    ares = sorted(ares.items())
+
+    if not domain: #construct new domain instance if needed
+        domain = orange.Domain([ orange.FloatVariable(name=name) \
+            for name in [ a[0] for a in ares]], data.domain.classVar )
+
+    examples = [ [ b[zap] for a,b in ares ] + \
+        [ data[zap][-1] ]   for zap in range(len(data)) ]
+
+    et = orange.ExampleTable(domain, examples)
+    return et
+
+offer = None
+
+def transformLearningS(data):
+    ass = asl(data, "hsa", geneSets=gs)
+    et = genesetsAsAttributes(data, ass)
+
+    global offer
+    offer = (et.domain, ass) #save assess model
+
+    return et
+   
+def transformTestingS(data):
+    global offer
+    if not offer:
+        a = fdfsdsdd #exception
+
+    domain, ass = offer
+    offer = None
+
+    return genesetsAsAttributes(data, ass, domain)
+
+
+import orngBayes, orngTest, orngStat
+learners = [ orngBayes.BayesLearner() ]
+
+resultsOriginal = orngTest.crossValidation(learners, data, folds=10)
+resultsTransformed = orngTest.crossValidation(learners, data, folds=10, 
+    pps = [("L", transformLearningS), ("T", transformTestingS)])
+
+print "Original", "CA:", orngStat.CA(resultsOriginal), "AUC:", orngStat.AUC(resultsOriginal)
+print "Transformed", "CA:", orngStat.CA(resultsTransformed), "AUC:", orngStat.AUC(resultsTransformed)
+

docs/reference-html/enrichment_graph.png

Added
New image

docs/reference-html/geneMatch.py

+import obiGene
+import obiKEGG
+
+targets = obiKEGG.KEGGOrganism("9606").get_genes() #human NCBI ID
+
+gmkegg = obiGene.GMKEGG("9606")
+gmgo = obiGene.GMGO("9606")
+gmkegggo = obiGene.matcher([[gmkegg, gmgo]], direct=False)
+
+gmkegg.set_targets(targets)
+gmgo.set_targets(targets)
+gmkegggo.set_targets(targets)
+
+genes = [ "cct7", "pls1", "gdi1", "nfkb2", "dlg7" ]
+
+print "%12s" % "gene", "%12s" % "KEGG", "%12s" % "GO", "%12s" % "KEGG+GO"
+for gene in genes:
+    print "%12s" % gene, "%12s" % gmkegg.umatch(gene), \
+          "%12s" % gmgo.umatch(gene), \
+          "%12s" % gmkegggo.umatch(gene)
+

docs/reference-html/geneMatch1.py

+import obiGene
+import obiKEGG
+
+keggorg = obiKEGG.KEGGOrganism("mmu")
+kegg_genes = keggorg.get_genes() 
+
+query = [ "Fndc4", "Itgb8", "Cdc34", "Olfr1403" ] 
+
+gm = obiGene.GMKEGG("mmu") #use KEGG aliases for gene matching
+gm.set_targets(kegg_genes) #set KEGG gene aliases as targets
+
+pnames = keggorg.list_pathways()
+
+for name in query:
+    match = gm.umatch(name) # matched kegg alias or None
+    if match:
+    	pwys = keggorg.get_pathways_by_genes([match])
+        print name, "is in", [ pnames[p] for p in pwys ] 

docs/reference-html/geo_gds1.py

+"""
+Print out some information on specific GEO's data set.
+Does not download the data set.
+"""
+
+import obiGEO
+import textwrap
+
+gdsinfo = obiGEO.GDSInfo()
+gds = gdsinfo["GDS10"]
+
+print "ID:", gds["dataset_id"]
+print "Features:", gds["feature_count"]
+print "Genes:", gds["gene_count"]
+print "Organism:", gds["platform_organism"]
+print "PubMed ID:", gds["pubmed_id"]
+print "Sample types:"
+for sampletype in set([sinfo["type"] for sinfo in gds["subsets"]]):
+    ss = [sinfo["description"] for sinfo in gds["subsets"] if sinfo["type"]==sampletype]
+    print "  %s (%s)" % (sampletype, ", ".join(ss))
+print
+print "Description:"
+print "\n".join(textwrap.wrap(gds["description"], 70))

docs/reference-html/geo_gds2.py

+import obiGEO
+reload(obiGEO)
+
+# gds = obiGEO.GDS("GDS10")
+gds = obiGEO.GDS("GDS1210")
+
+data = gds.getdata(report_genes=True, transpose=False)
+print "report_genes=True, transpose=False"
+print "Report=Genes, Rows=Genes/Spots"
+print "rows=%d cols=%d has_class=%s" % (len(data), len(data.domain.attributes), data.domain.classVar<>None)
+print
+
+data = gds.getdata(report_genes=False, transpose=False)
+print "report_genes=False, transpose=False"
+print "Report=Spots, Rows=Genes/Spots"
+print "rows=%d cols=%d has_class=%s" % (len(data), len(data.domain.attributes), data.domain.classVar<>None)
+print
+
+data = gds.getdata(report_genes=True, transpose=True)
+print "report_genes=True, transpose=True"
+print "Report=Genes, Rows=Samples"
+print "rows=%d cols=%d has_class=%s" % (len(data), len(data.domain.attributes), data.domain.classVar<>None)
+print "Class values:", " ".join([str(cv) for cv in data.domain.classVar.values]) 
+print
+
+
+data = gds.getdata(report_genes=True, transpose=True, sample_type="tissue")
+print 'report_genes=True, transpose=True sample_type="tissue"'
+print "Report=Genes, Rows=Samples"
+print "rows=%d cols=%d has_class=%s" % (len(data), len(data.domain.attributes), data.domain.classVar<>None)
+print "Class values:", " ".join([str(cv) for cv in data.domain.classVar.values]) 
+print

docs/reference-html/geo_gds3.py

+import obiGEO
+
+gds = obiGEO.GDS("GDS1676")
+data = gds.getdata(sample_type="infection")
+print "Genes: %d, Samples: %d" % (len(data), len(data.domain.attributes))
+
+for a in data.domain.attributes:
+    print a.name, a.attributes

docs/reference-html/geo_gds4.py

+import orngServerFiles
+import glob
+import re
+
+filenames = glob.glob(orngServerFiles.localpath("GEO") + "/GDS*.soft.gz")
+m = re.compile("(GDS[0-9]*).soft")
+print "%d data files cached:" % len(filenames)
+print " ".join([m.search(fn).group(1) for fn in filenames])
+

docs/reference-html/geo_gds5.py

+"""
+Check all data files from GEO, find those which include at least N
+samples in all sample subsets of at least one sample type. Useful
+when, for instance, filtering out the data sets that could be used for
+supervised machine learning.
+"""
+
+import obiGEO
+
+def valid(info, n=40):
+    """Return a set of subset types containing more than n samples in every subset"""
+    invalid = set()
+    subsets = set([sinfo["type"] for sinfo in info["subsets"]])
+    for sampleinfo in info["subsets"]:
+        if len(sampleinfo["sample_id"]) < n:
+            invalid.add(sampleinfo["type"])
+    return subsets.difference(invalid)
+
+def report(stypes, info):
+    """Pretty-print GDS and valid susbset types"""
+    for id, sts in stypes:
+        print id
+        for st in sts:
+            print "  %s:" % st,
+            gds = info[id]
+            print ", ".join(["%s/%d" % (sinfo["description"], len(sinfo["sample_id"])) \
+                             for sinfo in gds["subsets"] if sinfo["type"]==st])
+
+gdsinfo = obiGEO.GDSInfo()
+valid_subset_types = [(id, valid(info)) for id, info in gdsinfo.items() if valid(info)]
+report(valid_subset_types, gdsinfo)

docs/reference-html/geo_gds6.py

+import obiGEO
+import orange
+import orngTest
+import orngStat
+
+gds = obiGEO.GDS("GDS2960")
+data = gds.getdata(sample_type="disease state", transpose=True)
+print "Samples: %d, Genes: %d" % (len(data), len(data.domain.attributes))
+
+learners = [orange.LinearLearner]
+results = orngTest.crossValidation(learners, data, folds=10)
+print "AUC = %.3f" % orngStat.AUC(results)[0]

docs/reference-html/gsea1.py

+import orange, obiGsea, obiGene
+
+data = orange.ExampleTable("iris")
+
+gen1 = dict([
+    ("sepal",["sepal length", "sepal width"]), 
+    ("petal",["petal length", "petal width", "petal color"])
+    ])
+
+res = obiGsea.runGSEA(data, matcher=obiGene.matcher([]), minSize=2, geneSets=gen1)
+print "%5s  %6s %6s %s" % ("LABEL", "NES", "P-VAL", "GENES")
+for name,resu in res.items():
+    print "%5s  %6.3f %6.3f %s" % (name, resu["nes"], resu["p"], str(resu["genes"]))

docs/reference-html/gsea2.py

+import obiDicty
+import obiGeneSets
+import obiGsea
+import orange
+import obiGene
+
+dbc = obiDicty.DatabaseConnection()
+data = dbc.getData(sample='pkaC-', time="8")[0] #get first chip
+
+print "First 10 examples"
+for ex in data[:10]:
+    print ex
+
+matcher=obiGene.matcher([[obiGene.GMKEGG("ddi"),obiGene.GMDicty()]])
+
+genesets =  obiGeneSets.collections([":kegg:ddi"])
+res = obiGsea.runGSEA(data, matcher=matcher, minPart=0.05, geneSets=genesets, 
+    permutation="gene")
+
+print "GSEA results"
+print "%-40s %6s %6s %6s %7s" % ("LABEL", "NES", "P-VAL", "SIZE", "MATCHED") 
+for name,resu in res.items()[:10]: 
+    print "%-40s %6.3f %6.3f %6d %7d" % (name[:30], resu["nes"], resu["p"], 
+        resu["size"], resu["matched_size"]) 
+

docs/reference-html/gsea3.py

+import obiGeneSets
+import obiGsea
+import orange
+import obiGene
+import obiGEO
+
+import obiGEO
+gds = obiGEO.GDS("GDS10")
+data = gds.getdata() 
+
+print "Possible phenotype descriptors:"
+print map(lambda x: x[0], obiGsea.allgroups(data).items())
+
+matcher=obiGene.matcher([obiGene.GMKEGG("9606")])
+
+phenVar = "tissue"
+geneVar = "gene" #use gene meta variable for gene names
+
+genesets =  obiGeneSets.collections([":kegg:hsa"])
+res = obiGsea.runGSEA(data, matcher=matcher, minPart=0.05, geneSets=genesets, 
+    permutation="class", n=10, phenVar=phenVar, geneVar=geneVar)
+
+print
+print "GSEA results (choosen descriptor: tissue)"
+print "%-40s %6s %6s %6s %7s" % ("LABEL", "NES", "FDR", "SIZE", "MATCHED") 
+for name,resu in sorted(res.items(), key=lambda x: x[1]["fdr"])[:10]: 
+    print "%-40s %6.3f %6.3f %6d %7d" % (name[:30], resu["nes"], resu["fdr"], 
+        resu["size"], resu["matched_size"]) 

docs/reference-html/mirnaExamle1.py

+import random
+import obimiRNA
+
+miRNAs = obimiRNA.ids()
+
+print 'miRNA name\tAccession_Number\t\tSequence\t\tPre-forms\n'
+for m in random.sample(miRNAs, 10):
+    accession = obimiRNA.get_info(m).matACC
+    sequence = obimiRNA.get_info(m).matSQ
+    preForms = obimiRNA.get_info(m).pre_forms
+    print '%s\t%s\t\t%s\t\t%s' % (m, accession, sequence, preForms)

docs/reference-html/mirnaExamle2.py

+import random
+import obimiRNA
+
+mirnaHSA = obimiRNA.ids('hsa')
+
+for pm in reduce(lambda x,y: x+y, [obimiRNA.get_info(m).pre_forms.split(',') for m in random.sample(mirnaHSA,3)]):                                    
+    pre_miRNA = obimiRNA.get_info(pm,type='pre')
+    print
+    print 'Pre-miRNA name: %s' % pm
+    print 'Accession Number: %s' % pre_miRNA.preACC
+    print 'Accession Number of mature form(s): %s' % pre_miRNA.matACCs
+    print 'PubMed accession number(s): %s' % pre_miRNA.pubIDs
+    print 'Pre-miRNAs clustered together with %s: %s' % (pm, pre_miRNA.clusters)
+    print 'Link to miRBase: %s' % pre_miRNA.web_addr

docs/reference-html/mirnaExamle3.py

+import random
+import obiGO
+import obimiRNA
+
+annotations = obiGO.Annotations('hsa',obiGO.Ontology())
+miRNAs = random.sample(obimiRNA.ids('hsa'),10)
+
+print 'miRNA\tNumber of annotations\tGO_IDs\n'
+for mi,goList in obimiRNA.get_GO(miRNAs, annotations, goSwitch=False).items():
+    if goList:
+        print '%s\t%d\t%s' % (mi, len(goList), ','.join(goList[0:4])+'...')

docs/reference-html/mirnaExamle4.py

+import random
+import obiGO
+import obimiRNA
+
+annotations = obiGO.Annotations('hsa',obiGO.Ontology())
+
+miRNAs = random.sample(obimiRNA.ids('hsa'),10)
+
+dict_all = obimiRNA.get_GO(miRNAs, annotations, goSwitch=False)
+dict_enr = obimiRNA.get_GO(miRNAs, annotations, enrichment=True, goSwitch=False)
+
+dict_tfidf = obimiRNA.filter_GO(dict_all, annotations, reverse=False)
+
+print '#\tmiRNA name\t# All GO terms\t# Enriched GO terms\t# Filtred GO terms\n'
+for n,m in enumerate(miRNAs):
+    print '%d\t%s\t\t%d\t\t%d\t\t%d' % (n+1,m,len(dict_all[m]),len(dict_enr[m]),len(dict_tfidf[m]))

docs/reference-html/mirnaExamle5.py

+import random
+import obimiRNA
+
+miRNAs = random.sample(obimiRNA.ids('hsa'),10)
+
+mirPath_all= obimiRNA.get_pathways(miRNAs,enrichment=False, pathSwitch=False)
+mirPath_enr = obimiRNA.get_pathways(miRNAs,enrichment=True, pathSwitch=False)
+
+print 'miRNA_name\t# of pathways\t# of enriched pathways\n'
+for m in miRNAs:
+    print '%s\t\t%d\t\t%d' % (m,len(mirPath_all[m]),len(mirPath_enr[m]))

docs/reference-html/mirnaExample1.py

+import random
+import obimiRNA
+
+miRNAs = obimiRNA.ids()
+
+print 'miRNA name\tAccession_Number\t\tSequence\t\tPre-forms\n'
+for m in random.sample(miRNAs, 10):
+    accession = obimiRNA.get_info(m).matACC
+    sequence = obimiRNA.get_info(m).matSQ
+    preForms = obimiRNA.get_info(m).pre_forms
+    print '%s\t%s\t\t%s\t\t%s' % (m, accession, sequence, preForms)

docs/reference-html/mirnaExample2.py

+import random
+import obimiRNA
+
+mirnaHSA = obimiRNA.ids('hsa')
+
+for pm in reduce(lambda x,y: x+y, [obimiRNA.get_info(m).pre_forms.split(',') for m in random.sample(mirnaHSA,3)]):                                    
+    pre_miRNA = obimiRNA.get_info(pm,type='pre')
+    print
+    print 'Pre-miRNA name: %s' % pm
+    print 'Accession Number: %s' % pre_miRNA.preACC
+    print 'Accession Number of mature form(s): %s' % pre_miRNA.matACCs
+    print 'PubMed accession number(s): %s' % pre_miRNA.pubIDs
+    print 'Pre-miRNAs clustered together with %s: %s' % (pm, pre_miRNA.clusters)
+    print 'Link to miRBase: %s' % pre_miRNA.web_addr

docs/reference-html/mirnaExample3.py

+import random
+import obiGO
+import obimiRNA
+
+annotations = obiGO.Annotations('hsa',obiGO.Ontology())
+miRNAs = random.sample(obimiRNA.ids('hsa'),10)
+
+print 'miRNA\tNumber of annotations\tGO_IDs\n'
+for mi,goList in obimiRNA.get_GO(miRNAs, annotations, goSwitch=False).items():
+    if goList:
+        print '%s\t%d\t%s' % (mi, len(goList), ','.join(goList[0:4])+'...')

docs/reference-html/mirnaExample4.py

+import random
+import obiGO
+import obimiRNA
+
+annotations = obiGO.Annotations('hsa',obiGO.Ontology())
+
+miRNAs = random.sample(obimiRNA.ids('hsa'),10)
+
+dict_all = obimiRNA.get_GO(miRNAs, annotations, goSwitch=False)
+dict_enr = obimiRNA.get_GO(miRNAs, annotations, enrichment=True, goSwitch=False)
+
+dict_tfidf = obimiRNA.filter_GO(dict_all, annotations, reverse=False)
+
+print '#\tmiRNA name\t# All GO terms\t# Enriched GO terms\t# Filtred GO terms\n'
+for n,m in enumerate(miRNAs):
+    print '%d\t%s\t\t%d\t\t%d\t\t%d' % (n+1,m,len(dict_all[m]),len(dict_enr[m]),len(dict_tfidf[m]))

docs/reference-html/mirnaExample5.py

+import random
+import obimiRNA
+
+miRNAs = random.sample(obimiRNA.ids('hsa'),10)
+
+mirPath_all= obimiRNA.get_pathways(miRNAs,enrichment=False, pathSwitch=False)
+mirPath_enr = obimiRNA.get_pathways(miRNAs,enrichment=True, pathSwitch=False)
+
+print 'miRNA_name\t# of pathways\t# of enriched pathways\n'
+for m in miRNAs:
+    print '%s\t\t%d\t\t%d' % (m,len(mirPath_all[m]),len(mirPath_enr[m]))

docs/reference-html/obiArrayExpress-test.py

+import obiArrayExpress
+from pprint import pprint
+
+# test the gene_atlas_summary
+summary = obiArrayExpress.get_atlas_summary(["Kalrn", "Ptprd", "Mbp", "Cyfip2"], "Mus musculus")
+pprint(summary)
+
+# test query_atlas_simple
+results = obiArrayExpress.query_atlas_simple(genes=["Kalrn", "Ptprd", "Mbp", "Cyfip2"], organism="Mus musculus", regulation="up", condition="brain")
+pprint(results)
+
+# test Atlas Conditions
+gene_cond1 = obiArrayExpress.AtlasConditionGeneProperty("Goterm", "Is", "translation")
+gene_cond2 = obiArrayExpress.AtlasConditionGeneProperty("Disease", "Is", "cancer")
+org_cond = obiArrayExpress.AtlasConditionOrganism("Homo sapiens")
+
+conditions = obiArrayExpress.AtlasConditionList([gene_cond1, gene_cond2, org_cond])
+results = obiArrayExpress.query_atlas(conditions)
+pprint(results)
+
+# test ArrayExpress experiments, files query
+
+results = obiArrayExpress.query_experiments(accession="E-MEXP-31")
+pprint(results)
+
+results = obiArrayExpress.query_experiments(species="Homo sapines", expdesign="dose+response", ef="CellType")
+pprint(results)
+
+results = obiArrayExpress.query_experiments(species="Homo sapiens", gxa=True, assaycount=(1, 5), miamescore=(3, 5))
+pprint(results)
+
+results = obiArrayExpress.query_files(species="Mus musculus", gxa=True, keywords=["lung", "cancer"], miamescore=(3, 5), format="xml")
+print results

docs/reference-html/obiAssess.htm

+<html>
+
+<head>
+<title>obiAssess: pathway enrichment for each sample</title>
+<link rel=stylesheet href="style.css" type="text/css">
+<link rel=stylesheet href="style-print.css" type="text/css" media=print>
+</head>
+
+<body>
+<h1>obiAssess: pathway enrichment for each sample</h1>
+<index name="modules/assess enrichment gsea">
+
+<p>Gene Set Enrichment Analysis (GSEA) is a method which tries to identify groups of genes that are regulated together. It computes pathway enrichments for the whole data set. ASSESS in inspired by GSEA and it computes enrichments for each sample in the data set.</p>
+
+<p>ASSESS takes gene expression with sample phenotypes and computes gene set enrichments for given gene sets. First pathway &quot;models&quot; have to be created with AssessLearner. Afterwards they are used to calculate enrichments for each pair of sample and pathway.</p>
+
+<h2>AssessLearner</h2>
+
+Class is used to build models, that can be used later to determine enrichments scores for each example. Note that domains of input data to <code>AssessLearner</code> and <code>Assess</code> instances must be the same.
+
+<dl class=attributes>
+
+<dt>__call__(self, data, organism, geneSets, minSize=3, maxSize=1000, minPart=0.1, classValues=None, rankingf=None)</dt>
+<dd>Function <code>__call__</code> returns an instance of class <code>Assess</code> which can be given an example and returns its enrichemnt in all pathways. Argument descriptions follow. 
+
+<dl class=arguments>
+
+  <dt>data</dt>
+  <dd>An <A href="ExampleTable.htm"><CODE>ExampleTable</CODE></A> with gene expression data. An example
+  should correspond to a sample with its phenotype (class value). Attributes represent individual genes. Their names 
+  should be meaningful gene aliases.</dd>
+
+  <dt>organism</dt>
+  <dd>Organism code as used in KEGG. Needed for matching gene names in data to those in gene sets. Some
+  examples: <code>hsa</code> for human, <code>mmu</code> for mouse. This is an required argument.</dd> 
+
+  <dt>classValues</dt>
+  <dd>A pair of class values describing phenotypes that are chosen as two distinct phenotypes on which gene correlations
+  are computed. Only examples with one of chosen class values are considered for analysis. If not specified, first
+  two class values in <code>classVar</code> attribute descriptor are used.</dd>
+
+  <dt>geneSets</dt>
+  <dd>A python dictionary of gene sets, where key is a gene set name which points to a list of gene aliases for genes
+  in the gene set. Default: gene sets in your collection directory.</dd>
+
+  <dt>minSize, maxSize</dt>
+  <dd>Minimum and maximum number of genes from gene set also present in the data set for that gene set to be analysed.
+  Defaults: 3 and 1000.</dd>
+
+  <dt>minPart</dt>
+  <dd>Minimum fraction of genes from the gene set also present in the data set for that gene set to be analysed. Default: 0.1.</dd> 
+
+  <dt>rankingf</dt>
+  <dd>Used to specify model type for individual gene sets. See source code for reference. We recommend leaving the parameter blank. In that case, a parametric model from Edelman, 2006 is used.</dd>
+
+</dl>
+
+</dd>
+
+</dl>
+
+<h2>Assess</h2>
+
+<dl class=attributes>
+
+<dt>__init__(**kwargs)</dt>
+<dd>Function <code>__init__</code> is usually called only by <code>AssessLearner</code>. It is used to save built &quot;model&quot; data. Saves all keyword arguments into object's namespace.</dd>
+
+<dt>__call__(example)</dt>
+<dd>Returns enrichments of all gene sets for this example. Enrichments are returned in a dictionary, where keys are gene set and values their enrichments. Note that example's domain must be the same as the domain on which the &quot;model&quot; was built.</dd>
+
+
+</dl>
+
+<h3>Example 1</h3>
+
+This example prints enrichmentes for the first sample in the data set. It uses KEGG as a gene set source.
+
+<p class="header"><a href="assess1.py">assess1.py</a> (uses <a href="http://www.ailab.si/orange/datasets/DLBCL.tab">DLBCL.tab</a>)</p>
+
+<xmp class=code>import orange
+import obiAssess
+import obiGeneSets
+
+gs = obiGeneSets.collections([":kegg:hsa"])
+data = orange.ExampleTable("DLBCL.tab")
+
+asl = obiAssess.AssessLearner()
+ass = asl(data, "hsa", geneSets=gs)
+
+print "Enrichments for the first example (10 pathways)"
+enrichments = ass(data[0])
+for patw, enric in sorted(enrichments.items())[:10]:
+    print patw, enric
+</xmp>
+
+<p>Output:</p>
+
+<xmp class=code>Enrichments for the first example (10 pathways)
+[KEGG] 1- and 2-Methylnaphthalene degradation -0.84674671525
+[KEGG] 3-Chloroacrylic acid degradation -0.587923507915
+[KEGG] ABC transporters - General -0.292198856631
+[KEGG] Acute myeloid leukemia 0.305086037192
+[KEGG] Adherens junction 0.387903973883
+[KEGG] Adipocytokine signaling pathway 0.404448748545
+[KEGG] Alanine and aspartate metabolism 0.400113861834
+[KEGG] Alkaloid biosynthesis I -0.677360944415
+[KEGG] Alkaloid biosynthesis II -0.437492650183
+[KEGG] Allograft rejection 0.491535468415
+</xmp>
+
+<h3>Example 2: transforming data sets</h3>
+
+This example builds a new data set, where attributes are gene sets instead of genes. It prints first 10 attributes for the first example of transformed data set. Note, that the output matches previous example (well, with the exception of floating point discrepancies).
+
+<p class="header"><a href="assess2.py">assess2.py</a> (uses <a href="http://www.ailab.si/orange/datasets/DLBCL.tab">DLBCL.tab</a>)</p>
+
+<xmp class=code>import orange
+import obiAssess
+import obiGeneSets
+
+gs = obiGeneSets.collections([":kegg:hsa"])
+data = orange.ExampleTable("DLBCL.tab")
+
+asl = obiAssess.AssessLearner()
+ass = asl(data, "hsa", geneSets=gs)
+
+def genesetsAsAttributes(data, ass, domain=None):
+    """
+    Construct new data set with gene sets as attributes from data
+    set "data" with assess model "ass".
+    """
+
+    ares = {}
+    for ex in data:
+        cres = ass(ex)
+        for name,val in cres.items():
+            aresl = ares.get(name, [])
+            aresl.append(val)
+            ares[name] = aresl
+
+    ares = sorted(ares.items())
+
+    if not domain: #construct new domain instance if needed
+        domain = orange.Domain([ orange.FloatVariable(name=name) \
+            for name in [ a[0] for a in ares]], data.domain.classVar )
+
+    examples = [ [ b[zap] for a,b in ares ] + \
+        [ data[zap][-1] ]   for zap in range(len(data)) ]
+
+    et = orange.ExampleTable(domain, examples)
+    return et
+
+tdata = genesetsAsAttributes(data, ass)
+
+print "First 10 attributes of the first example in transformed data set"
+for pathw, enric in zip(tdata.domain,tdata[0])[:10]:
+    print pathw.name, enric.value
+</xmp>
+
+<p>Output:</p>
+
+<xmp class=code>First 10 attributes of the first example in transformed data set
+[KEGG] 1- and 2-Methylnaphthalene degradation -0.846746742725
+[KEGG] 3-Chloroacrylic acid degradation -0.587923526764
+[KEGG] ABC transporters - General -0.292198866606
+[KEGG] Acute myeloid leukemia 0.305086046457
+[KEGG] Adherens junction 0.387903988361
+[KEGG] Adipocytokine signaling pathway 0.404448747635
+[KEGG] Alanine and aspartate metabolism 0.400113850832
+[KEGG] Alkaloid biosynthesis I -0.6773609519
+[KEGG] Alkaloid biosynthesis II -0.437492638826
+[KEGG] Allograft rejection 0.491535454988
+</xmp>
+
+<h3>Example 3: testing transformed data set quality</h3>
+
+We measure CA and AUC of transformed data set using cross validation and compare them to the original data set. Care needs to be taken to prevent overfitting: we must not use any knowledge about testing set when creating &quot;ASSESS models&quot; and we have to use the same &quot;ASSESS model&quot; for both learning and testing set. We solve this by saving the model to a global variable.
+
+<p class="header">part of <a href="assess3.py">assess3.py</a> (uses <a href="http://www.ailab.si/orange/datasets/DLBCL.tab">DLBCL.tab</a>)</p>
+
+<xmp class=code>offer = None
+
+def transformLearningS(data):
+    ass = asl(data, "hsa", geneSets=gs)
+    et = genesetsAsAttributes(data, ass)
+
+    global offer
+    offer = (et.domain, ass) #save assess model
+
+    return et
+   
+def transformTestingS(data):
+    global offer
+    if not offer:
+        a = fdfsdsdd #exception
+
+    domain, ass = offer
+    offer = None
+
+    return genesetsAsAttributes(data, ass, domain)
+
+
+import orngBayes, orngTest, orngStat
+learners = [ orngBayes.BayesLearner() ]
+
+resultsOriginal = orngTest.crossValidation(learners, data, folds=10)
+resultsTransformed = orngTest.crossValidation(learners, data, folds=10, 
+    pps = [("L", transformLearningS), ("T", transformTestingS)])
+
+print "Original", "CA:", orngStat.CA(resultsOriginal), "AUC:", orngStat.AUC(resultsOriginal)
+print "Transformed", "CA:", orngStat.CA(resultsTransformed), "AUC:", orngStat.AUC(resultsTransformed)
+</xmp>
+
+<p>Output:</p>
+
+<xmp class=code>Original CA: [0.8214285714285714] AUC: [0.78583333333333338]
+Transformed CA: [0.80714285714285716] AUC: [0.84250000000000003]
+</xmp>
+
+<HR>
+<H2>References</H2>
+
+<p>Edelman E, Porrello A, Guinney J, Balakumaran B, Bild A, Febbo PG, Mukherjee S. Analysis of sample set enrichment scores: assaying the enrichment of sets of genes for individual samples in genome-wide expression profiles. Bioinformatics. 2006 Jul 15; 22(14):e108-16. </p>
+
+</body>
+</html>
+

docs/reference-html/obiBioMart-query.py

+from obiBioMart import *
+
+## Printing attribute configurations 
+
+connection = BioMartConnection("http://www.biomart.org/biomart/martservice")
+registry = connection.registry()
+#for schema in registry.virtual_schemas()[:1]:
+#    for database in schema.marts()[:1]:
+#        for dataset in database.datasets()[:2]:
+#            for attrTree in dataset.configuration().attributes():
+#                if not getattr(attrTree, "hidden", "false") == "true":
+#                    print dataset.name, "has attribute", getattr(attrTree, "displayName", "<unknown>")
+                    
+## Printing dataset attributes
+
+database = registry["ensembl"]
+dataset = database["hsapiens_gene_ensembl"]
+
+for attr in dataset.attributes():
+    print attr
+
+for filter in dataset.filters():
+    print filter
+                    
+query = BioMartQuery(connection, dataset="hsapiens_gene_ensembl", attributes=["ensembl_transcript_id", "chromosome_name"], 
+                     filters=[("chromosome_name", ["22"])])
+print query.get_count()
+
+print query.run()
+
+query = BioMartQuery(connection)
+query.set_dataset("hsapiens_gene_ensembl")
+query.add_filter("chromosome_name", ["22"])
+query.add_attribute("ensembl_transcript_id")
+query.add_attribute("chromosome_name")
+query.add_attribute("uniprot_swissprot")
+print query.get_count()
+print query.run()

docs/reference-html/obiBioMart.htm

Empty file added.

docs/reference-html/obiChem.htm

+<html>
+<head>
+<link rel=stylesheet href="style.css" type="text/css">
+</head>
+<body>
+
+<h1>obiChem: A library for searching frequent molecular fragments</h1>
+<index name="modules/molecular fragments">
+<p>obi implements the following classes
+<ul>
+    <li>FragmentMiner   : The main class that does the search</li>
+    <li>Fragment        : Representation of the fragment</li>
+    <li>Fragmenter      : A class that is used to fragment an ExampleTable</li>
+    <li>FragmentBasedLearner    : A learner wrapper class that first runs the molecular fragmentation on the data</li>
+</ul>
+</p>
+
+<h2>FragmentMiner</h2>
+<p>A class for finding frequent molecular fragments</p>
+<p class=section>Attributes</p>
+<dl class=attributes>
+    <dt>active</dt>
+    <dd>list of smiles codes of active molecules</dd>
+    <dt>inactive</dt>
+    <dd>list of smiles codes of inactive molecules</dd>
+    <dt>minSupport</dt>
+    <dd>minimum frequency in the active set of the fragments to search for</dd>
+    <dt>maxSupport</dt>
+    <dd>maximum frequency in the inactive set of the fragments to search for</dd>
+    <dt>addWholeRings</dt>
+    <dd>if True rings will be added as a whole rather then atom by atom</dd>
+    <dt>canonicalPruning</dt>
+    <dd>if True a cache of all cannonical codes of all fragments will be kept to avoid redundant search</dd>
+    <dt>findClosed</dt>
+    <dd>finds only fragments that are not sub-structures of any other fragment with the same support (default: True)</dd>
+</dl>
+<p class=section>Methods</p>
+<dl class=methods>
+    <dt>Search()</dt>
+    <dd>Runs the fragment search algorithm and returns a list of found fragments</dd>
+</dl>
+<h3>Example</h3>
+<XMP class=code>miner = FragmentMiner(active = ["NC(C)C(=O)O", "NC(CS)C(=O)O", "NC(CO)C(=O)O"], inactive = [], minSupport = 0.6)
+for fragment in miner.Search():
+    print fragment.ToSmiles() , "Support: %.3f" %fragment.Support()</XMP>
+
+<h2>Fragment</h2>
+<p>A class representing a molecular fragment</p>
+<p class=section>Methods</p>
+<dl class=methods>
+    <dt>ToOBMol()</dt>
+    <dd>Returns an openbabel.OBMol object representation</dd>
+    <dt>ToSmiles()</dt>
+    <dd>Returns a SMILES code representation</dd>
+    <dt>ToCanonicalSmiles()</dt>
+    <dd>Returns a canonical SMILES code representation</dd>
+    <dt>Support()</dt>
+    <dd>Returns the support of the fragment in the active set</dd>
+    <dt>OcurrencesIn(smiles)</dt>
+    <dd>Returns the number of times a fragment is containd in the molecule represented by the <code>smiles</code> code argument</dd>
+    <dt>ContainedIn(smiles)</dt>
+    <dd>Returns True if the fragment is present in the molecule represented by the <code>smiles</code> code argument</dd>
+</dl>
+
+<h2>Fragmenter</h2>
+<p>An object that is used to fragment an ExampleTable</p>
+<p class=section>Attributes</p>
+<dl class=attributes>
+    <dt>minSupport</dt>
+    <dd>minimum frequency in the active set of the fragments to search for (default: 0.2)</dd>
+    <dt>maxSupport</dt>
+    <dd>maximum frequency in the inactive set of the fragments to search for (default: 0.2)</dd>
+    <dt>findClosed</dt>
+    <dd>finds only fragments that are not sub-structures of any other fragment with the same support (default: True)</dd>
+</dl>
+<p class=section>Methods</p>
+<dl class=methods>
+    <dt>__call__(data, smilesAttr, activeFunc)</dt>
+    <dd>Takes a data-set, and runs the FragmentMiner on it. Returns a new data-set and the fragments.
+        The new data-set contains new attributes that represent the presence of a fragment that was found.
+        <p class>Arguments</p>
+        <dl class=arguments>
+            <dt>data</dt>
+            <dd>the dataset</dd>
+            <dt>smilesAttr</dt>
+            <dd>the attribute in the data that contains the SMILES codes (if none is provided it will try to make a smart guess)</dd>
+            <dt>activeFunc</dt>
+            <dd>a function that takes an example from the data-set and returns True if the example should be
+                    considered as active (if none is provided all examples are considered active)</dd>
+       </dl>
+    </dd>
+</dl>
+<h3>Example</h3>
+<XMP class=code>fragmenter=Fragmenter(minSupport=0.1, maxSupport=0.05)
+data, fragments=fragmenter(data, "SMILES")
+</XMP>
+
+<h2>FragmentBasedLearner</h2>
+<p>A learner wrapper class that first runs the molecular fragmentation on the data.</p>
+<p class=section>Attributes</p>
+<dl class=attributes>
+    <dt>smilesAttr</dt>
+    <dd>Attribute in the data that contains the smiles codes (if none is provided it will try to make a smart guess)</dd>
+    <dt>learner</dt>
+    <dd>learner that will be used to actualy learn on the fragmented data (default: orngSVM.SVMLearner)</dd>
+    <dt>minSupport</dt>
+    <dd>minimum frequency in the active set of the fragments to search for</dd>
+    <dt>maxSupport</dt>
+    <dd>maximum frequency in the inactive set of the fragments to search for</dd>
+    <dt>activeFunc</dt>
+    <dd>a function that takes an example from the learning data-set and returns True if the example should be
+                    considered as active (if none is provided all examples are considered active)</dd>
+    <dt>findClosed</dt>
+    <dd>finds only fragments that are not sub-structures of any other fragment with the same support (default: True)</dd>
+    
+</dl>

docs/reference-html/obiGEO.htm

+<html>
+<HEAD>
+<LINK REL=StyleSheet HREF="style.css" TYPE="text/css">
+<LINK REL=StyleSheet HREF="../style-print.css" TYPE="text/css" MEDIA=print></LINK>
+</HEAD>
+
+<BODY>
+<h1>obiGEO: an interface to NCBI's Gene Expression Omnibus</h1>
+
+<index name="NCBI">
+<index name="Gene Expression Omnibus">
+<index name="microarray data sets">
+
+<p>obiGEO provides an interface
+to <a href="http://www.ncbi.nlm.nih.gov/">NCBI</a>'s 
+<a href="http://www.ncbi.nlm.nih.gov/geo/">Gene Expression Omnibus</a>
+repository. Currently, it only supports
+<a href="http://www.ncbi.nlm.nih.gov/sites/GDSbrowser">GEO
+DataSets</a> information querying and retreival.</p>
+
+<h2>GDSInfo</h2>
+
+<p><INDEX name="classes/GDSInfo (in obiGEO)">GDSInfo is the class that
+    can be used to retreive the infomation about
+    <a href=http://www.ncbi.nlm.nih.gov/sites/GDSbrowser>GEO Data
+    Sets</a>. The class accesses the Orange server file
+    that either resides on the local computer or is
+    automatically retreived from Orange server. Notice that the call
+    of this class does not access any NCBI's servers directly.</p>
+
+<p class=section>Methods</p>
+<dl class=attributes>
+<dt>GDSInfo(force_update=False)</dt>
+<dd><p>Constructor returning the object with GEO DataSets
+  information. If <code>force_update</code> is set
+  to <code>True</code>, the constructor will download GEO DataSets
+  information file (gds_info.pickled) from Orange server, otherwise,
+  it will first check if the local copy exists. The object returned
+  behaves like a dictionary: the keys are GEO DataSets IDs, and the
+  dictionary values for is a dictionary providing various information
+  about the particular data set.</p>
+
+<xmp class=code>>>> import obiGEO
+>>> info = obiGEO.GDSInfo()
+>>> info.keys()[:5]
+>>> ['GDS2526', 'GDS2524', 'GDS2525', 'GDS2522', 'GDS1618']
+>>> info['GDS2526']['title']
+'c-MYC depletion effect on carcinoma cell lines'
+>>> info['GDS2526']['platform_organism']
+'Homo sapiens'
+</xmp>
+</dd>
+</dl>
+
+<h2>GDS</h2>
+
+<p><INDEX name="classes/GDSInfo (in obiGEO)">GDS is a class that
+    provides methods for retreival of a specific GEO DataSet. The data
+    is provided as Orange's ExampleTable.
+
+<p class=section>Methods</p>
+<dl class=attributes>
+<dt>GDS(gdsname, verbose=False, force_download=False)</dt>
+<dd>Constructor returning the object to be used to retreive GEO
+  DataSet table (samples and gene expressions). <code>gdsname</code>
+  is an NCBI's ID for the data set in the form "GDSn" where "n" is a
+  GDS ID number. Construct checks a local cache directory if the
+  particular data file is loaded locally, else it downloads it from
+  <a href="ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SOFT/GDS/">NCBI's GEO
+  FTP site</a>. The download is forced
+  if <code>force_download=True</code>. The compressed data file
+  resides in the cache directory after the call of the constructor
+  (call to <code>orngServerFiles.localpath("GEO")</code> reveals the
+  path of this directory).</p>
+
+<xmp class=code>>>> import obiGEO
+>>> gds = obiGEO.GDS("GDS1676")
+>>> print print ", ".join(gds.genes[:10])
+EXO1, BUB1B, LTB4R2, FOXA1, MEN1, LIFR, L1CAM, TRAF3, AKAP1, PIK3CD
+>>> gds.info["title"]
+'T cell leukemia cell response to human herpesvirus 6 infection: time course'
+>>> print gds
+GDS1676 (Homo sapiens), samples=8, features=2100, genes=667, subsets=8
+</xmp>
+</dd>
+
+<dt>getdata(report_genes=True, transpose=False,
+merge_function=variableMean, sample_type=None,
+remove_unknown=None)</dt>
+<dd><p>The call of this method returns the data from GEO DataSet in
+  Orange format. Micorarray spots reported in the GEO data set can
+  either be merged according to their gene id's
+  (<code>report_genes=True</code>) or can be left as spots. The data
+  matrix can have spots/genes in rows and samples in columns
+  (default, <code>transpose=False</code>) or samples in rows and
+  spots/genes in columns
+  (<code>transpose=True</code>). Argument <code>sample_type</code>
+  defines the type of annotation, or (if <code>transpose=True</code>)
+  the type of class labels to be included in the data set. Namely,
+  with <code>sample_type</code>, the entire annotation of samples will
+  be included either in the class value or in
+  the <code>.attributes</code> field of each data set
+  attributes. Spots with sample profiles that include unknown values
+  are retained by default (<code>remove_unknown=None</code>). They are
+  removed if the proportion of samples with unknown values
+  is above the threshold set by <code>remove_unknown</code>.</p>
+
+<p>The following illustrates how <code>getdata</code> is used to
+  construct a data set with genes in rows and samples in
+  columns. Notice that the annotation about each sample is retained
+  in <code>.attributes</code>. 
+
+<xmp class=code>>>> import obiGEO
+>>> gds = obiGEO.GDS("GDS1676") 
+>>> data = gds.getdata()
+>>> len(data)
+667
+>>> data[0]
+[?, ?, -0.803, 0.128, 0.110, -2.000, -1.000, -0.358], {"gene":'EXO1'}
+>>> data.domain.attributes[0]
+FloatVariable 'GSM63816'
+>>> data.domain.attributes[0].attributes
+Out[191]: {'dose': '20 U/ml IL-2', 'infection': 'acute ', 'time': '1 d'}
+</xmp>
+
+</dd>
+</dl>
+
+<h2>Examples</h2>
+
+<p>The following script prints out some information about a specific data set. It does not download the data set, just uses the (local) GEO data sets information file.</p>
+
+<p class="header"><a href="geo_gds1.py">geo_gds1.py</a></p>
+<xmp class=code>import obiGEO
+import textwrap
+
+gdsinfo = obiGEO.GDSInfo()
+gds = gdsinfo["GDS10"]
+
+print "ID:", gds["dataset_id"]
+print "Features:", gds["feature_count"]
+print "Genes:", gds["gene_count"]
+print "Organism:", gds["platform_organism"]
+print "PubMed ID:", gds["pubmed_id"]
+print "Sample types:"
+for sampletype in set([sinfo["type"] for sinfo in gds["subsets"]]):
+    ss = [sinfo["description"] for sinfo in gds["subsets"] if sinfo["type"]==sampletype]
+    print "  %s (%s)" % (sampletype, ", ".join(ss))
+print
+print "Description:"
+print "\n".join(textwrap.wrap(gds["description"], 70))
+</xmp>
+
+<p>The output of this script is:</p>
+
+<xmp class=code>ID: GDS10
+Features: 39114
+Genes: 20094
+Organism: Mus musculus
+PubMed ID: 11827943
+Sample types:
+  disease state (diabetic, diabetic-resistant, nondiabetic)
+  strain (NOD, Idd3, Idd5, Idd3+Idd5, Idd9, B10.H2g7, B10.H2g7 Idd3)
+  tissue (spleen, thymus)
+
+Description:
+Examination of spleen and thymus of type 1 diabetes nonobese diabetic
+(NOD) mouse, four NOD-derived diabetes-resistant congenic strains and
+two nondiabetic control strains.
+</xmp>
+
+<p>GEO data sets provide a sort of mini ontology for sample labeling. Samples belong to sample subsets, which in turn belong to specific types. Like above GDS10, which has three sample types, of which the subsets for the tissue type are spleen and thymus. If you are into using data sets for supervised data mining, then it would be useful to find out which of the data sets provide enough samples for each label. It is (semantically) convenient to perform classification within sample subsets of the same type. We therefore need a script that go through the entire set of data sets and finds those for which, for a specific type, there are enough samples within each of the subsets. The following script does the work. The function <code>valid</code> is passed the information about the data set and determines which subset types (if any) satisfy the "validity" criteria. The number of requested samples in the subset is by default set to <code>n=40</code>.</p>
+
+<p class="header"><a href="geo_gds5.py">geo_gds5.py</a></p>
+<xmp class=code>import obiGEO
+
+def valid(info, n=40):
+    """Return a set of subset types containing more than n samples in every subset"""
+    invalid = set()
+    subsets = set([sinfo["type"] for sinfo in info["subsets"]])
+    for sampleinfo in info["subsets"]:
+        if len(sampleinfo["sample_id"]) < n:
+            invalid.add(sampleinfo["type"])
+    return subsets.difference(invalid)
+
+def report(stypes, info):
+    """Pretty-print GDS and valid susbset types"""
+    for id, sts in stypes:
+        print id
+        for st in sts:
+            print "  %s:" % st,
+            gds = info[id]
+            print ", ".join(["%s/%d" % (sinfo["description"], len(sinfo["sample_id"])) \
+                             for sinfo in gds["subsets"] if sinfo["type"]==st])
+
+gdsinfo = obiGEO.GDSInfo()
+valid_subset_types = [(id, valid(info)) for id, info in gdsinfo.items() if valid(info)]
+report(valid_subset_types, gdsinfo)
+</xmp>
+
+<p>The requested number of samples, <code>n=40</code>, seems to be a quite a stringent criteria met - at the time of writing of this documentation - by only a few data sets (you may try to lower this threshold):</p>
+
+<xmp class="code">GDS1611
+  genotype/variation: wild type/48, upf1 null mutant/48
+GDS968
+  agent: none/57, UV/57, IR/57
+GDS1490
+  other: non-neural/50, neural/100
+GDS2373
+  gender: male/82, female/48
+GDS1293
+  tissue: raphe magnus/40, somatomotor cortex/41
+GDS2960
+  disease state: control/41, Marfan syndrome/60
+GDS1292
+  tissue: raphe magnus/40, somatomotor cortex/43
+GDS1412
+  protocol: no treatment/47, hormone replacement therapy/42
+</xmp>
+
+<p>Let us now pick one data file from the above (GDS2960) and see if we can predict the disease state. We will use LinearLearner, a fast variant of support vector machines with linear kernel, and within 10-fold cross validation measure AUC, the area under ROC. AUC is the probably for correctly distinguishing between two classes if picking the sample from target (e.g., the disease) and non-target class (e.g., control).</p>
+
+<p class="header"><a href="geo_gds6.py">geo_gds6.py</a></p>
+<xmp class="code">import obiGEO
+import orange
+import orngTest
+import orngStat
+
+gds = obiGEO.GDS("GDS2960")
+data = gds.getdata(sample_type="disease state", transpose=True)
+print "Samples: %d, Genes: %d" % (len(data), len(data.domain.attributes))
+
+learners = [orange.LinearLearner]
+results = orngTest.crossValidation(learners, data, folds=10)
+print "AUC = %.3f" % orngStat.AUC(results)[0]
+</xmp>
+
+<p>The output of this script is:</p>
+
+<xmp class="code">Samples: 101, Genes: 3979
+AUC = 0.985</xmp>
+
+<p>The AUC for this data set is very high, indicating that using this particular gene expression data it is almost trivial to separate the two classes.</p>
+
+
+</body>
+</html>

docs/reference-html/obiGO-enrichment.py

+import obiGO
+
+ontology = obiGO.Ontology()
+annotations = obiGO.Annotations("sgd", ontology=ontology)
+
+res = annotations.GetEnrichedTerms(["YGR270W", "YIL075C", "YDL007W"])
+print "Enriched terms:"
+for GOId, (genes, p_value, ref) in res.items():
+    if p_value < 0.05:
+        print ontology[GOId].name, "with p-value: %.4f" %p_value, ", ".join(genes)
+
+# And again for slims        
+ontology.SetSlimsSubset("goslim_yeast")
+
+res = annotations.GetEnrichedTerms(["YGR270W", "YIL075C", "YDL007W"], slimsOnly=True)
+print "Enriched slim terms:"
+for GOId, (genes, p_value, _) in res.items():
+    if p_value < 0.05:
+        print ontology[GOId].name, "with p-value: %.4f" %p_value, ", ".join(genes)
+        
+# Print names and definitions of all terms with "apoptosis" in the name
+##for term in [term for term in ontology.terms.values() if "apoptosis" in term.name.lower()]:
+##	print term.name, term.id
+##	print term.def_ if hasattr(term, "def_") else ""

docs/reference-html/obiGO-gene-annotations.py

+import obiGO
+
+ontology = obiGO.Ontology()
+annotations = obiGO.Annotations("sgd", ontology=ontology)
+
+gene = annotations.aliasMapper["YIL075C"]
+print gene, "(YIL075C) directly annotated to the folowing terms:"
+for a in annotations.geneAnnotations[gene]:
+    print ontology[a.GO_ID].name, "with evidence code", a.Evidence_code
+    
+# Get all genes annotated to the same terms as YIL075C
+ids = set([a.GO_ID for a in annotations.geneAnnotations[gene]])
+for GOID in ids:
+	ants = annotations.GetAllAnnotations(GOID)
+	genes = set([a.geneName for a in ants])
+	print ", ".join(genes), "annotated to", GOID, ontology[a.GO_ID].name
+	
+	

docs/reference-html/obiGO-slim-mapping.py

+import obiGO
+
+ontology = obiGO.Ontology()
+annotations = obiGO.Annotations("sgd", ontology=ontology)
+
+ontology.SetSlimsSubset("goslim_yeast")
+terms = annotations.GetAnnotatedTerms(["YGR270W", "YIL075C", "YDL007W"], directAnnotationOnly=True)
+slims = set()
+for term in terms:
+    print term
+    slims.update(ontology.GetSlimTerms(term))
+
+print "Genes: YGR270W, YIL075C and YDL007W map to the folowing slims terms:"
+for term in slims:
+    print term, ontology[term].name

docs/reference-html/obiGO-yeast.py

+from __future__ import division
+import obiGO, obiProb
+import orange
+
+data = orange.ExampleTable("../../../../doc/datasets/brown-selected.tab")
+
+ontology = obiGO.Ontology()
+annotations = obiGO.Annotations("yeast", ontology=ontology)
+
+cluster_genes = [str(ex["gene"]) for ex in data if ex.getclass() == "Resp"]
+
+gg = dict([(b, a) for a, b in annotations.GetGeneNamesTranslator(cluster_genes).items()])
+cluster_genes = [gg.get(name, name) for name in cluster_genes]
+gg = dict([(ann.DB_Object_Symbol, ann.DB_Object_ID) for ann in annotations])
+cluster_genes = [gg.get(name, name) for name in cluster_genes]
+cluster_genes = cluster_genes[:10]
+reference_genes = annotations.geneNames
+
+david_date = "August 3, 2009"
+
+david_results_bp = """Category	Term	Count	%	PValue	Genes	List Total	Pop Hits	Pop Total	Fold Enrichment	Bonferroni	Benjamini	FDR
+GOTERM_BP_ALL	GO:0015992~proton transport	8	57.14%	1.3041103299910861E-11	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	53	5164	55.676549865229106	3.115524416230642E-8	2.077016336698989E-9	2.2887058914733416E-8
+GOTERM_BP_ALL	GO:0015986~ATP synthesis coupled proton transport	8	57.14%	2.3084091771234847E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	42	5164	70.25850340136054	5.514709577880694E-9	1.8382365629676656E-9	4.051181612396704E-9
+GOTERM_BP_ALL	GO:0044249~cellular biosynthetic process	8	57.14%	0.0026654367159150284	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	878	5164	3.360885128538887	0.9982984740500304	0.14734967010922517	4.5760545698257165
+GOTERM_BP_ALL	GO:0015672~monovalent inorganic cation transport	8	57.14%	6.501499399871959E-11	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	66	5164	44.70995670995671	1.5532080710478624E-7	8.62893434483425E-9	1.1410075106965678E-7
+GOTERM_BP_ALL	GO:0006818~hydrogen transport	8	57.14%	1.3041103299910861E-11	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	53	5164	55.676549865229106	3.115524416230642E-8	2.077016336698989E-9	2.2887058914733416E-8
+GOTERM_BP_ALL	GO:0051234~establishment of localization	9	64.29%	0.004162009675774264	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, S000003439, 	14	1235	5164	2.6880277617119726	0.9999529260234311	0.2111926048706756	7.058073886583582
+GOTERM_BP_ALL	GO:0006812~cation transport	8	57.14%	3.2348983576564486E-8	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	158	5164	18.67631103074141	7.727873692198184E-5	2.5760541179087326E-6	5.677217334021378E-5
+GOTERM_BP_ALL	GO:0009165~nucleotide biosynthetic process	8	57.14%	2.884205297126469E-9	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	112	5164	26.346938775510203	6.8903427785649285E-6	2.460844882312685E-7	5.061755459223605E-6
+GOTERM_BP_ALL	GO:0006811~ion transport	8	57.14%	9.322978491193589E-8	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	184	5164	16.03726708074534	2.2270116490286096E-4	6.960162230784661E-6	1.6361734598291378E-4
+GOTERM_BP_ALL	GO:0006810~transport	9	64.29%	0.003689044915217824	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, S000003439, 	14	1213	5164	2.7367801201271935	0.9998536375393898	0.19374226311229126	6.280333887049161
+GOTERM_BP_ALL	GO:0009058~biosynthetic process	8	57.14%	0.012969438334012132	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	1153	5164	2.5592863337876346	0.9999999999999715	0.49994490614887377	20.47521247842694
+GOTERM_BP_ALL	GO:0042775~organelle ATP synthesis coupled electron transport	3	21.43%	0.001698878089256512	S000003155, S000004997, S000004387, 	14	25	5164	44.26285714285714	0.9827866703378599	0.098914987364349	2.93996869915667
+GOTERM_BP_ALL	GO:0006164~purine nucleotide biosynthetic process	8	57.14%	2.1784520172080657E-10	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	78	5164	37.83150183150183	5.20432014816663E-7	2.1684672724120446E-8	3.8231643406660965E-7
+GOTERM_BP_ALL	GO:0042773~ATP synthesis coupled electron transport	3	21.43%	0.001698878089256512	S000003155, S000004997, S000004387, 	14	25	5164	44.26285714285714	0.9827866703378599	0.098914987364349	2.93996869915667
+GOTERM_BP_ALL	GO:0006163~purine nucleotide metabolic process	8	57.14%	2.8583856087082035E-10	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	81	5164	36.430335097001766	6.828681227144529E-7	2.6264167174439024E-8	5.016442483629646E-7
+GOTERM_BP_ALL	GO:0007571~age-dependent general metabolic decline	2	14.29%	0.02491247592376542	S000003882, S000003882, 	14	10	5164	73.77142857142856	1.0	0.7226114630464955	35.77322472636095
+GOTERM_BP_ALL	GO:0009152~purine ribonucleotide biosynthetic process	8	57.14%	1.3504564734308501E-10	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	73	5164	40.422700587084144	3.226239850828705E-7	1.5363049277183904E-8	2.3700393869674485E-7
+GOTERM_BP_ALL	GO:0006796~phosphate metabolic process	11	78.57%	6.609360672311857E-11	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000000195, S000005999, S000003882, S000005999, S000004387, 	14	290	5164	13.991133004926107	1.578975383775827E-7	8.310397370259182E-9	1.1599365912218218E-7
+GOTERM_BP_ALL	GO:0009260~ribonucleotide biosynthetic process	8	57.14%	1.9849931357144687E-10	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	77	5164	38.32282003710575	4.7421478244213944E-7	2.061803872077661E-8	3.4836461493270576E-7
+GOTERM_BP_ALL	GO:0006793~phosphorus metabolic process	11	78.57%	6.609360672311857E-11	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000000195, S000005999, S000003882, S000005999, S000004387, 	14	290	5164	13.991133004926107	1.578975383775827E-7	8.310397370259182E-9	1.1599365912218218E-7
+GOTERM_BP_ALL	GO:0009150~purine ribonucleotide metabolic process	8	57.14%	1.6417078391964835E-10	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	75	5164	39.3447619047619	3.922039358839058E-7	1.7827454956709232E-8	2.8811831809250066E-7
+GOTERM_BP_ALL	GO:0009259~ribonucleotide metabolic process	8	57.14%	2.3877846642490237E-10	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	79	5164	37.35262206148282	5.704415340090918E-7	2.2817667644225992E-8	4.1905411363174494E-7
+GOTERM_BP_ALL	GO:0051188~cofactor biosynthetic process	8	57.14%	3.4773191839524677E-9	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	115	5164	25.659627329192546	8.307281102348796E-6	2.864591178708409E-7	6.102665184304357E-6
+GOTERM_BP_ALL	GO:0046034~ATP metabolic process	8	57.14%	3.870915746856979E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	45	5164	65.57460317460317	9.247588694805131E-9	1.5412647824675219E-9	6.793399176530102E-9
+GOTERM_BP_ALL	GO:0006123~mitochondrial electron transport, cytochrome c to oxygen	3	21.43%	2.6032148376007355E-4	S000003155, S000004997, S000004387, 	14	10	5164	110.65714285714284	0.46312446340401836	0.016670004919741466	0.4558791472939139
+GOTERM_BP_ALL	GO:0051186~cofactor metabolic process	8	57.14%	3.7293039277677224E-7	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	225	5164	13.114920634920635	8.905341134267664E-4	2.5454843862848797E-5	6.544876183900428E-4
+GOTERM_BP_ALL	GO:0009145~purine nucleoside triphosphate biosynthetic process	8	57.14%	5.353761737671291E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	47	5164	62.78419452887537	1.2790030923248707E-8	1.4211145593634456E-9	9.39572863956073E-9
+GOTERM_BP_ALL	GO:0009144~purine nucleoside triphosphate metabolic process	8	57.14%	6.261365320746103E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	48	5164	61.476190476190474	1.4958304817014323E-8	1.4958304594969718E-9	1.0988576715220688E-8
+GOTERM_BP_ALL	GO:0006119~oxidative phosphorylation	11	78.57%	2.1818706134527815E-17	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000000195, S000005999, S000003882, S000005999, S000004387, 	14	68	5164	59.668067226890756	5.212488895538695E-14	5.212488895538695E-14	3.829164197571297E-14
+GOTERM_BP_ALL	GO:0008152~metabolic process	14	100.00%	0.019621353851322847	S000004997, S000004387, S000003159, S000003439, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000004028, S000005999, 	14	3818	5164	1.352540597171294	1.0	0.6426933342222417	29.374312706476815
+GOTERM_BP_ALL	GO:0009142~nucleoside triphosphate biosynthetic process	8	57.14%	1.1330305911809188E-11	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	52	5164	56.747252747252745	2.706801482954546E-8	1.933429638611983E-9	1.9884527358016157E-8
+GOTERM_BP_ALL	GO:0006118~electron transport	6	42.86%	1.1045195750922386E-5	S000003155, S000004997, S000004028, S000004387, S000003439, S000003159, 	14	132	5164	16.766233766233768	0.026042020409655975	7.327069494768024E-4	0.019382452143623663
+GOTERM_BP_ALL	GO:0051179~localization	9	64.29%	0.005041519242062774	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, S000003439, 	14	1271	5164	2.611891648870406	0.999994297765288	0.23999017511150011	8.48817753150758
+GOTERM_BP_ALL	GO:0009141~nucleoside triphosphate metabolic process	8	57.14%	1.7133267420743854E-11	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	55	5164	53.65194805194805	4.093144034822416E-8	2.40773179172038E-9	3.006878079858666E-8
+GOTERM_BP_ALL	GO:0006754~ATP biosynthetic process	8	57.14%	3.2721626944045396E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	44	5164	67.06493506493506	7.817191005621282E-9	1.954297723649745E-9	5.742617492643376E-9
+GOTERM_BP_ALL	GO:0006753~nucleoside phosphate metabolic process	8	57.14%	3.2721626944045396E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	44	5164	67.06493506493506	7.817191005621282E-9	1.954297723649745E-9	5.742617492643376E-9
+GOTERM_BP_ALL	GO:0055086~nucleobase, nucleoside and nucleotide metabolic process	8	57.14%	1.7766800583227095E-7	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	202	5164	14.608203677510607	4.2435883801938967E-4	1.248371336348697E-5	3.1180536678165893E-4
+GOTERM_BP_ALL	GO:0006091~generation of precursor metabolites and energy	14	100.00%	1.7111195002613388E-16	S000004997, S000004387, S000003159, S000003439, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000004028, S000005999, 	14	322	5164	16.03726708074534	5.304645611658998E-13	2.652322805829499E-13	3.885780586188048E-13
+GOTERM_BP_ALL	GO:0009117~nucleotide metabolic process	8	57.14%	5.840328188592497E-8	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	172	5164	17.156146179401993	1.3951571125714324E-4	4.500810660945653E-6	1.0249720883148683E-4
+GOTERM_BP_ALL	GO:0006732~coenzyme metabolic process	8	57.14%	1.081918945782046E-7	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	188	5164	15.696048632218842	2.584370494638222E-4	7.832407207586378E-6	1.8987567623751644E-4
+GOTERM_BP_ALL	GO:0009108~coenzyme biosynthetic process	8	57.14%	1.3849814685789753E-9	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	101	5164	29.216407355021214	3.3087152432331735E-6	1.2254520420640347E-7	2.430630552385793E-6
+GOTERM_BP_ALL	GO:0001321~age-dependent general metabolic decline during replicative cell aging	2	14.29%	0.005029005617493512	S000003882, S000003882, 	14	2	5164	368.85714285714283	0.9999941238340904	0.2442972120915523	8.467976406149823
+GOTERM_BP_ALL	GO:0016310~phosphorylation	11	78.57%	4.944705485834868E-12	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000000195, S000005999, S000003882, S000005999, S000004387, 	14	224	5164	18.113520408163268	1.181291520158112E-8	1.6875593145115886E-9	8.677925045219581E-9
+GOTERM_BP_ALL	GO:0009206~purine ribonucleoside triphosphate biosynthetic process	8	57.14%	5.353761737671291E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	47	5164	62.78419452887537	1.2790030923248707E-8	1.4211145593634456E-9	9.39572863956073E-9
+GOTERM_BP_ALL	GO:0009205~purine ribonucleoside triphosphate metabolic process	8	57.14%	6.261365320746103E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	48	5164	61.476190476190474	1.4958304817014323E-8	1.4958304594969718E-9	1.0988576715220688E-8
+GOTERM_BP_ALL	GO:0009201~ribonucleoside triphosphate biosynthetic process	8	57.14%	8.476650445053009E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	50	5164	59.01714285714286	2.0250749632744203E-8	1.68756253415836E-9	1.4876466725155524E-8
+GOTERM_BP_ALL	GO:0009199~ribonucleoside triphosphate metabolic process	8	57.14%	9.815138922270817E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	51	5164	57.85994397759104	2.3448390007452247E-8	1.803722282645026E-9	1.7225498805117923E-8
+"""
+
+david_results_cc = """Category	Term	Count	%	PValue	Genes	List Total	Pop Hits	Pop Total	Fold Enrichment	Bonferroni	Benjamini	FDR
+GOTERM_CC_ALL	GO:0044444~cytoplasmic part	12	85.71%	0.006088819536079676	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000005999, S000003882, S000004028, S000004387, S000003439, S000003159, 	14	2641	5632	1.827879050143344	0.9814660869691836	0.13275524314652598	8.72075140466545
+GOTERM_CC_ALL	GO:0031090~organelle membrane	10	71.43%	3.029187860465467E-6	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000003882, S000004028, S000004387, S000003159, 	14	700	5632	5.746938775510205	0.0019761075921682636	1.0410314240905105E-4	0.004525594357451812
+GOTERM_CC_ALL	GO:0031975~envelope	10	71.43%	4.78624197919905E-8	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000003882, S000004028, S000004387, S000003159, 	14	434	5632	9.269256089532588	3.125367247458488E-5	2.8412833162905216E-6	7.15077466795222E-5
+GOTERM_CC_ALL	GO:0043234~protein complex	13	92.86%	2.603956264388198E-7	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	1316	5632	3.973947025618758	1.7002391047071796E-4	1.2145523826445626E-5	3.890375158688286E-4
+GOTERM_CC_ALL	GO:0044429~mitochondrial part	10	71.43%	1.0957063379250622E-7	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000003882, S000004028, S000004387, S000003159, 	14	477	5632	8.433662773285414	7.154706817014134E-5	5.503802375850242E-6	1.6370141305843688E-4
+GOTERM_CC_ALL	GO:0031967~organelle envelope	10	71.43%	4.503200851575421E-8	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000003882, S000004028, S000004387, S000003159, 	14	431	5632	9.333775273450447	2.9405469906707005E-5	2.9405859021114367E-6	6.727903724135231E-5
+GOTERM_CC_ALL	GO:0005754~mitochondrial proton-transporting ATP synthase, catalytic core	3	21.43%	1.4737764835078688E-5	S000003882, S000000195, S000003882, 	14	3	5632	402.2857142857143	0.009577670489531065	4.1833992163242684E-4	0.0220163639806481
+GOTERM_CC_ALL	GO:0031966~mitochondrial membrane	10	71.43%	1.080240517000826E-9	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000003882, S000004028, S000004387, S000003159, 	14	283	5632	14.215042907622413	7.05396839717487E-7	1.4107940771168614E-7	1.613909073050479E-6
+GOTERM_CC_ALL	GO:0005753~mitochondrial proton-transporting ATP synthase complex	5	35.71%	9.998508143708598E-8	S000003882, S000002706, S000000195, S000005999, S000003882, 	14	21	5632	95.78231292517006	6.528813007933287E-5	5.440840318726714E-6	1.493803553298534E-4
+GOTERM_CC_ALL	GO:0005751~mitochondrial respiratory chain complex IV	5	35.71%	8.365377472985592E-9	S000003155, S000004997, S000004028, S000004387, S000003159, 	14	12	5632	167.61904761904765	5.462576611603431E-6	6.82823708353375E-7	1.2498103152402962E-5
+GOTERM_CC_ALL	GO:0044425~membrane part	13	92.86%	4.362609746181852E-7	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	1375	5632	3.8034285714285714	2.848379045127247E-4	1.8991718223948517E-5	6.517839032826878E-4
+GOTERM_CC_ALL	GO:0016469~proton-transporting two-sector ATPase complex	8	57.14%	2.9258138389520786E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	47	5632	68.4741641337386	1.910528291126923E-9	9.552640900523102E-10	4.3711811947844126E-9
+GOTERM_CC_ALL	GO:0005746~mitochondrial respiratory chain	5	35.71%	9.653058317842064E-7	S000003155, S000004997, S000004028, S000004387, S000003159, 	14	36	5632	55.87301587301587	6.30146386737862E-4	3.707843095557273E-5	0.0014421837375766522
+GOTERM_CC_ALL	GO:0044422~organelle part	10	71.43%	0.0368673468391256	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000003882, S000004028, S000004387, S000003159, 	14	2301	5632	1.7483081889861551	0.9999999999777655	0.5353835078549395	42.94864604210752
+GOTERM_CC_ALL	GO:0033178~proton-transporting two-sector ATPase complex, catalytic domain	4	28.57%	6.414463893121058E-6	S000003882, S000002706, S000000195, S000003882, 	14	17	5632	94.65546218487394	0.0041798981622530595	1.994400309182076E-4	0.009582956878873006
+GOTERM_CC_ALL	GO:0033177~proton-transporting two-sector ATPase complex, proton-transporting domain	3	21.43%	0.0010077286045780168	S000002706, S000005999, S000005999, 	14	21	5632	57.469387755102034	0.4823099464317113	0.024089493962627162	1.4950448954651918
+GOTERM_CC_ALL	GO:0000276~mitochondrial proton-transporting ATP synthase complex, coupling factor F(o)	2	14.29%	0.027376315885587123	S000002706, S000005999, 	14	12	5632	67.04761904761905	0.9999999865729374	0.4427322586888206	33.94699076606338
+GOTERM_CC_ALL	GO:0000275~mitochondrial proton-transporting ATP synthase complex, catalytic core F(1)	3	21.43%	7.340126705958842E-5	S000003882, S000000195, S000003882, 	14	6	5632	201.14285714285714	0.04680214738322663	0.001915474595515665	0.1096074222371568
+GOTERM_CC_ALL	GO:0005743~mitochondrial inner membrane	10	71.43%	3.133119092481637E-11	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000003882, S000004028, S000004387, S000003159, 	14	191	5632	21.062079281974572	2.0459247185300455E-8	6.819749098774253E-9	4.680961174230447E-8
+GOTERM_CC_ALL	GO:0000274~mitochondrial proton-transporting ATP synthase, stator stalk	2	14.29%	0.006909968533425321	S000002706, S000005999, 	14	3	5632	268.1904761904762	0.9891963501737456	0.14455501193220344	9.840977861323552
+GOTERM_CC_ALL	GO:0005740~mitochondrial envelope	10	71.43%	3.1435985004983136E-9	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000003882, S000004028, S000004387, S000003159, 	14	319	5632	12.610837438423646	2.0527677120352195E-6	2.932527882748559E-7	4.6966224354072494E-6
+GOTERM_CC_ALL	GO:0005739~mitochondrion	12	85.71%	1.328977450330144E-6	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000005999, S000003882, S000004028, S000004387, S000003439, S000003159, 	14	1154	5632	4.183213666749195	8.674464023651351E-4	4.821121845244303E-5	0.0019855104412713764
+GOTERM_CC_ALL	GO:0045277~respiratory chain complex IV	5	35.71%	8.365377472985592E-9	S000003155, S000004997, S000004028, S000004387, S000003159, 	14	12	5632	167.61904761904765	5.462576611603431E-6	6.82823708353375E-7	1.2498103152402962E-5
+GOTERM_CC_ALL	GO:0044455~mitochondrial membrane part	10	71.43%	3.5280572783016857E-13	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000003882, S000004028, S000004387, S000003159, 	14	117	5632	34.38339438339438	2.303972568284962E-10	2.303972568284962E-10	5.271338920920243E-10
+GOTERM_CC_ALL	GO:0045267~proton-transporting ATP synthase, catalytic core	3	21.43%	1.4737764835078688E-5	S000003882, S000000195, S000003882, 	14	3	5632	402.2857142857143	0.009577670489531065	4.1833992163242684E-4	0.0220163639806481
+GOTERM_CC_ALL	GO:0032991~macromolecular complex	13	92.86%	6.200301918298537E-6	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	1725	5632	3.0317184265010355	0.0040406243253358065	2.0241999553805012E-4	0.00926302113514188
+GOTERM_CC_ALL	GO:0019866~organelle inner membrane	10	71.43%	5.203286066955664E-11	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000003882, S000004028, S000004387, S000003159, 	14	202	5632	19.915134370579917	3.39774325341935E-8	8.494358216815101E-9	7.773847121583799E-8
+GOTERM_CC_ALL	GO:0045265~proton-transporting ATP synthase, stator stalk	2	14.29%	0.006909968533425321	S000002706, S000005999, 	14	3	5632	268.1904761904762	0.9891963501737456	0.14455501193220344	9.840977861323552
+GOTERM_CC_ALL	GO:0045263~proton-transporting ATP synthase complex, coupling factor F(o)	3	21.43%	4.406829632417675E-4	S000002706, S000005999, S000005999, 	14	14	5632	86.20408163265306	0.2501104878313618	0.011009310594931931	0.6563739649655287
+GOTERM_CC_ALL	GO:0045261~proton-transporting ATP synthase complex, catalytic core F(1)	4	28.57%	8.008761082313701E-7	S000003882, S000002706, S000000195, S000003882, 	14	9	5632	178.79365079365078	5.228355818966968E-4	3.268523508093146E-5	0.0011965242358913386
+GOTERM_CC_ALL	GO:0016020~membrane	13	92.86%	7.628592576013883E-6	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	1756	5632	2.978197201431826	0.004969102909115897	2.26405727854595E-4	0.011396720953205097
+GOTERM_CC_ALL	GO:0045259~proton-transporting ATP synthase complex	6	42.86%	1.4166066493008505E-9	S000003882, S000002706, S000000195, S000005999, S000005999, S000003882, 	14	25	5632	96.54857142857142	9.250436889818303E-7	1.541740075605702E-7	2.116449182576474E-6
+GOTERM_CC_ALL	GO:0044446~intracellular organelle part	10	71.43%	0.0368673468391256	S000003882, S000002706, S000003155, S000000195, S000005999, S000004997, S000003882, S000004028, S000004387, S000003159, 	14	2301	5632	1.7483081889861551	0.9999999999777655	0.5353835078549395	42.94864604210752
+"""
+
+david_results_mf = """Category	Term	Count	%	PValue	Genes	List Total	Pop Hits	Pop Total	Fold Enrichment	Bonferroni	Benjamini	FDR
+GOTERM_MF_ALL	GO:0046961~hydrogen ion transporting ATPase activity, rotational mechanism	7	50.00%	1.357188625187203E-10	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	35	4927	70.38571428571429	2.4076526516925156E-7	2.1887753720584158E-8	2.3007645788553077E-7
+GOTERM_MF_ALL	GO:0022804~active transmembrane transporter activity	7	50.00%	4.561468131602496E-6	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	193	4927	12.764248704663212	0.00805941031507107	3.517669381594146E-4	0.0077325147965212615
+GOTERM_MF_ALL	GO:0017111~nucleoside-triphosphatase activity	7	50.00%	1.354991458548405E-4	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	349	4927	7.058739255014327	0.21368025699328053	0.009203226040080392	0.22945586557691078
+GOTERM_MF_ALL	GO:0008553~hydrogen-exporting ATPase activity, phosphorylative mechanism	2	14.29%	0.015735007517678746	S000003882, S000003882, 	14	6	4927	117.30952380952381	0.9999999999993964	0.5849039042925119	23.575548401812018
+GOTERM_MF_ALL	GO:0046933~hydrogen ion transporting ATP synthase activity, rotational mechanism	8	57.14%	1.504413377749123E-12	S000003882, S000002706, S000000195, S000005999, S000000195, S000005999, S000003882, S000005999, 	14	38	4927	74.09022556390977	2.668917753467781E-9	2.965464540594098E-10	2.550426536629402E-9
+GOTERM_MF_ALL	GO:0015078~hydrogen ion transmembrane transporter activity	13	92.86%	8.312898389343022E-21	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	90	4927	50.83412698412699	1.474708174269452E-17	1.474708174269452E-17	1.4092381372956447E-17
+GOTERM_MF_ALL	GO:0015077~monovalent inorganic cation transmembrane transporter activity	13	92.86%	1.6571922570429097E-20	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	95	4927	48.15864661654136	2.939859063994122E-17	1.469929531997061E-17	2.809343288076066E-17
+GOTERM_MF_ALL	GO:0015075~ion transmembrane transporter activity	13	92.86%	1.3266048405395307E-16	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	195	4927	23.461904761904762	1.9695356456850277E-13	3.941291737419306E-14	1.887379141862766E-13
+GOTERM_MF_ALL	GO:0016887~ATPase activity	7	50.00%	2.851574368180932E-5	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	265	4927	9.29622641509434	0.04932940179671719	0.002021460114576268	0.04833011644592711
+GOTERM_MF_ALL	GO:0015405~P-P-bond-hydrolysis-driven transmembrane transporter activity	7	50.00%	9.769490154343295E-8	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	101	4927	24.39108910891089	1.7329574628732747E-4	9.628329410094061E-6	1.6561645203028164E-4
+GOTERM_MF_ALL	GO:0015399~primary active transmembrane transporter activity	7	50.00%	9.769490154343295E-8	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	101	4927	24.39108910891089	1.7329574628732747E-4	9.628329410094061E-6	1.6561645203028164E-4
+GOTERM_MF_ALL	GO:0015662~ATPase activity, coupled to transmembrane movement of ions, phosphorylative mechanism	2	14.29%	0.06406743892539128	S000003882, S000003882, 	14	25	4927	28.154285714285713	1.0	0.9684032470122852	67.45191745405069
+GOTERM_MF_ALL	GO:0046872~metal ion binding	5	35.71%	0.07719554090708378	S000003882, S000002706, S000003155, S000000195, S000003882, 	14	640	4927	2.7494419642857144	1.0	0.9829564745750087	74.38322655397852
+GOTERM_MF_ALL	GO:0043492~ATPase activity, coupled to movement of substances	7	50.00%	4.8652622253470385E-8	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	90	4927	27.372222222222224	8.630602941672727E-5	5.753967045607489E-6	8.24779769370565E-5
+GOTERM_MF_ALL	GO:0015002~heme-copper terminal oxidase activity	5	35.71%	7.691253005950271E-7	S000003155, S000004997, S000004028, S000004387, S000003159, 	14	30	4927	58.654761904761905	0.0013634983981221094	6.201756809931513E-5	0.0013038461507353105
+GOTERM_MF_ALL	GO:0042626~ATPase activity, coupled to transmembrane movement of substances	7	50.00%	4.8652622253470385E-8	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	90	4927	27.372222222222224	8.630602941672727E-5	5.753967045607489E-6	8.24779769370565E-5
+GOTERM_MF_ALL	GO:0042625~ATPase activity, coupled to transmembrane movement of ions	7	50.00%	3.290094897579469E-9	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	58	4927	42.47413793103448	5.836611263854152E-6	4.48971306687973E-7	5.577509576415451E-6
+GOTERM_MF_ALL	GO:0042623~ATPase activity, coupled	7	50.00%	5.780183298730377E-6	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	201	4927	12.256218905472638	0.010201681018396247	4.2716185735414136E-4	0.009798362529434002
+GOTERM_MF_ALL	GO:0008324~cation transmembrane transporter activity	13	92.86%	1.244185803793163E-17	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	161	4927	28.41659272404614	2.207185615929071E-14	5.517964039822678E-15	2.1091970603597518E-14
+GOTERM_MF_ALL	GO:0005215~transporter activity	13	92.86%	9.331263693917314E-12	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	488	4927	9.375146370023419	1.6553750037395787E-8	1.65537505925073E-9	1.581884623291785E-8
+GOTERM_MF_ALL	GO:0019829~cation-transporting ATPase activity	7	50.00%	2.7146665610749877E-10	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	39	4927	63.166666666666664	4.81581680311649E-7	4.0131815537414184E-8	4.60201854401987E-7
+GOTERM_MF_ALL	GO:0043167~ion binding	5	35.71%	0.08465249594692495	S000003882, S000002706, S000003155, S000000195, S000003882, 	14	660	4927	2.666125541125541	1.0	0.9872049046718362	77.67510380858903
+GOTERM_MF_ALL	GO:0016491~oxidoreductase activity	6	42.86%	0.0016156535426757536	S000003155, S000004997, S000004028, S000004387, S000003439, S000003159, 	14	361	4927	5.849228333992876	0.9432152372099303	0.08837982165926195	2.703911735463682
+GOTERM_MF_ALL	GO:0016820~hydrolase activity, acting on acid anhydrides, catalyzing transmembrane movement of substances	7	50.00%	5.934326909367205E-8	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	93	4927	26.489247311827956	1.0526942125099747E-4	6.579663508032851E-6	1.0060120366750525E-4
+GOTERM_MF_ALL	GO:0016818~hydrolase activity, acting on acid anhydrides, in phosphorus-containing anhydrides	7	50.00%	1.8772210180800466E-4	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	370	4927	6.658108108108109	0.28326583700053276	0.012259428518476279	0.3177584959434432
+GOTERM_MF_ALL	GO:0016817~hydrolase activity, acting on acid anhydrides	7	50.00%	1.9634482734341306E-4	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	373	4927	6.604557640750671	0.2941481609246992	0.011940209452176997	0.3323313942078632
+GOTERM_MF_ALL	GO:0003824~catalytic activity	13	92.86%	0.0012913234405410655	S000004997, S000004387, S000003159, S000003439, S000003882, S000002706, S000000195, S000003155, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	2405	4927	1.9023166023166025	0.8989649855976286	0.0735633440992598	2.1667037480417095
+GOTERM_MF_ALL	GO:0016462~pyrophosphatase activity	7	50.00%	1.8772210180800466E-4	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	370	4927	6.658108108108109	0.28326583700053276	0.012259428518476279	0.3177584959434432
+GOTERM_MF_ALL	GO:0016676~oxidoreductase activity, acting on heme group of donors, oxygen as acceptor	5	35.71%	7.691253005950271E-7	S000003155, S000004997, S000004028, S000004387, S000003159, 	14	30	4927	58.654761904761905	0.0013634983981221094	6.201756809931513E-5	0.0013038461507353105
+GOTERM_MF_ALL	GO:0016675~oxidoreductase activity, acting on heme group of donors	5	35.71%	7.691253005950271E-7	S000003155, S000004997, S000004028, S000004387, S000003159, 	14	30	4927	58.654761904761905	0.0013634983981221094	6.201756809931513E-5	0.0013038461507353105
+GOTERM_MF_ALL	GO:0022892~substrate-specific transporter activity	13	92.86%	1.045535361873502E-12	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	407	4927	11.24096174096174	1.8547117175415906E-9	2.3183899244827444E-10	1.7723711387418462E-9
+GOTERM_MF_ALL	GO:0022891~substrate-specific transmembrane transporter activity	13	92.86%	1.1436221376671628E-13	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	339	4927	13.495785924989466	2.0286217150555785E-10	3.381039892502713E-11	1.9385604232979858E-10
+GOTERM_MF_ALL	GO:0022890~inorganic cation transmembrane transporter activity	13	92.86%	2.6125070468606044E-18	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	142	4927	32.21881287726358	4.634587501130712E-15	1.544862500376904E-15	4.428833833827902E-15
+GOTERM_MF_ALL	GO:0004129~cytochrome-c oxidase activity	5	35.71%	7.691253005950271E-7	S000003155, S000004997, S000004028, S000004387, S000003159, 	14	30	4927	58.654761904761905	0.0013634983981221094	6.201756809931513E-5	0.0013038461507353105
+GOTERM_MF_ALL	GO:0016787~hydrolase activity	7	50.00%	0.030218055645008027	S000003882, S000002706, S000000195, S000000195, S000005999, S000003882, S000005999, 	14	988	4927	2.493421052631579	1.0	0.8078538878815004	40.55798333297395
+GOTERM_MF_ALL	GO:0022857~transmembrane transporter activity	13	92.86%	6.05182729099852E-13	S000004997, S000004387, S000003159, S000003882, S000002706, S000000195, S000003155, S000005999, S000000195, S000005999, S000003882, S000005999, S000004028, 	14	389	4927	11.761109070877708	1.0735938804629086E-9	1.5337053849151516E-10	1.0259348925956147E-9
+"""
+
+def proces_david(dav):
+    lines = [line.split("\t") for line in dav.split("\n") if line]
+    keys = lines[0]
+    return dict([(line[1].split("~")[0], dict(zip(keys, line))) for line in lines])
+
+terms = [["GO:0009058", "GO:0006119"], ["GO:0031967", "GO:0044455"], ["GO:0046933", "GO:0003824"]]
+print "Genes:"
+print "\n".join(cluster_genes)
+
+for aspect, terms, david in zip(["P", "C", "F"], terms, [proces_david(david_results_bp), proces_david(david_results_cc), proces_david(david_results_mf)]):
+    enriched_terms = annotations.GetEnrichedTerms(cluster_genes, reference_genes, aspect=aspect, prob=obiProb.Hypergeometric())
+#    print enriched_terms
+    for term in terms:
+        print "Term: %s - %s" % (term, ontology[term].name)
+        print "    Annotated genes in reference: %i from %i" % (enriched_terms[term][2], len(reference_genes))
+        print "    Annotated genes in cluster: %i from %i" % (len(enriched_terms[term][0]), len(cluster_genes))
+        print "    Enrichment: %.3f" % ((len(enriched_terms[term][0]) / len(cluster_genes)) / (enriched_terms[term][2]/len(reference_genes)))
+        print "    p-value (hypergeometric distribution): %f" % (enriched_terms[term][1])
+        print "    Comment: results from NCBI David (%s):" % david_date
+        print "        Annotated genes in reference: %s from %s" % (david[term]["Pop Hits"], david[term]["Pop Total"])
+        print "        Annotated genes in cluster: %s from %s" % (david[term]["Count"], david[term]["List Total"])
+        print "        Enrichment: %s" % david[term]["Fold Enrichment"]
+        print "        p-value: %s" % david[term]["PValue"]
+    
+

docs/reference-html/obiGO.htm

+<html>
+
+<head>
+<title>obiGO: Gene Ontology Handling Library</title>
+<link rel=stylesheet href="style.css" type="text/css">
+<link rel=stylesheet href="style-print.css" type="text/css" media=print>
+</head>
+
+<body>
+<h1>obiGO: Gene Ontology Handling Library</h1>
+<index name="modules/gene ontology GO">
+<p>obiGO is a library for hadling gene ontology (GO) databases. <a href="http://www.geneontology.org/GO.doc.shtml">(More about GO).</a></p>
+
+<p class=section>Attributes</p>
+<dl class=attributes>
+	<dt>evidenceTypes</dt>
+	<dd>A dictionary with all evidence codes as keys and short description as values. </dd>
+</dl>
+
+<h2>Ontology</h2>
+<p>Ontology is the main class representing a gene ontology.</p>
+<p class=section>Attributes</p>
+<dl class=attributes>
+	<dt>terms</dt>
+	<dd>A dictionary mapping term ids to instances of Term </dd>
+</dl>
+
+<p class=section>Methods</p>
+<dl class=attributes>
+	<dt>__init__(file=None, progressCallback=None)</dt>
+	<dd>Initialize the ontology from <code>file</code> (if not given try and load ontology from default_database_path). The optional <code>progressCallback</code> will be called with a single argument to report on the progress.</dd>
+	<dt>Load(progressCallback=None)</dt>
+	<dd>A class method that tries to load the ontology file from default_database_path. It looks for a filename starting with 'gene_ontology'. The optional <code>progressCallback</code> will be called with a single argument to report on the progress.</dd>
+	<dt>ParseFile(file, progressCallback=None)</dt>
+	<dd>Parse the <code>file</code>. <code>file</code> can be a file name string or an open filelike object. The optional <code>progressCallback</code> will be called with a single argument to report on the progress.</dd>
+	<dt>ExtractSuperGraph(terms)</dt>
+	<dd>Return ids of all super terms of <code>terms</code> up to the most general one.</dd>
+	<dt>ExtractSubGraph(terms)</dt>
+	<dd>Return all sub terms of <code>terms</code>.</dd>
+	<dt>GetTermDepth(term)</dt>
+	<dd>Return the minimum depth of a term (length of the shortest path to this term from the top level term).</dd>
+	<dt>GetDefinedSlimsSubsets()</dt>
+    <dd>Return a list of defined subsets</dd>
+	<dt>SetSlimsSubset(subset)</dt>
+    <dd>Set the slims term subset to subset. If subset is a string it must equal one of the defined subsetdef.</dd>
+	<dt>GetSlimTerms(termId)</dt>
+    <dd>Return a list of slim terms for termId.</dd>
+	<dt>DownloadOntology(file, progressCallback=None)</dt>
+	<dd>A static method that downloads the ontology from the GO website and saves it in <code>file</code>.</dd>
+	<dt>__getitem__(termId)</dt>
+	<dd>return a Term object with <code>termID</code> as id or alt_id</dd>
+	<dt>__len__()</dt>
+	<dd>return the number of terms in the ontology</dd>
+	<dt>__iter__()</dt>
+	<dd>iterator over all term ids</dd>
+	<dt>__contains__(id)</dt>
+	<dd>check if a term with <code>id</code> is in the ontology (also checks alt_ids)</dd>
+</dl>
+
+<h2>Term</h2>
+<p>Term is a class that represents a term in the ontology</p>
+<p class=section>Attributes</p>
+<dl class=attributes>
+	<dt>id</dt>
+	<dd>The term id</dd>
+	<dt>name</dt>
+	<dd>The term name</dd>
+	<dt>namespace</dt>
+	<dd>The namespace of the term</dd>
+	<dt>def_</dt>
+	<dd>The term def entry (Note the use of trailing unserscore to avoid conflict with a python keyword)</dd>
+	<dt>is_a</dt>
+	<dd>List of term ids this term is a subterm of.</dd>
+	<dt>related</dt>
+	<dd>List of (relType, termId) tuples with relType specifying the relationship type with termId</dd>
+</dl>
+<h2>Annotations</h2>
+<p>Annotations object holds the annotations.</p>
+<p class=section>Attributes</p>
+<dl class=attributes>
+	<dt>geneAnnotations</dt>
+	<dd>A dictionary mapping a gene name (DB_Object_Symbol) to a set of all annotations of that gene</dd>
+	<dt>termAnnotations</dt>
+	<dd>A dictionary mapping a GO term id to a set of all annotations to that term</dd>
+	<dt>geneNames</dt>
+	<dd>A set of all gene names (all entrys from DB_Object_Symbol)</dd>
+	<dt>geneNamesDict</dt>
+	<dd>A dictionary mapping each unique identifier from DB_Object_ID, DB_Object_Symbol and DB_Object_Synonym to a list of all equivalent names</dd>
+	<dt>aliasMapper</dt>
+	<dd>A dictionary mapping each unique identifier from DB_Object_ID, DB_Object_Symbol and DB_Object_Synonym to a DB_Object_Symbol equivalent</dd>
+	<dt>annotations</dt>
+	<dd>A list of all AnnotationRecord instances</dd>
+</dl>
+<p class=section>Methods</p>
+<dl class=attributes>
+	<dt> __init__(file=None, ontology=None, genematcher=None, progressCallback=None)</dt>
+	<dd>Initialize the annotations from <code>file</code> by calling <code>ParseFile</code> on it. If file does not exist asume it is the name of the organism to be loaded from default_database_path. The <code>ontology</code> argument if present must be an instance of Ontology class. <code>genematcher</code> should be an instance of obiGene.Macher and defaults to obiGene.GMGO. The optional <code>progressCallback</code> will be called with a single argument to report on the progress.</dd>
+	<dt>Load(org, ontology=None, progressCallback=None)</dt>
+	<dd>A class method that tries to load the association file for the given organism from default_database_path. It trys to match the <code>org</code> with GO organism codes and if it fails, searches for org in NCBI Taxonomy using obiTaxonomy module.</dd>
+	<dt>ParseFile(self, file, progressCallback=None)</dt>
+	<dd>Parse the <code>file</code>. <code>file</code> can be a file name string or an open filelike object. The optional <code>progressCallback</code> will be called with a single argument to report on the progress.</dd>
+	<dt>GetAllAnnotations(term)</dt>
+	<dd>Return all annotations that are annotated to term whose id entry equals <code>id</code></dd>
+	<dt>GetAllGenes(id, evidenceCodes=None)</dt>
+	<dd>Return a list of genes annotated by specified evidence codes to this and all subterms.</dd>
+	<dt>GetEnrichedTerms(genes, reference=None, evidenceCodes=None, slimsOnly=False, aspect="P", prob=obiProb.Binomial(), progressCallback=None)</dt>
+	<dd>Return a dictionary of enriched terms, with tuples of (list_of_genes, p_value, reference_count) for items and term ids as keys. P-Values are FDR adjusted if useFDR is True (default).</dd>
+	<dt>GetAnnotatedTerms(genes, directAnnotationOnly=False, evidenceCodes=None, progressCallback=None)</dt>
+	<dd>Return all terms that are annotated by genes with evidenceCodes.</dd>
+	<dt>DownloadAnnotations(org, file, progressCallback=None)</dt>
+	<dd>A static method that downloads the annotation file for organism <code>org</code> to <code>file</code></dd>
+	<dt>__contains__(annotation)</dt>
+	<dd>check in annotations is in this Annotations object</dd>
+	<dt>__iter__</dt>
+	<dd>iterate over all annotations in this object</dd>
+	<dt>__len__()</dt>
+	<dd>retrun the number of annotations in this object</dd>
+	<dt></dt>
+	<dd></dd>
+</dl>
+
+<h2>Examples</h2>
+<p>Searching the annotation(part of <a href="obiGO-gene-annotations.py">obiGO-gene-annotations.py</a>)</p>
+<xmp class=code>import obiGO
+ontology = obiGO.Ontology.Load()
+# Print names and definitions of all terms with "apoptosis" in the name
+for term in [term for term in ontology.terms.values() if "apoptosis" in term.name.lower()]:
+	print term.name, term.id
+	print term.def_
+annotations = obiGO.Annotations.Load("sgd", ontology=ontology)
+annotations.GetEnrichedTerms(["YGR270W", "YIL075C", "YDL007W"])
+
+gene = annotations.aliasMapper["YIL075C"]
+print gene, "(YIL075C) directly annotated to the folowing terms:"
+for a in annotations.geneAnnotations[gene]:
+    print ontology[a.GO_ID].name, "with evidence code", a.Evidence_code
+    
+# Get all genes annotated to the same terms as YIL075C
+ids = set([a.GO_ID for a in annotations.geneAnnotations[gene]])
+for GOID in ids:
+	ants = annotations.GetAllAnnotations(GOID)
+	genes = set([a.geneName for a in ants])
+	print ", ".join(genes), "annotated to", GOID, ontology[a.GO_ID].name
+</xmp>
+
+<p>Term enrichment (part of <a href="obiGO-enrichment.py">obiGO-enrichment.py</a>)</p>
+<xmp class=code>res = annotations.GetEnrichedTerms(["YGR270W", "YIL075C", "YDL007W"])
+print "Enriched terms:"
+for GOId, (genes, p_value, ref) in res.items():
+    if p_value < 0.05:
+        print ontology[GOId].name, "with p-value: %.4f" %p_value, ", ".join(genes)
+
+# And again for slims
+ontology.SetSlimsSubset("goslim_yeast")
+
+res = annotations.GetEnrichedTerms(["YGR270W", "YIL075C", "YDL007W"], slimsOnly=True)
+print "Enriched slim terms:"
+for GOId, (genes, p_value, _) in res.items():
+    if p_value < 0.05:
+        print ontology[GOId].name, "with p-value: %.4f" %p_value, ", ".join(genes)
+</xmp>
+
+<p>Mapping to slim terms (part of <a href="obiGO-slim-mapping.py">obiGO-slim-mapping.py</a>)</p></p>
+<xmp class=code>ontology.SetSlimsSubset("goslim_yeast")
+terms = annotations.GetAnnotatedTerms(["YGR270W", "YIL075C", "YDL007W"], directAnnotationOnly=True)
+slims = set()
+for term in terms:
+    print term
+    slims.update(ontology.GetSlimTerms(term))
+
+print "Genes: YGR270W, YIL075C and YDL007W map to the folowing slims terms:"
+for term in slims:
+    print term, ontology[term].name
+</xmp>

docs/reference-html/obiGene.htm

+<html>
+
+<head>
+<title>obiGene: gene matching and gene info</title>
+<link rel=stylesheet href="style.css" type="text/css">
+<link rel=stylesheet href="style-print.css" type="text/css" media=print>
+</head>
+
+<body>
+<h1>obiGene: gene matching and gene info</h1>
+<index name="modules/gene match matching info">
+
+<p><code>obiGene</code> module provides access to NCBI gene info and gene name matching.</p>
+
+<hr>
+
+<h2>Gene name matching</h2>
+
+<p>Genes usually have multiple aliases. When combining data from different sources (for example expression data from one dataset with gene sets from another one), care needs to be taken to match gene aliases representing the same genes. The implemented alias matching methods are based on sets of aliases, where each set contains a group of gene aliases for a single gene. Matching gene aliases are target gene aliases residing in the same sets of aliases as the query gene alias. Target gene aliases are gene aliases which the matcher outputs as matching results. </p>
+
+<h2>Common interface</h2>
+
+<p>Since all gene matcher are subclasses of class <code>Matcher</code>, they all support methods <code>set_targets</code>, <code>match</code>, <code>explain</code>, <code>umatch</code>.</h2>
+
+<h3>Matcher</h3>
+
+<dl class=attributes>
+<dd>An abstract gene matcher class. All gene matchers should implement functions <code>set_targets</code>, <code>match</code> and <code>explain</code>. </dd>
+<dl class=attributes>
+<dt>set_targets(targets)</dt>
+<dd>Set gene aliases in the input list (of strings) as target gene aliases. Abstract.</dd>
+<dt>match(gene)</dt>
+<dd>Returns a list of target gene aliases which share the same set of aliases with the input gene. If there are no matches it returns an empty list. Abstract.</dd>
+<dt>explain(gene)</dt>
+<dd>Returns gene matches with their explanations as a list of tuples. Each tuple consists of a list of target genes in a set of aliases matched to the input gene. The set of aliases is returned as a second part of the tuple. Abstract.</dd>
+<dt>umatch(gene)</dt>
+<dd>Return unique matching gene aliases. If the <code>match</code> function returns exactly one gene alias, then it is returned. If not, the function returns <code>None</code>.</dd>
+</dl>
+</dl>
+
+<h2>Concrete matchers and their use</h2>
+
+<p>Almost all matchers are subclasses of <code>MatcherAliasesPickled</code> class. The only exception is <code>MatcherDirect</code>, where caching would be pointless.</p>
+
+<h3>MatcherAlisesKEGG or GMKEGG</h3>
+
+<dl class=attributes>
+<dd>Uses aliases from the KEGG database for matching.</dd>
+<dl class=attributes>
+<dt>__init__(organism, ignore_case=True)</dt>
+<dd>Initialization of the gene matcher for the given organism.</dd>
+</dl>
+</dl>
+
+<h3>MatcherAlisesGO or GMGO</h3>
+
+<dl class=attributes>
+<dd>Uses aliases from GO annotations.</dd>
+<dl class=attributes>
+<dt>__init__(organism, ignore_case=True)</dt>
+<dd>Initialization of the gene matcher for the given organism.</dd>
+</dl>
+</dl>
+
+<h3>MatcherAlisesDictyBase or GMDicty</h3>
+
+
+<dl class=attributes>
+<dd>Uses the aliases from the Dictybase.</dd>
+<dl class=attributes>
+<dt>__init__(ignore_case=True)</dt>
+<dd>Initialization of the gene matcher.</dd>
+</dl>
+</dl>
+
+<h3>MatcherAlisesNCBI or GMNCBI</h3>
+
+<dl class=attributes>
+<dd>Uses aliases from NCBI gene info database.</dd>
+<dl class=attributes>
+<dt>__init__(organism, ignore_case=True)</dt>
+<dd>Initialization of the gene matcher for the given organism.</dd>
+</dl>
+</dl>
+
+<h3>MatcherDirect or GMDirect</h3>
+
+<dl class=attributes>
+<dd>Direct matching to target gene aliases (possibly ignoring case).</dd>
+<dl class=attributes>
+<dt>__init__(ignore_case=True)</dt>
+<dd>Initialization.</dd>
+</dl>
+</dl>
+
+<p>Gene name matchers can either be chained (try to apply them in sequence) or joined (overlapping sets of aliases are combined). This can be accomplished using the <code>matcher</code> function.</p>
+
+<h3>matcher(targets, direct=True, ignore_case=True)</h3>
+<dl class=attributes>
+<dd>Builds a new matcher from the list of matchers. Chain matchers in the input list. If a list element is another list, join matchers in the list by joining overlapping sets of aliases.</dd>
+<dl class=arguments>
+<dt>direct</dt> 
+<dd>If True (default), insert an instance of MatcherDirect in front of the specified gene matcher sequence.</dd>
+<dt>ignore_case</dt>
+<dd>Specifies handling of letter case for the added direct matcher.</dd>
+</dl>
+</dl>
+
+<h3>Example: using different gene matchers to match onto KEGG gene aliases</h3>
+
+<p>The following example tries to match input genes onto KEGG gene aliases. As you can see in the results, GO aliases alone can not match onto KEGG database. For the last gene only joined GO and KEGG aliases produce a match.</p>
+
+<p class="header"><a href="geneMatch.py">geneMatch.py</a></p>
+
+<xmp class=code>import obiGene
+import obiKEGG
+
+targets = obiKEGG.KEGGOrganism("9606").get_genes() #human NCBI ID
+
+gmkegg = obiGene.GMKEGG("9606")
+gmgo = obiGene.GMGO("9606")
+gmkegggo = obiGene.matcher([[gmkegg, gmgo]], direct=False)
+
+gmkegg.set_targets(targets)
+gmgo.set_targets(targets)
+gmkegggo.set_targets(targets)
+
+genes = [ "cct7", "pls1", "gdi1", "nfkb2", "dlg7" ]
+
+print "%12s" % "gene", "%12s" % "KEGG", "%12s" % "GO", "%12s" % "KEGG+GO"
+for gene in genes:
+    print "%12s" % gene, "%12s" % gmkegg.umatch(gene), \