Commits

Marko Toplak  committed c62fe1f

Remove reference that was converted into rst.

  • Participants
  • Parent commits 7e1dc12

Comments (0)

Files changed (12)

File docs/reference-html/geneMatch.py

-import obiGene
-import obiKEGG
-
-targets = obiKEGG.KEGGOrganism("9606").get_genes() #human NCBI ID
-
-gmkegg = obiGene.GMKEGG("9606")
-gmgo = obiGene.GMGO("9606")
-gmkegggo = obiGene.matcher([[gmkegg, gmgo]], direct=False)
-
-gmkegg.set_targets(targets)
-gmgo.set_targets(targets)
-gmkegggo.set_targets(targets)
-
-genes = [ "cct7", "pls1", "gdi1", "nfkb2", "dlg7" ]
-
-print "%12s" % "gene", "%12s" % "KEGG", "%12s" % "GO", "%12s" % "KEGG+GO"
-for gene in genes:
-    print "%12s" % gene, "%12s" % gmkegg.umatch(gene), \
-          "%12s" % gmgo.umatch(gene), \
-          "%12s" % gmkegggo.umatch(gene)
-

File docs/reference-html/geneMatch1.py

-import obiGene
-import obiKEGG
-
-keggorg = obiKEGG.KEGGOrganism("mmu")
-kegg_genes = keggorg.get_genes() 
-
-query = [ "Fndc4", "Itgb8", "Cdc34", "Olfr1403" ] 
-
-gm = obiGene.GMKEGG("mmu") #use KEGG aliases for gene matching
-gm.set_targets(kegg_genes) #set KEGG gene aliases as targets
-
-pnames = keggorg.list_pathways()
-
-for name in query:
-    match = gm.umatch(name) # matched kegg alias or None
-    if match:
-    	pwys = keggorg.get_pathways_by_genes([match])
-        print name, "is in", [ pnames[p] for p in pwys ] 

File docs/reference-html/geo_gds1.py

-"""
-Print out some information on specific GEO's data set.
-Does not download the data set.
-"""
-
-import obiGEO
-import textwrap
-
-gdsinfo = obiGEO.GDSInfo()
-gds = gdsinfo["GDS10"]
-
-print "ID:", gds["dataset_id"]
-print "Features:", gds["feature_count"]
-print "Genes:", gds["gene_count"]
-print "Organism:", gds["platform_organism"]
-print "PubMed ID:", gds["pubmed_id"]
-print "Sample types:"
-for sampletype in set([sinfo["type"] for sinfo in gds["subsets"]]):
-    ss = [sinfo["description"] for sinfo in gds["subsets"] if sinfo["type"]==sampletype]
-    print "  %s (%s)" % (sampletype, ", ".join(ss))
-print
-print "Description:"
-print "\n".join(textwrap.wrap(gds["description"], 70))

File docs/reference-html/geo_gds2.py

-import obiGEO
-reload(obiGEO)
-
-# gds = obiGEO.GDS("GDS10")
-gds = obiGEO.GDS("GDS1210")
-
-data = gds.getdata(report_genes=True, transpose=False)
-print "report_genes=True, transpose=False"
-print "Report=Genes, Rows=Genes/Spots"
-print "rows=%d cols=%d has_class=%s" % (len(data), len(data.domain.attributes), data.domain.classVar<>None)
-print
-
-data = gds.getdata(report_genes=False, transpose=False)
-print "report_genes=False, transpose=False"
-print "Report=Spots, Rows=Genes/Spots"
-print "rows=%d cols=%d has_class=%s" % (len(data), len(data.domain.attributes), data.domain.classVar<>None)
-print
-
-data = gds.getdata(report_genes=True, transpose=True)
-print "report_genes=True, transpose=True"
-print "Report=Genes, Rows=Samples"
-print "rows=%d cols=%d has_class=%s" % (len(data), len(data.domain.attributes), data.domain.classVar<>None)
-print "Class values:", " ".join([str(cv) for cv in data.domain.classVar.values]) 
-print
-
-
-data = gds.getdata(report_genes=True, transpose=True, sample_type="tissue")
-print 'report_genes=True, transpose=True sample_type="tissue"'
-print "Report=Genes, Rows=Samples"
-print "rows=%d cols=%d has_class=%s" % (len(data), len(data.domain.attributes), data.domain.classVar<>None)
-print "Class values:", " ".join([str(cv) for cv in data.domain.classVar.values]) 
-print

File docs/reference-html/geo_gds3.py

-import obiGEO
-
-gds = obiGEO.GDS("GDS1676")
-data = gds.getdata(sample_type="infection")
-print "Genes: %d, Samples: %d" % (len(data), len(data.domain.attributes))
-
-for a in data.domain.attributes:
-    print a.name, a.attributes

File docs/reference-html/geo_gds4.py

-import orngServerFiles
-import glob
-import re
-
-filenames = glob.glob(orngServerFiles.localpath("GEO") + "/GDS*.soft.gz")
-m = re.compile("(GDS[0-9]*).soft")
-print "%d data files cached:" % len(filenames)
-print " ".join([m.search(fn).group(1) for fn in filenames])
-

File docs/reference-html/geo_gds5.py

-"""
-Check all data files from GEO, find those which include at least N
-samples in all sample subsets of at least one sample type. Useful
-when, for instance, filtering out the data sets that could be used for
-supervised machine learning.
-"""
-
-import obiGEO
-
-def valid(info, n=40):
-    """Return a set of subset types containing more than n samples in every subset"""
-    invalid = set()
-    subsets = set([sinfo["type"] for sinfo in info["subsets"]])
-    for sampleinfo in info["subsets"]:
-        if len(sampleinfo["sample_id"]) < n:
-            invalid.add(sampleinfo["type"])
-    return subsets.difference(invalid)
-
-def report(stypes, info):
-    """Pretty-print GDS and valid susbset types"""
-    for id, sts in stypes:
-        print id
-        for st in sts:
-            print "  %s:" % st,
-            gds = info[id]
-            print ", ".join(["%s/%d" % (sinfo["description"], len(sinfo["sample_id"])) \
-                             for sinfo in gds["subsets"] if sinfo["type"]==st])
-
-gdsinfo = obiGEO.GDSInfo()
-valid_subset_types = [(id, valid(info)) for id, info in gdsinfo.items() if valid(info)]
-report(valid_subset_types, gdsinfo)

File docs/reference-html/geo_gds6.py

-import obiGEO
-import orange
-import orngTest
-import orngStat
-
-gds = obiGEO.GDS("GDS2960")
-data = gds.getdata(sample_type="disease state", transpose=True)
-print "Samples: %d, Genes: %d" % (len(data), len(data.domain.attributes))
-
-learners = [orange.LinearLearner]
-results = orngTest.crossValidation(learners, data, folds=10)
-print "AUC = %.3f" % orngStat.AUC(results)[0]

File docs/reference-html/obiGEO.htm

-<html>
-<HEAD>
-<LINK REL=StyleSheet HREF="style.css" TYPE="text/css">
-<LINK REL=StyleSheet HREF="../style-print.css" TYPE="text/css" MEDIA=print></LINK>
-</HEAD>
-
-<BODY>
-<h1>obiGEO: an interface to NCBI's Gene Expression Omnibus</h1>
-
-<index name="NCBI">
-<index name="Gene Expression Omnibus">
-<index name="microarray data sets">
-
-<p>obiGEO provides an interface
-to <a href="http://www.ncbi.nlm.nih.gov/">NCBI</a>'s 
-<a href="http://www.ncbi.nlm.nih.gov/geo/">Gene Expression Omnibus</a>
-repository. Currently, it only supports
-<a href="http://www.ncbi.nlm.nih.gov/sites/GDSbrowser">GEO
-DataSets</a> information querying and retreival.</p>
-
-<h2>GDSInfo</h2>
-
-<p><INDEX name="classes/GDSInfo (in obiGEO)">GDSInfo is the class that
-    can be used to retreive the infomation about
-    <a href=http://www.ncbi.nlm.nih.gov/sites/GDSbrowser>GEO Data
-    Sets</a>. The class accesses the Orange server file
-    that either resides on the local computer or is
-    automatically retreived from Orange server. Notice that the call
-    of this class does not access any NCBI's servers directly.</p>
-
-<p class=section>Methods</p>
-<dl class=attributes>
-<dt>GDSInfo(force_update=False)</dt>
-<dd><p>Constructor returning the object with GEO DataSets
-  information. If <code>force_update</code> is set
-  to <code>True</code>, the constructor will download GEO DataSets
-  information file (gds_info.pickled) from Orange server, otherwise,
-  it will first check if the local copy exists. The object returned
-  behaves like a dictionary: the keys are GEO DataSets IDs, and the
-  dictionary values for is a dictionary providing various information
-  about the particular data set.</p>
-
-<xmp class=code>>>> import obiGEO
->>> info = obiGEO.GDSInfo()
->>> info.keys()[:5]
->>> ['GDS2526', 'GDS2524', 'GDS2525', 'GDS2522', 'GDS1618']
->>> info['GDS2526']['title']
-'c-MYC depletion effect on carcinoma cell lines'
->>> info['GDS2526']['platform_organism']
-'Homo sapiens'
-</xmp>
-</dd>
-</dl>
-
-<h2>GDS</h2>
-
-<p><INDEX name="classes/GDSInfo (in obiGEO)">GDS is a class that
-    provides methods for retreival of a specific GEO DataSet. The data
-    is provided as Orange's ExampleTable.
-
-<p class=section>Methods</p>
-<dl class=attributes>
-<dt>GDS(gdsname, verbose=False, force_download=False)</dt>
-<dd>Constructor returning the object to be used to retreive GEO
-  DataSet table (samples and gene expressions). <code>gdsname</code>
-  is an NCBI's ID for the data set in the form "GDSn" where "n" is a
-  GDS ID number. Construct checks a local cache directory if the
-  particular data file is loaded locally, else it downloads it from
-  <a href="ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SOFT/GDS/">NCBI's GEO
-  FTP site</a>. The download is forced
-  if <code>force_download=True</code>. The compressed data file
-  resides in the cache directory after the call of the constructor
-  (call to <code>orngServerFiles.localpath("GEO")</code> reveals the
-  path of this directory).</p>
-
-<xmp class=code>>>> import obiGEO
->>> gds = obiGEO.GDS("GDS1676")
->>> print print ", ".join(gds.genes[:10])
-EXO1, BUB1B, LTB4R2, FOXA1, MEN1, LIFR, L1CAM, TRAF3, AKAP1, PIK3CD
->>> gds.info["title"]
-'T cell leukemia cell response to human herpesvirus 6 infection: time course'
->>> print gds
-GDS1676 (Homo sapiens), samples=8, features=2100, genes=667, subsets=8
-</xmp>
-</dd>
-
-<dt>getdata(report_genes=True, transpose=False,
-merge_function=variableMean, sample_type=None,
-remove_unknown=None)</dt>
-<dd><p>The call of this method returns the data from GEO DataSet in
-  Orange format. Micorarray spots reported in the GEO data set can
-  either be merged according to their gene id's
-  (<code>report_genes=True</code>) or can be left as spots. The data
-  matrix can have spots/genes in rows and samples in columns
-  (default, <code>transpose=False</code>) or samples in rows and
-  spots/genes in columns
-  (<code>transpose=True</code>). Argument <code>sample_type</code>
-  defines the type of annotation, or (if <code>transpose=True</code>)
-  the type of class labels to be included in the data set. Namely,
-  with <code>sample_type</code>, the entire annotation of samples will
-  be included either in the class value or in
-  the <code>.attributes</code> field of each data set
-  attributes. Spots with sample profiles that include unknown values
-  are retained by default (<code>remove_unknown=None</code>). They are
-  removed if the proportion of samples with unknown values
-  is above the threshold set by <code>remove_unknown</code>.</p>
-
-<p>The following illustrates how <code>getdata</code> is used to
-  construct a data set with genes in rows and samples in
-  columns. Notice that the annotation about each sample is retained
-  in <code>.attributes</code>. 
-
-<xmp class=code>>>> import obiGEO
->>> gds = obiGEO.GDS("GDS1676") 
->>> data = gds.getdata()
->>> len(data)
-667
->>> data[0]
-[?, ?, -0.803, 0.128, 0.110, -2.000, -1.000, -0.358], {"gene":'EXO1'}
->>> data.domain.attributes[0]
-FloatVariable 'GSM63816'
->>> data.domain.attributes[0].attributes
-Out[191]: {'dose': '20 U/ml IL-2', 'infection': 'acute ', 'time': '1 d'}
-</xmp>
-
-</dd>
-</dl>
-
-<h2>Examples</h2>
-
-<p>The following script prints out some information about a specific data set. It does not download the data set, just uses the (local) GEO data sets information file.</p>
-
-<p class="header"><a href="geo_gds1.py">geo_gds1.py</a></p>
-<xmp class=code>import obiGEO
-import textwrap
-
-gdsinfo = obiGEO.GDSInfo()
-gds = gdsinfo["GDS10"]
-
-print "ID:", gds["dataset_id"]
-print "Features:", gds["feature_count"]
-print "Genes:", gds["gene_count"]
-print "Organism:", gds["platform_organism"]
-print "PubMed ID:", gds["pubmed_id"]
-print "Sample types:"
-for sampletype in set([sinfo["type"] for sinfo in gds["subsets"]]):
-    ss = [sinfo["description"] for sinfo in gds["subsets"] if sinfo["type"]==sampletype]
-    print "  %s (%s)" % (sampletype, ", ".join(ss))
-print
-print "Description:"
-print "\n".join(textwrap.wrap(gds["description"], 70))
-</xmp>
-
-<p>The output of this script is:</p>
-
-<xmp class=code>ID: GDS10
-Features: 39114
-Genes: 20094
-Organism: Mus musculus
-PubMed ID: 11827943
-Sample types:
-  disease state (diabetic, diabetic-resistant, nondiabetic)
-  strain (NOD, Idd3, Idd5, Idd3+Idd5, Idd9, B10.H2g7, B10.H2g7 Idd3)
-  tissue (spleen, thymus)
-
-Description:
-Examination of spleen and thymus of type 1 diabetes nonobese diabetic
-(NOD) mouse, four NOD-derived diabetes-resistant congenic strains and
-two nondiabetic control strains.
-</xmp>
-
-<p>GEO data sets provide a sort of mini ontology for sample labeling. Samples belong to sample subsets, which in turn belong to specific types. Like above GDS10, which has three sample types, of which the subsets for the tissue type are spleen and thymus. If you are into using data sets for supervised data mining, then it would be useful to find out which of the data sets provide enough samples for each label. It is (semantically) convenient to perform classification within sample subsets of the same type. We therefore need a script that go through the entire set of data sets and finds those for which, for a specific type, there are enough samples within each of the subsets. The following script does the work. The function <code>valid</code> is passed the information about the data set and determines which subset types (if any) satisfy the "validity" criteria. The number of requested samples in the subset is by default set to <code>n=40</code>.</p>
-
-<p class="header"><a href="geo_gds5.py">geo_gds5.py</a></p>
-<xmp class=code>import obiGEO
-
-def valid(info, n=40):
-    """Return a set of subset types containing more than n samples in every subset"""
-    invalid = set()
-    subsets = set([sinfo["type"] for sinfo in info["subsets"]])
-    for sampleinfo in info["subsets"]:
-        if len(sampleinfo["sample_id"]) < n:
-            invalid.add(sampleinfo["type"])
-    return subsets.difference(invalid)
-
-def report(stypes, info):
-    """Pretty-print GDS and valid susbset types"""
-    for id, sts in stypes:
-        print id
-        for st in sts:
-            print "  %s:" % st,
-            gds = info[id]
-            print ", ".join(["%s/%d" % (sinfo["description"], len(sinfo["sample_id"])) \
-                             for sinfo in gds["subsets"] if sinfo["type"]==st])
-
-gdsinfo = obiGEO.GDSInfo()
-valid_subset_types = [(id, valid(info)) for id, info in gdsinfo.items() if valid(info)]
-report(valid_subset_types, gdsinfo)
-</xmp>
-
-<p>The requested number of samples, <code>n=40</code>, seems to be a quite a stringent criteria met - at the time of writing of this documentation - by only a few data sets (you may try to lower this threshold):</p>
-
-<xmp class="code">GDS1611
-  genotype/variation: wild type/48, upf1 null mutant/48
-GDS968
-  agent: none/57, UV/57, IR/57
-GDS1490
-  other: non-neural/50, neural/100
-GDS2373
-  gender: male/82, female/48
-GDS1293
-  tissue: raphe magnus/40, somatomotor cortex/41
-GDS2960
-  disease state: control/41, Marfan syndrome/60
-GDS1292
-  tissue: raphe magnus/40, somatomotor cortex/43
-GDS1412
-  protocol: no treatment/47, hormone replacement therapy/42
-</xmp>
-
-<p>Let us now pick one data file from the above (GDS2960) and see if we can predict the disease state. We will use LinearLearner, a fast variant of support vector machines with linear kernel, and within 10-fold cross validation measure AUC, the area under ROC. AUC is the probably for correctly distinguishing between two classes if picking the sample from target (e.g., the disease) and non-target class (e.g., control).</p>
-
-<p class="header"><a href="geo_gds6.py">geo_gds6.py</a></p>
-<xmp class="code">import obiGEO
-import orange
-import orngTest
-import orngStat
-
-gds = obiGEO.GDS("GDS2960")
-data = gds.getdata(sample_type="disease state", transpose=True)
-print "Samples: %d, Genes: %d" % (len(data), len(data.domain.attributes))
-
-learners = [orange.LinearLearner]
-results = orngTest.crossValidation(learners, data, folds=10)
-print "AUC = %.3f" % orngStat.AUC(results)[0]
-</xmp>
-
-<p>The output of this script is:</p>
-
-<xmp class="code">Samples: 101, Genes: 3979
-AUC = 0.985</xmp>
-
-<p>The AUC for this data set is very high, indicating that using this particular gene expression data it is almost trivial to separate the two classes.</p>
-
-
-</body>
-</html>

File docs/reference-html/obiGene.htm

-<html>
-
-<head>
-<title>obiGene: gene matching and gene info</title>
-<link rel=stylesheet href="style.css" type="text/css">
-<link rel=stylesheet href="style-print.css" type="text/css" media=print>
-</head>
-
-<body>
-<h1>obiGene: gene matching and gene info</h1>
-<index name="modules/gene match matching info">
-
-<p><code>obiGene</code> module provides access to NCBI gene info and gene name matching.</p>
-
-<hr>
-
-<h2>Gene name matching</h2>
-
-<p>Genes usually have multiple aliases. When combining data from different sources (for example expression data from one dataset with gene sets from another one), care needs to be taken to match gene aliases representing the same genes. The implemented alias matching methods are based on sets of aliases, where each set contains a group of gene aliases for a single gene. Matching gene aliases are target gene aliases residing in the same sets of aliases as the query gene alias. Target gene aliases are gene aliases which the matcher outputs as matching results. </p>
-
-<h2>Common interface</h2>
-
-<p>Since all gene matcher are subclasses of class <code>Matcher</code>, they all support methods <code>set_targets</code>, <code>match</code>, <code>explain</code>, <code>umatch</code>.</h2>
-
-<h3>Matcher</h3>
-
-<dl class=attributes>
-<dd>An abstract gene matcher class. All gene matchers should implement functions <code>set_targets</code>, <code>match</code> and <code>explain</code>. </dd>
-<dl class=attributes>
-<dt>set_targets(targets)</dt>
-<dd>Set gene aliases in the input list (of strings) as target gene aliases. Abstract.</dd>
-<dt>match(gene)</dt>
-<dd>Returns a list of target gene aliases which share the same set of aliases with the input gene. If there are no matches it returns an empty list. Abstract.</dd>
-<dt>explain(gene)</dt>
-<dd>Returns gene matches with their explanations as a list of tuples. Each tuple consists of a list of target genes in a set of aliases matched to the input gene. The set of aliases is returned as a second part of the tuple. Abstract.</dd>
-<dt>umatch(gene)</dt>
-<dd>Return unique matching gene aliases. If the <code>match</code> function returns exactly one gene alias, then it is returned. If not, the function returns <code>None</code>.</dd>
-</dl>
-</dl>
-
-<h2>Concrete matchers and their use</h2>
-
-<p>Almost all matchers are subclasses of <code>MatcherAliasesPickled</code> class. The only exception is <code>MatcherDirect</code>, where caching would be pointless.</p>
-
-<h3>MatcherAlisesKEGG or GMKEGG</h3>
-
-<dl class=attributes>
-<dd>Uses aliases from the KEGG database for matching.</dd>
-<dl class=attributes>
-<dt>__init__(organism, ignore_case=True)</dt>
-<dd>Initialization of the gene matcher for the given organism.</dd>
-</dl>
-</dl>
-
-<h3>MatcherAlisesGO or GMGO</h3>
-
-<dl class=attributes>
-<dd>Uses aliases from GO annotations.</dd>
-<dl class=attributes>
-<dt>__init__(organism, ignore_case=True)</dt>
-<dd>Initialization of the gene matcher for the given organism.</dd>
-</dl>
-</dl>
-
-<h3>MatcherAlisesDictyBase or GMDicty</h3>
-
-
-<dl class=attributes>
-<dd>Uses the aliases from the Dictybase.</dd>
-<dl class=attributes>
-<dt>__init__(ignore_case=True)</dt>
-<dd>Initialization of the gene matcher.</dd>
-</dl>
-</dl>
-
-<h3>MatcherAlisesNCBI or GMNCBI</h3>
-
-<dl class=attributes>
-<dd>Uses aliases from NCBI gene info database.</dd>
-<dl class=attributes>
-<dt>__init__(organism, ignore_case=True)</dt>
-<dd>Initialization of the gene matcher for the given organism.</dd>
-</dl>
-</dl>
-
-<h3>MatcherDirect or GMDirect</h3>
-
-<dl class=attributes>
-<dd>Direct matching to target gene aliases (possibly ignoring case).</dd>
-<dl class=attributes>
-<dt>__init__(ignore_case=True)</dt>
-<dd>Initialization.</dd>
-</dl>
-</dl>
-
-<p>Gene name matchers can either be chained (try to apply them in sequence) or joined (overlapping sets of aliases are combined). This can be accomplished using the <code>matcher</code> function.</p>
-
-<h3>matcher(targets, direct=True, ignore_case=True)</h3>
-<dl class=attributes>
-<dd>Builds a new matcher from the list of matchers. Chain matchers in the input list. If a list element is another list, join matchers in the list by joining overlapping sets of aliases.</dd>
-<dl class=arguments>
-<dt>direct</dt> 
-<dd>If True (default), insert an instance of MatcherDirect in front of the specified gene matcher sequence.</dd>
-<dt>ignore_case</dt>
-<dd>Specifies handling of letter case for the added direct matcher.</dd>
-</dl>
-</dl>
-
-<h3>Example: using different gene matchers to match onto KEGG gene aliases</h3>
-
-<p>The following example tries to match input genes onto KEGG gene aliases. As you can see in the results, GO aliases alone can not match onto KEGG database. For the last gene only joined GO and KEGG aliases produce a match.</p>
-
-<p class="header"><a href="geneMatch.py">geneMatch.py</a></p>
-
-<xmp class=code>import obiGene
-import obiKEGG
-
-targets = obiKEGG.KEGGOrganism("9606").get_genes() #human NCBI ID
-
-gmkegg = obiGene.GMKEGG("9606")
-gmgo = obiGene.GMGO("9606")
-gmkegggo = obiGene.matcher([[gmkegg, gmgo]], direct=False)
-
-gmkegg.set_targets(targets)
-gmgo.set_targets(targets)
-gmkegggo.set_targets(targets)
-
-genes = [ "cct7", "pls1", "gdi1", "nfkb2", "dlg7" ]
-
-print "%12s" % "gene", "%12s" % "KEGG", "%12s" % "GO", "%12s" % "KEGG+GO"
-for gene in genes:
-    print "%12s" % gene, "%12s" % gmkegg.umatch(gene), \
-          "%12s" % gmgo.umatch(gene), \
-          "%12s" % gmkegggo.umatch(gene)
-</xmp>
-
-<p>Output:</p>
-
-<xmp class=code>        gene         KEGG           GO      KEGG+GO
-        cct7    hsa:10574         None    hsa:10574
-        pls1     hsa:5357         None     hsa:5357
-        gdi1     hsa:2664         None     hsa:2664
-       nfkb2     hsa:4791         None     hsa:4791
-        dlg7         None         None     hsa:9787
-</xmp>
-
-
-<h2>Auxiliary functionality</h2>
-
-<h3>MatcherAliases</h3>
-
-<dl class=attributes>
-<dd>Gene matcher based on sets of aliases. A subclass of <code>Matcher</code>.</dd>
-<dl class=attributes>
-<dt>__init__(aliases, ignore_case=True)</dt>
-<dd>Constructs a gene matcher based on sets of aliases. Input aliases have to be represented as a list of sets, where the sets contain equivalent aliases for the given gene.</dd>
-<dt>to_ids(gene)</dt>
-<dd>Returns the index of the sets of aliases (as given to the constructor) which include input gene alias.</dd>
-</dl>
-</dl>
-
-<h3>MatcherAliasesPickled</h3>
-
-<dl class=attributes>
-<dd>An abstract class for alias matchers, which support pickling. A subclass of <code>MatcherAliases</code>. Its subclasses must implement functions <code>filename</code>, <code>create_aliases</code> and <code>create_aliases_version</code>. They are needed for automatic pickling to work. Loading of gene aliases is done lazily - only when really needed, as loading of aliases for individual components of joined gene matcher is often unnecessary.</dd>
-<dl class=attributes>
-<dt>filename()</dt>
-<dd>Returns the filename for the pickled file. Different organism and gene matcher combinations should have different filenames. Abstract.</dd>
-<dt>create_aliases()</dt>
-<dd>Returns a list of sets of gene aliases. Abstract.</dd>
-<dt>create_aliases_version()</dt>
-<dd>Returns the version of the gene aliases. If a file containing pickled gene matcher with the same version exists, it is read from file. If not, it is rebuild. Abstract.</dd>
-</dl>
-</dl>
-
-<h3>MatcherSequence</h3>
-
-<dl class=attributes>
-<dd>Supports chaining of gene matchers. User defines the order of used gene matchers. Gene matchers are queried in sequence until the match is found. The matching target aliases are then returned.</dd>
-<dl class=attributes>
-<dt>__init__(matchers)</dt>
-<dd>Input is a list of gene matcher objects (subclasses of type <code>Matcher</code>). </dd>
-</dl>
-</dl>
-
-<h3>MatcherAliasesPickledJoined</h3>
-
-<dl class=attributes>
-<dd>Creates a new matcher by joined gene aliases from input gene matchers. Sets of genes are joined if they contain common genes. The joined gene matcher is pickled only if all input gene matchers support pickling.</dd>
-<dl class=attributes>
-<dt>__init__(matchers)</dt>
-<dd>Constructs a joined gene matcher based on input matchers. The parameter <code>ignore_case</code> of the joined matcher is set to a common value of <code>ignore_case</code> in the input matches. </dd>
-</dl>
-</dl>
-
-
-<h2>Further examples</h2>
-
-<h3>Listing pathways with given genes</h3>
-
-<p>The following example works in conjunction with <code>obiKEGG</code>.  It takes a list of mouse gene names to find pathways with the given gene.</p>
-
-<p class="header"><a href="geneMatch1.py">geneMatch1.py</a></p>
-
-<xmp class=code>import obiGene
-import obiKEGG
-
-keggorg = obiKEGG.KEGGOrganism("mmu")
-kegg_genes = keggorg.get_genes() 
-
-query = [ "Fndc4", "Itgb8", "Cdc34", "Olfr1403" ] 
-
-gm = obiGene.GMKEGG("mmu") #use KEGG aliases for gene matching
-gm.set_targets(kegg_genes) #set KEGG gene aliases as targets
-
-pnames = keggorg.list_pathways()
-
-for name in query:
-    match = gm.umatch(name) # matched kegg alias or None
-    if match:
-    	pwys = keggorg.get_pathways_by_genes([match])
-        print name, "is in", [ pnames[p] for p in pwys ] 
-</xmp>
-
-<p>Output:</p>
-
-<xmp class=code>Fndc4 is in []
-Itgb8 is in ['Cell adhesion molecules (CAMs)', 
-             'ECM-receptor interaction', 
-             'Regulation of actin cytoskeleton', 
-             'Focal adhesion']
-Cdc34 is in ['Ubiquitin mediated proteolysis']
-Olfr1403 is in ['Olfactory transduction']
-</xmp>
-
-</body>
-</html>
-

File docs/reference-html/obiGeneSets.htm

-<html>
-
-<head>
-<title>obiGeneSets</title>
-<link rel=stylesheet href="style.css" type="text/css">
-<link rel=stylesheet href="style-print.css" type="text/css" media=print>
-</head>
-
-<body>
-<h1>obiGeneSets</h1>
-<index name="modules/gene sets">
-
-<p><code>obiGeneSets</code> is a module for gene set management. It can also load gene set collections in <code>gmt</code> format used by Molecular Signatures Database (MSigDB).</p>
-
-<dl class="attributes">
-
-<dt>list_all()</dt>
-<dd>
-Lists gene sets available in the local and <code>orngServerFiles</code> repositories. It returns a list of tuples of (hierarchy, organism, available_locally).
-</dd>
-
-<dt>collections(col1, col2, ....)</dt>
-<dd>
-Loads and combines gene sets from various sources: <code>gmt</code>, GO, KEGG and others. Returns a <code>GeneSets</code> object. All arguments are gene set specifications. They can be either:
-<ul>
-<li>filename of a <code>gmt</code> file,</li>
-<li>tuple <code>(hierarchy, organism)</code> (for example <code>(("KEGG",), "10090")</code>), or</li>
-<li>instance of <code>GeneSets</code></li>
-</ul>
-<h3>Usage one-liners</h3>
-<p>Gene sets from KEGG and GO for mouse.</p>
-<xmp class=code>obiGeneSets.collections((("KEGG",), "10090"), (("GO",), "10090"))
-</xmp>
-<p>Open gene sets from &quot;specific.gmt&quot; from the current working directory.</p>
-<xmp class=code>obiGeneSets.collections("specific.gmt")
-</xmp>
-<p>We can also combine above examples.</p>
-<xmp class=code>obiGeneSets.collections((("KEGG",), "10090"), (("GO",), "10090"), "specific.gmt")
-</xmp>
-</dd>
-
-<dt>register(geneSets, serverFiles=None)</dt>
-<dd>
-Registers given geneSets (<code>GeneSets</code> object) locally if <code>serverFiles</code> is None or to <code>orngServerFiles</code> repository if authenticated connection is passed. The gene set is registred by the common organism (or None if organisms are different) and the most common hierarchy.
-</dd>
-
-</dl>
-
-<h2>GeneSet: a single set of genes</h2>
-
-<code>GeneSet</code> objects contain a single gene set.
-
-<dl class="attributes">
-<dt>GeneSet(genes=None, name=None, id=None, description=None, link=None, organism=None, hierarchy=None, pair=None)</dt>
-<dd>Creates a GeneSet instance. If a tuple (name, genes) is passed as the <code>pair</code> argument, then the tuple is converted to the new represetation. Kept for backward compatibility.</dd>
-</dl>
-
-<p class="section">Object properties and funtions</p>
-
-<dl class="attributes">
-<dt>organism</dt>
-<dd>Taxid.</dd>
-<dt>hierarchy</dt>
-<dd>Hierarchy should be formated as a tuple, for example <code>("GO", "biological_process")</code>.</dd>
-<dt>genes</dt>
-<dd>A set of genes. Genes are strings.</dd>
-<dt>id</dt>
-<dt>name</dt>
-<dt>description</dt>
-<dt>link</dt>
-<dt>cname(source=True, name=True)</dt>
-<dd>Constructs a gene set name with the hierarchy.</dd>
-<dt>to_odict(self, source=True, name=True)y</dt>
-<dd>Returns a pair (id, listofgenes), like in the old format.</dd>
-</dl>
-
-<h2>GeneSets: a colletions of gene sets</h2>
-
-<code>GeneSets</code> objects contain multiple gene sets of type <code>GeneSet</code>. It is a subclass of Python's <code>set</code> type.
-
-<dl class="attributes">
-<dt>GeneSets(input=None)</dt>
-<dd>Creates a GeneSets instance. If it is an iterable, the <code>GeneSets</code> object is initialized with gene sets from input. If input is a dictionary, the gene sets are converted to the current format.</dd>
-</dl>
-
-<p class="section">Object properties and funtions</p>
-
-<dl class="attributes">
-<dt>set_hierarchy(hierarchy)</dt>
-<dd>Sets hierarchy for all gene sets.</dd>
-<dt>common_org()</dt>
-<dd>Returns the common organism (or None if there is no common organism).</dd>
-<dt>hierarchies()</dt>
-<dd>Returns a set of all hierachies.</dd>
-<dt>common_hierarchy</dt>
-<dd>Returns a common hierarchy.</dd>
-<dt>split_by_hierarchy()</dt>
-<dd>Splits gene sets by hierarchies. Returns a list of <code>GeneSets</code> object.</dd>
-<dt>to_odict()</dt>
-<dd>Return gene sets in old dictionary format.<dd>
-</dl>
-
-</body>
-</html>
-

File docs/reference-html/obiProb.htm

-<html>
-
-<head>
-<title>obiProb: Probability distributions and corrections</title>
-<link rel=stylesheet href="style.css" type="text/css">
-<link rel=stylesheet href="style-print.css" type="text/css" media=print>
-</head>
-
-<body>
-<h1>obiProb Probability distributions and corrections</h1>
-<p>obiProb provides the functionality to calculate probability distributions and corrections for multiple hypothesis testing.</p>
-
-<h2>Binomial</h2>
-<index name="Binomial">
-<p>A class for computing binomial distribution probabilities. <a href="http://en.wikipedia.org/wiki/Binomial_distribution">Binomial distribution</a> is the discrete probability distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields success with probability p</p>
-<p class=section>Methods</p>
-<dl class=attributes>
-	<dt>__call__(k, N, m, n)</dt>
-	<dd>if m out of N experiments are positive return the probability that k out of n experiments are positive using the binomial distribution.
-	(i.e. if p = m/N then return bin(n,k)*(p**k + (1-p)**(n-k)) where bin is the binomial coefficient)</dd>
-	<dt>p_value(k, N, m, n)</dt>
-	<dd>the probability that k or more tests are positive using the binomial distribution</dd>
-</dl>
-
-<h2>Hypergeometric</h2>
-<index name="Hypergeometric">
-<p>A class for computing hypergeometric distribution probabilities. <a href="http://en.wikipedia.org/wiki/Hypergeometric_distribution">Hypergeometric distribution</a> is a discrete probability distribution that describes the number of successes in a sequence of n draws from a finite population without replacement</p>
-<p class=section>Methods</p>
-<dl class=attributes>
-	<dt>__call__(k, N, m, n)</dt>
-	<dd>if m out of N experiments are positive return the probability that k out of n experiments are positive using the hypergeometric distribution.
-	(i.e. return bin(m, k)*bin(N-m, n-k)/bin(N,n) where bin is the binomial coefficient)</dd>
-	<dt>p_value(k, N, m, n)</dt>
-	<dd>the probability that k or more tests are positive using the hypergeometric distribution</dd>
-</dl>
-
-<h2>FDR</h2>
-<index name="FDR">
-<p>A function for preforming <a href="http://en.wikipedia.org/wiki/False_discovery_rate">False Discovery Rate</a> correction on a ordered list of p-values</p>
-<p class=section>Arguments</p>
-<dl class=attributes>
-	<dt>p_values</dt>
-	<dd>an ordered list of p-values</dd>
-	<dt>dependent (default False)</dd>
-	<dd>use correction for dependent hypotheses</dd>
-	<dt>m (default len(p_values))</dt>
-	<dd>number of hypotheses tested</dd>
-</dl>
-
-<h2>Bonferroni</h2>
-<index name="Bonferroni">
-<p>A function for performing <a href="http://en.wikipedia.org/wiki/Bonferroni_correction">Bonferroni correction</a> on a list of p-values</p>
-<p class=section>Arguments</p>
-<dl class=attributes>
-	<dt>p_values</dt>
-	<dd>a list of p-values</dd>
-	<dt>m (default len(p_values))</dt>
-	<dd>number of hypotheses tested</dd>
-</dl>