Commits

Aleš Erjavec committed 548d118

Porting obiKEGG to use the new REST KEGG API.

Comments (0)

Files changed (10)

_bioinformatics/obiKEGG/__init__.py

 KEGG - Kyoto Encyclopedia of Genes and Genomes
 ==============================================
 
-This is a python module for access to `KEGG`_ using its web services. To use this module you need to have
-`SUDS`_ python library installed (other backends are planed). 
+This is a python module for access to `KEGG`_ using its web services.
+
+To use this module you need to have `slumber` and `requests` package
+installed.
 
 .. _`KEGG`: http://www.genome.jp/kegg/
 
-.. _`SUDS`: http://pypi.python.org/pypi/suds/
 
 """
 from __future__ import absolute_import
 
+import os
+import sys
 import urllib2
-import os, sys
+
 from collections import defaultdict
 
 from datetime import datetime
 
 from Orange.utils import lru_cache
+from Orange.utils import progress_bar_milestones
+from Orange.utils import deprecated_keywords, deprecated_attribute, \
+                         deprecated_function_name
+
+from .. import obiProb
 
 from . import databases
 from . import entry
 
 KEGGGenome = databases.Genome
 KEGGGenes = databases.Genes
-KEGGEnzymes = databases.Enzymes
-KEGGReaction = databases.Reactions
-KEGGPathways = databases.Pathways
+KEGGEnzyme = databases.Enzyme
+KEGGReaction = databases.Reaction
+KEGGPathways = databases.Pathway
+KEGGCompound = databases.Compound
 
 KEGGBrite = Brite
 KEGGBriteEntry = BriteEntry
 DEFAULT_CACHE_DIR = conf.params["cache.path"]
 
 
-from .. import obiProb
-from Orange.utils import deprecated_keywords, deprecated_attribute
+class OrganismNotFoundError(Exception):
+    pass
 
-class OrganismNotFoundError(Exception): pass
 
 class Organism(object):
+    """
+    A convenience class for retrieving information regarding an
+    organism in the KEGG Genes database.
+
+    :param org: KEGGG organism code (e.g. "hsa", "sce")
+    :type org: str
+
+    """
     def __init__(self, org, genematcher=None):
         self.org_code = self.organism_name_search(org)
         self.genematcher = genematcher
         self.api = api.CachedKeggApi()
-        
+
     @property
     def org(self):
+        """
+        KEGG organism code.
+        """
         return self.org_code
-    
+
     @property
     def genes(self):
+        """
+        An :class:`Genes` database instance for this organism.
+        """
+        # TODO: This should not be a property but a method.
+        # I think it was only put here as back compatibility with old obiKEGG.
         if not hasattr(self, "_genes"):
             genes = KEGGGenes(self.org_code)
             self._genes = genes
         return self._genes
-    
+
     def gene_aliases(self):
-        return self.genes().gene_aliases()
-    
+        """
+        Return known gene aliases (synonyms in other databases).
+        """
+        return self.genes.gene_aliases()
+
     def pathways(self, with_ids=None):
+        """
+        Return a list of all pathways for this organism.
+        """
         if with_ids is not None:
             return self.api.get_pathways_by_genes(with_ids)
         else:
             return [p.entry_id for p in self.api.list_pathways(self.org_code)]
     
     def list_pathways(self):
+        """
+        List all pathways.
+        """
+        # NOTE: remove/deprecate and use pathways()
         return self.pathways()
     
     def get_linked_pathways(self, pathway_id):
             if l:
                 tabs = l.split("\t")
                 cset = set([tabs[0]])
+
+                if ":" in tabs[0]:
+                    # also add 'identifier' from 'org_code:identifier'
+                    cset.add(tabs[0].split(":", 1)[-1])
+
                 try:
                     rest = tabs[1].split(";")[0]
                     cset |= set(rest.split(", "))
                 except:
-                    pass #do not crash if a line does not conform
+                    pass  # do not crash if a line does not conform
                 out.append(cset)
         return out
 
-    def get_enriched_pathways(self, genes, reference=None, prob=obiProb.Binomial(), callback=None):
-        """ Return a dictionary with enriched pathways ids as keys
-        and (list_of_genes, p_value, num_of_reference_genes) tuples 
+    def get_enriched_pathways(self, genes, reference=None,
+                              prob=obiProb.Binomial(), callback=None):
+        """
+        Return a dictionary with enriched pathways ids as keys
+        and (list_of_genes, p_value, num_of_reference_genes) tuples
         as items.
-        
+
         """
-        allPathways = defaultdict(lambda :[[], 1.0, []])
-        from Orange.orng import orngMisc
-        milestones = orngMisc.progressBarMilestones(len(genes), 100)
+        if reference is None:
+            reference = self.genes.keys()
+        reference = set(reference)
+
+        allPathways = defaultdict(lambda: [[], 1.0, []])
+        milestones = progress_bar_milestones(len(genes), 100)
         pathways_db = KEGGPathways()
-        
+
         pathways_for_gene = []
         for i, gene in enumerate(genes):
             pathways_for_gene.append(self.pathways([gene]))
             if callback and i in milestones:
-                callback(i*50.0/len(genes))
-                
-        # precache for speed 
-        pathways_db.pre_cache([pid for pfg in pathways_for_gene for pid in pfg]) 
+                callback(i * 50.0 / len(genes))
+
+        # pre-cache for speed
+        pathways_db.pre_cache([pid for pfg in pathways_for_gene
+                               for pid in pfg])
         for i, (gene, pathways) in enumerate(zip(genes, pathways_for_gene)):
             for pathway in pathways:
-                if pathways_db.get_entry(pathway).gene: 
+                if pathways_db.get_entry(pathway).gene:
                     allPathways[pathway][0].append(gene)
             if callback and i in milestones:
-                callback(50.0 + i*50.0/len(genes))
-        reference = set(reference if reference is not None else self.genes.keys())
-        
+                callback(50.0 + i * 50.0 / len(genes))
+
         pItems = allPathways.items()
-        
+
         for i, (p_id, entry) in enumerate(pItems):
             pathway = pathways_db.get_entry(p_id)
             entry[2].extend(reference.intersection(pathway.gene or []))
-            entry[1] = prob.p_value(len(entry[0]), len(reference), len(entry[2]), len(genes))
-        return dict([(pid, (genes, p, len(ref))) for pid, (genes, p, ref) in allPathways.items()])
-        
+            entry[1] = prob.p_value(len(entry[0]), len(reference),
+                                    len(entry[2]), len(genes))
+        return dict([(pid, (genes, p, len(ref)))
+                     for pid, (genes, p, ref) in allPathways.items()])
+
     def get_genes_by_enzyme(self, enzyme):
-        enzyme = Enzymes().get_entry(enzyme)
+        enzyme = KEGGEnzyme().get_entry(enzyme)
         return enzyme.genes.get(self.org_code, []) if enzyme.genes else []
-    
+
     def get_genes_by_pathway(self, pathway_id):
         return KEGGPathway(pathway_id).genes()
-    
+
     def get_enzymes_by_pathway(self, pathway_id):
         return KEGGPathway(pathway_id).enzymes()
     
     def get_pathways_by_genes(self, gene_ids):
         return self.api.get_pathways_by_genes(gene_ids)
         gene_ids = set(gene_ids)
-        pathways = [self.genes[id].pathway for id in gene_ids if self.genes[id].pathway]
+        pathways = [self.genes[id].pathway for id in gene_ids
+                    if self.genes[id].pathway]
         pathways = reduce(set.union, pathways, set())
-        return [id for id in pathways if gene_ids.issubset(KEGGPathway(id).genes())] 
-    
+        return [id for id in pathways
+                if gene_ids.issubset(KEGGPathway(id).genes())]
+
     def get_pathways_by_enzymes(self, enzyme_ids):
         enzyme_ids = set(enzyme_ids)
-        pathways = [KEGGEnzymes()[id].pathway for id in enzyme_ids]
-        pathwats = reduce(set.union, pathways, set())
-        return [id for id in pathways if enzyme_ids.issubset(KEGGPathway(id).enzymes())]
-    
+        pathways = [KEGGEnzyme()[id].pathway for id in enzyme_ids]
+        pathways = reduce(set.union, pathways, set())
+        return [id for id in pathways
+                if enzyme_ids.issubset(KEGGPathway(id).enzymes())]
+
     def get_pathways_by_compounds(self, compound_ids):
         compound_ids = set(compound_ids)
-        pathways = [KEGGCompounds()[id].pathway for id in compound_ids]
-        pathwats = reduce(set.union, pathways, set())
-        return [id for id in pathways if compound_ids.issubset(KEGGPathway(id).compounds())]
-    
+        pathways = [KEGGCompound()[id].pathway for id in compound_ids]
+        pathways = reduce(set.union, pathways, set())
+        return [id for id in pathways
+                if compound_ids.issubset(KEGGPathway(id).compounds())]
+
     def get_enzymes_by_compound(self, compound_id):
         return KEGGCompound()[compound_id].enzyme
-    
+
     def get_enzymes_by_gene(self, gene_id):
         return self.genes[gene_id].enzymes
-    
+
     def get_compounds_by_enzyme(self, enzyme_id):
         return self._enzymes_to_compounds.get(enzyme_id)
     
     @deprecated_keywords({"caseSensitive": "case_sensitive"})
     def get_unique_gene_ids(self, genes, case_sensitive=True):
-        """Return a tuple with three elements. The first is a dictionary mapping from unique gene
-        ids to gene names in genes, the second is a list of conflicting gene names and the third is a list
-        of unknown genes.
+        """
+        Return a tuple with three elements. The first is a dictionary
+        mapping from unique geneids to gene names in genes, the second
+        is a list of conflicting gene names and the third is a list of
+        unknown genes.
+
         """
         unique, conflicting, unknown = {}, [], []
         for gene in genes:
             else:
                 conflicting.append(gene)
         return unique, conflicting, unknown
-    
+
+    @deprecated_function_name
     def get_genes(self):
         return self.genes
-    
+
     @classmethod
     def organism_name_search(cls, name):
         genome = KEGGGenome()
             name = ids.pop(0) if ids else name
             
         try:
-            return genome[name].entry_key
+            return genome[name].organism_code
         except KeyError:
             raise OrganismNotFoundError(name)
         
     def organism_version(cls, name):
         name = cls.organism_name_search(name)
         genome = KEGGGenome()
-        info = genome.api.binfo(name)
+        info = genome.api.info(name)
         return info.release
-    
+
     def _set_genematcher(self, genematcher):
         setattr(self, "_genematcher", genematcher)
         
         if getattr(self, "_genematcher", None) == None:
             from .. import obiGene
             if self.org_code == "ddi":
-                self._genematcher = obiGene.matcher([obiGene.GMKEGG(self.org_code), obiGene.GMDicty(),
-                                                     [obiGene.GMKEGG(self.org_code), obiGene.GMDicty()]])
+                self._genematcher = obiGene.matcher(
+                    [obiGene.GMKEGG(self.org_code), obiGene.GMDicty(),
+                    [obiGene.GMKEGG(self.org_code), obiGene.GMDicty()]]
+                )
             else:
-                self._genematcher = obiGene.matcher([obiGene.GMKEGG(self.org_code)])
+                self._genematcher = obiGene.matcher(
+                    [obiGene.GMKEGG(self.org_code)])
+
             self._genematcher.set_targets(self.genes.keys())
         return self._genematcher
     
     genematcher = property(_get_genematcher, _set_genematcher)
-    
+
+
 KEGGOrganism = Organism
-    
+
+
 def organism_name_search(name):
     return KEGGOrganism.organism_name_search(name)
 
         
         if e.taxid in [taxid,  genome.TAXID_MAP.get(taxid, taxid)]:
             return e.org_code()
-        
+
     return None
 
 def to_taxid(name):
     genome = KEGGGenome()
     if name in genome:
         return genome[name].taxid
-    
+
     keys = genome.search(name)
     if keys:
         return genome[keys[0]].taxid
 def create_gene_sets():
     pass
 
+
 def main():
     KEGGGenome()
     import doctest
     extraglobs = {"api": KeggApi()}
     doctest.testmod(optionflags=doctest.ELLIPSIS, extraglobs=extraglobs)
 
+
 if __name__ == "__main__":
     sys.exit(main())

_bioinformatics/obiKEGG/api.py

 """
 from __future__ import absolute_import
 
+from datetime import datetime
 from contextlib import closing
+from operator import itemgetter
+import warnings
 
 from .service import web_service
-from .types import *
+from .types import OrganismSummary, Definition, BInfo, Link
+
+
+DATABASES = [
+    ("KEGG Pathway", "pathway", "path", None),
+    ("KEGG Brite", "brite", "br", None),
+    ("KEGG Module", "module", "md", "M"),
+    ("KEGG Disease", "disease", "ds", "H"),
+    ("KEGG Drug", "drug", "dr", "D"),
+    ("KEGG Orthology", "orthology", "ko", "K"),
+    ("KEGG Genome", "genome", "genome", "T"),
+    ("KEGG Genomes", "genomes", "gn", "T"),
+    ("KEGG Genes", "genes", None, None),
+    ("KEGG Ligand", "ligand", "ligand", None),
+    ("KEGG Compound", "compound", "cpd", "C"),
+    ("KEGG Glycan", "glycan", "gl", "G"),
+    ("KEGG Reaction", "reaction", "rn", "R"),
+    ("KEGG RPair", "rpair", "rp", "RP"),
+    ("KEGG RClass", "rclass", "rc", "RC"),
+    ("KEGG Enzyme", "enzyme", "ec", "E")
+]
+
+
+def _link_targets(links):
+    return sorted(set(map(itemgetter(1), links)))
+
 
 class KeggApi(object):
-    """ KEGG API """
-    
+    """
+    An abstraction of a kegg api.
+    """
+
     def __init__(self):
         self.service = web_service()
-        
-    ##################
-    # Meta information
-    ##################
-    
-    def list_databases(self):
-        """ Returns a list of available databases.
-        
-        >>> api.list_databases()
-        [Definition(entry_id='nt',...
-         
+
+    def list_organisms(self):
         """
-        return map(Definition.from_items, self.service.list_databases())
-    
-    def list_organisms(self):
-        """ Return a list of all available organisms
-        
+        Return a list of all available organisms
+
         >>> api.list_organisms()
         [Definition(entry_id='hsa',...
-        
+
         """
-        return map(Definition.from_items, self.service.list_organisms())
-    
+        return map(OrganismSummary.from_str,
+                   self.service.list.organism.get().splitlines())
+
     def list_pathways(self, organism):
-        """ Return a list of all available pathways for `organism`
-        
+        """
+        Return a list of all available pathways for `organism`
+
         >>> api.list_pathways("hsa")
         [Definition(entry_id=',...
-        
+
         """
-        return map(Definition.from_items, self.service.list_pathways(organism))
-        
+        return map(Definition.from_str,
+                   self.service.list.pathway(organism).get().splitlines())
+
+    def list(self, db):
+        """
+        Return a list of all available entries in database `db`.
+        """
+        return map(Definition.from_str,
+                   self.service.list(db).get().splitlines())
+
     #######
     # DBGET
     #######
-     
-    def binfo(self, db):
-        """ Return info for database `db`
-        
-        >>> print api.dbinfo("gb")
-        genbank          GenBank nucleic acid sequence database
-        gb               Release 186.0, Oct 11
-                         National Center for Biotechnology Information
-                         144,458,648 entries, 132,067,413,372 bases
-                         Last update: 11/10/24
-                         <dbget> <fasta> <blast>
-                         
+
+    def info(self, db):
         """
-        result = self.service.binfo(db)
-        if result is not None:
-            return BInfo.from_text(str(result))
-        else:
-            return result
-    
-    def bfind(self, db, keywords):
-        """ Search database 'db' for keywords
+        Return info for database `db`
+
+        >>> print api.info("pathway")
+        BInfo(entry_id='path', definition='KEGG Pathway Database', ...
+
         """
-        result = self.service.bfind(" ".join([db, keywords]))
-        if result is not None:
-            return str(result)
-        else:
-            return result
-    
-    def bget(self, ids):
+        result = self.service.info(db).get()
+        return BInfo.from_text(str(result))
+
+    def find(self, db, keywords):
         """
+        Search database 'db' for keywords.
+        """
+        if isinstance(keywords, basestring):
+            keywords = [keywords]
+
+        return self.service.find(db)("+".join(keywords)).get()
+
+    def get(self, ids):
+        """
+        Retrieve database entries for `ids` list.
         """
         if not isinstance(ids, basestring):
             # Sequence of ids
-            ids = " ".join(ids)
-        result = self.service.bget(ids)
-        if result is not None:
-            return str(result)
+            ids = "+".join(ids)
+
+        return self.service.get(ids).get()
+
+    def conv(self, ids):
+        raise NotImplementedError()
+
+    def link(self, target_db, source_db=None, ids=None):
+        if not (source_db or ids):
+            raise ValueError("One of 'source_db' or 'ids' must be supplied")
+        if source_db and ids:
+            raise ValueError("Only one 'source_db' or 'ids' must be supplied")
+
+        if source_db:
+            result = self.service.link(target_db)(source_db).get()
         else:
-            return result
-    
-    def btit(self, ids):
-        """
-        """
-        if not isinstance(ids, basestring):
-            ids = " ".join(ids)
-            
-        result = self.service.btit(ids)
-        if result is not None:
-            return str(result)
-        else:
-            return result
-    
-    def bconv(self, ids):
-        if not isinstance(ids, basestring):
-            ids = " ".join(ids)
-            
-        result = self.service.bconv(ids)
-        if result is not None:
-            return str(result)
-        else:
-            return result
-    
-    ########
-    # LinkDB
-    ########
-    
-    def get_linkdb_by_entry(self, entry_id, db, offset, limit):
-        links = self.service.get_linkdb_by_entry(entry_id, db, offset, limit)
-        return [LinkDBRelation(**d) for d in \
-                map(dict, links)]
-        
-    def get_linkdb_between_databases(self, from_db, to_db, offset, limit):
-        links = self.service.get_linkdb_between_databases(from_db, to_db, offset, limit)
-        return [LinkDBRelation(**d) for d in \
-                map(dict, links)]
-        
+            result = self.service.link(target_db)("+".join(ids)).get()
+
+        return map(Link._make, map(str.split, result.splitlines()))
+
     def get_genes_by_enzyme(self, enzyme_id, org):
-        return self.service.get_genes_by_enzyme(enzyme_id, org)
-    
-    def get_enzymes_by_gene(self, genes_id):
-        return self.service.get_enzymes_by_gene(genes_id)
-    
+        return _link_targets(self.link(org, ids=[enzyme_id]))
+
+    def get_enzymes_by_gene(self, gene_id):
+        return _link_targets(self.link("ec", ids=[gene_id]))
+
     def get_enzymes_by_compound(self, compound_id):
-        return self.service.get_enzymes_by_compound(compound_id)
-    
+        return _link_targets(self.link("ec", ids=[compound_id]))
+
     def get_enzymes_by_glycan(self, glycan_id):
-        return self.service.get_enzymes_by_glycan(glycan_id)
-    
+        return _link_targets(self.link("ec", ids=[glycan_id]))
+
     def get_enzymes_by_reaction(self, reaction_id):
-        return self.service.get_enzymes_by_reaction(reaction_id)
-    
+        return _link_targets(self.link("ec", ids=[reaction_id]))
+
     def get_compounds_by_enzyme(self, enzyme_id):
-        return self.service.get_compounds_by_enzyme(enzyme_id)
-    
+        return _link_targets(self.link("compound", ids=[enzyme_id]))
+
     def get_compounds_by_reaction(self, reaction_id):
-        return self.service.get_compounds_by_reaction(reaction_id)
-    
+        return _link_targets(self.link("compound", ids=[reaction_id]))
+
     def get_glycans_by_enzyme(self, enzyme_id):
-        return self.service.get_glycans_by_enzyme(enzyme_id)
-    
+        return _link_targets(self.link("gl", ids=[enzyme_id]))
+
     def get_glycans_by_reaction(self, reaction_id):
-        return self.service.get_glycans_by_reaction(reaction_id)
-    
+        return _link_targets(self.link("gl", ids=[reaction_id]))
+
     def get_reactions_by_enzyme(self, enzyme_id):
-        return self.service.get_reactions_by_enzyme(enzyme_id)
-    
+        return _link_targets(self.link("rn", ids=[enzyme_id]))
+
     def get_reactions_by_compound(self, compound_id):
-        return self.service.get_reactions_by_compound(compound_id)
-    
+        return _link_targets(self.link("rn", ids=[compound_id]))
+
     def get_reactions_by_glycan(self, glycan_id):
-        return self.service.get_reactions_by_glycan(glycan_id)
-    
+        return _link_targets(self.link("rn", ids=[glycan_id]))
+
     ######
     # SSDB
     ######
-    
+
+    # No replacement api in the KEGG REST api.
     def get_best_best_neighbors_by_gene(self, genes_id, offset, limit):
-        ssr = self.service.get_best_best_neighbors_by_gene(genes_id, offset, limit)
-        return [SSDBRelation(**d) for d in \
-                map(dict, ssr)]
-    
+        raise NotImplementedError
+
     def get_best_neighbors_by_gene(self, genes_id, offset, limit):
-        ssr = self.service.get_best_neighbors_by_gene(genes_id, offset, limit)
-        return [SSDBRelation(**d) for d in \
-                map(dict, ssr)]
-    
+        raise NotImplementedError
+
     def get_reverse_best_neighbors_by_gene(self, genes_id, offset, limit):
-        ssr = self.service.get_reverse_best_neighbors_by_gene(genes_id, offset, limit)
-        return [SSDBRelation(**d) for d in \
-                map(dict, ssr)]
-    
+        raise NotImplementedError
+
     def get_paralogs_by_gene(self, genes_id, offset, limit):
-        ssr =  self.service.get_paralogs_by_gene(genes_id, offset, limit)
-        return [SSDBRelation(**d) for d in \
-                map(dict, ssr)]
-    
+        raise NotImplementedError
+
     #######
     # Motif
     #######
-    
+
+    # No replacement api in KEGG REST api
     def get_motifs_by_gene(self, genes_id, db):
-        motif = self.service.get_motifs_by_gene(genes_id, db)
-        return [MotifResult(**d) for d in \
-                map(dict, motif)]
-    
+        raise NotImplementedError
+
     def get_genes_by_motifs(self, motif_id_list, offset, limit):
-        genes = self.service.get_genes_by_motifs(motif_id_list, offset, limit)
-        return [Definition(**d) for d in \
-                map(dict, genes)]
-    
+        raise NotImplementedError
+
     ####
     # KO
     ####
-    
+
     def get_ko_by_gene(self, genes_id):
-        return self.service.get_ko_by_gene(genes_id)
-    
+        raise NotImplementedError
+
     def get_ko_by_ko_class(self, ko_class_id):
-        return self.service.get_ko_by_ko_class(ko_class_id)
-    
+        raise NotImplementedError
+
     def get_genes_by_ko_class(self, ko_class_id, org, offset, limit):
-        return self.service.get_genes_by_ko_class(ko_class_id, org, offset, limit)
-    
+        raise NotImplementedError
+
     def get_genes_by_ko(self, ko_id, org):
-        return self.service.get_genes_by_ko(ko_id, org)
-    
+        raise NotImplementedError
+
     #########
     # Pathway
     #########
-    
+
     def mark_pathway_by_objects(self, pathway_id, object_id_list):
-        return self.service.mark_pathway_by_objects(pathway_id, object_id_list)
-    
-    def color_pathway_by_objects(self, pathway_id, object_id_list, fg_color_list, bg_color_list):
-        return self.service.color_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list)
-    
-    def color_pathway_by_elements(self, pathway_id, element_id_list, fg_color_list, bg_color_list):
-        return self.service.color_pathway_by_elements(pathway_id, element_id_list, fg_color_list, bg_color_list)
-    
-    def get_html_of_marked_pathway_by_objects(self, pathway_id, object_id_list):
-        return self.service.get_html_of_marked_pathway_by_objects(pathway_id, object_id_list)
-    
-    def get_html_of_colored_pathway_by_objects(self, pathway_id, object_id_list, fg_color_list, bg_color_list):
-        return self.service.get_html_of_colored_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list)
-    
-    def get_html_of_colored_pathway_by_elements(self, pathway_id, element_id_list, fg_color_list, bg_color_list):
-        return self.service.get_html_of_colored_pathway_by_elements(pathway_id, element_id_list, fg_color_list, bg_color_list)
-    
+        raise NotImplementedError
+
+    def color_pathway_by_objects(self, pathway_id, object_id_list,
+                                 fg_color_list, bg_color_list):
+        raise NotImplementedError
+
+    def color_pathway_by_elements(self, pathway_id, element_id_list,
+                                  fg_color_list, bg_color_list):
+        raise NotImplementedError
+
+    def get_html_of_marked_pathway_by_objects(self, pathway_id,
+                                              object_id_list):
+        raise NotImplementedError
+
+    def get_html_of_colored_pathway_by_objects(self, pathway_id,
+                                               object_id_list, fg_color_list,
+                                               bg_color_list):
+        raise NotImplementedError
+
+    def get_html_of_colored_pathway_by_elements(self, pathway_id,
+                                                element_id_list, fg_color_list,
+                                                bg_color_list):
+        raise NotImplementedError
+
     def get_references_by_pathway(self, pathway_id):
         return self.service.get_references_by_pathway(pathway_id)
-    
+
     def get_element_relations_by_pathway(self, pathway_id):
         return self.service.get_element_relations_by_pathway(pathway_id)
-    
-    
-    
+
     def get_genes_by_organism(self, organism, offset=None, limit=None):
-        if offset is None and limit is None:
-            offset = 0
-            limit = self.get_number_of_genes_by_organism(organism)
-            
-        return self.service.get_genes_by_organism(organism, offset, limit)
-    
+        if offset is not None:
+            raise NotImplementedError("offset is no longer supported")
+        if limit is not None:
+            raise NotImplementedError("limit is no longer supported.")
+
+        res = self.service.list(organism).get().splitlines()
+        return [r.split(None, 1)[0] for r in res]
+
     def get_number_of_genes_by_organism(self, organism):
-        return self.service.get_number_of_genes_by_organism(organism)
-    
+        raise NotImplementedError
+
     ####################
     # Objects by pathway
     ####################
-    
+
     def get_elements_by_pathway(self, pathway_id):
-        return self.service.get_elements_by_pathway(pathway_id)
-    
+        raise NotImplementedError
+
     def get_genes_by_pathway(self, pathway_id):
-        return self.service.get_genes_by_pathway(pathway_id)
-    
+        return _link_targets(self.link("genes", ids=[pathway_id]))
+
     def get_enzymes_by_pathway(self, pathway_id):
-        return self.service.get_enzymes_by_pathway(pathway_id)
-    
+        return _link_targets(self.link("ec", ids=[pathway_id]))
+
     def get_compounds_by_pathway(self, pathway_id):
-        return self.service.get_compounds_by_pathway(pathway_id)
-    
+        return _link_targets(self.link("compound", ids=[pathway_id]))
+
     def get_drugs_by_pathway(self, pathway_id):
-        return self.service.get_drugs_by_pathway(pathway_id)
-    
+        return _link_targets(self.link("drug", ids=[pathway_id]))
+
     def get_glycans_by_pathway(self, pathway_id):
-        return self.service.get_glycans_by_pathway(pathway_id)
-    
+        return _link_targets(self.link("gl", ids=[pathway_id]))
+
     def get_reactions_by_pathway(self, pathway_id):
-        return self.get_reactions_by_pathway(pathway_id)
-    
+        return _link_targets(self.link("rn", ids=[pathway_id]))
+
     def get_kos_by_pathway(self, pathway_id):
-        return self.service.get_kos_by_pathway(pathway_id)
-    
+        return _link_targets(self.link("ko", ids=[pathway_id]))
+
     #####################
     # Pathways by objects
     #####################
-    
+
+    # These functions returned results intersections.
     def get_pathways_by_genes(self, gene_list):
-        return map(str, self.service.get_pathways_by_genes(gene_list))
-    
+        raise NotImplementedError
+
     def get_pathways_by_enzymes(self, enzyme_list):
-        return map(str, self.service.get_pathways_by_enzymes(enzyme_list))
-    
+        raise NotImplementedError
+
     def get_pathways_by_compounds(self, compound_list):
-        return map(str, self.service.get_pathways_by_compounds(compound_list))
-    
+        raise NotImplementedError
+
     def get_pathways_by_drugs(self, drug_list):
-        return map(str, self.service.get_pathways_by_drugs(drug_list))
-    
+        raise NotImplementedError
+
     def get_pathways_by_glycans(self, glycan_list):
-        return map(str, self.service.get_pathways_by_glycans(glycan_list))
-    
+        raise NotImplementedError
+
     def get_pathways_by_reactions(self, reaction_list):
-        return map(str, self.service.get_pathways_by_reactions(reaction_list))
-    
+        raise NotImplementedError
+
     def get_pathways_by_kos(self, ko_list):
-        return map(str, self.service.get_pathways_by_kos(ko_list))
-    
+        raise NotImplementedError
+
     ##########################
     # Relations among pathways
     ##########################
-    
+
     def get_linked_pathways(self, pathway_id):
         if not pathway_id.startswith("path:"):
             pathway_id = "path:" + pathway_id
-        return map(str, self.service.get_linked_pathways(pathway_id))
-    
-    
+        return _link_targets(self.link("pathway", ids=[pathway_id]))
+
+
 """
 KEGG api with caching
 """
     
     def set_default_release(self, release):
         self.default_release = release
-        
-    
-    ##################
-    # Meta information
-    ##################
-    
-    @lru_cache() # not persistently cached
-    def list_databases(self):
-        return KeggApi.list_databases(self)
-    
+
     @cached_method
     def list_organisms(self):
         return KeggApi.list_organisms(self)
     @cached_method
     def list_pathways(self, organism):
         return KeggApi.list_pathways(self, organism)
-    
-    #######
-    # DBGET
-    #######
-    
-    @lru_cache() # not persistently cached
-    def binfo(self, db):
-        return KeggApi.binfo(self, db)
-    
+
     @cached_method
-    def bfind(self, db, keywords):
-        return KeggApi.bfind(self, db, keywords)
-    
+    def list(self, db):
+        return KeggApi.list(self, db)
+
+    @lru_cache()  # not persistently cached
+    def info(self, db):
+        return KeggApi.info(self, db)
+
     @cached_method
-    def bget(self, ids):
-        rval = KeggApi.bget(self, ids)
-        return rval
-    
+    def find(self, db, keywords):
+        return KeggApi.find(self, db, keywords)
+
     @cached_method
-    def bget(self, ids):
+    def get(self, ids):
         if not isinstance(ids, basestring):
-            return self._batch_bget(ids)
+            return self._batch_get(ids)
         else:
-            return KeggApi.bget(self, ids)
-        
-    def _batch_bget(self, ids):
-        if len(ids) > 100:
-            raise ValueError("Can batch at most 100 ids at a time.")
-        
-        bget = self.bget
+            return KeggApi.get(self, ids)
+
+    def _batch_get(self, ids):
+        if len(ids) > 10:
+            raise ValueError("Can batch at most 10 ids at a time.")
+
+        get = self.get
         uncached = []
-        with closing(bget.cache_store()) as store:
+        unmatched = set()
+
+        with closing(get.cache_store()) as store:
             # Which ids are already cached
             # TODO: Invalidate entries by release string.
             for id in ids:
-                key = bget.key_from_args((id,))
+                key = get.key_from_args((id,))
                 if key not in store:
                     uncached.append(id)
-                
+
         if uncached:
             # in case there are duplicate ids
             uncached = sorted(set(uncached))
-            rval = KeggApi.bget(self, uncached)
+            rval = KeggApi.get(self, uncached)
+
             if rval is not None:
-                entrys = rval.split("///\n")
+                entries = rval.split("///\n")
             else:
-                entrys = []
-                
-            if entrys and not entrys[-1].strip():
-                # Delete the last newline if present
-                del entrys[-1]
-            
-            if len(entrys) == len(uncached):
-                with closing(bget.cache_store()) as store:
-                    for id, entry in zip(uncached, entrys):
-                        key = bget.key_from_args((id,))
-                        if entry is not None:
-                            entry = entry + "///\n"
-                        store[key] = cache_entry(entry, mtime=datetime.now())
-                        
-            else:
-                # Try to bisect the uncached list
-                if len(uncached) > 1 and len(uncached) - len(entrys) < 4:
-                    split = len(uncached) / 2
-                    self._batch_bget(uncached[:split])
-                    self._batch_bget(uncached[split:])
-                else:
-                    import warnings
-                    warnings.warn("Batch contains invalid ids", UserWarning)
-        
+                entries = []
+
+            if entries and not entries[-1].strip():
+                # Delete the last single newline entry if present
+                del entries[-1]
+
+            if len(entries) != len(uncached):
+                new_uncached, entries = match_by_ids(uncached, entries)
+                unmatched = set(uncached) - set(new_uncached)
+                uncached = new_uncached
+                warnings.warn("Unable to match entries for keys: %s." %
+                              ", ".join(map(repr, unmatched)))
+
+            with closing(get.cache_store()) as store:
+                for id, entry in zip(uncached, entries):
+                    key = get.key_from_args((id,))
+                    if entry is not None:
+                        entry = entry + "///\n"
+                    store[key] = cache_entry(entry, mtime=datetime.now())
+
         # Finally join all the results, but drop all None objects
-        entries = filter(lambda e: e is not None, map(bget, ids))
-        
+        entries = filter(lambda e: e is not None, map(get, ids))
+
         rval = "".join(entries)
         return rval
-    
+
     @cached_method
-    def btit(self, ids):
-        return KeggApi.btit(self, ids)
-    
-    @cached_method
-    def bconv(self, ids):
-        return KeggApi.bconv(self, ids)
-    
+    def conv(self, ids):
+        return KeggApi.conv(self, ids)
+
     ########
     # LinkDB
     ########
-    
-    @cached_method
-    def get_linkdb_by_entry(self, entry_id, db, offset, limit):
-       return KeggApi.get_linkdb_by_entry(self, entry_id, db, offset, limit)
-        
-    @cached_method
-    def get_linkdb_between_databases(self, from_db, to_db, offset, limit):
-        return KeggApi.get_linkdb_between_databases(self, from_db, to_db, offset, limit)
-            
+
     @cached_method
     def get_genes_by_enzyme(self, enzyme_id, org):
         return KeggApi.get_genes_by_enzyme(self, enzyme_id, org)
-    
+
     @cached_method
     def get_enzymes_by_gene(self, genes_id):
         return KeggApi.get_enzymes_by_gene(self, genes_id)
-    
+
     @cached_method
     def get_enzymes_by_compound(self, compound_id):
         return KeggApi.get_enzymes_by_compound(self, compound_id)
-    
+
     @cached_method
     def get_enzymes_by_glycan(self, glycan_id):
         return KeggApi.get_enzymes_by_glycan(self, glycan_id)
-    
+
     @cached_method
     def get_enzymes_by_reaction(self, reaction_id):
         return KeggApi.get_enzymes_by_reaction(self, reaction_id)
-    
+
     @cached_method
     def get_compounds_by_enzyme(self, enzyme_id):
         return KeggApi.get_compounds_by_enzyme(self, enzyme_id)
-    
+
     @cached_method
     def get_compounds_by_reaction(self, reaction_id):
         return KeggApi.get_compounds_by_reaction(self, reaction_id)
-    
+
     @cached_method
     def get_glycans_by_enzyme(self, enzyme_id):
         return KeggApi.get_glycans_by_enzyme(self, enzyme_id)
-    
+
     @cached_method
     def get_glycans_by_reaction(self, reaction_id):
         return KeggApi.get_glycans_by_reaction(self, reaction_id)
-    
+
     @cached_method
     def get_reactions_by_enzyme(self, enzyme_id):
         return KeggApi.get_reactions_by_enzyme(self, enzyme_id)
-    
+
     @cached_method
     def get_reactions_by_compound(self, compound_id):
         return KeggApi.get_reactions_by_compound(self, compound_id)
-    
+
     @cached_method
     def get_reactions_by_glycan(self, glycan_id):
         return KeggApi.get_reactions_by_glycan(self, glycan_id)
-    
+
     ######
     # SSDB
     ######
     #########
     # Pathway
     #########
-    
-    # TODO
-    
-    
-    
+
     @cached_method
     def get_genes_by_organism(self, organism, offset=None, limit=None):
         return KeggApi.get_genes_by_organism(self, organism, offset=offset, limit=limit)
     @cached_method
     def get_kos_by_pathway(self, pathway_id):
         return KeggApi.get_kos_by_pathway(self, pathway_id)
-    
+
+
+def match_by_ids(ids, entries):
+    """
+
+    """
+
+    unmatched_ids = set(ids)
+    unmatched_entries = set(entries)
+
+    matched_ids = []
+    matched_entries = []
+
+    def match_add(search_id, entry):
+        """
+        Move search_id and entry to the matched lists.
+        """
+        matched_ids.append(search_id)
+        matched_entries.append(entry)
+
+        # Remove from the unmatched set
+        unmatched_ids.remove(search_id)
+        unmatched_entries.remove(entry)
+
+    def entry_split(entry_text):
+        line, _ = entry_text.split("\n", 1)
+        return line.split(None, 2)
+
+    entries_by_id = {}
+
+    for entry in entries:
+        _, eid, _ = entry_split(entry)
+        entries_by_id[eid] = entry
+
+    # First match full search ids
+    for search_id in list(unmatched_ids):
+        if search_id in entries_by_id:
+            entry = entries_by_id.pop(search_id)
+            match_add(search_id, entry)
+
+    # Second pass, split the search ids by ':' to db and identifier part,
+    # match by identifier
+    for search_id in list(unmatched_ids):
+        if ":" in search_id:
+            db_id, rest = search_id.split(":", 1)
+            if rest in entries_by_id:
+                entry = entries_by_id.pop(rest)
+                match_add(search_id, entry)
+
+    return matched_ids, matched_entries

_bioinformatics/obiKEGG/databases.py

 from .entry import fields
 from . import api
 
+
 def iter_take(source_iter, n):
     source_iter = iter(source_iter)
     return [item for _, item in zip(range(n), source_iter)]
 
+
 def batch_iter(source_iter, n):
     source_iter = iter(source_iter)
     while True:
             yield batch
         else:
             break
-        
+
+
 def chain_iter(chains_iter):
     for iter in chains_iter:
         for element in iter:
             yield element
 
+
+# TODO: DBDataBase should be able to be constructed from a flat text
+# entry file. The precache etc. should be moved in caching api, that creates
+# simple file system hierarchy where the flat database is saved (with db
+# release string), e.g.
+# genes/hsa.dbget
+# genes/hsa.release
+# genes/sce.dbget
+# path.dbget
+# module.dbget
+# ligand/compound.dbget
+
+
 class DBDataBase(object):
-    """ A wrapper for DBGET database.
+    """
+    A wrapper for DBGET database.
+
     """
     # ENTRY_TYPE constructor (type)
     ENTRY_TYPE = entry.DBEntry
-    
-    # Needs to be set in a subclass or object instance 
+
+    # A database name/abbreviation (e.g. path). Needs to be set in a
+    # subclass or object instance's constructor
     DB = None
-    
+
     def __init__(self, **kwargs):
         if not self.DB:
-            raise TypeError("Cannot make an instance of abstract base class %r." \
-                            % type(self).__name__)
-            
+            raise TypeError("Cannot make an instance of abstract base "
+                            "class %r." % type(self).__name__)
+
         self.api = api.CachedKeggApi()
-        self.info = self.api.binfo(self.DB)
+        self.info = self.api.info(self.DB)
         release = self.info.release
         self.api.set_default_release(release)
         self._keys = []
-        
+
     def keys(self):
+        """
+        Return a list of database keys. These are unique kegg identifiers
+        that can be used to query the database.
+
+        """
         return list(self._keys)
-    
+
     def iterkeys(self):
+        """
+        Return an iterator over the `keys`
+        """
         return iter(self._keys)
-    
+
     def items(self):
+        """
+        Return a list of all (key, `ENTRY_TYPE` instance) tuples.
+        """
         return list(zip(self.keys(), self.batch_get(self.keys())))
-    
+
     def iteritems(self):
+        """
+        Return an iterator over the `items`.
+        """
         batch_size = 100
         iterkeys = self.iterkeys()
         return chain_iter(zip(batch, self.batch_get(batch))
                           for batch in batch_iter(iterkeys, batch_size))
-        
-#        return ((key, self.__getitem__(key)) for key in self.iterkeys())
-    
+
     def values(self):
+        """
+        Return a list of all `ENTRY_TYPE` instances.
+        """
         return self.batch_get(self.keys())
-    
+
     def itervalues(self):
+        """
+        Return an iterator over all `ENTRY_TYPE` instances.
+        """
         batch_size = 100
         iterkeys = self.iterkeys()
         return chain_iter(self.batch_get(batch)
                           for batch in batch_iter(iterkeys, batch_size))
-        
-#        return (self.__getitem__(key) for key in self.iterkeys())
-    
+
     def get(self, key, default=None):
+        """
+        Return an `ENTRY_TYPE` instance for the `key`. Raises `KeyError` if
+        not found.
+
+        """
         try:
             return self.__getitem__(key)
         except KeyError:
             return default
-        
+
     def has_key(self, key):
         return self.__contains__(key)
-    
+
     def __getitem__(self, key):
         e = self.get_entry(key)
         if e is None:
             raise KeyError(key)
         else:
             return e
-    
+
     def __contains__(self, key):
         return key in set(self.keys())
-    
+
     def __len__(self):
         return len(self.keys())
-    
+
     def __iter__(self):
         return iter(self.keys())
-    
+
     def get_text(self, key):
+        """
+        Return the database entry for `key` as plain text.
+        """
         key = self._add_db(key)
-        return self.api.bget([key])
-    
+        return self.api.get([key])
+
     def get_entry(self, key):
+        """
+        Return the database entry for `key` as an instance of `ENTRY_TYPE`.
+        """
         text = self.get_text(key)
         if not text or text == "None":
             return None
         else:
             return self.ENTRY_TYPE(text)
-        
+
     def find(self, name):
-        """ Find ``name`` using BFIND. 
         """
-        res = self.api.bfind(self.DB, name).splitlines()
-        return [r.split(" ", 1)[0] for r in res]    
-        
-    def pre_cache(self, keys=None, batch_size=100, progress_callback=None):
-        """ Retrive all the entries and cache them locally.
+        Find ``name`` using kegg ``find`` api.
+        """
+        res = self.api.find(self.DB, name).splitlines()
+        return [r.split(" ", 1)[0] for r in res]
+
+    def pre_cache(self, keys=None, batch_size=10, progress_callback=None):
+        """
+        Retrieve all the entries and cache them locally.
         """
         # TODO do this in multiple threads
-    
+
         if not isinstance(self.api, api.CachedKeggApi):
-            raise TypeError("Not an an instance of api.CachedKeggApi")
-        
-        if batch_size > 100 or batch_size < 1:
+            raise TypeError("Not an instance of api.CachedKeggApi")
+
+        if batch_size > 10 or batch_size < 1:
             raise ValueError("Invalid batch_size")
-        
+
         if keys is None:
             keys = self.keys()
-            
+
         keys = list(keys)
         start = 0
         while start < len(keys):
             batch = keys[start: start + batch_size]
             batch = map(self._add_db, batch)
-            
-            self.api.bget(batch)
-            
+
+            self.api.get(batch)
+
             if progress_callback:
                 progress_callback(100.0 * start / len(keys))
-                
+
             start += batch_size
-            
+
     def batch_get(self, keys):
-        """ Batch retrieve all entries for keys. This can be
-        significantly faster then getting each entry separately
-        especially if entries are not yet cached.
-        
+        """
+        Batch retrieve all entries for keys. This can be significantly
+        faster then getting each entry separately especially if entries
+        are not yet cached.
+
         """
         entries = []
         batch_size = 100
         while start < len(keys):
             batch = keys[start: start + batch_size]
             batch = map(self._add_db, batch)
-            batch_entries = self.api.bget(batch)
+            batch_entries = self.api.get(batch)
             if batch_entries is not None:
                 batch_entries = batch_entries.split("///\n")
-                # Remove possible empty last line  
+                # Remove possible empty last line
                 batch_entries = [e for e in batch_entries if e.strip()]
                 entries.extend(map(self.ENTRY_TYPE, batch_entries))
             start += batch_size
-            
+
         return entries
-            
+
     def _add_db(self, key):
-        """ Prefix the key with '%(DB)s:' string if not already
-        prefixed. 
+        """
+        Prefix the key with '%(DB)s:' string if not already prefixed.
         """
         if not key.startswith(self.DB + ":"):
             return self.DB + ":" + key
         else:
             return key
-        
-    @property
-    def entries(self):
-        return self.values()
-    
+
+
 @entry.entry_decorate
 class GenomeEntry(entry.DBEntry):
-    FIELDS = [("ENTRY", fields.DBEntryField),
-              ("NAME", fields.DBNameField),
-              ("DEFINITION", fields.DBDefinitionField),
-              ("ANNOTATION", fields.DBSimpleField),
-              ("TAXONOMY", fields.DBTaxonomyField),
-              ("DATA_SOURCE", fields.DBSimpleField),
-              ("ORIGINAL_DB", fields.DBSimpleField),
-              ("KEYWORDS", fields.DBSimpleField),
-              ("DISEASE", fields.DBSimpleField),
-              ("COMMENT", fields.DBSimpleField),
-              ("CHROMOSOME", fields.DBFieldWithSubsections),
-              ("STATISTICS", fields.DBSimpleField),
-              ("REFERENCE", fields.DBReference)]
-    
+    """
+    Entry for a KEGG Genome database.
+    """
+    FIELDS = [
+        ("ENTRY", fields.DBEntryField),
+        ("NAME", fields.DBNameField),
+        ("DEFINITION", fields.DBDefinitionField),
+        ("ANNOTATION", fields.DBSimpleField),
+        ("TAXONOMY", fields.DBTaxonomyField),
+        ("DATA_SOURCE", fields.DBSimpleField),
+        ("ORIGINAL_DB", fields.DBSimpleField),
+        ("KEYWORDS", fields.DBSimpleField),
+        ("DISEASE", fields.DBSimpleField),
+        ("COMMENT", fields.DBSimpleField),
+        ("CHROMOSOME", fields.DBFieldWithSubsections),
+        ("PLASMID", fields.DBSimpleField),
+        ("STATISTICS", fields.DBSimpleField),
+        ("REFERENCE", fields.DBReference)
+    ]
+
     MULTIPLE_FIELDS = ["REFERENCE"]
-    
+
     def __init__(self, text):
         entry.DBEntry.__init__(self, text)
-        
+
     @property
-    def entry_key(self):
-        """ Primary entry key used for querying.
-        
-        .. note:: Unlike most of the other entry types this is the
-            first listed 'NAME'.
-            
+    def organism_code(self):
         """
-        
+        A three or four letter KEGG organism code (e.g. 'hsa', 'sce', ...)
+        """
         return self.name.split(",", 1)[0]
 
     @property
     def taxid(self):
+        """
+        Organism NCBI taxonomy id.
+        """
         return self.TAXONOMY.taxid
-            
-    def org_code(self):
-        if self.name is not None:
-            return self.name.split(",")[0]
-        else:
-            return self.entry.split(" ")[0]
-        
+
+#    def org_code(self):
+#        if self.name is not None:
+#            return self.name.split(",")[0]
+#        else:
+#            return self.entry.split(" ")[0]
+
 
 class Genome(DBDataBase):
+    """
+    An interface to the A KEGG GENOME database.
+    """
     DB = "genome"
     ENTRY_TYPE = GenomeEntry
-    
+
     # For obiTaxonomy.common_taxids mapping
-    TAXID_MAP = {"562": "511145",   # Escherichia coli K-12 MG1655
-                 "2104": "272634",  # Mycoplasma pneumoniae M129 
-                 "4530": "39947",   # Oryza sativa ssp. japonica cultivar Nipponbare (Japanese rice)
-                 "4932" : "559292", # Saccharomyces cerevisiae S288C
-                 "4896": "284812",  # Schizosaccharomyces pombe 972h-
-                 }
-    
+    TAXID_MAP = {
+        "562": "511145",   # Escherichia coli K-12 MG1655
+        "2104": "272634",  # Mycoplasma pneumoniae M129
+        "4530": "39947",   # Oryza sativa ssp. japonica cultivar Nipponbare (Japanese rice)
+        "4932": "559292",  # Saccharomyces cerevisiae S288C
+        "4896": "284812",  # Schizosaccharomyces pombe 972h-
+    }
+
     def __init__(self):
         DBDataBase.__init__(self)
-        self._keys = [org.entry_id for org in self.api.list_organisms()]
-    
+        self._org_list = self.api.list_organisms()
+        self._keys = [org.entry_id for org in self._org_list]
+
     def _key_to_gn_entry_id(self, key):
         res = self.find(key)
         if len(res) == 0:
             raise ValueError("Not a unique key")
         else:
             return res[0]
-    
+
     @classmethod
     def common_organisms(cls):
         return ['ath', 'bta', 'cel', 'cre', 'dre', 'ddi',
                 'dme', 'eco', 'hsa', 'mmu', 'mpn', 'osa',
                 'pfa', 'rno', 'sce', 'spo', 'zma', 'xla']
-        
+
     @classmethod
     def essential_organisms(cls):
         return ['ddi', 'dme', 'hsa', 'mmu', 'sce']
-    
+
+    def org_code_to_entry_key(self, code):
+        """
+        Map an organism code ('hsa', 'sce', ...) to the corresponding kegg
+        identifier (T + 5 digit number).
+
+        """
+        for org in self._org_list:
+            if org.org_code == code:
+                return org.entry_id
+        else:
+            raise ValueError("Unknown organism code '%s'" % code)
+
     def search(self, string, relevance=False):
-        """ Search the genome database for string using ``bfind``.
+        """
+        Search the genome database for string using ``bfind``.
         """
         if relevance:
             raise NotImplementedError("relevance is no longer supported")
+
         if string in self.TAXID_MAP:
             string = self.TAXID_MAP[string]
-            
-        res = self.api.bfind(self.DB, string)
+
+        res = self.api.find(self.DB, string)
         if not res:
             return []
-        
+
         res = res.splitlines()
         res = [r.split(",", 1)[0] for r in res]
-        res = [r.split(" ", 1)[1] for r in res]
+        res = [r.split(None, 1)[1] for r in res]
         return res
-    
-    
+
+
 @entry.entry_decorate
 class GeneEntry(entry.DBEntry):
-    FIELDS = [("ENTRY", fields.DBEntryField),
-              ("NAME", fields.DBNameField),
-              ("DEFINITION", fields.DBDefinitionField),
-              ("ORGANISM", fields.DBSimpleField),
-              ("ORTHOLOGY", fields.DBSimpleField),
-              ("DRUG_TARGET", fields.DBSimpleField),
-              ("PATHWAY", fields.DBPathway),
-              ("MODULE", fields.DBSimpleField),
-              ("DISEASE", fields.DBSimpleField),
-              ("CLASS", fields.DBSimpleField),
-              ("POSITION", fields.DBSimpleField),
-              ("MOTIF", fields.DBSimpleField),
-              ("DBLINKS", fields.DBDBLinks),
-              ("STRUCTURE", fields.DBSimpleField),
-              ("AASEQ", fields.DBAASeq),
-              ("NTSEQ", fields.DBNTSeq)]
-    
+    FIELDS = [
+        ("ENTRY", fields.DBEntryField),
+        ("NAME", fields.DBNameField),
+        ("DEFINITION", fields.DBDefinitionField),
+        ("ORTHOLOGY", fields.DBSimpleField),
+        ("ORGANISM", fields.DBSimpleField),
+        ("PATHWAY", fields.DBPathway),
+        ("MODULE", fields.DBSimpleField),
+        ("DISEASE", fields.DBSimpleField),
+        ("DRUG_TARGET", fields.DBSimpleField),
+        ("CLASS", fields.DBSimpleField),
+        ("MOTIF", fields.DBSimpleField),
+        ("DBLINKS", fields.DBDBLinks),
+        ("STRUCTURE", fields.DBSimpleField),
+        ("POSITION", fields.DBSimpleField),
+        ("AASEQ", fields.DBAASeq),
+        ("NTSEQ", fields.DBNTSeq)
+    ]
+
     def aliases(self):
-        return [self.entry_key] + (self.name.split(",") if self.name else []) + [link[1][0] for link in self.dblinks.items() if self.dblinks]
+        return [self.entry_key] + \
+               (self.name.split(",") if self.name else []) + \
+               ([link[1][0] for link in self.dblinks.items()]
+                if self.dblinks else [])
 
     @property
     def alt_names(self):
-        """ For backwards compatibility.
+        """
+        For backwards compatibility.
         """
         return self.aliases()
-  
+
+
 class Genes(DBDataBase):
-    DB = None # Needs to be set in __init__ 
+    DB = None  # Needs to be set in __init__
     ENTRY_TYPE = GeneEntry
-    
+
     def __init__(self, org_code):
+        # TODO: Map to org code from kegg id (T + 5 digits)
         self.DB = org_code
         self.org_code = org_code
         DBDataBase.__init__(self)
         self._keys = self.api.get_genes_by_organism(org_code)
-        
+
     def gene_aliases(self):
         aliases = {}
         for entry in self.itervalues():
-            aliases.update(dict.fromkeys(entry.aliases(), self.org_code + ":" + entry.entry_key()))
+            aliases.update(dict.fromkeys(entry.aliases(),
+                                         self.org_code + ":" + entry.entry_key))
         return aliases
-    
+
 
 @entry.entry_decorate
 class CompoundEntry(entry.DBEntry):
-    FIELDS = [("ENTRY", fields.DBEntryField),
-              ("NAME", fields.DBNameField),
-              ("FORMULA", fields.DBSimpleField),
-              ("MASS", fields.DBSimpleField),
-              ("REMARK", fields.DBSimpleField),
-              ("REACTION", fields.DBSimpleField),
-              ("PATHWAY", fields.DBPathway),
-              ("ENZYME", fields.DBSimpleField),
-              ("DBLINKS", fields.DBDBLinks),
-              ("ATOM", fields.DBSimpleField),
-              ("BOND", fields.DBSimpleField)
-              ]
-    
-    
-class Compounds(DBDataBase):
+    FIELDS = [
+        ("ENTRY", fields.DBEntryField),
+        ("NAME", fields.DBNameField),
+        ("FORMULA", fields.DBSimpleField),
+        ("EXACT_MASS", fields.DBSimpleField),
+        ("MOL_WEIGHT", fields.DBSimpleField),
+        ("REMARK", fields.DBSimpleField),
+        ("COMMENT", fields.DBSimpleField),
+        ("REACTION", fields.DBSimpleField),
+        ("PATHWAY", fields.DBPathway),
+        ("ENZYME", fields.DBSimpleField),
+        ("BRITE", fields.DBSimpleField),
+        ("REFERENCE", fields.DBSimpleField),
+        ("DBLINKS", fields.DBDBLinks),
+        ("ATOM", fields.DBSimpleField),
+        ("BOND", fields.DBSimpleField)
+    ]
+
+
+class Compound(DBDataBase):
     DB = "cpd"
     ENTRY_TYPE = CompoundEntry
-    
+
     def __init__(self):
         DBDataBase.__init__(self)
-        self._keys = [] # All keys are not available
+        self._keys = [d.entry_id for d in self.api.list("cpd")]
 
 
-@entry.entry_decorate    
+@entry.entry_decorate
 class ReactionEntry(entry.DBEntry):
-    FIELDS = [("ENTRY", fields.DBEntryField),
-              ("NAME", fields.DBNameField),
-              ("DEFINITION", fields.DBDefinitionField),
-              ("EQUATION", fields.DBSimpleField),
-              ("ENZYME", fields.DBSimpleField)
-              ]
-    
-class Reactions(DBDataBase):
+    FIELDS = [
+        ("ENTRY", fields.DBEntryField),
+        ("NAME", fields.DBNameField),
+        ("DEFINITION", fields.DBDefinitionField),
+        ("EQUATION", fields.DBSimpleField),
+        ("ENZYME", fields.DBSimpleField)
+    ]
+
+
+class Reaction(DBDataBase):
     DB = "rn"
     ENTRY_TYPE = ReactionEntry
-    
+
     def __init__(self):
         DBDataBase.__init__(self)
-        self._keys = [] # All keys are not available
-         
+        self._keys = [d.entry_id for d in self.api.list("rn")]
+
+
 class Brite(DBDataBase):
     DB = "br"
-    
+
+
 class Disease(DBDataBase):
     DB = "ds"
-        
+
+
 class Drug(DBDataBase):
     DB = "dr"
-    
+
+
 @entry.entry_decorate
 class EnzymeEntry(entry.DBEntry):
-    FIELDS = [("ENTRY", fields.DBEntryField),
-              ("NAME", fields.DBNameField),
-              ("CLASS", fields.DBSimpleField),
-              ("SYSNAME", fields.DBSimpleField),
-              ("REACTION", fields.DBSimpleField),
-              ("ALL_REAC", fields.DBSimpleField),
-              ("SUBSTRATE", fields.DBSimpleField),
-              ("PRODUCT", fields.DBSimpleField),
-              ("COMMENT", fields.DBSimpleField),
-              ("REFERENCE", fields.DBReference),
-              ("PATHWAY", fields.DBPathway),
-              ("ORTHOLOGY", fields.DBSimpleField),
-              ("GENES", fields.DBSimpleField),
-              ("DBLINKS", fields.DBDBLinks)
-              ]
-    
+    FIELDS = [
+        ("ENTRY", fields.DBEntryField),
+        ("NAME", fields.DBNameField),
+        ("CLASS", fields.DBSimpleField),
+        ("SYSNAME", fields.DBSimpleField),
+        ("REACTION", fields.DBSimpleField),
+        ("ALL_REAC", fields.DBSimpleField),
+        ("SUBSTRATE", fields.DBSimpleField),
+        ("PRODUCT", fields.DBSimpleField),
+        ("COMMENT", fields.DBSimpleField),
+        ("REFERENCE", fields.DBReference),
+        ("PATHWAY", fields.DBPathway),
+        ("ORTHOLOGY", fields.DBSimpleField),
+        ("GENES", fields.DBSimpleField),
+        ("DBLINKS", fields.DBDBLinks)
+    ]
+
     MULTIPLE_FIELDS = ["REFERENCE"]
-    
-class Enzymes(DBDataBase):
+
+
+class Enzyme(DBDataBase):
     DB = "ec"
     ENTRY_TYPE = EnzymeEntry
-    
-    
+
+    def __init__(self):
+        DBDataBase.__init__(self)
+        self._keys = [d.entry_id for d in self.api.list("ec")]
+
+
 @entry.entry_decorate
 class OrthologyEntry(entry.DBEntry):
-    FIELDS = [("ENTRY", fields.DBEntryField),
-              ("NAME", fields.DBNameField),
-              ("CLASS", fields.DBSimpleField),
-              ("DBLINKS", fields.DBDBLinks),
-              ("GENES", fields.DBSimpleField),
-              ]
-    
+    FIELDS = [
+        ("ENTRY", fields.DBEntryField),
+        ("NAME", fields.DBNameField),
+        ("CLASS", fields.DBSimpleField),
+        ("DBLINKS", fields.DBDBLinks),
+        ("GENES", fields.DBSimpleField),
+    ]
+
+
 class Orthology(DBDataBase):
     DB = "ko"
     ENTRY_TYPE = OrthologyEntry
-    
-    
+
+    def __init__(self):
+        DBDataBase.__init__(self)
+        self._keys = [d.entry_id for d in self.api.list("ko")]
+
+
 @entry.entry_decorate
 class PathwayEntry(entry.DBEntry):
-    FIELDS = [("ENTRY", fields.DBEntryField),
-              ("NAME", fields.DBNameField),
-              ("DESCRIPTION", fields.DBSimpleField),
-              ("CLASS", fields.DBSimpleField),
-              ("PATHWAY_MAP", fields.DBPathwayMapField),
-              ("DISEASE", fields.DBSimpleField),
-              ("DRUG", fields.DBSimpleField),
-              ("DBLINKS", fields.DBDBLinks),
-              ("ORGANISM", fields.DBSimpleField),
-              ("GENE", fields.DBGeneField),
-              ("ENZYME", fields.DBEnzymeField),
-              ("COMPOUND", fields.DBCompoundField),
-              ("REFERENCE", fields.DBReference),
-              ("REL_PATHWAY", fields.DBSimpleField),
-              ("KO_PATHWAY", fields.DBSimpleField),
-              ]
-    
+    FIELDS = [
+        ("ENTRY", fields.DBEntryField),
+        ("NAME", fields.DBNameField),
+        ("DESCRIPTION", fields.DBSimpleField),
+        ("CLASS", fields.DBSimpleField),
+        ("PATHWAY_MAP", fields.DBPathwayMapField),
+        ("MODULE", fields.DBSimpleField),
+        ("DISEASE", fields.DBSimpleField),
+        ("DRUG", fields.DBSimpleField),
+        ("DBLINKS", fields.DBDBLinks),
+        ("ORGANISM", fields.DBSimpleField),
+        ("GENE", fields.DBGeneField),
+        ("ENZYME", fields.DBEnzymeField),
+        ("COMPOUND", fields.DBCompoundField),
+        ("REFERENCE", fields.DBReference),
+        ("REL_PATHWAY", fields.DBSimpleField),
+        ("KO_PATHWAY", fields.DBSimpleField),
+    ]
+
     MULTIPLE_FIELDS = ["REFERENCE"]
-    
+
     @property
     def gene(self):
         if hasattr(self, "GENE"):
             genes = self.GENE._convert()
         else:
             return None
-        
+
         org = self.organism
         org_prefix = ""
         if org:
             if match:
                 org_prefix = match[0] + ":"
         genes = [org_prefix + g for g in genes]
-        return genes 
-    
-class Pathways(DBDataBase):
+        return genes
+
+
+class Pathway(DBDataBase):
     DB = "path"
     ENTRY_TYPE = PathwayEntry
-    
+
     def __init__(self):
         DBDataBase.__init__(self)
-    
+        self._keys = [d.entry_id for d in self.api.list("path")]

_bioinformatics/obiKEGG/entry/__init__.py

 """
-DBGET entry 
+DBGET entry
 """
 from __future__ import absolute_import
 

_bioinformatics/obiKEGG/entry/parser.py

 """
-A parser for DBGET entries
- 
+A parser for DBGET database entries
+
 """
+from StringIO import StringIO
+
 
 class DBGETEntryParser(object):
-    """ A DBGET entry parser (inspired by ``xml.dom.pulldom``)
+    r"""
+    A DBGET entry parser (inspired by ``xml.dom.pulldom``).
+
+    ::
+
+        >>> stream = StringIO("ENTRY foo\n"
+        ...                   "NAME  foo's name\n"
+        ...                   "  BAR A subsection of 'NAME'\n")
+        ...
+        >>> parser = DBGETEntryParser()