1. biolab
  2. Untitled project
  3. orange-bioinformatics

Commits

Aleš Erjavec  committed b3b664a

Added sphinx rst documentation for obiKEGG.

  • Participants
  • Parent commits 50499d1
  • Branches default

Comments (0)

Files changed (9)

File _bioinformatics/obiKEGG/__init__.py

View file
 KEGG - Kyoto Encyclopedia of Genes and Genomes
 ==============================================
 
-This is a python module for access to `KEGG`_ using its web services.
+:mod:`obiKEGG` is a python module for accessing `KEGG (Kyoto Encyclopedia
+of Genes and Genomes) <http://www.genome.jp/kegg/>`_ using its web services.
 
-To use this module you need to have `slumber` and `requests` package
-installed.
+.. note:: To use this module you need to have `slumber`_ and `requests`_
+          package installed.
 
-.. _`KEGG`: http://www.genome.jp/kegg/
+.. _`slumber`: https://pypi.python.org/pypi/slumber/
 
+.. _`requests`: https://pypi.python.org/pypi/requests
+
+
+>>> # Create a KEGG Genes database interface
+>>> genome = KEGGGenome()
+>>> # List all available entry ids
+>>> keys = genome.keys()
+>>> print keys[0]
+T01001
+>>> # Retrieve the entry for the key.
+>>> entry = genome[keys[0]]
+>>> print entry.entry_key
+T01001
+>>> print entry.definition
+Homo sapiens (human)
+>>> print str(entry)
+ENTRY       T01001            Complete  Genome
+NAME        hsa, HUMAN, 9606
+DEFINITION  Homo sapiens (human)
+...
+
+The :class:`Organism` class can be used as a convenient starting point
+for organism specific databases.
+
+>>> organism = Organism("Homo sapiens")  # searches for the organism by name
+>>> print organism.org_code  # prints the KEGG organism code
+hsa
+>>> genes = organism.genes  # get the genes database for the organism
+>>> gene_ids = genes.keys() # KEGG gene identifiers
+>>> entry = genes["hsa:672"]
+>>> print entry.definition
+breast cancer 1, early onset
+>>> print entry  # print the entry in DBGET database format.
+ENTRY       672               CDS       T01001
+NAME        BRCA1, BRCAI, BRCC1, BROVCA1, IRIS, PNCA4, PPP1R53, PSCP, RNF53
+DEFINITION  breast cancer 1, early onset
+...
 
 """
 from __future__ import absolute_import
     A convenience class for retrieving information regarding an
     organism in the KEGG Genes database.
 
-    :param org: KEGGG organism code (e.g. "hsa", "sce")
+    :param org: KEGG organism code (e.g. "hsa", "sce"). Can also be a
+        descriptive name (e.g. 'yeast', "homo sapiens") in which case the
+        organism code will be searched for by using KEGG `find` api.
     :type org: str
 
+    .. seealso::
+
+        :func:`organism_name_search`
+            Search KEGG for an organism code
+
     """
     def __init__(self, org, genematcher=None):
         self.org_code = self.organism_name_search(org)
     @property
     def genes(self):
         """
-        An :class:`Genes` database instance for this organism.
+        An :class:`~.databases.Genes` database instance for this organism.
         """
         # TODO: This should not be a property but a method.
         # I think it was only put here as back compatibility with old obiKEGG.
 
     def list_pathways(self):
         """
-        List all pathways.
+        List all pathways for this organism.
+
+        .. deprecated: 2.5
+            Use :func:`pathways` instead.
+
         """
         # NOTE: remove/deprecate and use pathways()
         return self.pathways()
 
 
 def organism_name_search(name):
+    """
+    Search and organism by `name` and return an KEGG organism code.
+    """
     return KEGGOrganism.organism_name_search(name)
 
 
 def pathways(org):
+    """
+    Return a list of all KEGG pathways for an KEGG organism code `org`.
+    """
     return KEGGPathway.list(org)
 
 
 def organisms():
+    """
+    Return a list of all KEGG organisms.
+    """
     return KEGGOrganism.organisms()
 
 
 def from_taxid(taxid):
+    """
+    Return a KEGG organism code for a an NCBI Taxonomy id string `taxid`.
+    """
     genome = KEGGGenome()
     res = genome.search(taxid)
     for r in res:
 
 
 def to_taxid(name):
+    """
+    Return a NCBI Taxonomy id for a given KEGG Organism name
+    """
     genome = KEGGGenome()
     if name in genome:
         return genome[name].taxid

File _bioinformatics/obiKEGG/databases.py

View file
 """
-DBGET database
+DBGET Database Interface
+========================
+
 """
 from __future__ import absolute_import
 
 
 
 def iter_take(source_iter, n):
+    """
+    Return a list of the first `n` items in `source_iter`.
+    """
     source_iter = iter(source_iter)
     return [item for _, item in zip(range(n), source_iter)]
 
 
 def batch_iter(source_iter, n):
+    """
+    Split the `source_iter` into batches of size `n`.
+    """
     source_iter = iter(source_iter)
     while True:
         batch = iter_take(source_iter, n)
 
 class DBDataBase(object):
     """
-    A wrapper for DBGET database.
+    Base class for a DBGET database interface.
 
     """
-    # ENTRY_TYPE constructor (type)
+    #: ENTRY_TYPE constructor (a :class:`~.entry.DBEntry` subclass). This
+    #: should be redefined in subclasses.
     ENTRY_TYPE = entry.DBEntry
 
-    # A database name/abbreviation (e.g. path). Needs to be set in a
-    # subclass or object instance's constructor
+    #: A database name/abbreviation (e.g. 'pathway'). Needs to be set in a
+    #: subclass or object instance's constructor before calling the base.
+    #: __init__
     DB = None
 
     def __init__(self, **kwargs):
 
     def keys(self):
         """
-        Return a list of database keys. These are unique kegg identifiers
+        Return a list of database keys. These are unique KEGG identifiers
         that can be used to query the database.
 
         """
 
     def iterkeys(self):
         """
-        Return an iterator over the `keys`
+        Return an iterator over the `keys`.
         """
         return iter(self._keys)
 
     def items(self):
         """
-        Return a list of all (key, `ENTRY_TYPE` instance) tuples.
+        Return a list of all (key, :obj:`DBDataBase.ENTRY_TYPE` instance)
+        tuples.
+
         """
         return list(zip(self.keys(), self.batch_get(self.keys())))
 
 
     def values(self):
         """
-        Return a list of all `ENTRY_TYPE` instances.
+        Return a list of all :obj:`DBDataBase.ENTRY_TYPE` instances.
         """
         return self.batch_get(self.keys())
 
     def itervalues(self):
         """
-        Return an iterator over all `ENTRY_TYPE` instances.
+        Return an iterator over all :obj:`DBDataBase.ENTRY_TYPE` instances.
         """
         batch_size = 100
         iterkeys = self.iterkeys()
 
     def get(self, key, default=None):
         """
-        Return an `ENTRY_TYPE` instance for the `key`. Raises `KeyError` if
-        not found.
+        Return an :obj:`DBDataBase.ENTRY_TYPE` instance for the `key`.
+        Raises :class:`KeyError` if not found.
 
         """
         try:
 
     def find(self, name):
         """
-        Find ``name`` using kegg ``find`` api.
+        Find `name` using kegg `find` api.
         """
         res = self.api.find(self.DB, name).splitlines()
         return [r.split(" ", 1)[0] for r in res]
 
     def pre_cache(self, keys=None, batch_size=10, progress_callback=None):
         """
-        Retrieve all the entries and cache them locally.
+        Retrieve all the entries for `keys` and cache them locally for faster
+        subsequent retrieval. If `keys` is ``None`` then all entries will be
+        retrieved.
+
         """
-        # TODO do this in multiple threads
-
         if not isinstance(self.api, api.CachedKeggApi):
             raise TypeError("Not an instance of api.CachedKeggApi")
 
         """
         return self.TAXONOMY.taxid
 
-#    def org_code(self):
-#        if self.name is not None:
-#            return self.name.split(",")[0]
-#        else:
-#            return self.entry.split(" ")[0]
+    def org_code(self):
+        # for backwards compatibility; return the `organism_code`
+        return self.organism_code
 
 
 class Genome(DBDataBase):
 
 
 class Genes(DBDataBase):
+    """
+    Interface to the KEGG Genes database.
+
+    :param org_code: KEGG organism code (e.g. 'hsa').
+    :type org_code: str
+
+    """
     DB = None  # Needs to be set in __init__
     ENTRY_TYPE = GeneEntry
 

File _bioinformatics/obiKEGG/entry/__init__.py

View file
 from .parser import DBGETEntryParser
 
 
+# TODO: Remove the use of entry_decorate decorator
+# for constructing a DBEntry subclass, make fields
+# properties with __get__ method, and explicit assignment
+# and meaningful docstrings
+
+
 def entry_decorate(cls):
     """
     Decorate the DBEntry subclass with properties for accessing
 
     @property
     def entry_key(self):
-        """ Primary entry key used for querying.
+        """
+        Primary entry key used for identifying the entry.
         """
         return self.entry.split(" ", 1)[0]
 
     def parse(self, text):
+        """
+        Parse `text` string containing a formated DBGET entry.
+        """
         parser = DBGETEntryParser()
         gen = parser.parse_string(text)
         field_constructors = dict(self.FIELDS)
         return self.format()
 
     def format(self, section_indent=12):
+        """
+        Return a DBGET formated string representation.
+        """
         return "".join(f.format(section_indent)
                        for f in self.fields)
 

File _bioinformatics/obiKEGG/pathway.py

View file
 """
-KEGG Pathway (from kgml file)
+============
+KEGG Pathway
+============
 
 """
 from __future__ import absolute_import
 
 
 class Pathway(object):
+    """
+    Class representing a KEGG Pathway (parsed from a "kgml" file)
+
+    :param str pathway_id: A KEGG pathway id (e.g. 'path:hsa05130')
+
+    """
     KGML_URL_FORMAT = "http://www.genome.jp/kegg-bin/download?entry={pathway_id}&format=kgml"
 
     def __init__(self, pathway_id, local_cache=None, connection=None):
                                                  "last_modified.sqlite3"))
 
     def _get_kgml(self):
-        """ Return an open kgml file for the pathway.
+        """
+        Return an open kgml file for the pathway.
         """
         from datetime import datetime, timedelta
         valid = False
         return open(local_filename, "rb")
 
     def _get_image_filename(self):
-        """ Return a filename of a local copy of the pathway image
+        """
+        Return a filename of a local copy of the pathway image
         """
         # TODO: keep-alive (using httplib if it supports it)
         # better to move all code to use requests package
         return local_filename
 
     def _local_kgml_filename(self):
-        """ Return the local kgml xml filename for the pathway.
+        """
+        Return the local kgml xml filename for the pathway.
         """
         local_filename = os.path.join(self.local_cache,
                                       self.pathway_id + ".xml")
             self.__dict__.update(dom_element.attributes.items())
             self.graphics = ()
             self.components = []
-            self.graphics = dict(dom_element.getElementsByTagName("graphics")[0].attributes.items())
-            self.components = [node.getAttribute("id") for node in dom_element.getElementsByTagName("component")]
+
+            graphics = dom_element.getElementsByTagName("graphics")[0]
+            self.graphics = dict(graphics.attributes.items())
+
+            components = dom_element.getElementsByTagName("component")
+            self.components = [node.getAttribute("id") for node in components]
 
     class reaction(object):
         def __init__(self, dom_element):
             self.__dict__.update(dom_element.attributes.items())
-            self.substrates = [node.getAttribute("name") for node in dom_element.getElementsByTagName("substrate")]
-            self.products = [node.getAttribute("name") for node in dom_element.getElementsByTagName("product")]
+            self.substrates = [node.getAttribute("name") for node in
+                               dom_element.getElementsByTagName("substrate")]
+            self.products = [node.getAttribute("name") for node in
+                             dom_element.getElementsByTagName("product")]
 
     class relation(object):
         def __init__(self, dom_element):
             self.__dict__.update(dom_element.attributes.items())
-            self.subtypes = [node.attributes.items() for node in dom_element.getElementsByTagName("subtype")]
+            self.subtypes = [node.attributes.items() for node in
+                             dom_element.getElementsByTagName("subtype")]
 
     @cached_method
     def pathway_attributes(self):
 
     @property
     def name(self):
+        """
+        Pathway name/id (e.g. "path:hsa05130")
+        """
         return self.pathway_attributes().get("name")
 
     @property
     def org(self):
+        """
+        Pathway organism code (e.g. 'hsa')
+        """
         return self.pathway_attributes().get("org")
 
     @property
     def number(self):
+        """
+        Pathway number as a string (e.g. '05130')
+        """
         return self.pathway_attributes().get("number")
 
     @property
     def title(self):
+        """
+        Pathway title string.
+        """
         return self.pathway_attributes().get("title")
 
     @property
     def image(self):
+        """
+        URL of the pathway image.
+        """
         return self.pathway_attributes().get("image")
 
     @property
     def link(self):
+        """
+        URL to a pathway on the KEGG web site.
+        """
         return self.pathway_attributes().get("link")
 
     @cached_method
             return []
 
     def __iter__(self):
-        """ Iterate over all elements in the pathway
+        """
+        Iterate over all elements in the pathway.
         """
         return iter(self.all_elements())
 
     def __contains__(self, element):
-        """ Retrurn true if element in the pathway
+        """
+        Return ``True`` if element in the pathway.
         """
         return element in self.all_elements()
 
 
     @cached_method
     def all_elements(self):
-        """ Return all elements
+        """
+        Return all elements
         """
         return reduce(list.__add__,
                       [self.genes(), self.compounds(),
 
     @cached_method
     def genes(self):
-        """ Return all genes on the pathway
+        """
+        Return all genes on the pathway.
         """
         return self._get_entries_by_type("gene")
 
     @cached_method
     def compounds(self):
-        """ Return all compounds on the pathway
+        """
+        Return all compounds on the pathway.
         """
         return self._get_entries_by_type("compound")
 
     @cached_method
     def enzymes(self):
-        """ Return all enzymes on the pathway
+        """
+        Return all enzymes on the pathway.
         """
         return self._get_entries_by_type("enzyme")
 
     @cached_method
     def orthologs(self):
-        """ Return all orthologs on the pathway
+        """
+        Return all orthologs on the pathway.
         """
         return self._get_entries_by_type("ortholog")
 
     @cached_method
     def maps(self):
-        """ Return all linked maps on the pathway
+        """
+        Return all linked maps on the pathway.
         """
         return self._get_entries_by_type("map")
 
     @cached_method
     def groups(self):
-        """ Return all groups on the pathway
+        """
+        Return all groups on the pathway.
         """
         return self._get_entries_by_type("ortholog")
 
     def get_image(self):
-        """ Return an image of the pathway
+        """
+        Return an local filesystem path to an image of the pathway. The image
+        will be downloaded if not already cached.
         """
         return self._get_image_filename()
 
     @classmethod
     def list(cls, organism):
+        """
+        List all pathways for KEGG organism code `organism`.
+        """
         kegg = api.CachedKeggApi()
         return kegg.list_pathways(organism)

File _bioinformatics/obiKEGG/tests/__init__.py

Empty file added.

File _bioinformatics/obiKEGG/tests/test_api.py

View file
+import unittest
+import doctest
+
+from .. import api
+from ... import obiKEGG
+
+
+class TestApi(unittest.TestCase):
+    pass
+
+
+def load_tests(loader, tests, ignore):
+    tests.addTests(doctest.DocTestSuite(obiKEGG,
+                                        optionflags=doctest.ELLIPSIS))
+    return tests

File _bioinformatics/obiKEGG/tests/test_entry.py

View file
+from StringIO import StringIO
+import doctest
+
+import unittest
+
+from ..entry import parser, fields, DBEntry, entry_decorate
+
+
+TEST_ENTRY = """\
+ENTRY       test_id    something else
+NAME        test
+DESCRIPTION This is a test's description.
+            it spans
+            multiple lines
+  SUB       This is a description's sub
+            section
+///
+"""
+
+
+@entry_decorate
+class Entry(DBEntry):
+    pass
+
+
+class TestEntry(unittest.TestCase):
+    def test_entry(self):
+        """
+        Test basic DBEntry class.
+        """
+        entry = Entry(TEST_ENTRY)
+        self.assertEqual(entry.entry_key, "test_id")
+        self.assertEqual(entry.ENTRY.TITLE, "ENTRY")
+
+        self.assertEqual(str(entry), TEST_ENTRY[:-4])
+
+
+class TestParser(unittest.TestCase):
+    def test_parser(self):
+        parse = parser.DBGETEntryParser()
+        stream = StringIO(TEST_ENTRY)
+
+        for event, title, text in parse.parse(stream):
+            pass
+
+
+def load_tests(loader, tests, ignore):
+    tests.addTests(doctest.DocTestSuite(parser))
+    return tests

File docs/rst/index.rst

View file
 .. toctree::
    :maxdepth: 1
 
+   reference/kegg.rst
+
 Installation
 ------------
 

File docs/rst/reference/kegg.rst

View file
+============================================================
+KEGG - Kyoto Encyclopedia of Genes and Genomes (:mod:`kegg`)
+============================================================
+
+
+.. automodule:: Orange.bio.obiKEGG
+   :members:
+   :member-order: bysource
+
+DBEntry (:mod:`entry`)
+----------------------
+
+The :class:`~.entry.DBEntry` represents a DBGET databas entry.
+The individual KEGG Database interfaces below provide their own
+specialization for this base class.
+ 
+.. autoclass:: Orange.bio.obiKEGG.entry.DBEntry
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+
+KEGG Databases interface (:mod:`databases`)
+-------------------------------------------
+
+.. autoclass:: Orange.bio.obiKEGG.databases.DBDataBase
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+
+.. autoclass:: Orange.bio.obiKEGG.databases.GenomeEntry
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+
+.. autoclass:: Orange.bio.obiKEGG.databases.Genome
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+
+.. autoclass:: Orange.bio.obiKEGG.databases.GeneEntry
+   :members:
+   :exclude-members:
+      alt_names
+   :member-order: bysource
+   :show-inheritance:
+
+.. autoclass:: Orange.bio.obiKEGG.databases.Genes
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+
+.. autoclass:: Orange.bio.obiKEGG.databases.CompoundEntry
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+.. autoclass:: Orange.bio.obiKEGG.databases.Compound
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+
+.. autoclass:: Orange.bio.obiKEGG.databases.ReactionEntry
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+.. autoclass:: Orange.bio.obiKEGG.databases.Reaction
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+
+.. autoclass:: Orange.bio.obiKEGG.databases.EnzymeEntry
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+.. autoclass:: Orange.bio.obiKEGG.databases.Enzyme
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+
+.. autoclass:: Orange.bio.obiKEGG.databases.PathwayEntry
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+
+.. autoclass:: Orange.bio.obiKEGG.databases.Pathway
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+
+
+KEGG Pathway (:mod:`pathway`)
+-----------------------------
+
+.. autoclass:: Orange.bio.obiKEGG.pathway.Pathway
+   :members:
+   :exclude-members:
+      entrys
+   :member-order: bysource
+   :show-inheritance:
+
+
+Utilities
+---------
+
+.. autoclass:: Orange.bio.obiKEGG.entry.parser.DBGETEntryParser
+   :members: