Commits

Aleš Erjavec committed af7c562

Added "Gene Network" widget.

  • Participants
  • Parent commits 9480fbe

Comments (0)

Files changed (1)

orangecontrib/bio/widgets/OWGeneNetwork.py

+from collections import namedtuple
+
+from PyQt4.QtCore import QTimer, pyqtSlot as Slot
+
+import Orange.data
+import Orange.network
+import Orange.feature
+
+from Orange.OrangeWidgets import OWWidget
+from Orange.OrangeWidgets import OWGUI
+from Orange.OrangeWidgets import OWItemModels
+from Orange.OrangeWidgets.OWConcurrent import ThreadExecutor, Future
+
+from .. import ppi, taxonomy, gene
+
+NAME = "Gene Network"
+DESCRIPTION = "Extract a gene network for a set of genes."
+ICON = "icons/GeneNetwork.svg"
+
+INPUTS = [("Data", Orange.data.Table, "set_data")]
+OUTPUTS = [("Network", Orange.network.Graph),
+           ("Items", Orange.data.Table)]
+
+
+Source = namedtuple(
+    "Source",
+    ["name", "constructor", "sf_domain", "sf_filename"]
+)
+
+SOURCES = [
+    Source("BioGRID", ppi.BioGRID, "PPI", ppi.BioGRID.SERVER_FILE),
+#     Source("STRING", ppi.STRING, "PPI", ppi.STRING.FILENAME)
+]
+
+
+class OWGeneNetwork(OWWidget.OWWidget):
+    settingsList = ["taxid", "genes_in_row", "network_source",
+                    "include_neighborhood"]
+    contextHandlers = {
+        "": OWWidget.DomainContextHandler(
+            "", ["taxid", "gene_var_index", "genes_in_row"]
+        )
+    }
+
+    def __init__(self, parent=None, signalManager=None, title="Gene Network"):
+        super(OWGeneNetwork, self).__init__(parent, signalManager, title,
+                                            wantMainArea=False)
+
+        self.taxid = "9606"
+        self.gene_var_index = -1
+        self.genes_in_rows = False
+        self.network_source = 0
+        self.include_neighborhood = True
+        self.autocommit = False
+
+        self.loadSettings()
+
+        self.taxids = taxonomy.common_taxids()
+        self.current_taxid_index = self.taxids.index(self.taxid)
+
+        self.db = None
+        self.data = None
+        self.geneinfo = None
+        self._invalidated = False
+
+        box = OWGUI.widgetBox(self.controlArea, "Info")
+        self.info = OWGUI.widgetLabel(box, "No input\n")
+
+        box = OWGUI.widgetBox(self.controlArea, "Network Source")
+        OWGUI.comboBox(
+            box, self, "network_source",
+            items=[s.name for s in SOURCES],
+            callback=self._update_source_db
+        )
+        box = OWGUI.widgetBox(self.controlArea, "Organism")
+        self.organism_cb = OWGUI.comboBox(
+            box, self, "current_taxid_index",
+            items=map(taxonomy.name, self.taxids),
+            callback=self._update_organism
+        )
+        box = OWGUI.widgetBox(self.controlArea, "Genes")
+        self.genes_cb = OWGUI.comboBox(
+            box, self, "gene_var_index", callback=self._update_query_genes
+        )
+        self.varmodel = OWItemModels.VariableListModel()
+        self.genes_cb.setModel(self.varmodel)
+
+        OWGUI.checkBox(
+            box, self, "genes_in_rows",
+            "Use attribute names",
+            callback=self._update_query_genes
+        )
+        box = OWGUI.widgetBox(self.controlArea, "Neighborhood")
+        OWGUI.checkBox(
+            box, self, "include_neighborhood",
+            "Include immediate neighborers",
+            callback=self.invalidate
+        )
+        box = OWGUI.widgetBox(self.controlArea, "Commit")
+#         cb = OWGUI.checkBox(box, self, "autocommit")
+        OWGUI.button(box, self, "Commit", callback=self.commit, default=True)
+
+        self.executor = ThreadExecutor()
+
+#         QTimer.singleShot(0, self.initialize)
+        self._update_source_db()
+        self._update_organism()
+
+    @Slot()
+    def initialize(self):
+        if self.db is None:
+            self.db = ppi.BioGRID()
+
+    def set_data(self, data):
+        self.closeContext()
+        self.data = data
+        if data is not None:
+            self.varmodel[:] = string_variables(data.domain)
+            self.openContext("", data)
+#             self._update_info()
+            self.commit()
+        else:
+            self.varmodel[:] = []
+
+    def set_source_db(self, dbindex):
+        self.network_source = dbindex
+        # by taxid??
+        self.db = SOURCES[self.network_source]()
+        self.invalidate()
+
+    def set_organism(self, index):
+        self.current_taxid_index = index
+        self.taxid = self.taxids[index]
+        self.invalidate()
+
+    def set_gene_var(self, index):
+        self.gene_var_index = index
+        self.invalidate()
+
+    def query_genes(self):
+        if self.gene_var_index >= 0:
+            var = self.varmodel[self.gene_var_index]
+            genes = [unicode(inst[var]) for inst in self.data
+                     if not inst[var].isSpecial()]
+            return list(unique(genes))
+        else:
+            return []
+
+    def invalidate(self):
+        self._invalidated = True
+        if self.autocommit:
+            QTimer.singleShot(0, self._maybe_commit)
+
+    @Slot()
+    def _maybe_commit(self):
+        if self._invalidated:
+            self.commit()
+
+    def commit(self):
+        gene_var = self.varmodel[self.gene_var_index]
+        query_genes = self.query_genes()
+        geneinfo = self.geneinfo.result()
+
+        ppidb = self._db
+        tax_map = self._tax_mapping
+        taxid = tax_map.get(self.taxid, None)
+        if taxid is None:
+            raise ValueError
+
+        # Normalize the names to ppidb primary keys
+        matcher = geneinfo.matcher
+        query_genes = zip(query_genes, map(matcher.umatch, query_genes))
+        synonyms = ppidb_synonym_mapping(ppidb, taxid)
+
+        query_genes = [(query_gene, geneid, synonyms.get(geneid, None))
+                        for query_gene, geneid in query_genes]
+
+        query = [(syn[0], query_gene)
+                 for query_gene, _, syn in query_genes if syn]
+        net = extract_network(ppidb, dict(query), geneinfo,
+                              self.include_neighborhood)
+
+        # Items has the same domain as the input with query instances
+        # corresponding to nodes where applicable (if include_neighborhood
+        # then nodes not contained in the query will have instances with all
+        # unknown values
+
+        data_by_query_name = {str(inst[gene_var]): inst
+                              for inst in self.data}
+        items = Orange.data.Table(self.data.domain)
+        for nodeid, node in sorted(net.nodes(data=True)):
+            if "query_name" in node:
+                items.append(data_by_query_name[node["query_name"]])
+            else:
+                items.append(Orange.data.Instance(self.data.domain))
+        self.send("Network", net)
+        self.send("Items", items)
+        self._invalidated = False
+
+    def fetch_db(self, which):
+        pass
+
+    def _update_source_db(self):
+        source = ppi.BioGRID()
+        orgs = map(str, source.organisms())
+
+        tax_mapping = taxonomy_match(orgs, self.taxids)
+        tax_mapping = {common_tax: db_tax
+                       for db_tax, common_tax in tax_mapping.items()
+                       if common_tax is not None}
+
+        # TODO: Disable the items in Organism combo box that are not
+        # in this list.
+#         model = self.organism_cb.model()
+#         for i, taxid in self.taxids:
+#             enabled = taxid in tax_mapping
+
+        self._db = source
+        self._tax_mapping = tax_mapping
+        self.invalidate()
+
+    def _update_organism(self):
+        self.taxid = self.taxids[self.current_taxid_index]
+        if self.geneinfo is not None:
+            self.geneinfo.cancel()
+
+        self.geneinfo = self.executor.submit(
+            lambda: gene.NCBIGeneInfo(self.taxid)
+        )
+        self.invalidate()
+
+    def _update_query_genes(self):
+        self.invalidate()
+
+
+def unique(seq):
+    seen = set()
+    for el in seq:
+        if el not in seen:
+            seen.add(el)
+            yield el
+
+
+def string_variables(domain):
+    variables = domain.variables + domain.getmetas().values()
+    return [v for v in variables if isinstance(v, Orange.feature.String)]
+
+
+def multimap_inverse(multimap):
+    """
+    Return a multimap inverse relation.
+    """
+    d = {}
+    for key, values in multimap.iteritems():
+        for v in values:
+            d.setdefault(v, []).append(key)
+    return d
+
+
+def ppidb_synonym_mapping(ppidb, taxid):
+    keys = ppidb.ids(taxid)
+    mapping = {key: ppidb.synonyms(key) for key in keys}
+    return multimap_inverse(mapping)
+
+
+def taxonomy_match(query_taxids, target_taxids):
+    taxid_mapping = {}
+    target_taxids = set(target_taxids)
+
+    for taxid in query_taxids:
+        mapped = taxid_map(taxid, target_taxids)
+        taxid_mapping[taxid] = mapped
+
+    return taxid_mapping
+
+
+def taxid_map(query, targets):
+    if query in targets:
+        return query
+
+    lineage = taxonomy.lineage(query)
+    if any(tid in targets for tid in lineage):
+        return set(lineage).intersection(targets).pop()
+    else:
+        return None
+
+from Orange.utils import serverfiles as sf
+
+
+def fetch_ppidb(ppidb, progress=None):
+    sf.localpath_download(
+        ppidb.DOMAIN, ppidb.SERVER_FILE,
+        callback=progress, verbose=True
+    )
+    db = ppidb()
+
+    mapping = taxonomy_match(db.organisms(), taxonomy.common_taxids())
+    return db, mapping
+
+
+def extract_network(ppidb, query, geneinfo, include_neighborhood=True):
+    """
+    include neighborhood
+    """
+    from functools import partial
+    from collections import defaultdict
+    from itertools import count
+
+    if not isinstance(query, dict):
+        query = {name: name for name in query}
+
+    graph = Orange.network.Graph()
+    # node ids in Orange.network.Graph need to be in [0 .. n-1]
+    nodeids = defaultdict(partial(next, count()))
+
+    for key, query_name in query.items():
+        nodeid = nodeids[key]
+        graph.add_node(
+            nodeid,
+            key=key,
+            synonyms=ppidb.synonyms(key),
+            query_name=query_name
+        )
+
+    for key in query:
+        edges = ppidb.edges(key)
+
+        for id1, id2, score in edges:
+            if include_neighborhood or id1 in query and id2 in query:
+                nodeid1 = nodeids[id1]
+                nodeid2 = nodeids[id2]
+                if nodeid1 not in graph:
+                    graph.add_node(
+                        nodeid1, key=id1, synonyms=ppidb.synonyms(id1))
+                if nodeid2 not in graph:
+                    graph.add_node(
+                        nodeid2, key=id1, synonyms=ppidb.synonyms(id2))
+
+                if score is not None:
+                    graph.add_edge(nodeid1, nodeid2, weight=score)
+                else:
+                    graph.add_edge(nodeid1, nodeid2)
+
+    # The "items" should table should contain the query name (if
+    nodedomain = Orange.data.Domain(
+        [Orange.feature.String("Query name"),  # if applicable
+         Orange.feature.String("id"),          # ppidb primary key
+         Orange.feature.String("Synonyms"),    # ppidb synonyms
+         Orange.feature.String("Symbol"),      # ncbi gene name ??
+         Orange.feature.Discrete("source", values=["false", "true"])
+         ],
+        None
+    )
+
+    node_items = sorted(graph.node.items(), key=lambda t: nodeids[t[0]])
+
+    nodeitems = Orange.data.Table(
+        nodedomain,
+        [[str(node.get("query_name", "")),
+          str(node.get("key", "")),
+          str(", ".join(node.get("synonyms", []))),
+          str(node.get("query_name", nodeid)),
+          "true" if "query_name" in node else "false"]
+         for nodeid, node in node_items]
+    )
+    graph.set_items(nodeitems)
+    return graph
+
+
+def main():
+    from PyQt4.QtGui import QApplication
+    app = QApplication([])
+    w = OWGeneNetwork()
+    brown = Orange.data.Table("brown-selected")
+    w.set_data(Orange.data.Table(brown[:5]))
+    w.show()
+    app.exec_()
+    w.saveSettings()
+    return 0
+
+if __name__ == "__main__":
+    main()