1. Andrew Dalke
  2. subgraph_enumeration

Commits

dalke  committed 98f7cbe

Looks like this is the improved code, where I use a Cython extension
for better canonical SMARTS generation

Comments (0)

Files changed (2)

File setup.py Added

View file
  • Ignore whitespace
  • Hide word diff
+from distutils.core import setup
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+
+import os
+OE_DIR = os.environ["OE_DIR"]
+OE_INCLUDE_DIR = os.path.join(OE_DIR, "toolkits", "include")
+OE_LIB_DIR = os.path.join(OE_DIR, "toolkits", "lib")
+OE_LIBS = ["oechem", "oeplatform", "oesystem"]
+
+setup(
+    cmdclass = {'build_ext': build_ext},
+    ext_modules = [Extension("subgraph_enumeration", ["subgraph_enumeration.pyx"],
+                             language="c++",
+                             include_dirs=[OE_INCLUDE_DIR],
+                             library_dirs=[OE_LIB_DIR],
+                             libraries=OE_LIBS,
+                             ),
+                   ]
+)

File subgraph_enumeration.pyx Added

View file
  • Ignore whitespace
  • Hide word diff
+from libcpp.vector cimport vector
+
+cdef extern from "string" namespace "std":
+    cdef cppclass string:
+        char *c_str()
+        int length()
+        char operator[](int)
+        void push_back(char)
+
+cdef extern from "oechem.h" namespace "OEChem":
+    cdef cppclass OEAtomBase:
+        int GetIdx()
+        int IsAromatic()
+        int SetIsotope(int)
+
+    cdef cppclass OEBondBase:
+        int GetBgnIdx()
+        int GetEndIdx()
+        int GetOrder()
+        int SetOrder(int)
+    
+    cdef cppclass OEGraphMol:
+        OEGraphMol()
+        int NumAtoms()
+        OEAtomBase* NewAtom(int)
+        OEBondBase* NewBond(OEAtomBase*, OEAtomBase*, int)
+
+    cdef int OEParseSmiles(OEGraphMol&, char *)
+    void OECreateIsoSmiString(string&, OEGraphMol&)
+
+ctypedef OEAtomBase* OEAtomBaseP
+## def make_canonical_smarts(int max_atom_id, subgraph, atom_info, bond_info):
+##     cdef OEGraphMol new_mol
+##     cdef vector[OEAtomBaseP] new_atoms
+##     cdef atomic_num, isotope
+##     new_atoms.resize(max_atom_id+1)
+##     for atom_idx in subgraph.atoms:
+##         atomic_num, isotope = atom_info[atom_idx]
+##         new_atom = new_mol.NewAtom(atomic_num)
+##         new_atom.SetIsotope(atom.IsAromatic()+1)
+##         new_atoms[atom.GetIdx()] = new_atom
+
+##     for bond in subgraph.bonds:
+##         new_bond = new_mol.NewBond(new_atoms[bond.GetBgnIdx()],
+##                                    new_atoms[bond.GetEndIdx()],
+##                                    bond.GetOrder())
+##         if bond.IsAromatic():
+##             new_bond.SetOrder(1)
+
+
+def make_canonical_smarts(int max_atom_id, subgraph, atom_info, bond_info):
+    cdef OEGraphMol new_mol
+    cdef vector[OEAtomBaseP] new_atoms
+    cdef int atomic_num, isotope
+    cdef int bgn_idx, end_idx, order, is_aromatic
+    new_atoms.resize(max_atom_id+1)
+    for atom_idx in subgraph.atoms:
+        atomic_num, isotope = atom_info[atom_idx]
+        new_atom = new_mol.NewAtom(atomic_num)
+        new_atom.SetIsotope(isotope)
+        new_atoms[atom_idx] = new_atom
+
+    for bond_idx in subgraph.bonds:
+        bgn_idx, end_idx, order, is_aromatic = bond_info[bond_idx]
+        new_bond = new_mol.NewBond(new_atoms[bgn_idx],
+                                   new_atoms[end_idx],
+                                   order)
+        if is_aromatic:
+            new_bond.SetOrder(1)
+    cdef string smiles
+    cdef string smarts
+    OECreateIsoSmiString(smiles, new_mol)
+
+    cdef int state = 0
+    for i in range(smiles.length()):
+        if state == 0:
+            if smiles[i] == '[':
+                state = 1
+            else:
+                smarts.push_back(smiles[i])
+        elif state == 1:
+            if smiles[i] == '1':
+                state = 2
+            elif smiles[i] == '2':
+                state = 3
+            else:
+                state = 4
+        elif state == 2:
+            smarts.push_back(smiles[i])
+            state = 5
+        elif state == 3:
+            smarts.push_back(smiles[i]+32)
+            state = 5
+        elif state == 4:
+            smarts.push_back('*')
+        elif state == 5:
+            if smiles[i] == ']':
+                state = 0
+            else:
+                smarts.push_back(smiles[i])
+        else:
+            smarts.push_back('*')
+
+    return smarts.c_str()
+
+def num_atoms():
+    cdef OEGraphMol mol
+    OEParseSmiles(mol, "c1ccccc1O")
+    return mol.NumAtoms()