Commits

dalke  committed 8fbeea0

Added an option to match the valences

  • Participants
  • Parent commits d349cd2
  • Tags fmcs-1.0b2

Comments (0)

Files changed (1)

     maximize = "bonds"
     atom_compare = "elements"
     bond_compare = "bondtypes"
+    match_valences = False
     ring_matches_ring_only = False
     complete_rings_only = False
 
 
 # Create a TypedMolecule using the element-based typing scheme
 
-def get_typed_molecule(rdmol, atom_typer, bond_typer,
+# TODO: refactor this. It doesn't seem right to pass boolean flags.
+
+def get_typed_molecule(rdmol, atom_typer, bond_typer, match_valences = Default.match_valences,
                        ring_matches_ring_only = Default.ring_matches_ring_only):
     atoms = list(rdmol.GetAtoms())
     atom_smarts_types = atom_typer(atoms)
 
+    # Get the valence information, if requested
+    if match_valences:
+        new_atom_smarts_types = []
+        for (atom, atom_smarts_type) in zip(atoms, atom_smarts_types):
+            valence = atom.GetImplicitValence() + atom.GetExplicitValence()
+            valence_str = "v%d" % valence
+            if "," in atom_smarts_type:
+                atom_smarts_type += ";" + valence_str
+            else:
+                atom_smarts_type += valence_str
+            new_atom_smarts_types.append(atom_smarts_type)
+        atom_smarts_types = new_atom_smarts_types
+        
+
     # Store and reuse the bond information because I use it twice.
     # In a performance test, the times went from 2.0 to 1.4 seconds by doing this.
     bonds = list(rdmol.GetBonds())
     return TypedMolecule(mol, atoms, bonds, atom_smarts_types, bond_smarts_types, canonical_bondtypes)
 
 
-# The input molecules can be either an RDKit Molecule or a 2-element
-# tuple containing the RDKit Molecule and the list of atom classes.
-# Convert them into a list of TypedMolecule instances using the given
-# parameters.
-
-def convert_input_to_typed_molecules(mols, atom_typer, bond_typer, ring_matches_ring_only):
+def convert_input_to_typed_molecules(mols, atom_typer, bond_typer, match_valences, ring_matches_ring_only):
     typed_mols = []
-    atom_type_source = 0
-    for molno, input_mol in enumerate(mols):
-        # Check if the input contains user-defined atom classes or
-        if isinstance(input_mol, tuple):
-            rdmol, atom_classes = input_mol
-        else:
-            rdmol = input_mol
-            atom_classes = None
-
-        if atom_classes is None:
-            if not atom_type_source:
-                atom_type_source = 1
-            elif atom_type_source != 1:
-                raise ValueError("mols[%d] does not specify atom types but previous molecules did" %
-                                 (molno,))
-            typed_mol = get_typed_molecule(rdmol, atom_typer, bond_typer, ring_matches_ring_only)
-        else:
-            raise NotImplementedError("blah blah")
-            if not atom_type_source:
-                atom_type_source = 2
-            elif atom_type_source != 2:
-                raise ValueError("mols[%d] specifies atom types but previous molecules did not" %
-                                 (molno,))
-            _check_atom_classes(molno, rdmol.GetNumAtoms(), atom_classes)
-            typed_mol = get_specified_types(rdmol, atom_types, ring_matches_ring_only)
-                                            
+    for molno, rdmol in enumerate(mols):
+        typed_mol = get_typed_molecule(rdmol, atom_typer, bond_typer,
+                                       match_valences=match_valences, ring_matches_ring_only=ring_matches_ring_only)
         typed_mols.append(typed_mol)
 
     return typed_mols
          maximize = Default.maximize,
          atom_compare = Default.atom_compare,
          bond_compare = Default.bond_compare,
+         match_valences = Default.match_valences,
          ring_matches_ring_only = False,
          complete_rings_only = False,
          timeout=Default.timeout,
 
 
     # Make copies of all of the molecules so I can edit without worrying about the original
-    typed_mols = convert_input_to_typed_molecules(mols, atom_typer, bond_typer, ring_matches_ring_only)
+    typed_mols = convert_input_to_typed_molecules(mols, atom_typer, bond_typer,
+                                                  match_valences = match_valences,
+                                                  ring_matches_ring_only = ring_matches_ring_only)
     bondtype_counts = get_canonical_bondtype_counts(typed_mols)
     fragmented_mols = [remove_unknown_bondtypes(typed_mol, bondtype_counts) for typed_mol in typed_mols]
     timer.mark("end fragment")
                     "identical if and only if their corresponding atom classes are the same. Note "
                     "that '003' and '3' are treated as identical values. (Not used by default)")
 
+parser.add_argument("--match-valences", action="store_true",
+                    help=
+                    "Modify the atom comparison so that two atoms must also have the same total "
+                    "bond order in order to match.")
+
                         
 parser.add_argument("--ring-matches-ring-only", action="store_true",
                     help=
                maximize = args.maximize,
                atom_compare = args.atom_compare,
                bond_compare = args.bond_compare,
+               match_valences = args.match_valences,
                ring_matches_ring_only = args.ring_matches_ring_only,
                complete_rings_only = args.complete_rings_only,
                timeout = args.timeout,