Commits

Andrew Dalke committed 879e6fb

got the VerboseCachingTargetsMatcher to handle the new threshold option.

  • Participants
  • Parent commits f7b70fe

Comments (0)

Files changed (1)

         return True
 
 class VerboseCachingTargetsMatcher(object):
-    def __init__(self, targets):
+    def __init__(self, targets, required_match_count=None):
         self.targets = targets
+        if required_match_count is None:
+            required_match_count = len(targets)
         self.cache = {}
+        self.required_match_count = required_match_count
+        self._num_allowed_errors = len(targets) - required_match_count
         self.num_lookups = self.num_cached_true = self.num_cached_false = 0
         self.num_search_true = self.num_search_false = self.num_matches = 0
 
+    def shift_targets(self):
+        assert self._num_allowed_errors >= 0, (self.required_match_count, self._num_allowed_errors)
+        if self._num_allowed_errors > 1:
+            self.targets = self.targets[1:]
+            self._num_allowed_errors = len(self.targets) - self.required_match_count
+            
     def __getitem__(self, smarts, missing=object()):
         self.num_lookups += 1
         x = self.cache.get(smarts, missing)
         if len(count_list) >= threshold_count:
             supported_bondtypes.add(bondtype)
             # For better filtering, find the largest count which is in threshold
-            # This can likely be done with:
-            #  count_list.sort(reversed=True)
-            #  max_count = count_list[threshold_count-1]
+            # Keep track of the counts while building the subgraph.
+            # The subgraph can never have more types of a given count.
 
     
     fragmented_mols = [remove_unknown_bondtypes(typed_mol, bondtype_counts) for typed_mol in typed_mols]
             pat = Chem.MolFromSmarts(mcs.smarts)
         for structure in structures:
             atom_indices = structure.GetSubstructMatch(pat)
+            assert atom_indices
             bond_indices = _get_match_bond_indices(pat, structure, atom_indices)
             subgraph = Subgraph(atom_indices, bond_indices)
             if atom_class_tag: