Andrew Dalke avatar Andrew Dalke committed ce215e2

Fixed various cases on how thresholds interact with stop heuristics.

Comments (0)

Files changed (1)

     overall_counts = defaultdict(list)
     for typed_mol in typed_mols:
         bondtype_counts = get_counts(typed_mols[0].canonical_bondtypes)
-
         for k,v in bondtype_counts.items():
             overall_counts[k].append(v)
     return overall_counts
 
     def shift_targets(self):
         assert self._num_allowed_errors >= 0, (self.required_match_count, self._num_allowed_errors)
-        if self._num_allowed_errors > 1:
-            self.targets = self.targets[1:]
-            self._num_allowed_errors = len(self.targets) - self.required_match_count
+        self.targets = self.targets[1:]
+        self._num_allowed_errors = len(self.targets) - self.required_match_count
         
     def __missing__(self, smarts):
         num_allowed_errors = self._num_allowed_errors
     sizes = []
     max_num_atoms = fragmented_mols[0].rdmol.GetNumAtoms()
     max_num_bonds = fragmented_mols[0].rdmol.GetNumBonds()
+    ignored_count = 0
     for tiebreaker, (typed_mol, fragmented_mol) in enumerate(zip(typed_mols, fragmented_mols)):
         num_atoms, num_bonds = find_upper_fragment_size_limits(fragmented_mol.rdmol,
                                                                fragmented_mol.rdmol_atoms)
         if num_atoms < min_num_atoms:
-            timer.mark("end select")
-            timer.mark("end fmcs")
-            _update_times(timer, times)
-            return MCSResult(-1, -1, None, True)
-        if num_atoms < max_num_atoms:
-            max_num_atoms = num_atoms
-        if num_bonds < max_num_bonds:
-            max_num_bonds = num_bonds
-        sizes.append( (num_bonds, num_atoms, tiebreaker, typed_mol, fragmented_mol) )
-        
-    if sizes is None:
-        # There was a short-cut exit because one of the molecules didn't have a large enough fragment
+            # This isn't big enough to be in the MCS
+            ignored_count += 1
+            if ignored_count + threshold_count > len(mols):
+                # I might be able to exit because enough of the molecules don't have
+                # a large enough fragment to be part of the MCS
+                timer.mark("end select")
+                timer.mark("end fmcs")
+                _update_times(timer, times)
+                return MCSResult(-1, -1, None, True)
+        else:
+            if num_atoms < max_num_atoms:
+                max_num_atoms = num_atoms
+            if num_bonds < max_num_bonds:
+                max_num_bonds = num_bonds
+            sizes.append( (num_bonds, num_atoms, tiebreaker, typed_mol, fragmented_mol) )
+
+    if len(sizes) < threshold_count:
         timer.mark("end select")
         timer.mark("end fmcs")
         _update_times(timer, times)
         return MCSResult(-1, -1, None, True)
+
     assert min(size[1] for size in sizes) >= min_num_atoms
 
     # Sort so the molecule with the smallest largest fragment (by bonds) comes first.
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.