Commits

Johannes Köster  committed d56cb65

A proper check for periodicity of wildcard values.

  • Participants
  • Parent commits 43581bf

Comments (0)

Files changed (3)

File snakemake/dag.py

 from functools import partial, lru_cache
 from operator import itemgetter, attrgetter
 
-from snakemake.io import IOFile, _IOFile
+from snakemake.io import IOFile, _IOFile, PeriodicityDetector
 from snakemake.jobs import Job, Reason
 from snakemake.exceptions import RuleException, MissingInputException
 from snakemake.exceptions import MissingRuleException, AmbiguousRuleException
 from snakemake.exceptions import CyclicGraphException, MissingOutputException
 from snakemake.exceptions import IncompleteFilesException
+from snakemake.exceptions import PeriodicWildcardError
 from snakemake.exceptions import UnexpectedOutputException
 from snakemake.logging import logger
 
+
 __author__ = "Johannes Köster"
 
 
         self.force_incomplete = force_incomplete
         self.ignore_incomplete = ignore_incomplete
 
+        self.periodic_wildcard_detector = PeriodicityDetector()
+
     def init(self):
         """ Initialise the DAG. """
         for job in map(self.rule2job, self.targetrules):
                     "archive, e.g. by using 'touch'.".format(
                         ", ".join(job.expanded_output)), rule=job.rule)
 
+    def check_periodic_wildcards(self, job):
+        """ Raise an exception if a wildcard of the given job appears to be periodic,
+        indicating a cyclic dependency. """
+        for wildcard, value in job.wildcards_dict.items():
+            periodic_substring = self.periodic_wildcard_detector.is_periodic(value)
+            if periodic_substring is not None:
+                raise PeriodicWildcardError(
+                    "The value {} in wildcard {} is periodically repeated ({}). "
+                    "This would lead to an infinite recursion. "
+                    "To avoid this, e.g. restrict the wildcards in this rule to certain values.".format(
+                        periodic_substring, wildcard, value), rule=job.rule)
+
     def handle_protected(self, job):
         """ Write-protect output files that are marked with protected(). """
         for f in job.expanded_output:
         cycles = list()
 
         for job in jobs:
+            self.check_periodic_wildcards(job)
             if file in job.input:
                 cycles.append(job)
                 continue
                 producer[file] = self.update(jobs, file=file, visited=visited,
                     skip_until_dynamic=skip_until_dynamic
                         or file in job.dynamic_input)
-            except (MissingInputException, CyclicGraphException) as ex:
+            except (MissingInputException, CyclicGraphException, PeriodicWildcardError) as ex:
                 exceptions[file] = ex
 
         for file, job_ in producer.items():

File snakemake/exceptions.py

             lineno=lineno, snakefile=snakefile)
 
 
+class PeriodicWildcardError(RuleException):
+    pass
+
+
 class ProtectedOutputException(IOException):
     def __init__(self, rule, files, include=None, lineno=None, snakefile=None):
         super().__init__("Write-protected output files", rule, files, include,

File snakemake/io.py

 
 class Resources(Namedlist):
     pass
+
+
+##### Wildcard pumping detection #####
+
+class PeriodicityDetector:
+    def __init__(self, min_repeat=15, max_repeat=100):
+        """
+        Args:
+            max_len (int): The maximum length of the periodic substring.
+        """
+        self.regex = re.compile("((?P<value>.+)(?P=value){{{min_repeat},{max_repeat}}})$".format(min_repeat=min_repeat - 1, max_repeat=max_repeat - 1))
+
+    def is_periodic(self, value):
+        """Returns the periodic substring or None if not periodic."""
+        m = self.regex.search(value) # search for a periodic suffix.
+        if m is not None:
+            return m.group("value")