Commits

Johannes Köster committed a364551 Merge

Merge branch 'master' of bitbucket.org:johanneskoester/snakemake

  • Participants
  • Parent commits 9b660af, 8c742ca

Comments (0)

Files changed (6)

     return resources
 
 if __name__ == "__main__":
-    #import cProfile
-    #cProfile.run('main()', "snakemake.profile")
-    main()
+    import cProfile
+    cProfile.run('main()', "snakemake.profile")
+    #main()
                 reason.derived = False
             return job
 
+        reason = self.reason
+        _needrun = self._needrun
+        dependencies = self.dependencies
+        depending = self.depending
+
         candidates = set(self.jobs)
 
-        queue = list(filter(self.reason, map(needrun, candidates)))
+        queue = list(filter(reason, map(needrun, candidates)))
         visited = set(queue)
         while queue:
             job = queue.pop(0)
-            self._needrun.add(job)
+            _needrun.add(job)
 
-            for job_, files in self.dependencies[job].items():
+            for job_, files in dependencies[job].items():
                 missing_output = job_.missing_output(requested=files)
-                self.reason(job_).missing_output.update(missing_output)
+                reason(job_).missing_output.update(missing_output)
                 if missing_output and not job_ in visited:
                     visited.add(job_)
                     queue.append(job_)
 
-            for job_, files in self.depending[job].items():
+            for job_, files in depending[job].items():
                 if job_ in candidates:
-                    self.reason(job_).updated_input_run.update(files)
+                    reason(job_).updated_input_run.update(files)
                     if not job_ in visited:
                         visited.add(job_)
                         queue.append(job_)
 
-        self._len = len(self._needrun)
+        self._len = len(_needrun)
 
     def update_priority(self):
         """ Update job priorities. """
         dependencies = defaultdict(list)
         # use a set to circumvent multiple jobs for the same file
         # if user specified it twice
+        file2jobs = self.file2jobs
         for file in set(job.input):
             try:
-                for job_ in self.file2jobs(file):
-                    dependencies[file].append(job_)
+                jobs = self.file2jobs(file)
+                dependencies[file].extend(jobs)
             except MissingRuleException as ex:
                 pass
         return dependencies
         return Job(targetrule, self)
 
     def file2jobs(self, targetfile):
-        jobs = list()
-        for rule in self.rules:
-            if rule.is_producer(targetfile):
-                jobs.append(Job(rule, self, targetfile=targetfile))
+        jobs = [Job(rule, self, targetfile=targetfile) for rule in self.rules if rule.is_producer(targetfile)]
         if not jobs:
             raise MissingRuleException(targetfile)
         return jobs
 
     def match(self, target):
         match = self.regex().match(target)
-        if match and len(match.group()) == len(target):
-            return match
-        return None
+        return match if match else None
 
     def __eq__(self, other):
         f = other._file if isinstance(other, _IOFile) else other
 
 
 def regex(filepattern):
-    f = ""
+    f = []
     last = 0
     wildcards = set()
     for match in _wildcard_regex.finditer(filepattern):
-        f += re.escape(filepattern[last:match.start()])
+        f.append(re.escape(filepattern[last:match.start()]))
         wildcard = match.group("name")
         if wildcard in wildcards:
             if match.group("constraint"):
                 raise ValueError("If multiple wildcards of the same name "
                 "appear in a string, eventual constraints have to be defined "
                 "at the first occurence and will be inherited by the others.")
-            f += "(?P={})".format(wildcard)
+            f.append("(?P={})".format(wildcard))
         else:
             wildcards.add(wildcard)
-            f += "(?P<{}>{})".format(
+            f.append("(?P<{}>{})".format(
                 wildcard,
                 match.group("constraint")
-                    if match.group("constraint") else ".+")
+                    if match.group("constraint") else ".+"))
         last = match.end()
-    f += re.escape(filepattern[last:])
-    return f
+    f.append(re.escape(filepattern[last:]))
+    f.append("$") # ensure that the match spans the whole file
+    return "".join(f)
 
 
 def apply_wildcards(pattern, wildcards, fill_missing=False,

snakemake/jobs.py

         self.rule = rule
         self.dag = dag
         self.targetfile = targetfile
-        self._hash = None
         self.wildcards_dict = self.rule.get_wildcards(targetfile)
         self.wildcards = Wildcards(fromdict=self.wildcards_dict)
         self._format_wildcards = (self.wildcards
         for f in self.input:
             if self.ruleio[f] in self.rule.dynamic_input:
                 self.dynamic_input.add(f)
+        self._hash = self.rule.__hash__()
+        if not self.dynamic_output:
+            for o in self.output:
+                self._hash ^= o.__hash__()
 
     @property
     def b64id(self):
     def __gt__(self, other):
         return self.rule.__gt__(other.rule)
 
+
     def __hash__(self):
-        if self._hash is None:
-            self._hash = self.rule.__hash__()
-            if not self.dynamic_output:
-                for o in self.output:
-                        self._hash ^= o.__hash__()
         return self._hash
 
     @staticmethod

snakemake/rules.py

         """
         try:
             for o in self.output:
-                match = o.match(requested_output)
-                if match and len(match.group()) == len(requested_output):
+                if o.match(requested_output):
                     return True
             return False
         except sre_constants.error as ex:

snakemake/scheduler.py

 import operator
 from functools import partial
 from collections import defaultdict
-from itertools import chain
+from itertools import chain, accumulate
 
 from snakemake.executors import DryrunExecutor, TouchExecutor
 from snakemake.executors import ClusterExecutor, CPUExecutor
             len(self.dag), self.finished_jobs / len(self.dag)))
 
 
-def cumsum(iterable):
-    l = [0]
-    for i in iterable:
-        l.append(i + l[-1])
-    return l
+def cumsum(iterable, zero=[0]):
+    return list(chain(zero, accumulate(iterable)))