Commits

Johannes Köster committed 8c742ca

Various small speed improvements.

  • Participants
  • Parent commits ab644cb

Comments (0)

Files changed (4)

     return resources
 
 if __name__ == "__main__":
-    #import cProfile
-    #cProfile.run('main()', "snakemake.profile")
-    main()
+    import cProfile
+    cProfile.run('main()', "snakemake.profile")
+    #main()
                 reason.derived = False
             return job
 
+        reason = self.reason
+        _needrun = self._needrun
+        dependencies = self.dependencies
+        depending = self.depending
+
         candidates = set(self.jobs)
 
-        queue = list(filter(self.reason, map(needrun, candidates)))
+        queue = list(filter(reason, map(needrun, candidates)))
         visited = set(queue)
         while queue:
             job = queue.pop(0)
-            self._needrun.add(job)
+            _needrun.add(job)
 
-            for job_, files in self.dependencies[job].items():
+            for job_, files in dependencies[job].items():
                 missing_output = job_.missing_output(requested=files)
-                self.reason(job_).missing_output.update(missing_output)
+                reason(job_).missing_output.update(missing_output)
                 if missing_output and not job_ in visited:
                     visited.add(job_)
                     queue.append(job_)
 
-            for job_, files in self.depending[job].items():
+            for job_, files in depending[job].items():
                 if job_ in candidates:
-                    self.reason(job_).updated_input_run.update(files)
+                    reason(job_).updated_input_run.update(files)
                     if not job_ in visited:
                         visited.add(job_)
                         queue.append(job_)
 
-        self._len = len(self._needrun)
+        self._len = len(_needrun)
 
     def update_priority(self):
         """ Update job priorities. """
         dependencies = defaultdict(list)
         # use a set to circumvent multiple jobs for the same file
         # if user specified it twice
+        file2jobs = self.file2jobs
         for file in set(job.input):
             try:
-                for job_ in self.file2jobs(file):
-                    dependencies[file].append(job_)
+                jobs = self.file2jobs(file)
+                dependencies[file].extend(jobs)
             except MissingRuleException as ex:
                 pass
         return dependencies
         return Job(targetrule, self)
 
     def file2jobs(self, targetfile):
-        jobs = list()
-        for rule in self.rules:
-            if rule.is_producer(targetfile):
-                jobs.append(Job(rule, self, targetfile=targetfile))
+        jobs = [Job(rule, self, targetfile=targetfile) for rule in self.rules if rule.is_producer(targetfile)]
         if not jobs:
             raise MissingRuleException(targetfile)
         return jobs
 
     def match(self, target):
         match = self.regex().match(target)
-        if match and len(match.group()) == len(target):
-            return match
-        return None
+        return match if match else None
 
     def __eq__(self, other):
         f = other._file if isinstance(other, _IOFile) else other
 
 
 def regex(filepattern):
-    f = ""
+    f = []
     last = 0
     wildcards = set()
     for match in _wildcard_regex.finditer(filepattern):
-        f += re.escape(filepattern[last:match.start()])
+        f.append(re.escape(filepattern[last:match.start()]))
         wildcard = match.group("name")
         if wildcard in wildcards:
             if match.group("constraint"):
                 raise ValueError("If multiple wildcards of the same name "
                 "appear in a string, eventual constraints have to be defined "
                 "at the first occurence and will be inherited by the others.")
-            f += "(?P={})".format(wildcard)
+            f.append("(?P={})".format(wildcard))
         else:
             wildcards.add(wildcard)
-            f += "(?P<{}>{})".format(
+            f.append("(?P<{}>{})".format(
                 wildcard,
                 match.group("constraint")
-                    if match.group("constraint") else ".+")
+                    if match.group("constraint") else ".+"))
         last = match.end()
-    f += re.escape(filepattern[last:])
-    return f
+    f.append(re.escape(filepattern[last:]))
+    f.append("$") # ensure that the match spans the whole file
+    return "".join(f)
 
 
 def apply_wildcards(pattern, wildcards, fill_missing=False,

snakemake/rules.py

         """
         try:
             for o in self.output:
-                match = o.match(requested_output)
-                if match and len(match.group()) == len(requested_output):
+                if o.match(requested_output):
                     return True
             return False
         except sre_constants.error as ex: