Commits

Johannes Köster  committed f82078f

Added function glob_wildcards

  • Participants
  • Parent commits e14444f

Comments (0)

Files changed (12)

File snakemake/exceptions.py

     linemaps -- a dict of a dict that maps for each snakefile
         the compiled lines to source code lines in the snakefile.
     """
-    #traceback.print_exception(type(ex), ex, ex.__traceback__)
+    traceback.print_exception(type(ex), ex, ex.__traceback__)
     origin = get_exception_origin(ex, linemaps)
     if origin is not None:
         lineno, file = origin

File snakemake/io.py

 import os
 import re
 import stat
-from itertools import product
-from collections import Iterable
+from itertools import product, chain
+from collections import Iterable, namedtuple
 from snakemake.exceptions import MissingOutputException, WorkflowError, WildcardError
 
 __author__ = "Johannes Köster"
                 values = [values]
             yield [(wildcard, value) for value in values]
 
-    expanded = list()
-    for comb in combinator(*flatten(wildcards)):
-        comb = dict(comb)
-        for filepattern in filepatterns:
-            expanded.append(filepattern.format(**comb))
-    return expanded
+    try:
+        return [filepattern.format(**comb) for comb in map(dict, combinator(*flatten(wildcards))) for filepattern in filepatterns]
+    except KeyError as e:
+        raise WildcardError("No values given for wildcard {}.".format(e))
+
+
+def glob_wildcards(pattern):
+    """
+    Glob the values of the wildcards by matching the given pattern to the filesystem.
+    Returns a named tuple with a list of values for each wildcard.
+    """
+    first_wildcard = re.search("{[^{]", pattern)
+    dirname = os.path.dirname(pattern[:first_wildcard.start()]) if first_wildcard else os.path.dirname(pattern)
+    if not dirname:
+        dirname = "."
+    
+    names = [match.group('name')
+        for match in _wildcard_regex.finditer(pattern)]
+    Wildcards = namedtuple("Wildcards", names)
+    wildcards = Wildcards(*[list() for name in names])
+
+    pattern = re.compile(regex(pattern))
+    for dirpath, dirnames, filenames in os.walk(dirname):
+        for f in chain(filenames, dirnames):
+            if dirpath != ".":
+                f = os.path.join(dirpath, f)
+            match = re.match(pattern, f)
+            if match:
+                for name, value in match.groupdict().items():
+                    getattr(wildcards, name).append(value)
+    return wildcards
 
 
 # TODO rewrite Namedlist!

File snakemake/workflow.py

 from snakemake.dag import DAG
 from snakemake.scheduler import JobScheduler
 from snakemake.parser import parse
-from snakemake.io import protected, temp, temporary, expand, dynamic
+from snakemake.io import protected, temp, temporary, expand, dynamic, glob_wildcards
 from snakemake.persistence import Persistence
 
 

File tests/test05/Snakefile

 
 rule compute1:
 	input: '{name}.in'
-	output: inter=['{name}.%s.inter'%c for c in chromosomes]
+	output: inter=expand('{{name}}.{chr}.inter', chr=chromosomes)
 	resources: gpu=1
 	run:
 		assert len(output.inter) > 0

File tests/test_globwildcards/Snakefile

+
+IDS, = glob_wildcards("test.{id}.txt")
+
+
+rule all:
+	input: expand("test.{id}.out", id=IDS)
+
+rule:
+	input: "test.{id}.txt"
+	output: "test.{id}.out"
+	shell: "touch {output}"

File tests/test_globwildcards/expected-results/test.0.out

Empty file added.

File tests/test_globwildcards/expected-results/test.1.out

Empty file added.

File tests/test_globwildcards/expected-results/test.2.out

Empty file added.

File tests/test_globwildcards/test.0.txt

Empty file added.

File tests/test_globwildcards/test.1.txt

Empty file added.

File tests/test_globwildcards/test.2.txt

Empty file added.

File tests/tests.py

 
 def test_subworkflows():
     run(dpath("test_subworkflows"))
+
+def test_globwildcards():
+    run(dpath("test_globwildcards"))