Commits

Antoine Pitrou committed 218208b

Add Path.rglob()

Comments (0)

Files changed (4)

 Version 0.6
 ^^^^^^^^^^^
 
-- Add Path.glob()
+- Add Path.glob() and Path.rglob()
 - Add PurePath.match()
 
 Version 0.5
    loop is encountered along the resolution path, :exc:`ValueError` is raised.
 
 
+.. method:: Path.rglob(pattern)
+
+   Like :meth:`glob`, but glob recursively from any subdirectories as well::
+
+      >>> sorted(Path().rglob("*.py"))
+      [PosixPath('build/lib/pathlib.py'),
+       PosixPath('docs/conf.py'),
+       PosixPath('pathlib.py'),
+       PosixPath('setup.py'),
+       PosixPath('test_pathlib.py')]
+
+
 .. method:: Path.rmdir()
 
    Remove this directory.  The directory must be empty.
         # A stub for the opener argument to built-in open()
         return self._accessor.open(self, flags, mode)
 
-    def _select_children(self, pattern_parts):
+    def _select_children(self, pattern_parts, recursive):
         # Helper for globbing
+        # XXX symlink loops
         if not pattern_parts:
             yield self
             return
         if not self.is_dir():
             return
         pat = pattern_parts[0]
-        pattern_parts = pattern_parts[1:]
+        child_parts = pattern_parts[1:]
         if _is_wildcard_pattern(pat):
             cf = self._flavour.casefold
             for name in self._accessor.listdir(self):
                 name = cf(name)
                 if fnmatch.fnmatchcase(name, pat):
                     child_path = self._make_child_relpath(name)
-                    for p in child_path._select_children(pattern_parts):
+                    for p in child_path._select_children(child_parts, False):
+                        yield p
+                elif recursive:
+                    child_path = self._make_child_relpath(name)
+                    for p in child_path._select_children(pattern_parts, recursive):
                         yield p
         else:
             child_path = self._make_child_relpath(pat)
             if child_path.exists():
-                for p in child_path._select_children(pattern_parts):
+                for p in child_path._select_children(child_parts, False):
                     yield p
+            if recursive:
+                for name in self._accessor.listdir(self):
+                    child_path = self._make_child_relpath(name)
+                    for p in child_path._select_children(pattern_parts, recursive):
+                        yield p
 
     # Public API
 
         drv, root, pattern_parts = self._flavour.parse_parts((pattern,))
         if drv or root:
             raise NotImplementedError("Non-relative patterns are unsupported")
-        for p in self._select_children(pattern_parts):
+        for p in self._select_children(pattern_parts, recursive=False):
+            yield p
+
+    def rglob(self, pattern):
+        """Recursively yield all existing files (of any kind, including
+        directories) matching the given pattern, anywhere in this subtree.
+        """
+        pattern = self._flavour.casefold(pattern)
+        drv, root, pattern_parts = self._flavour.parse_parts((pattern,))
+        if drv or root:
+            raise NotImplementedError("Non-relative patterns are unsupported")
+        for p in self._select_children(pattern_parts, recursive=True):
             yield p
 
     def absolute(self):
     #  |-- dirB/
     #  |    |-- fileB
     #       |-- linkD -> "../dirB"
+    #  |-- dirC/
+    #  |    |-- fileC
+    #  |    |-- fileD
     #  |-- fileA
     #  |-- linkA -> "fileA"
     #  |-- linkB -> "dirB"
         self.addCleanup(support.rmtree, BASE)
         os.mkdir(join('dirA'))
         os.mkdir(join('dirB'))
+        os.mkdir(join('dirC'))
+        os.mkdir(join('dirC', 'dirD'))
         with open(join('fileA'), 'wb') as f:
             f.write(b"this is file A\n")
         with open(join('dirB', 'fileB'), 'wb') as f:
             f.write(b"this is file B\n")
+        with open(join('dirC', 'fileC'), 'wb') as f:
+            f.write(b"this is file C\n")
+        with open(join('dirC', 'dirD', 'fileD'), 'wb') as f:
+            f.write(b"this is file D\n")
         if not symlink_skip_reason:
             if os.name == 'nt':
                 # Workaround for http://bugs.python.org/issue13772
         self.assertIsInstance(p, collections.Iterable)
         it = iter(p)
         paths = set(it)
-        expected = ['dirA', 'dirB', 'fileA']
+        expected = ['dirA', 'dirB', 'dirC', 'fileA']
         if not symlink_skip_reason:
             expected += ['linkA', 'linkB']
         self.assertEqual(paths, { P(BASE, q) for q in expected })
                                            errno.ENOENT, errno.EINVAL))
 
     def test_glob(self):
+        def _check(glob, expected):
+            self.assertEqual(set(glob), { P(BASE, q) for q in expected })
         P = self.cls
         p = P(BASE)
         it = p.glob("fileA")
         self.assertIsInstance(it, collections.Iterator)
-        self.assertEqual(set(it), { P(BASE, "fileA") })
-        it = p.glob("fileB")
-        self.assertEqual(set(it), set())
-        it = p.glob("dir*/file*")
-        self.assertEqual(set(it), { P(BASE, "dirB/fileB") })
-        it = p.glob("*A")
-        expected = ['dirA', 'fileA']
-        if not symlink_skip_reason:
-            expected += ['linkA']
-        self.assertEqual(set(it), { P(BASE, q) for q in expected })
-        it = p.glob("*B/*")
-        expected = ['dirB/fileB']
-        if not symlink_skip_reason:
-            expected += ['dirB/linkD', 'linkB/fileB', 'linkB/linkD']
-        self.assertEqual(set(it), { P(BASE, q) for q in expected })
-        it = p.glob("*/fileB")
-        expected = ['dirB/fileB']
-        if not symlink_skip_reason:
-            expected += ['linkB/fileB']
-        self.assertEqual(set(it), { P(BASE, q) for q in expected })
+        _check(it, ["fileA"])
+        _check(p.glob("fileB"), [])
+        _check(p.glob("dir*/file*"), ["dirB/fileB", "dirC/fileC"])
+        if symlink_skip_reason:
+            _check(p.glob("*A"), ['dirA', 'fileA'])
+        else:
+            _check(p.glob("*A"), ['dirA', 'fileA', 'linkA'])
+        if symlink_skip_reason:
+            _check(p.glob("*B/*"), ['dirB/fileB'])
+        else:
+            _check(p.glob("*B/*"), ['dirB/fileB', 'dirB/linkD',
+                                    'linkB/fileB', 'linkB/linkD'])
+        if symlink_skip_reason:
+            _check(p.glob("*/fileB"), ['dirB/fileB'])
+        else:
+            _check(p.glob("*/fileB"), ['dirB/fileB', 'linkB/fileB'])
+
+    def test_rglob(self):
+        def _check(glob, expected):
+            self.assertEqual(set(glob), { P(BASE, q) for q in expected })
+        P = self.cls
+        p = P(BASE)
+        it = p.rglob("fileA")
+        self.assertIsInstance(it, collections.Iterator)
+        # XXX cannot test because of symlink loops in the test setup
+        #_check(it, ["fileA"])
+        #_check(p.rglob("fileB"), ["dirB/fileB"])
+        #_check(p.rglob("*/fileA"), [""])
+        #_check(p.rglob("*/fileB"), ["dirB/fileB"])
+        #_check(p.rglob("file*"), ["fileA", "dirB/fileB"])
+        # No symlink loops here
+        p = P(BASE, "dirC")
+        _check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"])
+        _check(p.rglob("*/*"), ["dirC/dirD/fileD"])
 
     def test_glob_dotdot(self):
         # ".." is not special in globs