Éric Araujo avatar Éric Araujo committed edcdef7

Fix long-standing bugs with MANIFEST.in parsing on Windows (#6884).

These regex changes fix a number of issues for distutils on Windows:
- #6884: impossible to include a file starting with 'build'
- #9691 and #14004: sdist includes too many files
- #13193: test_filelist failures

This commit replaces the incorrect changes done in 0a94e2f807c7 and
90b30d62caf2 to fix #13193; we were too eager to fix the test failures
and I did not study the code enough before greenlighting patches. This
time we have unit tests from the problems reported by users to be sure
we have the right fix.

Thanks to Nadeem Vawda for his help.

Comments (0)

Files changed (4)

Lib/distutils/filelist.py

 
         Return True if files are found, False otherwise.
         """
+        # XXX docstring lying about what the special chars are?
         files_found = False
         pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
         self.debug_print("include_pattern: applying regex r'%s'" %
     # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
     # and by extension they shouldn't match such "special characters" under
     # any OS.  So change all non-escaped dots in the RE to match any
-    # character except the special characters.
-    # XXX currently the "special characters" are just slash -- i.e. this is
-    # Unix-only.
-    pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', r'\1[^/]', pattern_re)
-
+    # character except the special characters (currently: just os.sep).
+    sep = os.sep
+    if os.sep == '\\':
+        # we're using a regex to manipulate a regex, so we need
+        # to escape the backslash twice
+        sep = r'\\\\'
+    escaped = r'\1[^%s]' % sep
+    pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
     return pattern_re
 
 
     if prefix is not None:
         # ditch end of pattern character
         empty_pattern = glob_to_re('')
-        prefix_re = (glob_to_re(prefix))[:-len(empty_pattern)]
-        # paths should always use / in manifest templates
-        pattern_re = "^%s/.*%s" % (prefix_re, pattern_re)
+        prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
+        sep = os.sep
+        if os.sep == '\\':
+            sep = r'\\'
+        pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
     else:                               # no prefix -- respect anchor flag
         if anchor:
             pattern_re = "^" + pattern_re

Lib/distutils/tests/test_filelist.py

 """Tests for distutils.filelist."""
+import os
 import re
 import unittest
 from distutils import debug
 from test.support import captured_stdout, run_unittest
 from distutils.tests import support
 
+MANIFEST_IN = """\
+include ok
+include xo
+exclude xo
+include foo.tmp
+include buildout.cfg
+global-include *.x
+global-include *.txt
+global-exclude *.tmp
+recursive-include f *.oo
+recursive-exclude global *.x
+graft dir
+prune dir3
+"""
+
+
+def make_local_path(s):
+    """Converts '/' in a string to os.sep"""
+    return s.replace('/', os.sep)
+
 
 class FileListTestCase(support.LoggingSilencer,
                        unittest.TestCase):
         self.clear_logs()
 
     def test_glob_to_re(self):
-        # simple cases
-        self.assertEqual(glob_to_re('foo*'), 'foo[^/]*\\Z(?ms)')
-        self.assertEqual(glob_to_re('foo?'), 'foo[^/]\\Z(?ms)')
-        self.assertEqual(glob_to_re('foo??'), 'foo[^/][^/]\\Z(?ms)')
+        sep = os.sep
+        if os.sep == '\\':
+            sep = re.escape(os.sep)
 
-        # special cases
-        self.assertEqual(glob_to_re(r'foo\\*'), r'foo\\\\[^/]*\Z(?ms)')
-        self.assertEqual(glob_to_re(r'foo\\\*'), r'foo\\\\\\[^/]*\Z(?ms)')
-        self.assertEqual(glob_to_re('foo????'), r'foo[^/][^/][^/][^/]\Z(?ms)')
-        self.assertEqual(glob_to_re(r'foo\\??'), r'foo\\\\[^/][^/]\Z(?ms)')
+        for glob, regex in (
+            # simple cases
+            ('foo*', r'foo[^%(sep)s]*\Z(?ms)'),
+            ('foo?', r'foo[^%(sep)s]\Z(?ms)'),
+            ('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'),
+            # special cases
+            (r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'),
+            (r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'),
+            ('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'),
+            (r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')):
+            regex = regex % {'sep': sep}
+            self.assertEqual(glob_to_re(glob), regex)
+
+    def test_process_template_line(self):
+        # testing  all MANIFEST.in template patterns
+        file_list = FileList()
+        l = make_local_path
+
+        # simulated file list
+        file_list.allfiles = ['foo.tmp', 'ok', 'xo', 'four.txt',
+                              'buildout.cfg',
+                              # filelist does not filter out VCS directories,
+                              # it's sdist that does
+                              l('.hg/last-message.txt'),
+                              l('global/one.txt'),
+                              l('global/two.txt'),
+                              l('global/files.x'),
+                              l('global/here.tmp'),
+                              l('f/o/f.oo'),
+                              l('dir/graft-one'),
+                              l('dir/dir2/graft2'),
+                              l('dir3/ok'),
+                              l('dir3/sub/ok.txt'),
+                             ]
+
+        for line in MANIFEST_IN.split('\n'):
+            if line.strip() == '':
+                continue
+            file_list.process_template_line(line)
+
+        wanted = ['ok',
+                  'buildout.cfg',
+                  'four.txt',
+                  l('.hg/last-message.txt'),
+                  l('global/one.txt'),
+                  l('global/two.txt'),
+                  l('f/o/f.oo'),
+                  l('dir/graft-one'),
+                  l('dir/dir2/graft2'),
+                 ]
+
+        self.assertEqual(file_list.files, wanted)
 
     def test_debug_print(self):
         file_list = FileList()
         self.assertEqual(file_list.allfiles, ['a.py', 'b.txt'])
 
     def test_process_template(self):
+        l = make_local_path
         # invalid lines
         file_list = FileList()
         for action in ('include', 'exclude', 'global-include',
 
         # include
         file_list = FileList()
-        file_list.set_allfiles(['a.py', 'b.txt', 'd/c.py'])
+        file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
 
         file_list.process_template_line('include *.py')
         self.assertEqual(file_list.files, ['a.py'])
 
         # exclude
         file_list = FileList()
-        file_list.files = ['a.py', 'b.txt', 'd/c.py']
+        file_list.files = ['a.py', 'b.txt', l('d/c.py')]
 
         file_list.process_template_line('exclude *.py')
-        self.assertEqual(file_list.files, ['b.txt', 'd/c.py'])
+        self.assertEqual(file_list.files, ['b.txt', l('d/c.py')])
         self.assertNoWarnings()
 
         file_list.process_template_line('exclude *.rb')
-        self.assertEqual(file_list.files, ['b.txt', 'd/c.py'])
+        self.assertEqual(file_list.files, ['b.txt', l('d/c.py')])
         self.assertWarnings()
 
         # global-include
         file_list = FileList()
-        file_list.set_allfiles(['a.py', 'b.txt', 'd/c.py'])
+        file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
 
         file_list.process_template_line('global-include *.py')
-        self.assertEqual(file_list.files, ['a.py', 'd/c.py'])
+        self.assertEqual(file_list.files, ['a.py', l('d/c.py')])
         self.assertNoWarnings()
 
         file_list.process_template_line('global-include *.rb')
-        self.assertEqual(file_list.files, ['a.py', 'd/c.py'])
+        self.assertEqual(file_list.files, ['a.py', l('d/c.py')])
         self.assertWarnings()
 
         # global-exclude
         file_list = FileList()
-        file_list.files = ['a.py', 'b.txt', 'd/c.py']
+        file_list.files = ['a.py', 'b.txt', l('d/c.py')]
 
         file_list.process_template_line('global-exclude *.py')
         self.assertEqual(file_list.files, ['b.txt'])
 
         # recursive-include
         file_list = FileList()
-        file_list.set_allfiles(['a.py', 'd/b.py', 'd/c.txt', 'd/d/e.py'])
+        file_list.set_allfiles(['a.py', l('d/b.py'), l('d/c.txt'),
+                                l('d/d/e.py')])
 
         file_list.process_template_line('recursive-include d *.py')
-        self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+        self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
         self.assertNoWarnings()
 
         file_list.process_template_line('recursive-include e *.py')
-        self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+        self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
         self.assertWarnings()
 
         # recursive-exclude
         file_list = FileList()
-        file_list.files = ['a.py', 'd/b.py', 'd/c.txt', 'd/d/e.py']
+        file_list.files = ['a.py', l('d/b.py'), l('d/c.txt'), l('d/d/e.py')]
 
         file_list.process_template_line('recursive-exclude d *.py')
-        self.assertEqual(file_list.files, ['a.py', 'd/c.txt'])
+        self.assertEqual(file_list.files, ['a.py', l('d/c.txt')])
         self.assertNoWarnings()
 
         file_list.process_template_line('recursive-exclude e *.py')
-        self.assertEqual(file_list.files, ['a.py', 'd/c.txt'])
+        self.assertEqual(file_list.files, ['a.py', l('d/c.txt')])
         self.assertWarnings()
 
         # graft
         file_list = FileList()
-        file_list.set_allfiles(['a.py', 'd/b.py', 'd/d/e.py', 'f/f.py'])
+        file_list.set_allfiles(['a.py', l('d/b.py'), l('d/d/e.py'),
+                                l('f/f.py')])
 
         file_list.process_template_line('graft d')
-        self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+        self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
         self.assertNoWarnings()
 
         file_list.process_template_line('graft e')
-        self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+        self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
         self.assertWarnings()
 
         # prune
         file_list = FileList()
-        file_list.files = ['a.py', 'd/b.py', 'd/d/e.py', 'f/f.py']
+        file_list.files = ['a.py', l('d/b.py'), l('d/d/e.py'), l('f/f.py')]
 
         file_list.process_template_line('prune d')
-        self.assertEqual(file_list.files, ['a.py', 'f/f.py'])
+        self.assertEqual(file_list.files, ['a.py', l('f/f.py')])
         self.assertNoWarnings()
 
         file_list.process_template_line('prune e')
-        self.assertEqual(file_list.files, ['a.py', 'f/f.py'])
+        self.assertEqual(file_list.files, ['a.py', l('f/f.py')])
         self.assertWarnings()
 
 

Lib/distutils/tests/test_sdist.py

 from os.path import join
 from textwrap import dedent
 
+try:
+    import zlib
+    ZLIB_SUPPORT = True
+except ImportError:
+    ZLIB_SUPPORT = False
+
 from test.support import captured_stdout, check_warnings, run_unittest
 
 from distutils.command.sdist import sdist, show_formats
 MANIFEST = """\
 # file GENERATED by distutils, do NOT edit
 README
+buildout.cfg
 inroot.txt
 setup.py
 data%(sep)sdata.dt
 somecode%(sep)sdoc.txt
 """
 
-try:
-    import zlib
-    ZLIB_SUPPORT = True
-except ImportError:
-    ZLIB_SUPPORT = False
-
-
 class SDistTestCase(PyPIRCCommandTestCase):
 
     def setUp(self):
         dist_folder = join(self.tmp_dir, 'dist')
         result = os.listdir(dist_folder)
         result.sort()
-        self.assertEqual(result, ['fake-1.0.tar', 'fake-1.0.tar.gz'] )
+        self.assertEqual(result, ['fake-1.0.tar', 'fake-1.0.tar.gz'])
 
         os.remove(join(dist_folder, 'fake-1.0.tar'))
         os.remove(join(dist_folder, 'fake-1.0.tar.gz'))
         self.write_file((data_dir, 'data.dt'), '#')
         some_dir = join(self.tmp_dir, 'some')
         os.mkdir(some_dir)
+        # make sure VCS directories are pruned (#14004)
+        hg_dir = join(self.tmp_dir, '.hg')
+        os.mkdir(hg_dir)
+        self.write_file((hg_dir, 'last-message.txt'), '#')
+        # a buggy regex used to prevent this from working on windows (#6884)
+        self.write_file((self.tmp_dir, 'buildout.cfg'), '#')
         self.write_file((self.tmp_dir, 'inroot.txt'), '#')
         self.write_file((some_dir, 'file.txt'), '#')
         self.write_file((some_dir, 'other_file.txt'), '#')
 
         dist.data_files = [('data', ['data/data.dt',
+                                     'buildout.cfg',
                                      'inroot.txt',
                                      'notexisting']),
                            'some/file.txt',
             zip_file.close()
 
         # making sure everything was added
-        self.assertEqual(len(content), 11)
+        self.assertEqual(len(content), 12)
 
         # checking the MANIFEST
         f = open(join(self.tmp_dir, 'MANIFEST'))
         try:
             manifest = f.read()
-            self.assertEqual(manifest, MANIFEST % {'sep': os.sep})
         finally:
             f.close()
+        self.assertEqual(manifest, MANIFEST % {'sep': os.sep})
 
     @unittest.skipUnless(ZLIB_SUPPORT, 'Need zlib support to run')
     def test_metadata_check_option(self):
 Library
 -------
 
+- Issue #6884: Fix long-standing bugs with MANIFEST.in parsing in distutils
+  on Windows.
+
 - Issue #8033: sqlite3: Fix 64-bit integer handling in user functions
   on 32-bit architectures. Initial patch by Philippe Devalkeneer.
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.