Commits

Stefan H. Holek committed 5db4bb0 Merge

Merge tarek/distribute.

Comments (0)

Files changed (5)

setuptools/command/egg_info.py

         if item.endswith('\r'):     # Fix older sdists built on Windows
             item = item[:-1]
         path = convert_path(item)
-        if os.path.exists(path):
-            self.files.append(path)
+        try:
+            if os.path.exists(path):
+                self.files.append(path)
+            else:
+                log.warn("%r not found -- skipping", path)
+        except UnicodeEncodeError:
+            log.warn("%r not %s encodable -- skipping", path,
+                sys.getfilesystemencoding())
 
 
 
         by 'add_defaults()' and 'read_template()') to the manifest file
         named by 'self.manifest'.
         """
+        # The manifest must be UTF-8 encodable. See #303.
+        if sys.version_info >= (3,):
+            files = []
+            for file in self.filelist.files:
+                try:
+                    file.encode("utf-8")
+                except UnicodeEncodeError:
+                    log.warn("'%s' not UTF-8 encodable -- skipping" % file)
+                else:
+                    files.append(file)
+            self.filelist.files = files
+
         files = self.filelist.files
         if os.sep!='/':
             files = [f.replace(os.sep,'/') for f in files]
     """
     contents = "\n".join(contents)
     if sys.version_info >= (3,):
-        contents = contents.encode("utf-8", "surrogateescape")
+        contents = contents.encode("utf-8")
     f = open(filename, "wb")        # always write POSIX-style manifest
     f.write(contents)
     f.close()

setuptools/command/sdist.py

         manifest = open(self.manifest, 'rbU')
         for line in manifest:
             if sys.version_info >= (3,):
-                line = line.decode('UTF-8', 'surrogateescape')
+                try:
+                    line = line.decode('UTF-8')
+                except UnicodeDecodeError:
+                    log.warn("%r not UTF-8 decodable -- skipping" % line)
+                    continue
             # ignore comments and blank lines
             line = line.strip()
             if line.startswith('#') or not line:

setuptools/command/upload_docs.py

 except ImportError:
     from setuptools.command.upload import upload
 
-if sys.version_info >= (3,):
+if sys.version_info >= (3, 1):
     errors = 'surrogateescape'
 else:
     errors = 'strict'

setuptools/package_index.py

 
     # Double scheme does not raise on Mac OS X as revealed by a
     # failing test. We would expect "nonnumeric port". Refs #20.
-    if sys.platform == 'darwin':
-        if netloc.endswith(':'):
-            raise httplib.InvalidURL("nonnumeric port: ''")
+    if netloc.endswith(':'):
+        raise httplib.InvalidURL("nonnumeric port: ''")
 
     if scheme in ('http', 'https'):
         auth, host = urllib2.splituser(netloc)

setuptools/tests/test_sdist.py

 import unittest
 import urllib
 import unicodedata
-import posixpath
 from StringIO import StringIO
 
 
     sys.stdout, sys.stderr = old_stdout, old_stderr
 
 
-# Fake byte literals to shut up Python <= 2.5
+# Fake byte literals for Python <= 2.5
 def b(s, encoding='utf-8'):
     if sys.version_info >= (3,):
         return s.encode(encoding)
     return s
 
 
-# HFS Plus returns decomposed UTF-8
+# Convert to POSIX path
+def posix(path):
+    if sys.version_info >= (3,) and not isinstance(path, unicode):
+        return path.replace(os.sep.encode('ascii'), b('/'))
+    else:
+        return path.replace(os.sep, '/')
+
+
+# HFS Plus uses decomposed UTF-8
 def decompose(path):
     if isinstance(path, unicode):
         return unicodedata.normalize('NFD', path)
     return path
 
 
-# HFS Plus quotes unknown bytes like so: %F6
-def hfs_quote(path):
-    if isinstance(path, unicode):
-        raise TypeError('bytes are required')
-    try:
-        u = path.decode('utf-8')
-    except UnicodeDecodeError:
-        path = urllib.quote(path) # Not UTF-8
-    else:
-        if sys.version_info >= (3,):
-            path = u
-    return path
-
-
 class TestSdistTest(unittest.TestCase):
 
     def setUp(self):
         os.mkdir('sdist_test.egg-info')
 
         # UTF-8 filename
-        filename = posixpath.join('sdist_test', 'smörbröd.py')
+        filename = os.path.join('sdist_test', 'smörbröd.py')
 
         # Add UTF-8 filename and write manifest
         quiet()
 
         # The manifest should be UTF-8 encoded
         try:
-            u = contents.decode('UTF-8')
+            u_contents = contents.decode('UTF-8')
         except UnicodeDecodeError, e:
             self.fail(e)
 
         # The manifest should contain the UTF-8 filename
         if sys.version_info >= (3,):
-            self.assertTrue(filename in u)
+            self.assertTrue(posix(filename) in u_contents)
         else:
-            self.assertTrue(filename in contents)
+            self.assertTrue(posix(filename) in contents)
 
-    def test_manifest_is_written_with_surrogateescape_error_handler(self):
-        # Test for #303.
-        dist = Distribution(SETUP_ATTRS)
-        dist.script_name = 'setup.py'
-        mm = manifest_maker(dist)
-        mm.manifest = os.path.join('sdist_test.egg-info', 'SOURCES.txt')
-        os.mkdir('sdist_test.egg-info')
+    # Python 3 only
+    if sys.version_info >= (3,):
 
-        # Latin-1 filename
-        filename = posixpath.join(b('sdist_test'), LATIN1_FILENAME)
+        def test_write_manifest_allows_utf8_filenames(self):
+            # Test for #303.
+            dist = Distribution(SETUP_ATTRS)
+            dist.script_name = 'setup.py'
+            mm = manifest_maker(dist)
+            mm.manifest = os.path.join('sdist_test.egg-info', 'SOURCES.txt')
+            os.mkdir('sdist_test.egg-info')
 
-        # Add filename with surrogates and write manifest
-        quiet()
-        try:
-            mm.run()
-            if sys.version_info >= (3,):
-                u = filename.decode('utf-8', 'surrogateescape')
-                mm.filelist.files.append(u)
-            else:
-                mm.filelist.files.append(filename)
-            mm.write_manifest()
-        finally:
-            unquiet()
+            # UTF-8 filename
+            filename = os.path.join(b('sdist_test'), b('smörbröd.py'))
 
-        manifest = open(mm.manifest, 'rbU')
-        contents = manifest.read()
-        manifest.close()
+            # Add filename and write manifest
+            quiet()
+            try:
+                mm.run()
+                u_filename = filename.decode('utf-8')
+                mm.filelist.files.append(u_filename)
+                # Re-write manifest
+                mm.write_manifest()
+            finally:
+                unquiet()
 
-        # The manifest should contain the Latin-1 filename
-        self.assertTrue(filename in contents)
+            manifest = open(mm.manifest, 'rbU')
+            contents = manifest.read()
+            manifest.close()
+
+            # The manifest should be UTF-8 encoded
+            try:
+                contents.decode('UTF-8')
+            except UnicodeDecodeError, e:
+                self.fail(e)
+
+            # The manifest should contain the UTF-8 filename
+            self.assertTrue(posix(filename) in contents)
+
+            # The filelist should have been updated as well
+            self.assertTrue(u_filename in mm.filelist.files)
+
+        def test_write_manifest_skips_non_utf8_filenames(self):
+            # Test for #303.
+            dist = Distribution(SETUP_ATTRS)
+            dist.script_name = 'setup.py'
+            mm = manifest_maker(dist)
+            mm.manifest = os.path.join('sdist_test.egg-info', 'SOURCES.txt')
+            os.mkdir('sdist_test.egg-info')
+
+            # Latin-1 filename
+            filename = os.path.join(b('sdist_test'), LATIN1_FILENAME)
+
+            # Add filename with surrogates and write manifest
+            quiet()
+            try:
+                mm.run()
+                u_filename = filename.decode('utf-8', 'surrogateescape')
+                mm.filelist.files.append(u_filename)
+                # Re-write manifest
+                mm.write_manifest()
+            finally:
+                unquiet()
+
+            manifest = open(mm.manifest, 'rbU')
+            contents = manifest.read()
+            manifest.close()
+
+            # The manifest should be UTF-8 encoded
+            try:
+                contents.decode('UTF-8')
+            except UnicodeDecodeError, e:
+                self.fail(e)
+
+            # The Latin-1 filename should have been skipped
+            self.assertFalse(posix(filename) in contents)
+
+            # The filelist should have been updated as well
+            self.assertFalse(u_filename in mm.filelist.files)
 
     def test_manifest_is_read_with_utf8_encoding(self):
         # Test for #303.
         cmd = sdist(dist)
         cmd.ensure_finalized()
 
-        # UTF-8 filename
-        filename = os.path.join('sdist_test', 'smörbröd.py')
-        open(filename, 'w').close()
-
+        # Create manifest
         quiet()
         try:
             cmd.run()
         finally:
             unquiet()
 
+        # Add UTF-8 filename to manifest
+        filename = os.path.join(b('sdist_test'), b('smörbröd.py'))
+        cmd.manifest = os.path.join('sdist_test.egg-info', 'SOURCES.txt')
+        manifest = open(cmd.manifest, 'ab')
+        manifest.write(b('\n')+filename)
+        manifest.close()
+
+        # The file must exist to be included in the filelist
+        open(filename, 'w').close()
+
+        # Re-read manifest
+        cmd.filelist.files = []
+        quiet()
+        try:
+            cmd.read_manifest()
+        finally:
+            unquiet()
+
         # The filelist should contain the UTF-8 filename
-        if sys.platform == 'darwin':
-            filename = decompose(filename)
+        if sys.version_info >= (3,):
+            filename = filename.decode('utf-8')
         self.assertTrue(filename in cmd.filelist.files)
 
-    def test_manifest_is_read_with_surrogateescape_error_handler(self):
-        # Test for #303.
+    # Python 3 only
+    if sys.version_info >= (3,):
 
-        # This is hard to test on HFS Plus because it quotes unknown
-        # bytes (see previous test). Furthermore, egg_info.FileList
-        # only appends filenames that os.path.exist.
+        def test_read_manifest_skips_non_utf8_filenames(self):
+            # Test for #303.
+            dist = Distribution(SETUP_ATTRS)
+            dist.script_name = 'setup.py'
+            cmd = sdist(dist)
+            cmd.ensure_finalized()
 
-        # We therefore write the manifest file by hand and check whether
-        # read_manifest produces a UnicodeDecodeError.
-        dist = Distribution(SETUP_ATTRS)
-        dist.script_name = 'setup.py'
-        cmd = sdist(dist)
-        cmd.ensure_finalized()
+            # Create manifest
+            quiet()
+            try:
+                cmd.run()
+            finally:
+                unquiet()
 
-        filename = os.path.join(b('sdist_test'), LATIN1_FILENAME)
-
-        quiet()
-        try:
-            cmd.run()
             # Add Latin-1 filename to manifest
+            filename = os.path.join(b('sdist_test'), LATIN1_FILENAME)
             cmd.manifest = os.path.join('sdist_test.egg-info', 'SOURCES.txt')
             manifest = open(cmd.manifest, 'ab')
-            manifest.write(filename+b('\n'))
+            manifest.write(b('\n')+filename)
             manifest.close()
+
+            # The file must exist to be included in the filelist
+            open(filename, 'w').close()
+
             # Re-read manifest
+            cmd.filelist.files = []
+            quiet()
             try:
-                cmd.read_manifest()
-            except UnicodeDecodeError, e:
-                self.fail(e)
-        finally:
-            unquiet()
+                try:
+                    cmd.read_manifest()
+                except UnicodeDecodeError, e:
+                    self.fail(e)
+            finally:
+                unquiet()
+
+            # The Latin-1 filename should have been skipped
+            filename = filename.decode('latin-1')
+            self.assertFalse(filename in cmd.filelist.files)
 
     def test_sdist_with_utf8_encoded_filename(self):
         # Test for #303.
         finally:
             unquiet()
 
-        # The filelist should contain the UTF-8 filename
-        # (in one representation or other)
-        if sys.version_info >= (3,):
-            filename = filename.decode(sys.getfilesystemencoding(), 'surrogateescape')
         if sys.platform == 'darwin':
             filename = decompose(filename)
-        self.assertTrue(filename in cmd.filelist.files)
+
+        if sys.version_info >= (3,):
+            if sys.platform == 'win32':
+                # Python 3 mangles the UTF-8 filename
+                filename = filename.decode('cp1252')
+                self.assertTrue(filename in cmd.filelist.files)
+            else:
+                filename = filename.decode('utf-8')
+                self.assertTrue(filename in cmd.filelist.files)
+        else:
+            self.assertTrue(filename in cmd.filelist.files)
 
     def test_sdist_with_latin1_encoded_filename(self):
         # Test for #303.
         finally:
             unquiet()
 
-        # The filelist should contain the Latin-1 filename
-        # (in one representation or other)
-        if sys.platform == 'darwin':
-            filename = hfs_quote(filename)
-        elif sys.version_info >= (3,):
-            filename = filename.decode(sys.getfilesystemencoding(), 'surrogateescape')
-        self.assertTrue(filename in cmd.filelist.files)
-
-    def test_decompose(self):
-        self.assertNotEqual('smörbröd.py', decompose('smörbröd.py'))
-
         if sys.version_info >= (3,):
-            self.assertEqual(len('smörbröd.py'), 11)
-            self.assertEqual(len(decompose('smörbröd.py')), 13)
+            filename = filename.decode('latin-1')
+            if sys.platform == 'win32':
+                # Latin-1 is similar to Windows-1252
+                self.assertTrue(filename in cmd.filelist.files)
+            else:
+                # The Latin-1 filename should have been skipped
+                self.assertFalse(filename in cmd.filelist.files)
         else:
-            self.assertEqual(len('smörbröd.py'), 13)
-            self.assertEqual(len(decompose('smörbröd.py')), 15)
-
-    def test_hfs_quote(self):
-        self.assertEqual(hfs_quote(LATIN1_FILENAME), 'sm%F6rbr%F6d.py')
-
-        # Bytes are required
-        if sys.version_info >= (3,):
-            self.assertRaises(TypeError, hfs_quote, 'smörbröd.py')
-        else:
-            self.assertRaises(TypeError, hfs_quote, 'smörbröd.py'.decode('utf-8'))
+            # No conversion takes place under Python 2 and the file
+            # is included. We shall keep it that way for BBB.
+            self.assertTrue(filename in cmd.filelist.files)
 
 
 def test_suite():