Commits

Antoine Pitrou committed e39c980

Improve Python 2.7 compatibility with unicode path names.

Comments (0)

Files changed (5)

 In development (unreleased)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-- Issue #21: fix TypeError under 2.7 when using new division.
+- Make pathlib usable under Python 2.7 with unicode pathnames (only pure
+  ASCII, though).
+- Issue #21: fix TypeError under Python 2.7 when using new division.
 - Add tox support for easier testing.
 
 Version 0.97
     intern = intern
 except NameError:
     intern = sys.intern
+try:
+    basestring = basestring
+except NameError:
+    basestring = str
 
 supports_symlinks = True
 try:
 # Internals
 #
 
+_py2 = sys.version_info < (3,)
+_py2_fs_encoding = 'ascii'
+
+def _py2_fsencode(parts):
+    # py2 => minimal unicode support
+    return [part.encode(_py2_fs_encoding) if isinstance(part, unicode)
+            else part for part in parts]
+
 def _is_wildcard_pattern(pat):
     # Whether this pattern needs actual matching using fnmatch, or can
     # be looked up directly as a file.
         self.join = self.sep.join
 
     def parse_parts(self, parts):
+        if _py2:
+            parts = _py2_fsencode(parts)
         parsed = []
         sep = self.sep
         altsep = self.altsep
         for a in args:
             if isinstance(a, PurePath):
                 parts += a._parts
-            elif isinstance(a, str):
-                # Assuming a str
+            elif isinstance(a, basestring):
                 parts.append(a)
             else:
                 raise TypeError(
         if altsep:
             actual = f([x.replace('/', altsep) for x in arg])
             self.assertEqual(actual, expected)
+        drv, root, parts = actual
+        # neither bytes (py3) nor unicode (py2)
+        self.assertIsInstance(drv, str)
+        self.assertIsInstance(root, str)
+        for p in parts:
+            self.assertIsInstance(p, str)
 
     def test_parse_parts_common(self):
         check = self._check_parse_parts
 
     def _check_str(self, expected, args):
         p = self.cls(*args)
-        self.assertEqual(str(p), expected.replace('/', self.sep))
+        s = str(p)
+        self.assertEqual(s, expected.replace('/', self.sep))
+        self.assertIsInstance(s, str)
 
     def test_str_common(self):
         # Canonicalized paths roundtrip
         P = self.cls
         self.assertEqual(bytes(P('a/b')), b'a' + sep + b'b')
 
-    @with_fsencode
     def test_as_uri_common(self):
         P = self.cls
         with self.assertRaises(ValueError):
             p = self.cls(pathstr)
             clsname = p.__class__.__name__
             r = repr(p)
+            self.assertIsInstance(r, str)
             # The repr() is in the form ClassName("forward-slashes path")
             self.assertTrue(r.startswith(clsname + '('), r)
             self.assertTrue(r.endswith(')'), r)
         p = P('a/b')
         parts = p.parts
         self.assertEqual(parts, ('a', 'b'))
+        for part in parts:
+            self.assertIsInstance(part, str)
         # The object gets reused
         self.assertIs(parts, p.parts)
         # When the path is absolute, the anchor is a separate part
         self.assertEqual(P('/a'), P('///a'))
         self.assertNotEqual(P('/a'), P('//a'))
 
-    @with_fsencode
     def test_as_uri(self):
         P = self.cls
         self.assertEqual(P('/').as_uri(), 'file:///')
 
     @with_fsencode
     def test_as_uri(self):
-        from urllib.parse import quote_from_bytes
         P = self.cls
         with self.assertRaises(ValueError):
             P('/a/b').as_uri()
         self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") })
 
 
+def main():
+    unittest.main(__name__)
+
+
 if __name__ == "__main__":
-    unittest.main()
+    main()

test_pathlib_with_py2_unicode_literals.py

+
+import __future__
+import os
+import sys
+import types
+
+
+def compile_source_file(source_file, flags):
+    with open(source_file, "r") as f:
+        source = f.read()
+    return compile(source, os.path.basename(source_file), 'exec', flags)
+
+
+if __name__ == "__main__":
+    # Compile and run test_pathlib.py as if
+    # "from __future__ import unicode_literals" had been added at the top.
+    flags = __future__.CO_FUTURE_UNICODE_LITERALS
+    code = compile_source_file("test_pathlib.py", flags)
+    mod = types.ModuleType('test_pathlib')
+    mod.__file__ = "test_pathlib.py"
+    sys.modules[mod.__name__] = mod
+    eval(code, mod.__dict__)
+    mod.main()
 commands =
     {envpython} -bb test_pathlib.py {posargs}
     {envpython} -Qnew -bb test_pathlib.py {posargs}
+    {envpython} -bb test_pathlib_with_py2_unicode_literals.py {posargs}
 
 [testenv:docs]
 basepython = python