1. Armin Rigo
  2. cpython-withatomic

Commits

Benjamin Peterson  committed 58a7c50

#5391 make mmap work exclusively with bytes

  • Participants
  • Parent commits 7ee1cef
  • Branches default

Comments (0)

Files changed (4)

File Doc/library/mmap.rst

View file
    :synopsis: Interface to memory-mapped files for Unix and Windows.
 
 
-Memory-mapped file objects behave like both strings and like file objects.
-Unlike normal string objects, however, these are mutable.  You can use mmap
-objects in most places where strings are expected; for example, you can use
-the :mod:`re` module to search through a memory-mapped file.  Since they're
-mutable, you can change a single character by doing ``obj[index] = 'a'``, or
-change a substring by assigning to a slice: ``obj[i1:i2] = '...'``.  You can
-also read and write data starting at the current file position, and
+Memory-mapped file objects behave like both :class:`bytes` and like file
+objects. Unlike normal :class:`bytes` objects, however, these are mutable.
+You can use mmap objects in most places where :class:`bytes` are expected; for
+example, you can use the :mod:`re` module to search through a memory-mapped file.
+Since they're mutable, you can change a single byte by doing ``obj[index] = 97``,
+or change a subsequence by assigning to a slice: ``obj[i1:i2] = b'...'``.
+You can also read and write data starting at the current file position, and
 :meth:`seek` through the file to different positions.
 
 A memory-mapped file is created by the :class:`mmap` constructor, which is
 
       # write a simple example file
       with open("hello.txt", "wb") as f:
-          f.write("Hello Python!\n")
+          f.write(b"Hello Python!\n")
 
       with open("hello.txt", "r+b") as f:
           # memory-map the file, size 0 means whole file
           map = mmap.mmap(f.fileno(), 0)
           # read content via standard file methods
-          print(map.readline())  # prints "Hello Python!"
+          print(map.readline())  # prints b"Hello Python!\n"
           # read content via slice notation
-          print(map[:5])  # prints "Hello"
+          print(map[:5])  # prints b"Hello"
           # update content using slice notation;
           # note that new content must have same size
-          map[6:] = " world!\n"
+          map[6:] = b" world!\n"
           # ... and read again using standard file methods
           map.seek(0)
-          print(map.readline())  # prints "Hello  world!"
+          print(map.readline())  # prints b"Hello  world!\n"
           # close the map
           map.close()
 
       import os
 
       map = mmap.mmap(-1, 13)
-      map.write("Hello world!")
+      map.write(b"Hello world!")
 
       pid = os.fork()
 
       result in an exception being raised.
 
 
-   .. method:: find(string[, start[, end]])
+   .. method:: find(sub[, start[, end]])
 
-      Returns the lowest index in the object where the substring *string* is
-      found, such that *string* is contained in the range [*start*, *end*].
+      Returns the lowest index in the object where the subsequence *sub* is
+      found, such that *sub* is contained in the range [*start*, *end*].
       Optional arguments *start* and *end* are interpreted as in slice notation.
       Returns ``-1`` on failure.
 
 
    .. method:: read(num)
 
-      Return a string containing up to *num* bytes starting from the current
-      file position; the file position is updated to point after the bytes that
-      were returned.
+      Return a :class:`bytes` containing up to *num* bytes starting from the
+      current file position; the file position is updated to point after the
+      bytes that were returned.
 
 
    .. method:: read_byte()
 
-      Returns a string of length 1 containing the character at the current file
-      position, and advances the file position by 1.
+      Returns a byte at the current file position as an integer, and advances
+      the file position by 1.
 
 
    .. method:: readline()
       throw a :exc:`TypeError` exception.
 
 
-   .. method:: rfind(string[, start[, end]])
+   .. method:: rfind(sub[, start[, end]])
 
-      Returns the highest index in the object where the substring *string* is
-      found, such that *string* is contained in the range [*start*, *end*].
+      Returns the highest index in the object where the subsequence *sub* is
+      found, such that *sub* is contained in the range [*start*, *end*].
       Optional arguments *start* and *end* are interpreted as in slice notation.
       Returns ``-1`` on failure.
 
       Returns the current position of the file pointer.
 
 
-   .. method:: write(string)
+   .. method:: write(bytes)
 
-      Write the bytes in *string* into memory at the current position of the
+      Write the bytes in *bytes* into memory at the current position of the
       file pointer; the file position is updated to point after the bytes that
       were written. If the mmap was created with :const:`ACCESS_READ`, then
       writing to it will throw a :exc:`TypeError` exception.
 
    .. method:: write_byte(byte)
 
-      Write the single-character string *byte* into memory at the current
+      Write the the integer *byte* into memory at the current
       position of the file pointer; the file position is advanced by ``1``. If
       the mmap was created with :const:`ACCESS_READ`, then writing to it will
       throw a :exc:`TypeError` exception.

File Lib/test/test_mmap.py

View file
         # Simple sanity checks
 
         tp = str(type(m))  # SF bug 128713:  segfaulted on Linux
-        self.assertEqual(m.find('foo'), PAGESIZE)
+        self.assertEqual(m.find(b'foo'), PAGESIZE)
 
         self.assertEqual(len(m), 2*PAGESIZE)
 
 
     def test_find_end(self):
         # test the new 'end' parameter works as expected
-        f = open(TESTFN, 'w+')
-        data = 'one two ones'
+        f = open(TESTFN, 'wb+')
+        data = b'one two ones'
         n = len(data)
         f.write(data)
         f.flush()
         m = mmap.mmap(f.fileno(), n)
         f.close()
 
-        self.assertEqual(m.find('one'), 0)
-        self.assertEqual(m.find('ones'), 8)
-        self.assertEqual(m.find('one', 0, -1), 0)
-        self.assertEqual(m.find('one', 1), 8)
-        self.assertEqual(m.find('one', 1, -1), 8)
-        self.assertEqual(m.find('one', 1, -2), -1)
+        self.assertEqual(m.find(b'one'), 0)
+        self.assertEqual(m.find(b'ones'), 8)
+        self.assertEqual(m.find(b'one', 0, -1), 0)
+        self.assertEqual(m.find(b'one', 1), 8)
+        self.assertEqual(m.find(b'one', 1, -1), 8)
+        self.assertEqual(m.find(b'one', 1, -2), -1)
 
 
     def test_rfind(self):
         # test the new 'end' parameter works as expected
-        f = open(TESTFN, 'w+')
-        data = 'one two ones'
+        f = open(TESTFN, 'wb+')
+        data = b'one two ones'
         n = len(data)
         f.write(data)
         f.flush()
         m = mmap.mmap(f.fileno(), n)
         f.close()
 
-        self.assertEqual(m.rfind('one'), 8)
-        self.assertEqual(m.rfind('one '), 0)
-        self.assertEqual(m.rfind('one', 0, -1), 8)
-        self.assertEqual(m.rfind('one', 0, -2), 0)
-        self.assertEqual(m.rfind('one', 1, -1), 8)
-        self.assertEqual(m.rfind('one', 1, -2), -1)
+        self.assertEqual(m.rfind(b'one'), 8)
+        self.assertEqual(m.rfind(b'one '), 0)
+        self.assertEqual(m.rfind(b'one', 0, -1), 8)
+        self.assertEqual(m.rfind(b'one', 0, -2), 0)
+        self.assertEqual(m.rfind(b'one', 1, -1), 8)
+        self.assertEqual(m.rfind(b'one', 1, -2), -1)
 
 
     def test_double_close(self):
         # Test write_byte()
         for i in range(len(data)):
             self.assertEquals(m.tell(), i)
-            m.write_byte(data[i:i+1])
+            m.write_byte(data[i])
             self.assertEquals(m.tell(), i+1)
-        self.assertRaises(ValueError, m.write_byte, b"x")
+        self.assertRaises(ValueError, m.write_byte, b"x"[0])
         self.assertEquals(m[:], data)
         # Test read_byte()
         m.seek(0)
         for i in range(len(data)):
             self.assertEquals(m.tell(), i)
-            # XXX: Disable this test for now because it's not clear
-            # which type of object m.read_byte returns. Currently, it
-            # returns 1-length str (unicode).
-            if 0:
-                self.assertEquals(m.read_byte(), data[i:i+1])
-            else:
-                m.read_byte()
+            self.assertEquals(m.read_byte(), data[i])
             self.assertEquals(m.tell(), i+1)
         self.assertRaises(ValueError, m.read_byte)
         # Test read()

File Misc/NEWS

View file
 
 Core and Builtins
 -----------------
+
 - Implement PEP 378, Format Specifier for Thousands Separator, for
   integers.
 
 Extension Modules
 -----------------
 
+- Issue #5391: mmap now deals exclusively with bytes.
+
 - Issue #5463: In struct module, remove deprecated overflow wrapping
   when packing an integer: struct.pack('=L', -1) now raises
   struct.error instead of returning b'\xff\xff\xff\xff'.  The

File Modules/mmapmodule.c

View file
 	if (self->pos < self->size) {
 	        char value = self->data[self->pos];
 		self->pos += 1;
-		return Py_BuildValue("c", value);
+		return Py_BuildValue("b", value);
 	} else {
 		PyErr_SetString(PyExc_ValueError, "read byte out of range");
 		return NULL;
 	Py_ssize_t len;
 
 	CHECK_VALID(NULL);
-	if (!PyArg_ParseTuple(args, reverse ? "s#|nn:rfind" : "s#|nn:find",
+	if (!PyArg_ParseTuple(args, reverse ? "y#|nn:rfind" : "y#|nn:find",
 			      &needle, &len, &start, &end)) {
 		return NULL;
 	} else {
 	char *data;
 
 	CHECK_VALID(NULL);
-	if (!PyArg_ParseTuple(args, "s#:write", &data, &length))
+	if (!PyArg_ParseTuple(args, "y#:write", &data, &length))
 		return(NULL);
 
 	if (!is_writable(self))
 	char value;
 
 	CHECK_VALID(NULL);
-	if (!PyArg_ParseTuple(args, "c:write_byte", &value))
+	if (!PyArg_ParseTuple(args, "b:write_byte", &value))
 		return(NULL);
 
 	if (!is_writable(self))