Commits

Walter Dörwald  committed 962800e

Backport checkin:
Reset internal buffers when seek() is called. This fixes SF bug #1156259.

  • Participants
  • Parent commits dfa097c
  • Branches 2.4

Comments (0)

Files changed (3)

File Lib/codecs.py

             from decoding errors.
 
         """
-        pass
+        self.bytebuffer = ""
+        self.charbuffer = u""
+        self.atcr = False
+
+    def seek(self, offset, whence):
+        """ Set the input stream's current position.
+
+            Resets the codec buffers used for keeping state.
+        """
+        self.reset()
+        self.stream.seek(offset, whence)
 
     def next(self):
 

File Lib/encodings/utf_16.py

 
 class StreamReader(codecs.StreamReader):
 
+    def reset(self):
+        codecs.StreamReader.reset(self)
+        try:
+            del self.decode
+        except AttributeError:
+            pass
+
     def decode(self, input, errors='strict'):
         (object, consumed, byteorder) = \
             codecs.utf_16_ex_decode(input, errors, 0, False)

File Lib/test/test_codecs.py

             return s
 
 class ReadTest(unittest.TestCase):
+    def test_seek(self):
+        # all codecs should be able to encode these
+        s = u"%s\n%s\n" % (100*u"abc123", 100*u"def456")
+        encoding = self.encoding
+        reader = codecs.getreader(encoding)(StringIO.StringIO(s.encode(encoding)))
+        for t in xrange(5):
+            # Test that calling seek resets the internal codec state and buffers
+            reader.seek(0, 0)
+            line = reader.readline()
+            self.assertEqual(s[:len(line)], line)
+
     def check_partial(self, input, partialresults):
         # get a StreamReader for the encoding and feed the bytestring version
         # of input to the reader byte by byte. Read every available from