Commits

Antonio Cuni committed 4193517 Draft

the _csv module now expects unicode to read, not strings

Comments (0)

Files changed (2)

pypy/module/_csv/interp_reader.py

-from pypy.rlib.rstring import StringBuilder
+from pypy.rlib.rstring import UnicodeBuilder
 from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.gateway import NoneNotWrapped, unwrap_spec
                             break
                 raise
             self.line_num += 1
-            line = space.str_w(w_line)
+            line = space.unicode_w(w_line)
             for c in line:
-                if c == '\0':
+                if c == u'\0':
                     raise self.error("line contains NULL byte")
 
                 if state == START_RECORD:
-                    if c == '\n' or c == '\r':
+                    if c == u'\n' or c == u'\r':
                         state = EAT_CRNL
                         continue
                     # normal character - handle as START_FIELD
                     # fall-through to the next case
 
                 if state == START_FIELD:
-                    field_builder = StringBuilder(64)
+                    field_builder = UnicodeBuilder(64)
                     # expecting field
-                    if c == '\n' or c == '\r':
+                    if c == u'\n' or c == u'\r':
                         # save empty field
                         self.save_field(field_builder)
                         state = EAT_CRNL
                     elif c == dialect.escapechar:
                         # possible escaped character
                         state = ESCAPED_CHAR
-                    elif c == ' ' and dialect.skipinitialspace:
+                    elif c == u' ' and dialect.skipinitialspace:
                         # ignore space at start of field
                         pass
                     elif c == dialect.delimiter:
 
                 elif state == IN_FIELD:
                     # in unquoted field
-                    if c == '\n' or c == '\r':
+                    if c == u'\n' or c == u'\r':
                         # end of line
                         self.save_field(field_builder)
                         state = EAT_CRNL
                         # save field - wait for new field
                         self.save_field(field_builder)
                         state = START_FIELD
-                    elif c == '\n' or c == '\r':
+                    elif c == u'\n' or c == u'\r':
                         # end of line
                         self.save_field(field_builder)
                         state = EAT_CRNL
                             dialect.delimiter, dialect.quotechar))
 
                 elif state == EAT_CRNL:
-                    if not (c == '\n' or c == '\r'):
+                    if not (c == u'\n' or c == u'\r'):
                         raise self.error("new-line character seen in unquoted "
                                         "field - do you need to open the file "
                                         "in universal-newline mode?")
                 self.save_field(field_builder)
                 break
             elif state == ESCAPED_CHAR:
-                self.add_char(field_builder, '\n')
+                self.add_char(field_builder, u'\n')
                 state = IN_FIELD
             elif state == IN_QUOTED_FIELD:
                 pass
             elif state == ESCAPE_IN_QUOTED_FIELD:
-                self.add_char(field_builder, '\n')
+                self.add_char(field_builder, u'\n')
                 state = IN_QUOTED_FIELD
             elif state == START_FIELD:
                 # save empty field
-                field_builder = StringBuilder(1)
+                field_builder = UnicodeBuilder(1)
                 self.save_field(field_builder)
                 break
             else:
         dialect = interp_attrproperty_w('dialect', W_Reader),
         line_num = interp_attrproperty('line_num', W_Reader),
         __iter__ = interp2app(W_Reader.iter_w),
-        next = interp2app(W_Reader.next_w),
+        __next__ = interp2app(W_Reader.next_w),
         __doc__ = """CSV reader
 
 Reader objects are responsible for reading and parsing tabular data

pypy/module/_csv/test/test_reader.py

+from __future__ import unicode_literals
 from pypy.conftest import gettestobjspace
 
 
     def test_simple_reader(self):
         self._read_test(['foo:bar\n'], [['foo', 'bar']], delimiter=':')
 
+    def test_cannot_read_bytes(self):
+        import _csv
+        reader = _csv.reader([b'foo'])
+        raises(TypeError, "next(reader)")
+
     def test_read_oddinputs(self):
         self._read_test([], [])
         self._read_test([''], [[]])
         import _csv as csv
         r = csv.reader(['line,1', 'line,2', 'line,3'])
         assert r.line_num == 0
-        r.next()
+        next(r)
         assert r.line_num == 1
-        r.next()
+        next(r)
         assert r.line_num == 2
-        r.next()
+        next(r)
         assert r.line_num == 3
-        raises(StopIteration, r.next)
+        raises(StopIteration, "next(r)")
         assert r.line_num == 3
 
     def test_dubious_quote(self):