Armin Rigo avatar Armin Rigo committed 3f01d4d

Copy the CPython-style error messages more closely

Comments (0)

Files changed (1)

rpython/rlib/runicode.py

             # about the pos anymore and we just ignore the value
             if not charsleft:
                 # there's only the start byte and nothing else
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'unexpected end of data',
                                       s, pos, pos+1)
                 result.append(r)
                     (ordch1 == 0xe0 and ordch2 < 0xa0)):
                     # or (ordch1 == 0xed and ordch2 > 0x9f)
                     # second byte invalid, take the first and continue
-                    r, pos = errorhandler(errors, 'utf-8',
+                    r, pos = errorhandler(errors, 'utf8',
                                           'invalid continuation byte',
                                           s, pos, pos+1)
                     result.append(r)
                     continue
                 else:
                     # second byte valid, but third byte missing
-                    r, pos = errorhandler(errors, 'utf-8',
+                    r, pos = errorhandler(errors, 'utf8',
                                       'unexpected end of data',
                                       s, pos, pos+2)
                     result.append(r)
                     (ordch1 == 0xf0 and ordch2 < 0x90) or
                     (ordch1 == 0xf4 and ordch2 > 0x8f)):
                     # second byte invalid, take the first and continue
-                    r, pos = errorhandler(errors, 'utf-8',
+                    r, pos = errorhandler(errors, 'utf8',
                                           'invalid continuation byte',
                                           s, pos, pos+1)
                     result.append(r)
                     continue
                 elif charsleft == 2 and ord(s[pos+2])>>6 != 0x2:   # 0b10
                     # third byte invalid, take the first two and continue
-                    r, pos = errorhandler(errors, 'utf-8',
+                    r, pos = errorhandler(errors, 'utf8',
                                           'invalid continuation byte',
                                           s, pos, pos+2)
                     result.append(r)
                     continue
                 else:
                     # there's only 1 or 2 valid cb, but the others are missing
-                    r, pos = errorhandler(errors, 'utf-8',
+                    r, pos = errorhandler(errors, 'utf8',
                                       'unexpected end of data',
                                       s, pos, pos+charsleft+1)
                     result.append(r)
                     break
 
         if n == 0:
-            r, pos = errorhandler(errors, 'utf-8',
+            r, pos = errorhandler(errors, 'utf8',
                                   'invalid start byte',
                                   s, pos, pos+1)
             result.append(r)
         elif n == 2:
             ordch2 = ord(s[pos+1])
             if ordch2>>6 != 0x2:   # 0b10
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+1)
                 result.append(r)
                 # surrogates shouldn't be valid UTF-8!
                 or (not allow_surrogates and ordch1 == 0xed and ordch2 > 0x9f)
                 ):
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+1)
                 result.append(r)
                 continue
             elif ordch3>>6 != 0x2:     # 0b10
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+2)
                 result.append(r)
             if (ordch2>>6 != 0x2 or     # 0b10
                 (ordch1 == 0xf0 and ordch2 < 0x90) or
                 (ordch1 == 0xf4 and ordch2 > 0x8f)):
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+1)
                 result.append(r)
                 continue
             elif ordch3>>6 != 0x2:     # 0b10
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+2)
                 result.append(r)
                 continue
             elif ordch4>>6 != 0x2:     # 0b10
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+3)
                 result.append(r)
                             _encodeUCS4(result, ch3)
                             continue
                     if not allow_surrogates:
-                        ru, rs, pos = errorhandler(errors, 'utf-8',
+                        ru, rs, pos = errorhandler(errors, 'utf8',
                                                    'surrogates not allowed',
                                                    s, pos-1, pos)
                         if rs is not None:
                             if ord(ch) < 0x80:
                                 result.append(chr(ord(ch)))
                             else:
-                                errorhandler('strict', 'utf-8',
+                                errorhandler('strict', 'utf8',
                                              'surrogates not allowed',
                                              s, pos-1, pos)
                         continue
         if len(s) - pos < 2:
             if not final:
                 break
-            r, pos = errorhandler(errors, 'utf-16', "truncated data",
+            r, pos = errorhandler(errors, 'utf16', "truncated data",
                                   s, pos, len(s))
             result.append(r)
             if len(s) - pos < 2:
             if not final:
                 break
             errmsg = "unexpected end of data"
-            r, pos = errorhandler(errors, 'utf-16', errmsg, s, pos - 2, len(s))
+            r, pos = errorhandler(errors, 'utf16', errmsg, s, pos - 2, len(s))
             result.append(r)
             if len(s) - pos < 2:
                 break
                                            (ch2 & 0x3FF)) + 0x10000))
                 continue
             else:
-                r, pos = errorhandler(errors, 'utf-16',
+                r, pos = errorhandler(errors, 'utf16',
                                       "illegal UTF-16 surrogate",
                                       s, pos - 4, pos - 2)
                 result.append(r)
         else:
-            r, pos = errorhandler(errors, 'utf-16',
+            r, pos = errorhandler(errors, 'utf16',
                                   "illegal encoding",
                                   s, pos - 2, pos)
             result.append(r)
         if len(s) - pos < 4:
             if not final:
                 break
-            r, pos = errorhandler(errors, 'utf-32', "truncated data",
+            r, pos = errorhandler(errors, 'utf32', "truncated data",
                                   s, pos, len(s))
             result.append(r)
             if len(s) - pos < 4:
         ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) |
               (ord(s[pos + iorder[1]]) << 8)  | ord(s[pos + iorder[0]]))
         if ch >= 0x110000:
-            r, pos = errorhandler(errors, 'utf-32', "codepoint not in range(0x110000)",
+            r, pos = errorhandler(errors, 'utf32', "codepoint not in range(0x110000)",
                                   s, pos, len(s))
             result.append(r)
             continue
                     if base64bits >= 6:
                         # We've seen at least one base-64 character
                         msg = "partial character in shift sequence"
-                        res, pos = errorhandler(errors, 'utf-7',
+                        res, pos = errorhandler(errors, 'utf7',
                                                 msg, s, pos-1, pos)
                         result.append(res)
                         continue
                         # Some bits remain; they should be zero
                         if base64buffer != 0:
                             msg = "non-zero padding bits in shift sequence"
-                            res, pos = errorhandler(errors, 'utf-7',
+                            res, pos = errorhandler(errors, 'utf7',
                                                     msg, s, pos-1, pos)
                             result.append(res)
                             continue
         else:
             pos += 1
             msg = "unexpected special character"
-            res, pos = errorhandler(errors, 'utf-7', msg, s, pos-1, pos)
+            res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos)
             result.append(res)
 
     # end of string
             base64bits >= 6 or
             (base64bits > 0 and base64buffer != 0)):
             msg = "unterminated shift sequence"
-            res, pos = errorhandler(errors, 'utf-7', msg, s, shiftOutStartPos, pos)
+            res, pos = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos)
             result.append(res)
     elif inShift:
         pos = shiftOutStartPos # back off output
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.