Serhiy Storchaka avatar Serhiy Storchaka committed 44a4f92

Issue #16688: Fix backreferences did make case-insensitive regex fail on non-ASCII strings.
Patch by Matthew Barnett.

Comments (0)

Files changed (4)

Lib/test/test_re.py

         self.assertEqual(r, s)
         self.assertEqual(n, size + 1)
 
+    def test_bug_16688(self):
+        # Issue 16688: Backreferences make case-insensitive regex fail on
+        # non-ASCII strings.
+        self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
+        self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
 
 def run_re_tests():
     from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
 Nick Barnes
 Quentin Barnes
 David Barnett
+Matthew Barnett
 Richard Barran
 Cesar Eduardo Barros
 Des Barry
 Library
 -------
 
+- Issue #16688: Fix backreferences did make case-insensitive regex fail on
+  non-ASCII strings. Patch by Matthew Barnett.
+
 - Issue #16485: Fix file descriptor not being closed if file header patching
   fails on closing of aifc file.
 
     Py_ssize_t i;
 
     /* adjust end */
-    if (maxcount < end - ptr && maxcount != 65535)
+    if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
         end = ptr + maxcount*state->charsize;
 
     switch (pattern[0]) {
     Py_ssize_t i;
 
     /* check minimal length */
-    if (pattern[3] && (end - ptr) < pattern[3])
+    if (pattern[3] && (end - ptr)/state->charsize < pattern[3])
         return 0;
 
     /* check known prefix */
         /* <INFO> <1=skip> <2=flags> <3=min> ... */
         if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
             TRACE(("reject (got %d chars, need %d)\n",
-                   (end - ctx->ptr), ctx->pattern[3]));
+                   (end - ctx->ptr)/state->charsize, ctx->pattern[3]));
             RETURN_FAILURE;
         }
         ctx->pattern += ctx->pattern[1] + 1;
                         RETURN_FAILURE;
                     while (p < e) {
                         if (ctx->ptr >= end ||
-                            state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p))
+                            state->lower(SRE_CHARGET(state, ctx->ptr, 0)) !=
+                            state->lower(SRE_CHARGET(state, p, 0)))
                             RETURN_FAILURE;
-                        p++;
+                        p += state->charsize;
                         ctx->ptr += state->charsize;
                     }
                 }
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.