Commits

Armin Rigo committed bca4299

"Fix" (??) _sre.getlower(x) to return an x>=256 unchanged
when SRE_FLAG_LOCALE is passed. That's obscure...

Comments (0)

Files changed (2)

pypy/rlib/rsre/rsre_char.py

 
 
 def getlower(char_ord, flags):
-    if flags & SRE_FLAG_UNICODE:
+    if flags & SRE_FLAG_LOCALE:
+        if char_ord < 256:      # cheating!  Well, CPython does too.
+            char_ord = tolower(char_ord)
+        return char_ord
+    elif flags & SRE_FLAG_UNICODE:
         assert unicodedb is not None
         char_ord = unicodedb.tolower(char_ord)
-    elif flags & SRE_FLAG_LOCALE:
-        return tolower(char_ord)
     else:
         if int_between(ord('A'), char_ord, ord('Z') + 1):   # ASCII lower
             char_ord += ord('a') - ord('A')

pypy/rlib/rsre/test/test_char.py

     assert rsre_char.getlower(ord('2'), SRE_FLAG_UNICODE) == ord('2')
     assert rsre_char.getlower(10, SRE_FLAG_UNICODE) == 10
     assert rsre_char.getlower(UPPER_PI, SRE_FLAG_UNICODE) == LOWER_PI
+    #
+    # xxx the following cases are like CPython's.  They are obscure.
+    # (iko) that's a nice way to say "broken"
+    assert rsre_char.getlower(UPPER_PI, SRE_FLAG_LOCALE) == UPPER_PI
+    assert rsre_char.getlower(UPPER_PI, SRE_FLAG_LOCALE | SRE_FLAG_UNICODE) \
+                                                         == UPPER_PI
 
 def test_is_word():
     assert rsre_char.is_word(ord('A'))