Armin Rigo avatar Armin Rigo committed fe62c82

Test and fix for issue1132.

Comments (0)

Files changed (3)

pypy/module/unicodedata/generate_unicodedb.py

     print >> outfile, 'version = %r' % version
     print >> outfile
 
-    cjk_end = 0x9FA5
-    if version >= "4.1":
-        cjk_end = 0x9FBB
+    if version < "4.1":
+        cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
+                        " 0x4E00 <= code <= 0x9FA5 or"
+                        " 0x20000 <= code <= 0x2A6D6)")
+    elif version < "5":    # don't know the exact limit
+        cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
+                        " 0x4E00 <= code <= 0x9FBB or"
+                        " 0x20000 <= code <= 0x2A6D6)")
+    else:
+        cjk_interval = ("(0x3400 <= code <= 0x4DB5 or"
+                        " 0x4E00 <= code <= 0x9FCB or"
+                        " 0x20000 <= code <= 0x2A6D6 or"
+                        " 0x2A700 <= code <= 0x2B734)")
 
     write_character_names(outfile, table, base_mod)
 
         if not ('0' <= c <= '9' or 'A' <= c <= 'F'):
             raise KeyError
     code = int(cjk_code, 16)
-    if (0x3400 <= code <= 0x4DB5 or
-        0x4E00 <= code <= 0x%X or
-        0x20000 <= code <= 0x2A6D6):
+    if %s:
         return code
     raise KeyError
 
                 raise
 
 def name(code):
-    if (0x3400 <= code <= 0x4DB5 or
-        0x4E00 <= code <= 0x%X or
-        0x20000 <= code <= 0x2A6D6):
+    if %s:
         return "CJK UNIFIED IDEOGRAPH-" + hex(code)[2:].upper()
     if 0xAC00 <= code <= 0xD7A3:
         # vl_code, t_code = divmod(code - 0xAC00, len(_hangul_T))
                 return base_mod.lookup_charcode(code)
             else:
                 raise
-''' % (cjk_end, cjk_end)
+''' % (cjk_interval, cjk_interval)
 
     # Categories
     writeDbRecord(outfile, table)

pypy/module/unicodedata/test/test_unicodedata.py

         import unicodedata
         cases = ((0x3400, 0x4DB5),
                  (0x4E00, 0x9FA5))
-        if unicodedata.unidata_version >= "4.1":
+        if unicodedata.unidata_version >= "5":    # don't know the exact limit
+            cases = ((0x3400, 0x4DB5),
+                     (0x4E00, 0x9FCB),
+                     (0x20000, 0x2A6D6),
+                     (0x2A700, 0x2B734))
+        elif unicodedata.unidata_version >= "4.1":
             cases = ((0x3400, 0x4DB5),
                      (0x4E00, 0x9FBB),
                      (0x20000, 0x2A6D6))

pypy/module/unicodedata/unicodedb_5_2_0.py

         if not ('0' <= c <= '9' or 'A' <= c <= 'F'):
             raise KeyError
     code = int(cjk_code, 16)
-    if (0x3400 <= code <= 0x4DB5 or
-        0x4E00 <= code <= 0x9FBB or
-        0x20000 <= code <= 0x2A6D6):
+    if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FCB or 0x20000 <= code <= 0x2A6D6 or 0x2A700 <= code <= 0x2B734):
         return code
     raise KeyError
 
                 raise
 
 def name(code):
-    if (0x3400 <= code <= 0x4DB5 or
-        0x4E00 <= code <= 0x9FBB or
-        0x20000 <= code <= 0x2A6D6):
+    if (0x3400 <= code <= 0x4DB5 or 0x4E00 <= code <= 0x9FCB or 0x20000 <= code <= 0x2A6D6 or 0x2A700 <= code <= 0x2B734):
         return "CJK UNIFIED IDEOGRAPH-" + hex(code)[2:].upper()
     if 0xAC00 <= code <= 0xD7A3:
         # vl_code, t_code = divmod(code - 0xAC00, len(_hangul_T))
         v_code = vl_code % len(_hangul_V)
         return ("HANGUL SYLLABLE " + _hangul_L[l_code] +
                 _hangul_V[v_code] + _hangul_T[t_code])
-    
+
     if not base_mod:
         return lookup_charcode(code)
     else:
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.