Source

woocode / py / pinyin-ascii.py

Full commit
# -*- encoding:utf-8 -*-

ascii_gbk_maps = {
    'a': (-20319, -20284),
    'b': (-20283, -19776),
    'c': (-19775, -19219),
    'd': (-19218, -18711),
    'e': (-18710, -18527),
    'f': (-18526, -18240),
    'g': (-18239, -17923),
    'h': (-17922, -17418),
    'j': (-17417, -16475),
    'k': (-16474, -16213),
    'l': (-16212, -15641),
    'm': (-15640, -15166),
    'n': (-15165, -14923),
    'o': (-14922, -14915),
    'p': (-14914, -14631),
    'q': (-14630, -14150),
    'r': (-14149, -14091),
    's': (-14090, -13119),
    't': (-13118, -12839),
    'w': (-12838, -12557),
    'x': (-12556, -11848),
    'y': (-11847, -11056),
    'z': (-11055, -10247),
}

def belongs_to(value, range_start, range_end):
    if range_start <= value <= range_end:
        return True
    return False

def pinyin_to_ascii(unicode_str):
    gbk_str = unicode_str.encode('gbk')
    if len(gbk_str) == 1:
        return gbk_str
    value = ord(gbk_str[0]) * 256 + ord(gbk_str[1]) - 65536
    for k, ranges in ascii_gbk_maps.iteritems():
        if belongs_to(value, *ranges):
            return k
    return ''

def test():
    s = u'谁'
    s = u'你'
    s = u'g'
    s = u'帝'
    print pinyin_to_ascii(s)

test()