Skip to content

Instantly share code, notes, and snippets.

@toyowata
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save toyowata/c2fb53b01509a6fbc539 to your computer and use it in GitHub Desktop.
Save toyowata/c2fb53b01509a6fbc539 to your computer and use it in GitHub Desktop.
Generate UTF-8 to Kuten code conversion table - UTF-8の有効ビット位置だけ取り出し、区点コード変換テーブルを再生成する
#!/usr/bin/env python
#encoding=utf-8
# Generate UTF-8 to Kuten code conversion table
#
# You can find the UTF-8 table here:
# http://ash.jp/code/unitbl21.htm
def print_expanded_address(table):
print "const uint32_t GT20L16Y1J_address_table[] = {"
for key, value in sorted(table.items()):
MSB = (value & 0xFF00) >> 8
LSB = (value & 0x00FF)
address = 0
if MSB >= 1 and MSB <= 15 and LSB >= 1 and LSB <= 94:
address =( (MSB - 1) * 94 + (LSB - 1))*32
elif MSB >= 16 and MSB <= 47 and LSB >= 1 and LSB <= 94:
address =( (MSB - 16) * 94 + (LSB - 1))*32 + 0x0AA40L
elif MSB >= 48 and MSB <= 84 and LSB >= 1 and LSB <= 94:
address = ((MSB - 48) * 94 + (LSB - 1))*32 + 0x21CDFL
elif MSB == 85 and LSB >= 1 and LSB <= 94:
address = ((MSB - 85) * 94 + (LSB - 1))*32 + 0x3C4A0L
elif MSB >= 88 and MSB <= 89 and LSB >= 1 and LSB <= 94:
address = ((MSB - 88) * 94 + (LSB - 1))*32 + 0x3D060L
print '0x%x' % address, ","
print "};"
print
def print_kuten_value(table):
print "const uint16_t utf8_value[] = {"
for key, value in sorted(table.items()):
print '0x%x' % value, ","
print "};"
print
if __name__ == "__main__":
f = open('utf8_table.txt', 'r')
line = f.readline()
table = {}
while line:
utf8 = line[21:21+6]
if utf8[0] == "E":
ku = int(line[0:0+2], 10)
ten = int(line[3:3+2], 10)
utf8 = int(utf8, 16)
# extract valid bits: 0b1110xxxx10xxxxxx10xxxxxx
# valid from 0xE08080 to 0xEFBFBF
utf8 = ((utf8 & 0x000F0000L) >> 4) | ((utf8 & 0x00003F00L) >> 2) | (utf8 & 0x0000003FL)
table[utf8] = (ku*256)+ten
line = f.readline()
f.close
print "const uint16_t utf8_key[] = {"
for key, value in sorted(table.items()):
print key, ","
print "};"
print
print_kuten_value(table)
print_expanded_address(table)
@toyowata
Copy link
Author

元になった変換テーブルは、ここ。
http://ash.jp/code/unitbl21.htm

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment