Skip to content

Instantly share code, notes, and snippets.

@jaseg
Created January 5, 2016 11:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jaseg/dc287041874566d436e0 to your computer and use it in GitHub Desktop.
Save jaseg/dc287041874566d436e0 to your computer and use it in GitHub Desktop.
Fooling around with furigana on a terminal.
#!/usr/bin/env python3
import unicodedata, textwrap
print('\033[38;5;250m ニホンゴ\n\033[0m日本語', end='\n\n')
# I could not find any terminal actually rendering this. urxvt just ignores the escapes, which is kind of sane. gnome terminal et al. print it literally, replacing the \e with a replacement character which looks garbage.
print('foo\033[1\\bar\033[2\\baz\033[0\\fnord', end='\n\n')
FW_TO_HW_MAP = {
0x3001: 0xFF64,
0x3002: 0xFF61,
0x300C: 0xFF62,
0x300D: 0xFF63,
0x309B: 0xFF9E,
0x309C: 0xFF9F,
0x30A1: 0xFF67,
0x30A2: 0xFF71,
0x30A3: 0xFF68,
0x30A4: 0xFF72,
0x30A5: 0xFF69,
0x30A6: 0xFF73,
0x30A7: 0xFF6A,
0x30A8: 0xFF74,
0x30A9: 0xFF6B,
0x30AA: 0xFF75,
0x30AB: 0xFF76,
0x30AD: 0xFF77,
0x30AF: 0xFF78,
0x30B1: 0xFF79,
0x30B3: 0xFF7A,
0x30B5: 0xFF7B,
0x30B7: 0xFF7C,
0x30B9: 0xFF7D,
0x30BB: 0xFF7E,
0x30BD: 0xFF7F,
0x30BF: 0xFF80,
0x30C1: 0xFF81,
0x30C3: 0xFF6F,
0x30C4: 0xFF82,
0x30C6: 0xFF83,
0x30C8: 0xFF84,
0x30CA: 0xFF85,
0x30CB: 0xFF86,
0x30CC: 0xFF87,
0x30CD: 0xFF88,
0x30CE: 0xFF89,
0x30CF: 0xFF8A,
0x30D2: 0xFF8B,
0x30D5: 0xFF8C,
0x30D8: 0xFF8D,
0x30DB: 0xFF8E,
0x30DE: 0xFF8F,
0x30DF: 0xFF90,
0x30E0: 0xFF91,
0x30E1: 0xFF92,
0x30E2: 0xFF93,
0x30E3: 0xFF6C,
0x30E4: 0xFF94,
0x30E5: 0xFF6D,
0x30E6: 0xFF95,
0x30E7: 0xFF6E,
0x30E8: 0xFF96,
0x30E9: 0xFF97,
0x30EA: 0xFF98,
0x30EB: 0xFF99,
0x30EC: 0xFF9A,
0x30ED: 0xFF9B,
0x30EF: 0xFF9C,
0x30F2: 0xFF66,
0x30F3: 0xFF9D,
0x30FB: 0xFF65,
0x30FC: 0xFF70 }
def print_furi(text, furi):
furi = [ ord(c) for c in unicodedata.normalize('NFD', furi) ]
hira_to_kata = lambda c: c+0x60
is_hira = lambda c: c in range(0x3041, 0x3096)
is_kata = lambda c: c in range(0x30A1, 0x30FA)
kana_to_hw = lambda c: FW_TO_HW_MAP[hira_to_kata(c) if is_hira(c) else c]
furi = ''.join(( '\uFF9E' if c == 0x3099 # 濁点
else '\uFF9F' if c == 0x309A # 半濁点
else chr(kana_to_hw(c)) if is_hira(c) or is_kata(c)
else chr(c)
for c in furi ))
print('\033[38;5;220m{furi}\n\033[0m{text}'.format(furi=furi, text=text), end='\n\n')
print_furi('日本語', 'にほんご')
print_furi('日本語', 'ニホンゴ')
print_furi('日本語', 'nihongo')
#!/usr/bin/env python3
import unicodedata, textwrap
print('\033[38;5;250m ニホンゴ\n\033[0m日本語', end='\n\n')
print('foo\033[1\\bar\033[2\\baz\033[0\\fnord', end='\n\n')
FW_TO_HW_MAP = {
0x3001: 0xFF64,
0x3002: 0xFF61,
0x300C: 0xFF62,
0x300D: 0xFF63,
0x309B: 0xFF9E,
0x309C: 0xFF9F,
0x30A1: 0xFF67,
0x30A2: 0xFF71,
0x30A3: 0xFF68,
0x30A4: 0xFF72,
0x30A5: 0xFF69,
0x30A6: 0xFF73,
0x30A7: 0xFF6A,
0x30A8: 0xFF74,
0x30A9: 0xFF6B,
0x30AA: 0xFF75,
0x30AB: 0xFF76,
0x30AD: 0xFF77,
0x30AF: 0xFF78,
0x30B1: 0xFF79,
0x30B3: 0xFF7A,
0x30B5: 0xFF7B,
0x30B7: 0xFF7C,
0x30B9: 0xFF7D,
0x30BB: 0xFF7E,
0x30BD: 0xFF7F,
0x30BF: 0xFF80,
0x30C1: 0xFF81,
0x30C3: 0xFF6F,
0x30C4: 0xFF82,
0x30C6: 0xFF83,
0x30C8: 0xFF84,
0x30CA: 0xFF85,
0x30CB: 0xFF86,
0x30CC: 0xFF87,
0x30CD: 0xFF88,
0x30CE: 0xFF89,
0x30CF: 0xFF8A,
0x30D2: 0xFF8B,
0x30D5: 0xFF8C,
0x30D8: 0xFF8D,
0x30DB: 0xFF8E,
0x30DE: 0xFF8F,
0x30DF: 0xFF90,
0x30E0: 0xFF91,
0x30E1: 0xFF92,
0x30E2: 0xFF93,
0x30E3: 0xFF6C,
0x30E4: 0xFF94,
0x30E5: 0xFF6D,
0x30E6: 0xFF95,
0x30E7: 0xFF6E,
0x30E8: 0xFF96,
0x30E9: 0xFF97,
0x30EA: 0xFF98,
0x30EB: 0xFF99,
0x30EC: 0xFF9A,
0x30ED: 0xFF9B,
0x30EF: 0xFF9C,
0x30F2: 0xFF66,
0x30F3: 0xFF9D,
0x30FB: 0xFF65,
0x30FC: 0xFF70 }
def print_furi(text, furi):
furi = [ ord(c) for c in unicodedata.normalize('NFD', furi) ]
hira_to_kata = lambda c: c+0x60
is_hira = lambda c: c in range(0x3041, 0x3096)
is_kata = lambda c: c in range(0x30A1, 0x30FA)
kana_to_hw = lambda c: FW_TO_HW_MAP[hira_to_kata(c) if is_hira(c) else c]
furi = ''.join(( '\uFF9E' if c == 0x3099 # 濁点
else '\uFF9F' if c == 0x309A # 半濁点
else chr(kana_to_hw(c)) if is_hira(c) or is_kata(c)
else chr(c)
for c in furi ))
print('\033[38;5;220m{furi}\n\033[0m{text}'.format(furi=furi, text=text), end='\n\n')
print_furi('日本語', 'にほんご')
print_furi('日本語', 'ニホンゴ')
print_furi('日本語', 'nihongo')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment