Skip to content

Instantly share code, notes, and snippets.

@daviehh
Forked from pixelcort/furigana.py
Last active July 17, 2021 01:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save daviehh/fbb0f46e8c49f7cf3d2fc5574a925854 to your computer and use it in GitHub Desktop.
Save daviehh/fbb0f46e8c49f7cf3d2fc5574a925854 to your computer and use it in GitHub Desktop.
add furigana to kanji-hiragana pair
import re
f_nx = lambda x: x != ""
def rt_pattern(kanji, hira):
return f"<ruby> {kanji} <rt> {hira} </rt></ruby>"
def to_furigana(kanji, hira):
kana_range = "\u3040-\u30ff"
re_kanj = f"([^{kana_range}]*)"
re_kana = f"([{kana_range}]*)"
is_hira = re.compile(f"[{kana_range}]+").search
kj_l = re.split(f"([{kana_range}]+)", kanji)
kgr = list(filter(f_nx, kj_l))
h_re = [f"({si})" if is_hira(si) else re_kana for si in kgr]
hrex = "".join(h_re)
hgr = re.search(hrex, hira).groups()
assert len(hgr) == len(kgr)
furi_l = [hgr[i] if hgr[i] == kgr[i] else rt_pattern(kgr[i], hgr[i]) for i in range(len(kgr))]
return "".join(furi_l).strip()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment