Skip to content

Instantly share code, notes, and snippets.

@thegamecracks
Created May 31, 2024 03:12
Show Gist options
  • Save thegamecracks/b6d4fc83101cc2f1305d9154683eeb48 to your computer and use it in GitHub Desktop.
Save thegamecracks/b6d4fc83101cc2f1305d9154683eeb48 to your computer and use it in GitHub Desktop.
Personal practice for pronouncing numerals in Japanese
import difflib
import math
import random
DIGITS = {
1: "ichi",
2: "ni",
3: "san",
4: "yon",
5: "go",
6: "roku",
7: "nana",
8: "hachi",
9: "kyou",
}
HUNDREDS = {
1: "hyaku",
2: "nihyaku",
3: "sanbyaku",
4: "yonhyaku",
5: "gohyaku",
6: "roppyaku",
7: "nanahyaku",
8: "happyaku",
9: "kyouhyaku",
}
def number_to_romaji(n: int) -> str:
parts: list[str] = []
if n == 0:
return "zero"
elif n < 0:
parts.append("mainasu")
n = abs(n)
# TODO: 1 trillion -> itchou
# TODO: 100 million -> ichioku
ten_thousands, n = divmod(n, 10000)
if ten_thousands > 0:
parts.append(number_to_romaji(ten_thousands))
parts.append("+")
parts.append("man")
thousands, n = divmod(n, 1000)
if thousands == 1:
parts.append("sen")
elif thousands > 1:
parts.append(number_to_romaji(thousands))
parts.append("+")
parts.append("sen")
hundreds, n = divmod(n, 100)
if hundreds > 0:
parts.append(HUNDREDS[hundreds])
tens, n = divmod(n, 10)
if tens == 1:
parts.append("juu")
elif tens > 1:
parts.append(number_to_romaji(tens))
parts.append("-")
parts.append("juu")
if n in DIGITS:
parts.append(DIGITS[n])
s = " ".join(parts)
s = s.replace(" + ", "")
s = s.replace(" - ", "-")
return s
def main() -> None:
try:
difficulty = 100
while True:
digits = random.randrange(difficulty)
exp = int(random.uniform(0, math.log10(difficulty) + 1))
n = digits * 10 ** exp
expected = number_to_romaji(n)
answer = input(f"{n}: ").lower().strip()
underline = underline_diff(expected, answer)
if underline.isspace():
print("✅")
difficulty += 10
else:
print("❌", expected)
if answer:
print(" ", underline)
except Exception:
print(locals())
raise
def underline_diff(before: str, after: str) -> str:
underline = [" "] * len(before)
s = difflib.SequenceMatcher(None, before, after, autojunk=False)
for tag, i1, i2, j1, j2 in s.get_opcodes():
if tag == "equal":
continue
elif tag == "insert":
underline[i1:i1+1] = "^"
else:
underline[i1:i2] = ["^"] * (i2 - i1)
# Skip underlining whitespace
underline = [" " if c.isspace() else u for c, u in zip(before, underline)]
return "".join(underline)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
pass
$ python numerals.py
740: nanahyaku yon-juu
4200: yonsan nibyaku
❌ yonsen nihyaku
^ ^
26: ni-juu
❌ ni-juu roku
^^^^
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment