Created
May 31, 2024 03:12
-
-
Save thegamecracks/b6d4fc83101cc2f1305d9154683eeb48 to your computer and use it in GitHub Desktop.
Personal practice for pronouncing numerals in Japanese
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import difflib | |
import math | |
import random | |
DIGITS = { | |
1: "ichi", | |
2: "ni", | |
3: "san", | |
4: "yon", | |
5: "go", | |
6: "roku", | |
7: "nana", | |
8: "hachi", | |
9: "kyou", | |
} | |
HUNDREDS = { | |
1: "hyaku", | |
2: "nihyaku", | |
3: "sanbyaku", | |
4: "yonhyaku", | |
5: "gohyaku", | |
6: "roppyaku", | |
7: "nanahyaku", | |
8: "happyaku", | |
9: "kyouhyaku", | |
} | |
def number_to_romaji(n: int) -> str: | |
parts: list[str] = [] | |
if n == 0: | |
return "zero" | |
elif n < 0: | |
parts.append("mainasu") | |
n = abs(n) | |
# TODO: 1 trillion -> itchou | |
# TODO: 100 million -> ichioku | |
ten_thousands, n = divmod(n, 10000) | |
if ten_thousands > 0: | |
parts.append(number_to_romaji(ten_thousands)) | |
parts.append("+") | |
parts.append("man") | |
thousands, n = divmod(n, 1000) | |
if thousands == 1: | |
parts.append("sen") | |
elif thousands > 1: | |
parts.append(number_to_romaji(thousands)) | |
parts.append("+") | |
parts.append("sen") | |
hundreds, n = divmod(n, 100) | |
if hundreds > 0: | |
parts.append(HUNDREDS[hundreds]) | |
tens, n = divmod(n, 10) | |
if tens == 1: | |
parts.append("juu") | |
elif tens > 1: | |
parts.append(number_to_romaji(tens)) | |
parts.append("-") | |
parts.append("juu") | |
if n in DIGITS: | |
parts.append(DIGITS[n]) | |
s = " ".join(parts) | |
s = s.replace(" + ", "") | |
s = s.replace(" - ", "-") | |
return s | |
def main() -> None: | |
try: | |
difficulty = 100 | |
while True: | |
digits = random.randrange(difficulty) | |
exp = int(random.uniform(0, math.log10(difficulty) + 1)) | |
n = digits * 10 ** exp | |
expected = number_to_romaji(n) | |
answer = input(f"{n}: ").lower().strip() | |
underline = underline_diff(expected, answer) | |
if underline.isspace(): | |
print("✅") | |
difficulty += 10 | |
else: | |
print("❌", expected) | |
if answer: | |
print(" ", underline) | |
except Exception: | |
print(locals()) | |
raise | |
def underline_diff(before: str, after: str) -> str: | |
underline = [" "] * len(before) | |
s = difflib.SequenceMatcher(None, before, after, autojunk=False) | |
for tag, i1, i2, j1, j2 in s.get_opcodes(): | |
if tag == "equal": | |
continue | |
elif tag == "insert": | |
underline[i1:i1+1] = "^" | |
else: | |
underline[i1:i2] = ["^"] * (i2 - i1) | |
# Skip underlining whitespace | |
underline = [" " if c.isspace() else u for c, u in zip(before, underline)] | |
return "".join(underline) | |
if __name__ == "__main__": | |
try: | |
main() | |
except KeyboardInterrupt: | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python numerals.py | |
740: nanahyaku yon-juu | |
✅ | |
4200: yonsan nibyaku | |
❌ yonsen nihyaku | |
^ ^ | |
26: ni-juu | |
❌ ni-juu roku | |
^^^^ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment