Last active
February 1, 2022 09:11
-
-
Save fgiobergia/395fb9b4727790e0a9a2d24d936526f1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
GPLv3 yadda yadda | |
Flavio Giobergia | |
https://gist.github.com/fgiobergia/395fb9b4727790e0a9a2d24d936526f1 | |
""" | |
def _merge(a: str, b: str, elisione: bool = True, spacing: bool = False, use_e : bool = False) -> str: | |
"""Utility function to merge two strings, following a bunch of rules, | |
as explained in the documentation for int2ita | |
""" | |
if not b: | |
return a | |
if b[0] in "aeiou" and elisione: | |
# avoids "ottantaotto", "ventiuno" | |
a = a[:-1] | |
if use_e: | |
return f"{a} e {b}" | |
elif spacing: | |
return f'{a} {b}' | |
return a + b | |
def int2ita(n: int, spacing : bool = False, use_e: bool = False, elisione : bool = True) -> str: | |
"""Convert a number n into its italian string representation (e.g. 42 => quarantadue). | |
Parameters | |
---------- | |
n : int | |
the number to be converted (currently only numbers < 1 trillion are supported) | |
spacing : bool | |
whether each "section" of the number should be separated by a space (default False). | |
Example (n = 1234): | |
* `spacing=True`: mille due cento trenta quattro | |
* `spacing=False`: milleduecentotrentaquattro | |
use_e : bool | |
whether to concatenate separate portions of the numbers with an 'e' (and) (default False) | |
Example (n = 1234567): | |
* `use_e=True` (`spacing=False`): un milione e duecentotrentaquattromila e cinquecentosessantasette | |
* `use_e=False` (`spacing=False`): un milione duecentotrentaquattromilacinquecentosessantasette | |
elisione : bool | |
whether numbers that require elision (e.g. consecutive vowels) should require elision or not (default True) | |
Example (n = 31) | |
* `elisione=True` (`spacing=False`): trentuno <= expected behavior in the Italian language | |
* `elisione=True` (`spacing=True`): trent uno <= eh # 1 | |
* `elisione=False` (`spacing=False`): trentauno <= eh # 2 | |
* `elisione=False` (`spacing=True`): trenta uno <= possibly useful for some language models | |
""" | |
mu = { | |
0: "zero", | |
1: "uno", | |
2: "due", | |
3: "tre", | |
4: "quattro", | |
5: "cinque", | |
6: "sei", | |
7: "sette", | |
8: "otto", | |
9: "nove" | |
} | |
m_11_19 = { | |
11: "undici", | |
12: "dodici", | |
13: "tredici", | |
14: "quattordici", | |
15: "quindici", | |
16: "sedici", | |
17: "diciassette", | |
18: "diciotto", | |
19: "diciannove" | |
} | |
m_10_90 = { | |
1: "dieci", | |
2: "venti", | |
3: "trenta", | |
4: "quaranta", | |
5: "cinquanta", | |
6: "sessanta", | |
7: "settanta", | |
8: "ottanta", | |
9: "novanta", | |
} | |
if n < 10: | |
return mu[n] | |
elif n < 100: | |
d = n // 10 | |
u = n % 10 | |
if u == 0: | |
return m_10_90[d] | |
elif d == 1: | |
return m_11_19[n] | |
else: | |
part_a = int2ita(d*10, spacing, use_e, elisione) | |
part_b = int2ita(u, spacing, use_e, elisione) | |
return _merge(part_a, part_b, elisione, spacing) | |
elif n < 1000: | |
c = n // 100 | |
r = n % 100 | |
part_a = "" | |
if c > 1: | |
part_a = int2ita(c, spacing, use_e, elisione) | |
part_a += f"{' ' if spacing and part_a else ''}cento" | |
part_b = int2ita(r, spacing, use_e, elisione) if r > 0 else "" | |
return _merge(part_a, part_b, False, spacing) | |
elif n < 1_000_000: | |
m = n // 1000 | |
r = n % 1000 | |
part_a = "" | |
if m > 1: | |
part_a = int2ita(m, spacing, use_e, elisione) + f"{' ' if spacing else ''}mila" | |
else: | |
part_a = "mille" | |
part_b = int2ita(r, spacing, use_e, elisione) if r > 0 else "" | |
return _merge(part_a, part_b, False, spacing, use_e) | |
elif n < 1_000_000_000: | |
M = n // 1_000_000 | |
r = n % 1_000_000 | |
part_a = "" | |
if M == 1: | |
part_a = "un milione" | |
elif M > 1: | |
part_a = f"{int2ita(M, spacing, use_e, elisione)} milioni" | |
part_b = int2ita(r, spacing, use_e, elisione) if r > 0 else "" | |
return _merge(part_a, part_b, False, True, use_e) | |
elif n < 1_000_000_000_000: | |
M = n // 1_000_000_000 | |
r = n % 1_000_000_000 | |
part_a = "" | |
if M == 1: | |
part_a = "un miliardo" | |
elif M > 1: | |
part_a = f"{int2ita(M, spacing, use_e, elisione)} miliardi" | |
part_b = int2ita(r, spacing, use_e, elisione) if r > 0 else "" | |
return _merge(part_a, part_b, False, True, use_e) | |
return "UNSUPPORTED" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment