/to_and_from_english.py Secret
Created
January 14, 2025 20:06
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
ALL = { | |
'zero': 0, | |
'hundred': 10 ** 2, | |
'thousand': 10 ** 3, | |
'million': 10 ** 6, | |
'billion': 10 ** 9, | |
'trillion': 10 ** 12, | |
'quadrillion': 10 ** 15, | |
'quintillion': 10 ** 18, | |
'sextillion': 10 ** 21, | |
'septillion': 10 ** 24, | |
'octillion': 10 ** 27, | |
'nonillion': 10 ** 30, | |
'decillion': 10 ** 33, | |
'undecillion': 10 ** 36, | |
'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, | |
'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10, | |
'eleven': 11, 'twelve': 12, 'thirteen': 13, 'fourteen': 14, 'fifteen': 15, | |
'sixteen': 16, 'seventeen': 17, 'eighteen': 18, 'nineteen': 19, 'twenty': 20, | |
'thirty': 30, 'forty': 40, 'fifty': 50, 'sixty': 60, 'seventy': 70, 'eighty': 80, 'ninety': 90, | |
} | |
MULTIPLIER = { | |
'hundred', 'thousand', 'million', 'billion', 'trillion', 'quadrillion', | |
'quintillion', 'sextillion', 'septillion', 'octillion', 'nonillion', | |
'decillion', 'undecillion', | |
} | |
GROUPS = { | |
'thousand', 'million', 'billion', 'trillion', 'quadrillion', | |
'quintillion', 'sextillion', 'septillion', 'octillion', 'nonillion', | |
'decillion', 'undecillion', | |
} | |
def from_english(val): | |
Word = namedtuple('Word', ['val', 'type', 'group']) | |
if isinstance(val, list): | |
vals = val | |
else: | |
# On the first call, split the words into individual tokens | |
vals = [] | |
for cur in [x.lower() for x in val.replace('-', ' ').split(' ')]: | |
if cur in MULTIPLIER: | |
# If it's a multipler (hundred, million, etc), mark it is a multiplier | |
# thousands are special, they mark the end of a group | |
vals.append(Word(ALL[cur], 'mul', cur in GROUPS)) | |
elif cur in ALL: | |
# Otherwise it's a word, so just get the value | |
vals.append(Word(ALL[cur], 'val', False)) | |
else: | |
# Nothing to do for words we don't handle, like "and" | |
pass | |
# We're in the first pass, run through and find each thousand's group | |
groups = [] | |
while True: | |
found = False | |
for i in range(len(vals)): | |
if vals[i].group or i == len(vals) - 1: | |
# This is a thousands group, parse it by calling ourselves, and | |
# add it to the output | |
groups.append(Word(from_english(vals[:i+1]), 'val', False)) | |
vals = vals[i+1:] | |
found = True | |
break | |
if not found: | |
break | |
vals = groups | |
while True: | |
found = False | |
if not found: | |
# Find two values next to each other ("twenty" and "one") and | |
# just add them | |
for i in range(len(vals)-1): | |
if vals[i].type == 'val' and vals[i+1].type == 'val': | |
vals[i] = Word(vals[i].val + vals[i+1].val, 'val', False) | |
vals.pop(i+1) | |
found = True | |
break | |
if not found: | |
# Find a value followed by a multipler ("five" and "million") | |
# and multiply the value by the multiplier | |
for i in range(len(vals)-1): | |
if vals[i].type == 'val' and vals[i+1].type == 'mul': | |
vals[i] = Word(vals[i].val * vals[i+1].val, 'val', False) | |
vals.pop(i+1) | |
found = True | |
break | |
if not found: | |
# We ran out of things to do | |
break | |
if len(vals) == 1: | |
# At the end, we ended up with one value, return it | |
return vals[0].val | |
else: | |
# Something else, just blow up | |
raise Exception("Unable to convert") | |
def to_english(value, level=0): | |
# Simple logic to go the other way | |
ret = "" | |
if value < 0: | |
value *= -1 | |
ret = "negative" | |
small = { | |
1: "one", 2: "two", 3: "three", 4: "four", 5: "five", | |
6: "six", 7: "seven", 8: "eight", 9: "nine", 10: "ten", | |
11: "eleven", 12: "twelve", 13: "thirteen", 14: "fourteen", 15: "fifteen", | |
16: "sixteen", 17: "seventeen", 18: "eighteen", 19: "nineteen", | |
} | |
tens = { | |
20: "twenty", 30: "thirty", 40: "forty", 50: "fifty", | |
60: "sixty", 70: "seventy", 80: "eighty", 90: "ninety", | |
} | |
thousands = ( | |
('undecillion', 36), ('decillion', 33), ('nonillion', 30), ("octillion", 27), | |
("septillion", 24), ("sextillion", 21), ("quintillion", 18), ("quadrillion", 15), | |
("trillion", 12), ("billion", 9), ("million", 6), ("thousand", 3), ("hundred", 2), | |
) | |
if value in small: | |
# Simple case, just add the number | |
ret += " " + small[value] | |
else: | |
for word, digits in thousands: | |
# Build up each of the thousands case by calling into ourselves | |
# and adding the proper thousands word | |
# Also, handle the "x hundreds" case by treating that as a | |
# thousands case | |
digits = 10 ** digits | |
if value >= digits: | |
ret += " " + to_english(value // digits, level+1) + " " + word | |
value %= digits | |
if value == 0: | |
# If we've just been given zero, return it | |
if len(ret) == 0: | |
ret = "zero" | |
else: | |
if value in small: | |
# Add the small digits we know about | |
ret += " " + small[value] | |
else: | |
if (value - (value % 10)) in tens: | |
# Handle all the "twenty-one" type cases | |
if (value % 10) in small: | |
ret += " " + tens[value - (value % 10)] + "-" + small[value % 10] | |
else: | |
ret += " " + tens[value - (value % 10)] | |
ret = ret.strip() | |
if level == 0: | |
ret = ret[0].upper() + ret[1:] | |
return ret | |
test = "Two hundred and three thousand twenty-one" | |
print(f"Test: {test}") | |
as_number = from_english(test) | |
print(f"Converted: {as_number}") | |
as_english = to_english(as_number) | |
print(f"Back again: {as_english}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment