seligman/to_and_from_english.py Secret

## to_and_from_english.py
from collections import namedtuple

ALL = {
    'zero': 0,

    'hundred': 10 ** 2,
    'thousand': 10 ** 3,
    'million': 10 ** 6,
    'billion': 10 ** 9,
    'trillion': 10 ** 12,
    'quadrillion': 10 ** 15,
    'quintillion': 10 ** 18,
    'sextillion': 10 ** 21,
    'septillion': 10 ** 24,
    'octillion': 10 ** 27,
    'nonillion': 10 ** 30,
    'decillion': 10 ** 33,
    'undecillion': 10 ** 36,

    'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
    'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10,
    'eleven': 11, 'twelve': 12, 'thirteen': 13, 'fourteen': 14, 'fifteen': 15,
    'sixteen': 16, 'seventeen': 17, 'eighteen': 18, 'nineteen': 19, 'twenty': 20,
    'thirty': 30, 'forty': 40, 'fifty': 50, 'sixty': 60, 'seventy': 70, 'eighty': 80, 'ninety': 90,
}

MULTIPLIER = {
    'hundred', 'thousand', 'million', 'billion', 'trillion', 'quadrillion',
    'quintillion', 'sextillion', 'septillion', 'octillion', 'nonillion',
    'decillion', 'undecillion',
}

GROUPS = {
    'thousand', 'million', 'billion', 'trillion', 'quadrillion',
    'quintillion', 'sextillion', 'septillion', 'octillion', 'nonillion',
    'decillion', 'undecillion',
}

def from_english(val):
    Word = namedtuple('Word', ['val', 'type', 'group'])
    if isinstance(val, list):
        vals = val
    else:
        # On the first call, split the words into individual tokens
        vals = []
        for cur in [x.lower() for x in val.replace('-', ' ').split(' ')]:
            if cur in MULTIPLIER:
                # If it's a multipler (hundred, million, etc), mark it is a multiplier
                # thousands are special, they mark the end of a group
                vals.append(Word(ALL[cur], 'mul', cur in GROUPS))
            elif cur in ALL:
                # Otherwise it's a word, so just get the value
                vals.append(Word(ALL[cur], 'val', False))
            else:
                # Nothing to do for words we don't handle, like "and"
                pass

        # We're in the first pass, run through and find each thousand's group
        groups = []
        while True:
            found = False
            for i in range(len(vals)):
                if vals[i].group or i == len(vals) - 1:
                    # This is a thousands group, parse it by calling ourselves, and
                    # add it to the output
                    groups.append(Word(from_english(vals[:i+1]), 'val', False))
                    vals = vals[i+1:]
                    found = True
                    break
            if not found:
                break
        vals = groups

    while True:
        found = False
        if not found:
            # Find two values next to each other ("twenty" and "one") and
            # just add them
            for i in range(len(vals)-1):
                if vals[i].type == 'val' and vals[i+1].type == 'val':
                    vals[i] = Word(vals[i].val + vals[i+1].val, 'val', False)
                    vals.pop(i+1)
                    found = True
                    break
        if not found:
            # Find a value followed by a multipler ("five" and "million")
            # and multiply the value by the multiplier
            for i in range(len(vals)-1):
                if vals[i].type == 'val' and vals[i+1].type == 'mul':
                    vals[i] = Word(vals[i].val * vals[i+1].val, 'val', False)
                    vals.pop(i+1)
                    found = True
                    break

        if not found:
            # We ran out of things to do
            break

    if len(vals) == 1:
        # At the end, we ended up with one value, return it
        return vals[0].val
    else:
        # Something else, just blow up
        raise Exception("Unable to convert")

def to_english(value, level=0):
    # Simple logic to go the other way
    ret = ""
    if value < 0:
        value *= -1
        ret = "negative"

    small = {
        1: "one", 2: "two", 3: "three", 4: "four", 5: "five",
        6: "six", 7: "seven", 8: "eight", 9: "nine", 10: "ten",
        11: "eleven", 12: "twelve", 13: "thirteen", 14: "fourteen", 15: "fifteen",
        16: "sixteen", 17: "seventeen", 18: "eighteen", 19: "nineteen",
    }

    tens = {
        20: "twenty", 30: "thirty", 40: "forty", 50: "fifty",
        60: "sixty", 70: "seventy", 80: "eighty", 90: "ninety",
    }

    thousands = (
        ('undecillion', 36), ('decillion', 33), ('nonillion', 30), ("octillion", 27),
        ("septillion", 24), ("sextillion", 21), ("quintillion", 18), ("quadrillion", 15),
        ("trillion", 12), ("billion", 9), ("million", 6), ("thousand", 3), ("hundred", 2),
    )

    if value in small:
        # Simple case, just add the number
        ret += " " + small[value]
    else:
        for word, digits in thousands:
            # Build up each of the thousands case by calling into ourselves
            # and adding the proper thousands word
            # Also, handle the "x hundreds" case by treating that as a
            # thousands case
            digits = 10 ** digits
            if value >= digits:
                ret += " " + to_english(value // digits, level+1) + " " + word
                value %= digits

        if value == 0:
            # If we've just been given zero, return it
            if len(ret) == 0:
                ret = "zero"
        else:
            if value in small:
                # Add the small digits we know about
                ret += " " + small[value]
            else:
                if (value - (value % 10)) in tens:
                    # Handle all the "twenty-one" type cases
                    if (value % 10) in small:
                        ret += " " + tens[value - (value % 10)] + "-" + small[value % 10]
                    else:
                        ret += " " + tens[value - (value % 10)]

    ret = ret.strip()
    if level == 0:
        ret = ret[0].upper() + ret[1:]
    return ret


test = "Two hundred and three thousand twenty-one"
print(f"Test: {test}")
as_number = from_english(test)
print(f"Converted: {as_number}")
as_english = to_english(as_number)
print(f"Back again: {as_english}")
	from collections import namedtuple

	ALL = {
	'zero': 0,

	'hundred': 10 ** 2,
	'thousand': 10 ** 3,
	'million': 10 ** 6,
	'billion': 10 ** 9,
	'trillion': 10 ** 12,
	'quadrillion': 10 ** 15,
	'quintillion': 10 ** 18,
	'sextillion': 10 ** 21,
	'septillion': 10 ** 24,
	'octillion': 10 ** 27,
	'nonillion': 10 ** 30,
	'decillion': 10 ** 33,
	'undecillion': 10 ** 36,

	'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,
	'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10,
	'eleven': 11, 'twelve': 12, 'thirteen': 13, 'fourteen': 14, 'fifteen': 15,
	'sixteen': 16, 'seventeen': 17, 'eighteen': 18, 'nineteen': 19, 'twenty': 20,
	'thirty': 30, 'forty': 40, 'fifty': 50, 'sixty': 60, 'seventy': 70, 'eighty': 80, 'ninety': 90,
	}

	MULTIPLIER = {
	'hundred', 'thousand', 'million', 'billion', 'trillion', 'quadrillion',
	'quintillion', 'sextillion', 'septillion', 'octillion', 'nonillion',
	'decillion', 'undecillion',
	}

	GROUPS = {
	'thousand', 'million', 'billion', 'trillion', 'quadrillion',
	'quintillion', 'sextillion', 'septillion', 'octillion', 'nonillion',
	'decillion', 'undecillion',
	}

	def from_english(val):
	Word = namedtuple('Word', ['val', 'type', 'group'])
	if isinstance(val, list):
	vals = val
	else:
	# On the first call, split the words into individual tokens
	vals = []
	for cur in [x.lower() for x in val.replace('-', ' ').split(' ')]:
	if cur in MULTIPLIER:
	# If it's a multipler (hundred, million, etc), mark it is a multiplier
	# thousands are special, they mark the end of a group
	vals.append(Word(ALL[cur], 'mul', cur in GROUPS))
	elif cur in ALL:
	# Otherwise it's a word, so just get the value
	vals.append(Word(ALL[cur], 'val', False))
	else:
	# Nothing to do for words we don't handle, like "and"
	pass

	# We're in the first pass, run through and find each thousand's group
	groups = []
	while True:
	found = False
	for i in range(len(vals)):
	if vals[i].group or i == len(vals) - 1:
	# This is a thousands group, parse it by calling ourselves, and
	# add it to the output
	groups.append(Word(from_english(vals[:i+1]), 'val', False))
	vals = vals[i+1:]
	found = True
	break
	if not found:
	break
	vals = groups

	while True:
	found = False
	if not found:
	# Find two values next to each other ("twenty" and "one") and
	# just add them
	for i in range(len(vals)-1):
	if vals[i].type == 'val' and vals[i+1].type == 'val':
	vals[i] = Word(vals[i].val + vals[i+1].val, 'val', False)
	vals.pop(i+1)
	found = True
	break
	if not found:
	# Find a value followed by a multipler ("five" and "million")
	# and multiply the value by the multiplier
	for i in range(len(vals)-1):
	if vals[i].type == 'val' and vals[i+1].type == 'mul':
	vals[i] = Word(vals[i].val * vals[i+1].val, 'val', False)
	vals.pop(i+1)
	found = True
	break

	if not found:
	# We ran out of things to do
	break

	if len(vals) == 1:
	# At the end, we ended up with one value, return it
	return vals[0].val
	else:
	# Something else, just blow up
	raise Exception("Unable to convert")

	def to_english(value, level=0):
	# Simple logic to go the other way
	ret = ""
	if value < 0:
	value *= -1
	ret = "negative"

	small = {
	1: "one", 2: "two", 3: "three", 4: "four", 5: "five",
	6: "six", 7: "seven", 8: "eight", 9: "nine", 10: "ten",
	11: "eleven", 12: "twelve", 13: "thirteen", 14: "fourteen", 15: "fifteen",
	16: "sixteen", 17: "seventeen", 18: "eighteen", 19: "nineteen",
	}

	tens = {
	20: "twenty", 30: "thirty", 40: "forty", 50: "fifty",
	60: "sixty", 70: "seventy", 80: "eighty", 90: "ninety",
	}

	thousands = (
	('undecillion', 36), ('decillion', 33), ('nonillion', 30), ("octillion", 27),
	("septillion", 24), ("sextillion", 21), ("quintillion", 18), ("quadrillion", 15),
	("trillion", 12), ("billion", 9), ("million", 6), ("thousand", 3), ("hundred", 2),
	)

	if value in small:
	# Simple case, just add the number
	ret += " " + small[value]
	else:
	for word, digits in thousands:
	# Build up each of the thousands case by calling into ourselves
	# and adding the proper thousands word
	# Also, handle the "x hundreds" case by treating that as a
	# thousands case
	digits = 10 ** digits
	if value >= digits:
	ret += " " + to_english(value // digits, level+1) + " " + word
	value %= digits

	if value == 0:
	# If we've just been given zero, return it
	if len(ret) == 0:
	ret = "zero"
	else:
	if value in small:
	# Add the small digits we know about
	ret += " " + small[value]
	else:
	if (value - (value % 10)) in tens:
	# Handle all the "twenty-one" type cases
	if (value % 10) in small:
	ret += " " + tens[value - (value % 10)] + "-" + small[value % 10]
	else:
	ret += " " + tens[value - (value % 10)]

	ret = ret.strip()
	if level == 0:
	ret = ret[0].upper() + ret[1:]
	return ret


	test = "Two hundred and three thousand twenty-one"
	print(f"Test: {test}")
	as_number = from_english(test)
	print(f"Converted: {as_number}")
	as_english = to_english(as_number)
	print(f"Back again: {as_english}")