rntz/numbers.py

## numbers.py
from talon import Context, Module, actions
from typing import List, Optional, Union, Iterator

mod = Module()
ctx = Context()

digits = "zero one two three four five six seven eight nine".split()
teens = "eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen".split()
tens = "ten twenty thirty forty fifty sixty seventy eighty ninety".split()
scales = "hundred thousand million billion trillion quadrillion quintillion sextillion septillion octillion nonillion decillion".split()

digits_map = {n: i for i, n in enumerate(digits)}
digits_map["oh"] = 0
teens_map = {n: i + 11 for i, n in enumerate(teens)}
tens_map = {n: 10 * (i + 1) for i, n in enumerate(tens)}
scales_map = {"hundred": 100}
scales_map.update({n: 10 ** (3 * (i+1)) for i, n in enumerate(scales[1:])})

numbers_map = digits_map.copy()
numbers_map.update(teens_map)
numbers_map.update(tens_map)
numbers_map.update(scales_map)

number_word = "(" + "|".join(numbers_map.keys()) + ")"

def scan_small_numbers(l: List[str]) -> Iterator[Union[str,int]]:
    """
    Takes a list of number words, yields a generator of mixed numbers & strings.
    Translates small number terms (<100) into corresponding numbers.
    Smashes digits onto tens words, eg. ["twenty", "one"] -> [21].
    But note that "ten" and "zero" are excluded, ie:
      ["ten", "three"] -> [10, 3]
      ["fifty", "zero"] -> [50, 0]
    Does nothing to "scale words" (hundred, thousand, million, etc).
    """
    l.reverse()
    while l:
        n = l.pop()
        if l and n in tens and l[-1] in digits and n != "ten" and l[-1] != "zero":
            d = l.pop()
            yield numbers_map[n] + numbers_map[d]
        elif n not in scales_map:
            yield numbers_map[n]
        else:
            yield n

def split_list(value, l: list):
    """Splits a list by occurrences of a given value."""
    result = []
    start = 0
    while True:
        try: i = l.index(value, start)
        except ValueError: break
        result.append(l[start:i])
        start = i+1
    result.append(l[start:])
    return result

def parse_scale(scale: str, l: List[Union[str,int]]) -> List[Union[str,int]]:
    """Parses a list of mixed numbers & strings for occurrences of the following
    pattern:

        <multiplier> <scale> <remainder>

    where <scale> is a scale word like "hundred", "thousand", "million", etc and
    multiplier and remainder are numbers or strings of numbers of the
    appropriate size. For example:

        parse_scale("hundred", [1, "hundred", 2]) -> [102]
        parse_scale("thousand", [12, "thousand", 3, 45]) -> [12345]

    We assume that all scales of lower magnitude have already been parsed; don't
    call parse_scale("thousand") until you've called parse_scale("hundred").
    """
    scale_value = scales_map[scale]
    scale_digits = len(str(scale_value))

    # Split the list on the desired scale word, then parse from left to right.
    splits = split_list(scale, l)
    while 1 < len(splits):
        left, right = splits[0:2]

        # (1) figure out the multiplier by looking to the left of the scale word
        try: before = left.pop()
        except IndexError: before = 1
        else:
            if before == 0 or not isinstance(before, int):
                left.append(before)
                before = 1

        # (2) absorb numbers to the right, eg. in [1, "thousand", 1, 26], "1
        # thousand" absorbs ["1", "26"] to make 1,126. We pull numbers off
        # `right` until we fill up the desired number of digits.
        after = ""
        while right and isinstance(right[0], int):
            next = after + str(right[0])
            if len(next) >= scale_digits: break
            after = next
            right.pop(0)
        after = int(after) if after else 0

        # (3) Push the parsed number into place, close the gap in splits, and
        # continue parsing.
        left.append(before * scale_value + after)
        left.extend(right)
        splits[0:2] = [left]

    assert 1 == len(splits)
    return splits[0]

@mod.capture(rule=f"{number_word}+ (and {number_word}+)*")
def new_number(m) -> str:
    """parse a number"""
    print("!!!!!!!!!! PHRASE: " + str(m))
    l = list(scan_small_numbers(list(m)))
    print("!!!!!!!!!! SCANNED: " + repr(l))
    for scale in scales:
        l = parse_scale(scale, l)
    print("!!!!!!!!!! PARSED: " + repr(l))
    return "".join(str(n) for n in l)

def test():
    # TODO: more tests!
    l = "one hundred twenty three thousand four hundred fifty six".split()
    l = list(scan_small_numbers(l))
    print(l)
    print(parse_scale("hundred", l))
	from talon import Context, Module, actions
	from typing import List, Optional, Union, Iterator

	mod = Module()
	ctx = Context()

	digits = "zero one two three four five six seven eight nine".split()
	teens = "eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen".split()
	tens = "ten twenty thirty forty fifty sixty seventy eighty ninety".split()
	scales = "hundred thousand million billion trillion quadrillion quintillion sextillion septillion octillion nonillion decillion".split()

	digits_map = {n: i for i, n in enumerate(digits)}
	digits_map["oh"] = 0
	teens_map = {n: i + 11 for i, n in enumerate(teens)}
	tens_map = {n: 10 * (i + 1) for i, n in enumerate(tens)}
	scales_map = {"hundred": 100}
	scales_map.update({n: 10 ** (3 * (i+1)) for i, n in enumerate(scales[1:])})

	numbers_map = digits_map.copy()
	numbers_map.update(teens_map)
	numbers_map.update(tens_map)
	numbers_map.update(scales_map)

	number_word = "(" + "\|".join(numbers_map.keys()) + ")"

	def scan_small_numbers(l: List[str]) -> Iterator[Union[str,int]]:
	"""
	Takes a list of number words, yields a generator of mixed numbers & strings.
	Translates small number terms (<100) into corresponding numbers.
	Smashes digits onto tens words, eg. ["twenty", "one"] -> [21].
	But note that "ten" and "zero" are excluded, ie:
	["ten", "three"] -> [10, 3]
	["fifty", "zero"] -> [50, 0]
	Does nothing to "scale words" (hundred, thousand, million, etc).
	"""
	l.reverse()
	while l:
	n = l.pop()
	if l and n in tens and l[-1] in digits and n != "ten" and l[-1] != "zero":
	d = l.pop()
	yield numbers_map[n] + numbers_map[d]
	elif n not in scales_map:
	yield numbers_map[n]
	else:
	yield n

	def split_list(value, l: list):
	"""Splits a list by occurrences of a given value."""
	result = []
	start = 0
	while True:
	try: i = l.index(value, start)
	except ValueError: break
	result.append(l[start:i])
	start = i+1
	result.append(l[start:])
	return result

	def parse_scale(scale: str, l: List[Union[str,int]]) -> List[Union[str,int]]:
	"""Parses a list of mixed numbers & strings for occurrences of the following
	pattern:

	<multiplier> <scale> <remainder>

	where <scale> is a scale word like "hundred", "thousand", "million", etc and
	multiplier and remainder are numbers or strings of numbers of the
	appropriate size. For example:

	parse_scale("hundred", [1, "hundred", 2]) -> [102]
	parse_scale("thousand", [12, "thousand", 3, 45]) -> [12345]

	We assume that all scales of lower magnitude have already been parsed; don't
	call parse_scale("thousand") until you've called parse_scale("hundred").
	"""
	scale_value = scales_map[scale]
	scale_digits = len(str(scale_value))

	# Split the list on the desired scale word, then parse from left to right.
	splits = split_list(scale, l)
	while 1 < len(splits):
	left, right = splits[0:2]

	# (1) figure out the multiplier by looking to the left of the scale word
	try: before = left.pop()
	except IndexError: before = 1
	else:
	if before == 0 or not isinstance(before, int):
	left.append(before)
	before = 1

	# (2) absorb numbers to the right, eg. in [1, "thousand", 1, 26], "1
	# thousand" absorbs ["1", "26"] to make 1,126. We pull numbers off
	# `right` until we fill up the desired number of digits.
	after = ""
	while right and isinstance(right[0], int):
	next = after + str(right[0])
	if len(next) >= scale_digits: break
	after = next
	right.pop(0)
	after = int(after) if after else 0

	# (3) Push the parsed number into place, close the gap in splits, and
	# continue parsing.
	left.append(before * scale_value + after)
	left.extend(right)
	splits[0:2] = [left]

	assert 1 == len(splits)
	return splits[0]

	@mod.capture(rule=f"{number_word}+ (and {number_word}+)*")
	def new_number(m) -> str:
	"""parse a number"""
	print("!!!!!!!!!! PHRASE: " + str(m))
	l = list(scan_small_numbers(list(m)))
	print("!!!!!!!!!! SCANNED: " + repr(l))
	for scale in scales:
	l = parse_scale(scale, l)
	print("!!!!!!!!!! PARSED: " + repr(l))
	return "".join(str(n) for n in l)

	def test():
	# TODO: more tests!
	l = "one hundred twenty three thousand four hundred fifty six".split()
	l = list(scan_small_numbers(l))
	print(l)
	print(parse_scale("hundred", l))