Created
November 9, 2020 23:14
-
-
Save rntz/77c2254ebe3dc3983abb236e377e78bf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from talon import Context, Module, actions | |
from typing import List, Optional, Union, Iterator | |
mod = Module() | |
ctx = Context() | |
digits = "zero one two three four five six seven eight nine".split() | |
teens = "eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen".split() | |
tens = "ten twenty thirty forty fifty sixty seventy eighty ninety".split() | |
scales = "hundred thousand million billion trillion quadrillion quintillion sextillion septillion octillion nonillion decillion".split() | |
digits_map = {n: i for i, n in enumerate(digits)} | |
digits_map["oh"] = 0 | |
teens_map = {n: i + 11 for i, n in enumerate(teens)} | |
tens_map = {n: 10 * (i + 1) for i, n in enumerate(tens)} | |
scales_map = {"hundred": 100} | |
scales_map.update({n: 10 ** (3 * (i+1)) for i, n in enumerate(scales[1:])}) | |
numbers_map = digits_map.copy() | |
numbers_map.update(teens_map) | |
numbers_map.update(tens_map) | |
numbers_map.update(scales_map) | |
number_word = "(" + "|".join(numbers_map.keys()) + ")" | |
def scan_small_numbers(l: List[str]) -> Iterator[Union[str,int]]: | |
""" | |
Takes a list of number words, yields a generator of mixed numbers & strings. | |
Translates small number terms (<100) into corresponding numbers. | |
Smashes digits onto tens words, eg. ["twenty", "one"] -> [21]. | |
But note that "ten" and "zero" are excluded, ie: | |
["ten", "three"] -> [10, 3] | |
["fifty", "zero"] -> [50, 0] | |
Does nothing to "scale words" (hundred, thousand, million, etc). | |
""" | |
l.reverse() | |
while l: | |
n = l.pop() | |
if l and n in tens and l[-1] in digits and n != "ten" and l[-1] != "zero": | |
d = l.pop() | |
yield numbers_map[n] + numbers_map[d] | |
elif n not in scales_map: | |
yield numbers_map[n] | |
else: | |
yield n | |
def split_list(value, l: list): | |
"""Splits a list by occurrences of a given value.""" | |
result = [] | |
start = 0 | |
while True: | |
try: i = l.index(value, start) | |
except ValueError: break | |
result.append(l[start:i]) | |
start = i+1 | |
result.append(l[start:]) | |
return result | |
def parse_scale(scale: str, l: List[Union[str,int]]) -> List[Union[str,int]]: | |
"""Parses a list of mixed numbers & strings for occurrences of the following | |
pattern: | |
<multiplier> <scale> <remainder> | |
where <scale> is a scale word like "hundred", "thousand", "million", etc and | |
multiplier and remainder are numbers or strings of numbers of the | |
appropriate size. For example: | |
parse_scale("hundred", [1, "hundred", 2]) -> [102] | |
parse_scale("thousand", [12, "thousand", 3, 45]) -> [12345] | |
We assume that all scales of lower magnitude have already been parsed; don't | |
call parse_scale("thousand") until you've called parse_scale("hundred"). | |
""" | |
scale_value = scales_map[scale] | |
scale_digits = len(str(scale_value)) | |
# Split the list on the desired scale word, then parse from left to right. | |
splits = split_list(scale, l) | |
while 1 < len(splits): | |
left, right = splits[0:2] | |
# (1) figure out the multiplier by looking to the left of the scale word | |
try: before = left.pop() | |
except IndexError: before = 1 | |
else: | |
if before == 0 or not isinstance(before, int): | |
left.append(before) | |
before = 1 | |
# (2) absorb numbers to the right, eg. in [1, "thousand", 1, 26], "1 | |
# thousand" absorbs ["1", "26"] to make 1,126. We pull numbers off | |
# `right` until we fill up the desired number of digits. | |
after = "" | |
while right and isinstance(right[0], int): | |
next = after + str(right[0]) | |
if len(next) >= scale_digits: break | |
after = next | |
right.pop(0) | |
after = int(after) if after else 0 | |
# (3) Push the parsed number into place, close the gap in splits, and | |
# continue parsing. | |
left.append(before * scale_value + after) | |
left.extend(right) | |
splits[0:2] = [left] | |
assert 1 == len(splits) | |
return splits[0] | |
@mod.capture(rule=f"{number_word}+ (and {number_word}+)*") | |
def new_number(m) -> str: | |
"""parse a number""" | |
print("!!!!!!!!!! PHRASE: " + str(m)) | |
l = list(scan_small_numbers(list(m))) | |
print("!!!!!!!!!! SCANNED: " + repr(l)) | |
for scale in scales: | |
l = parse_scale(scale, l) | |
print("!!!!!!!!!! PARSED: " + repr(l)) | |
return "".join(str(n) for n in l) | |
def test(): | |
# TODO: more tests! | |
l = "one hundred twenty three thousand four hundred fifty six".split() | |
l = list(scan_small_numbers(l)) | |
print(l) | |
print(parse_scale("hundred", l)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment