Skip to content

Instantly share code, notes, and snippets.

@rntz
Created November 9, 2020 23:14
Show Gist options
  • Save rntz/77c2254ebe3dc3983abb236e377e78bf to your computer and use it in GitHub Desktop.
Save rntz/77c2254ebe3dc3983abb236e377e78bf to your computer and use it in GitHub Desktop.
from talon import Context, Module, actions
from typing import List, Optional, Union, Iterator
mod = Module()
ctx = Context()
digits = "zero one two three four five six seven eight nine".split()
teens = "eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen".split()
tens = "ten twenty thirty forty fifty sixty seventy eighty ninety".split()
scales = "hundred thousand million billion trillion quadrillion quintillion sextillion septillion octillion nonillion decillion".split()
digits_map = {n: i for i, n in enumerate(digits)}
digits_map["oh"] = 0
teens_map = {n: i + 11 for i, n in enumerate(teens)}
tens_map = {n: 10 * (i + 1) for i, n in enumerate(tens)}
scales_map = {"hundred": 100}
scales_map.update({n: 10 ** (3 * (i+1)) for i, n in enumerate(scales[1:])})
numbers_map = digits_map.copy()
numbers_map.update(teens_map)
numbers_map.update(tens_map)
numbers_map.update(scales_map)
number_word = "(" + "|".join(numbers_map.keys()) + ")"
def scan_small_numbers(l: List[str]) -> Iterator[Union[str,int]]:
"""
Takes a list of number words, yields a generator of mixed numbers & strings.
Translates small number terms (<100) into corresponding numbers.
Smashes digits onto tens words, eg. ["twenty", "one"] -> [21].
But note that "ten" and "zero" are excluded, ie:
["ten", "three"] -> [10, 3]
["fifty", "zero"] -> [50, 0]
Does nothing to "scale words" (hundred, thousand, million, etc).
"""
l.reverse()
while l:
n = l.pop()
if l and n in tens and l[-1] in digits and n != "ten" and l[-1] != "zero":
d = l.pop()
yield numbers_map[n] + numbers_map[d]
elif n not in scales_map:
yield numbers_map[n]
else:
yield n
def split_list(value, l: list):
"""Splits a list by occurrences of a given value."""
result = []
start = 0
while True:
try: i = l.index(value, start)
except ValueError: break
result.append(l[start:i])
start = i+1
result.append(l[start:])
return result
def parse_scale(scale: str, l: List[Union[str,int]]) -> List[Union[str,int]]:
"""Parses a list of mixed numbers & strings for occurrences of the following
pattern:
<multiplier> <scale> <remainder>
where <scale> is a scale word like "hundred", "thousand", "million", etc and
multiplier and remainder are numbers or strings of numbers of the
appropriate size. For example:
parse_scale("hundred", [1, "hundred", 2]) -> [102]
parse_scale("thousand", [12, "thousand", 3, 45]) -> [12345]
We assume that all scales of lower magnitude have already been parsed; don't
call parse_scale("thousand") until you've called parse_scale("hundred").
"""
scale_value = scales_map[scale]
scale_digits = len(str(scale_value))
# Split the list on the desired scale word, then parse from left to right.
splits = split_list(scale, l)
while 1 < len(splits):
left, right = splits[0:2]
# (1) figure out the multiplier by looking to the left of the scale word
try: before = left.pop()
except IndexError: before = 1
else:
if before == 0 or not isinstance(before, int):
left.append(before)
before = 1
# (2) absorb numbers to the right, eg. in [1, "thousand", 1, 26], "1
# thousand" absorbs ["1", "26"] to make 1,126. We pull numbers off
# `right` until we fill up the desired number of digits.
after = ""
while right and isinstance(right[0], int):
next = after + str(right[0])
if len(next) >= scale_digits: break
after = next
right.pop(0)
after = int(after) if after else 0
# (3) Push the parsed number into place, close the gap in splits, and
# continue parsing.
left.append(before * scale_value + after)
left.extend(right)
splits[0:2] = [left]
assert 1 == len(splits)
return splits[0]
@mod.capture(rule=f"{number_word}+ (and {number_word}+)*")
def new_number(m) -> str:
"""parse a number"""
print("!!!!!!!!!! PHRASE: " + str(m))
l = list(scan_small_numbers(list(m)))
print("!!!!!!!!!! SCANNED: " + repr(l))
for scale in scales:
l = parse_scale(scale, l)
print("!!!!!!!!!! PARSED: " + repr(l))
return "".join(str(n) for n in l)
def test():
# TODO: more tests!
l = "one hundred twenty three thousand four hundred fifty six".split()
l = list(scan_small_numbers(l))
print(l)
print(parse_scale("hundred", l))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment