Created
July 24, 2020 08:02
-
-
Save ChenyangGao/c98cbae611d0dd5c2800853ea65f9e17 to your computer and use it in GitHub Desktop.
English numerals to Arabic numerals
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from types import MappingProxyType | |
__all__ = ['str2num'] | |
NUMBase = MappingProxyType({ | |
'one': 1, | |
'two': 2, | |
'three': 3, | |
'four': 4, | |
'five': 5, | |
'six': 6, | |
'seven': 7, | |
'eight': 8, | |
'nine': 9, | |
'ten': 10, | |
'eleven': 11, | |
'twelve': 12, | |
'thirteen': 13, | |
'fourteen': 14, | |
'fifteen': 15, | |
'sixteen': 16, | |
'seventeen': 17, | |
'eighteen': 18, | |
'nineteen': 19, | |
'twenty': 20, | |
'thirty': 30, | |
'forty': 40, | |
'fifty': 50, | |
'sixty': 60, | |
'seventy': 70, | |
'eighty': 80, | |
'ninety': 90, | |
}) | |
NUMUnit = MappingProxyType({ | |
'hundred': 100, | |
'thousand': 1000, | |
'million': 1000_000, | |
'billion': 1000_000_000, | |
'trillion': 1000_000_000_000, | |
}) | |
def nty_n(s): | |
try: | |
a, b = s.split('-') | |
return NUMBase[a] + NUMBase[b] | |
except (KeyError, ValueError) as exc: | |
raise ValueError(s) from exc | |
def str2num(numstr: str) -> int: | |
numstr = numstr.strip().lower() | |
if numstr == 'zero': | |
return 0 | |
unitstack = [] | |
valstack = [] | |
type = None | |
negative = False | |
for c in re.finditer(r'(?P<value>\S+)|(?P<blank>\s+)', numstr): | |
if c.lastgroup == 'blank': # ignore blanks | |
continue | |
c = c.group() | |
if c in frozenset({'negative', 'minus'}): | |
if type is not None: | |
raise ValueError("type 'negative' must be the first") | |
negative = True | |
type = 'negative' | |
elif c == 'and': | |
if type != 'unit': | |
raise ValueError("type 'and' must follow type 'unit'") | |
type = 'and' | |
elif c in NUMUnit: | |
if type is None or type == 'negative': | |
raise ValueError("type 'unit' cannot be the first or follow type 'negative'") | |
u = NUMUnit[c] | |
if type == 'unit': | |
if unitstack and u <= unitstack[-1]: | |
raise ValueError("two consecutive type 'unit', the back must be greater than the front") | |
v = 0 | |
while unitstack and u > unitstack[-1]: | |
unitstack.pop() | |
v += valstack.pop() | |
if unitstack and u == unitstack[-1]: | |
raise ValueError("adjacent equivalent type 'unit' is not allowed") | |
v *= u | |
unitstack.append(u) | |
valstack.append(v) | |
type = 'unit' | |
else: | |
if type == 'base': | |
raise ValueError("consecutive type 'base' is not allowed") | |
if c in NUMBase: | |
v = NUMBase[c] | |
type = 'base' | |
else: | |
v = nty_n(c) | |
type = 'base' | |
if type == 'and': | |
raise ValueError("type 'and' cannot be the last") | |
if type == 'base': | |
valstack.append(v) | |
if valstack: | |
v = sum(valstack) | |
if negative: | |
v = -v | |
return v | |
raise ValueError('What did I do?') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
JavaScript Version