Last active
May 19, 2021 23:55
-
-
Save theyorubayesian/bbefc37733f84f56c9eee3ab15affd16 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
CURRENCY_LIST = {"£": "pounds", "$": "dollars", "€": "euros"} | |
CURRENCY_PATTERN = fr"((?:[{''.join(CURRENCY_LIST.keys())}]+\d*)(?:\,*\d+)(?:\.\d+)?(?i:[km])*)|(\d+(?:\,*\d+)(?:\.\d+)?(?i:[km])*(?:[{''.join(CURRENCY_LIST.keys())}]+))|((?:[{''.join(CURRENCY_LIST.keys())}]+))" | |
def _currency_to_text(text: str) -> str: | |
clean = ( | |
lambda x: x.lower() | |
.replace(",", "") | |
.replace("k", " thousand") | |
.replace("m", " million") | |
.replace("mn", " million") | |
.replace("b", " billion") | |
.replace("bn", " billion") | |
) | |
currency_matcher = re.compile(CURRENCY_PATTERN) | |
f = lambda x: list(filter(None, x))[0] | |
matches = [] | |
for m in currency_matcher.findall(text): | |
matches.append(f(m)) | |
if matches: | |
for m in matches: | |
if m[0] in CURRENCY_LIST: | |
idx = 0 | |
while (idx < len(m)) and (m[idx] in CURRENCY_LIST): | |
idx += 1 | |
text = text.replace(m, f"{clean(m[idx:])} {CURRENCY_LIST[m[0]]}") | |
continue | |
idx = 0 | |
while (idx < len(m)) and (m[idx] not in CURRENCY_LIST): | |
idx += 1 | |
text = text.replace(m, f"{clean(m[:idx])} {CURRENCY_LIST[m[idx]]}") | |
return text | |
if '__name__' == '__main__': | |
sentences = [ | |
"and the super cheap add on of great muffins too! large + muffin $2.50, that's awesome!", | |
"You wouldn't worry so much if you had $1B, you know?" | |
"How about you pay 25k$ for it?", | |
"My tuition was $100,000.50. I paid through my nose." | |
] | |
for sent in sentences: | |
print(_currency_to_text(sent)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment