Skip to content

Instantly share code, notes, and snippets.

@theyorubayesian
Last active May 19, 2021 23:55
Show Gist options
  • Save theyorubayesian/bbefc37733f84f56c9eee3ab15affd16 to your computer and use it in GitHub Desktop.
Save theyorubayesian/bbefc37733f84f56c9eee3ab15affd16 to your computer and use it in GitHub Desktop.
import re
CURRENCY_LIST = {"£": "pounds", "$": "dollars", "€": "euros"}
CURRENCY_PATTERN = fr"((?:[{''.join(CURRENCY_LIST.keys())}]+\d*)(?:\,*\d+)(?:\.\d+)?(?i:[km])*)|(\d+(?:\,*\d+)(?:\.\d+)?(?i:[km])*(?:[{''.join(CURRENCY_LIST.keys())}]+))|((?:[{''.join(CURRENCY_LIST.keys())}]+))"
def _currency_to_text(text: str) -> str:
clean = (
lambda x: x.lower()
.replace(",", "")
.replace("k", " thousand")
.replace("m", " million")
.replace("mn", " million")
.replace("b", " billion")
.replace("bn", " billion")
)
currency_matcher = re.compile(CURRENCY_PATTERN)
f = lambda x: list(filter(None, x))[0]
matches = []
for m in currency_matcher.findall(text):
matches.append(f(m))
if matches:
for m in matches:
if m[0] in CURRENCY_LIST:
idx = 0
while (idx < len(m)) and (m[idx] in CURRENCY_LIST):
idx += 1
text = text.replace(m, f"{clean(m[idx:])} {CURRENCY_LIST[m[0]]}")
continue
idx = 0
while (idx < len(m)) and (m[idx] not in CURRENCY_LIST):
idx += 1
text = text.replace(m, f"{clean(m[:idx])} {CURRENCY_LIST[m[idx]]}")
return text
if '__name__' == '__main__':
sentences = [
"and the super cheap add on of great muffins too! large + muffin $2.50, that's awesome!",
"You wouldn't worry so much if you had $1B, you know?"
"How about you pay 25k$ for it?",
"My tuition was $100,000.50. I paid through my nose."
]
for sent in sentences:
print(_currency_to_text(sent))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment