Skip to content

Instantly share code, notes, and snippets.

@sidharthms
Created February 1, 2018 20:46
Show Gist options
  • Save sidharthms/07557018bfc4638995b15300f11b6d08 to your computer and use it in GitHub Desktop.
Save sidharthms/07557018bfc4638995b15300f11b6d08 to your computer and use it in GitHub Desktop.
# Remove `import re` and put this:
import regex as re
re_dim = re.compile(r'(((\d+(\.\d+)?)\s*[a-z]*\s*x\s*)+(\d+(\.\d+)?)(\s*[a-z]+)?)|((\d+(\.\d+)?)\s*to\s*(\d+(\.\d+)?))')
def dims_clean(text):
ms = []
if isinstance(text, str):
for m in re.finditer(re_dim, text):
if m.captures(3):
ms.append('x'.join(m.captures(3) + m.captures(5)))
else:
ms.append('to'.join(m.captures(9) + m.captures(11)))
return ' '.join(ms)
return ''
tableA['extracted_dimensions'] = tableA['product_long_description'].apply(lambda x: dims_clean(x))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment