Skip to content

Instantly share code, notes, and snippets.

@ftfarias
Last active June 16, 2017 19:49
Show Gist options
  • Save ftfarias/79ab57c54e01d89e41dd573f36853f4c to your computer and use it in GitHub Desktop.
Save ftfarias/79ab57c54e01d89e41dd573f36853f4c to your computer and use it in GitHub Desktop.
import re
DATA = "Hey, you - what are you doing here!?"
print re.findall(r"[\w']+", DATA)
# Prints ['Hey', 'you', 'what', 'are', 'you', 'doing', 'here']
>>> re.split('\W+', 'Words, words, words.')
['Words', 'words', 'words', '']
>>> re.split('(\W+)', 'Words, words, words.')
['Words', ', ', 'words', ', ', 'words', '.', '']
>>> re.split('\W+', 'Words, words, words.', 1)
['Words', 'words, words.']
text = text.replace('_',' ')
text = text.replace('=',' ')
text = re.sub(r'[0-9]{1,3}\s?x\s?[0-9]{1,3}(x\s?[0-9]{1,3})?', ' _TAMANHO_ ' , text)
text = re.sub(r'[0-9]{1,3}\s?(cm|mm)?\s?x\s?[0-9]{1,3}(\s?(cm|mm)?\s?x\s?[0-9]{1,3})?\s?(cm|mm)?', ' _TAMANHO_ ' , text)
text = re.sub(r'(http[s]?://)?www.([a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', ' _URL_ ' , text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment