Skip to content

Instantly share code, notes, and snippets.

@djds23
Last active August 29, 2015 13:56
Show Gist options
  • Save djds23/9216747 to your computer and use it in GitHub Desktop.
Save djds23/9216747 to your computer and use it in GitHub Desktop.
Parses text to find emails, accepts text files or raw input
import re
import sys
# Email regex taken from django/django
# https://github.com/django/django/blob/master/django/core/validators.py#L137
email_re = re.compile(
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*" # dot-atom
r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-011\013\014\016-\177])*"' # quoted-string
r')@(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?$', re.IGNORECASE)
def parse_emails(string):
words = string.split()
find_emails = map(email_re.match, words)
matched_emails = filter(bool, find_emails)
processed_emails = map(lambda email: email.string, matched_emails)
return processed_emails
if __name__=='__main__':
try:
filename = sys.argv[1]
except IndexError:
filename = ''
if filename:
with open(filename, 'rU') as f:
bucket = []
for line in f:
bucket += parse_emails(line)
print list(bucket)
else:
print 'Please use Ctrl-C to quit'
while True:
from_user = raw_input('paste text here to extract emails: ')
print parse_emails(from_user)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment