Skip to content

Instantly share code, notes, and snippets.

@reverie
Created January 27, 2019 22:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reverie/de28f63fd1dae92cb3f87307ffdcf1e5 to your computer and use it in GitHub Desktop.
Save reverie/de28f63fd1dae92cb3f87307ffdcf1e5 to your computer and use it in GitHub Desktop.
def extract_first_email(s):
"""
Assumes `s` is a list of email addresses split by (mixed) spaces or commas.
Returns (first_email, rest) where first_email + rest == s,
or (None, None) if we don't think this is an email address.
"""
first_quote = s.find('"')
first_at = s.find('@')
if first_at == -1:
return (None, None)
if first_quote != -1 and first_quote < first_at:
second_quote = s.find('"', first_quote+1)
if second_quote == -1:
return (None, None)
first_at = s.find('@', second_quote)
next_separator = re.search('[ ,]', s[first_at:])
if not next_separator:
return (s, '')
first_email_end = first_at + next_separator.start()
return (s[:first_email_end], s[first_email_end:])
def split_email_line_by_spaces_or_commas(s):
"""
Returns a pair of ([maybe_valid_emails], [invalid_emails]) in s.
"""
emails = []
invalid = []
s = s.strip(' ,')
while s:
first, rest = extract_first_email(s)
if first is None:
invalid.append(s)
break
emails.append(first.strip(' ,'))
s = rest.strip(' ,')
return emails, invalid
def lenient_email_extractor(text):
"""
Returns a pair (address_pairs, invalid_addresses),
where address_pairs is of the kind returned by email.utils.parseaddr.
Test case:
'''
16@example.com, "Andrew, Esq." <17@example.com>,
"Mr. Bob Ross" <foo@example.com> test@foo.bar,23432@example.com,
2235233432@example.com,dsfjkadls@example.com "Full Name with quotes and <weird@chars.com>" <weird@example.com>
sdflkadsjfkdalfds@example.com
1@example.com 2@example.com 3@example.com
'''
(note: spaces, commas, blank lines)
"""
from email.utils import parseaddr
address_pairs = []
invalid_addresses = []
lines = text.strip().splitlines()
for l in lines:
emails, invalid = split_email_line_by_spaces_or_commas(l)
invalid_addresses.extend(invalid)
for e in emails:
name, real = parseaddr(e)
if name == real == '':
invalid_addresses.append(e)
else:
address_pairs.append((name, real))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment