Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
def extract_first_email(s):
Assumes `s` is a list of email addresses split by (mixed) spaces or commas.
Returns (first_email, rest) where first_email + rest == s,
or (None, None) if we don't think this is an email address.
first_quote = s.find('"')
first_at = s.find('@')
if first_at == -1:
return (None, None)
if first_quote != -1 and first_quote < first_at:
second_quote = s.find('"', first_quote+1)
if second_quote == -1:
return (None, None)
first_at = s.find('@', second_quote)
next_separator ='[ ,]', s[first_at:])
if not next_separator:
return (s, '')
first_email_end = first_at + next_separator.start()
return (s[:first_email_end], s[first_email_end:])
def split_email_line_by_spaces_or_commas(s):
Returns a pair of ([maybe_valid_emails], [invalid_emails]) in s.
emails = []
invalid = []
s = s.strip(' ,')
while s:
first, rest = extract_first_email(s)
if first is None:
emails.append(first.strip(' ,'))
s = rest.strip(' ,')
return emails, invalid
def lenient_email_extractor(text):
Returns a pair (address_pairs, invalid_addresses),
where address_pairs is of the kind returned by email.utils.parseaddr.
Test case:
''', "Andrew, Esq." <>,
"Mr. Bob Ross" <>,,, "Full Name with quotes and <>" <>
(note: spaces, commas, blank lines)
from email.utils import parseaddr
address_pairs = []
invalid_addresses = []
lines = text.strip().splitlines()
for l in lines:
emails, invalid = split_email_line_by_spaces_or_commas(l)
for e in emails:
name, real = parseaddr(e)
if name == real == '':
address_pairs.append((name, real))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment