Skip to content

Instantly share code, notes, and snippets.

@cy-xu
Last active August 27, 2021 00:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cy-xu/0030e4030c15dcd7df0ce267ec79bd24 to your computer and use it in GitHub Desktop.
Save cy-xu/0030e4030c15dcd7df0ce267ec79bd24 to your computer and use it in GitHub Desktop.
Email addresses clean up - remove nonsense from a long list of emails
""" Remove nonsense from a long list of emails
hello.xu@gmail.com
"""
raw_emails_file = './email_addresses.txt'
clean_email_file = './clean_email_addresses.txt'
raw_emails = open(raw_emails_file, 'r').readlines()
clean_emails = open(clean_email_file, 'w')
valid_counter = 0
for raw_email in raw_emails:
# skip empty/invalid lines
if not '@' in raw_email:
continue
# if < > found then save the valid part
if '<' in raw_email:
left_pos = raw_email.find('<')
right_pos = raw_email.find('>')
raw_email = raw_email[left_pos+1 : right_pos]
# if no special case found, remove nonsense and save
valid_address = raw_email.replace(' ', '')
valid_address = valid_address.replace(',', '')
valid_address = valid_address.lower()
# write valid address to new line
if valid_address.endswith('\n'):
clean_emails.write(valid_address)
else:
clean_emails.write(valid_address + '\n')
print(valid_address)
valid_counter += 1
clean_emails.close()
print(f'a total of {valid_counter} valid emails saved to {clean_email_file}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment