Skip to content

Instantly share code, notes, and snippets.

@dataday
Created July 9, 2017 14:50
Show Gist options
  • Save dataday/e6afe1e1acbd8b54ecc00ef2328c4b77 to your computer and use it in GitHub Desktop.
Save dataday/e6afe1e1acbd8b54ecc00ef2328c4b77 to your computer and use it in GitHub Desktop.
Generates a list of email fields from a list of names.
import re
from collections import Counter
# PEP 8 formatted code
class GenerateEmailFields:
version = '1.0.0'
description = 'Generates a list of email fields from a list of names.'
names_list = []
# Basic name pattern
name_pattern = '^(\w+)([\w\s]+)?\s([\w\-]+)$'
# Result string formats
email_field = '{0} <{1}>'
email_prefix = '{0}.{1}'
# Permitted email pattern: Not used but normally would be for results validation!
email_pattern = ''
# Hard coded values but should normally be derived dynamically
email_suffix = '{0}.com'
email_domain = 'company.com'
def __init__(self, names, company):
"""Assign names and company
Args:
names (str): List of names delimited by '; '.
company (str): Company name.
"""
# Assign names and compnay from input
if names:
self.names_list = names.split('; ')
else:
raise ValueError('Error: Missing names input')
if company:
self.email_domain = self.email_suffix.format(company)
else:
raise ValueError('Error: Missing company input')
def get_results(self):
"""Get email fields from a list of names.
Email address format: 'last_name.first_name_initial(occurrence_count)@company.domain'.
Returns:
obj:`list` of :obj:`str`: Qualified email fields, e.g., '[name <email_address>, ...]'
"""
# Process list of names
if len(self.names_list) > 0:
# Create a list of email addresses
email_address_list = self.create_email_address_list(
self.names_list
)
# Create a list of email fields
email_address_fields = self.create_email_address_fields(
self.names_list,
email_address_list
)
return email_address_fields
return self.names_list
##
def create_email_address_list(self, names_list):
"""Creates a list of email addresses from a list of names
Matching groups from `self.name_pattern`
Group 0 - Entire name
Group 1 - First name
Group 2 - Middle name (optional but ignored, use 'strip()' to remove space)
Group 3 - Last name (may contain hyphens)
Args:
names_list :obj:`list` of :obj:`str`: List of names.
Returns:
obj:`list` of :obj:`str`: A list of email addresses.
"""
email_prefixes = []
email_address_list = []
for value in names_list:
name_matches = re.search(self.name_pattern, value)
if name_matches.group(1) and name_matches.group(3):
email_prefixes.append(
self.create_email_address_prefix(
name_matches.group(1),
name_matches.group(3)
)
)
# Uniquify email addresses on final iteration
if is_list_at_last_index(names_list, value) and len(email_prefixes) > 0:
email_address_list = self.create_uniquified_email_address_list(
email_prefixes
)
return email_address_list
##
def create_email_address_fields(self, names_list, email_address_list):
"""Creates a list of email address fields from a list of names and email addresses.
Args:
names_list :obj:`list` of :obj:`str`: List of names.
email_address_list :obj:`list` of :obj:`str`: List of email addresses.
Returns:
obj:`list` of :obj:`str`: List of email fields.
"""
results = []
if names_list and email_address_list:
for index, value in enumerate(names_list):
results.append(
self.email_field.format(
value,
email_address_list[index]
)
)
return results
##
def create_uniquified_email_address_list(self, email_prefixes):
"""Appends a numeric to distinguish duplicates.
Reference:
https://stackoverflow.com/questions/30650474/python-rename-duplicates-in-list-with-progressive-numbers-without-sorting-list
Args:
email_prefixes :obj:`list` of :obj:`str`: Email prefix list.
Returns:
obj:`list` of :obj:`str`: Email prefix list.
"""
# Only assess email prefix duplicate for counts of over 1
email_prefix_counts = {k: v for k, v in Counter(email_prefixes).items() if v > 1}
# Assign occurrence count to values in reverse
for i in reversed(range(len(email_prefixes))):
email_prefix = email_prefixes[i]
if email_prefix in email_prefix_counts and email_prefix_counts[email_prefix]:
# Append assigned duplicate count to email prefix
email_prefixes[i] += str(email_prefix_counts[email_prefix])
# Decrement duplicate count for next iteration
email_prefix_counts[email_prefix] -= 1
# Assign company name to all email prefixes immaterial of whether they are duplicate or not.
email_prefixes[i] = format_email_address(email_prefixes[i], self.email_domain)
return email_prefixes
##
def create_email_address_prefix(self, first_name, last_name):
"""Produces a formatted email prefix constructed from first and last name values.
Args:
first_name (str): First name.
last_name (str): Last name.
Returns:
str: Qualified email prefix, e.g., (bloggs.j) last_name.first_name_initial.
"""
if first_name and last_name:
return self.email_prefix.format(
last_name.replace('-', ''),
first_name[0]
)
else:
return 'nobody.a'
################################################################################################################
# Please Note: These can probably be referenced from within a common utilities library
################################################################################################################
def is_list_at_last_index(values_list, value):
"""Determines if the last index of a list is currently being processed.
Args:
values_list :obj:`list` of :obj:`str`: List of values.
value (str): Current value.
Returns:
bool: The return value. True for success, False otherwise.
"""
return len(values_list) - 1 == values_list.index(value)
def format_email_address(email_prefix, email_suffix):
"""Formats email prefix and suffix into a fully qualified email address
Args:
email_prefix (str): Email prefix.
email_suffix (str): Email suffix.
Returns:
str: Email address last_name.first_name_initial(number)@company.domain
"""
return '{0}@{1}'.format(
email_prefix,
email_suffix
).lower()
# ^_^...
if __name__ == '__main__':
EmailFields = GenerateEmailFields(
'John Doe; Peter Parker; Mary Jane Watson-Parker; James Doe; John Elvis Doe; Jane Doe; Penny Parker',
'Example'
)
# Format as string, delimited by '; '
print '; '.join(EmailFields.get_results())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment