Created
July 9, 2017 14:50
-
-
Save dataday/e6afe1e1acbd8b54ecc00ef2328c4b77 to your computer and use it in GitHub Desktop.
Generates a list of email fields from a list of names.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from collections import Counter | |
# PEP 8 formatted code | |
class GenerateEmailFields: | |
version = '1.0.0' | |
description = 'Generates a list of email fields from a list of names.' | |
names_list = [] | |
# Basic name pattern | |
name_pattern = '^(\w+)([\w\s]+)?\s([\w\-]+)$' | |
# Result string formats | |
email_field = '{0} <{1}>' | |
email_prefix = '{0}.{1}' | |
# Permitted email pattern: Not used but normally would be for results validation! | |
email_pattern = '' | |
# Hard coded values but should normally be derived dynamically | |
email_suffix = '{0}.com' | |
email_domain = 'company.com' | |
def __init__(self, names, company): | |
"""Assign names and company | |
Args: | |
names (str): List of names delimited by '; '. | |
company (str): Company name. | |
""" | |
# Assign names and compnay from input | |
if names: | |
self.names_list = names.split('; ') | |
else: | |
raise ValueError('Error: Missing names input') | |
if company: | |
self.email_domain = self.email_suffix.format(company) | |
else: | |
raise ValueError('Error: Missing company input') | |
def get_results(self): | |
"""Get email fields from a list of names. | |
Email address format: 'last_name.first_name_initial(occurrence_count)@company.domain'. | |
Returns: | |
obj:`list` of :obj:`str`: Qualified email fields, e.g., '[name <email_address>, ...]' | |
""" | |
# Process list of names | |
if len(self.names_list) > 0: | |
# Create a list of email addresses | |
email_address_list = self.create_email_address_list( | |
self.names_list | |
) | |
# Create a list of email fields | |
email_address_fields = self.create_email_address_fields( | |
self.names_list, | |
email_address_list | |
) | |
return email_address_fields | |
return self.names_list | |
## | |
def create_email_address_list(self, names_list): | |
"""Creates a list of email addresses from a list of names | |
Matching groups from `self.name_pattern` | |
Group 0 - Entire name | |
Group 1 - First name | |
Group 2 - Middle name (optional but ignored, use 'strip()' to remove space) | |
Group 3 - Last name (may contain hyphens) | |
Args: | |
names_list :obj:`list` of :obj:`str`: List of names. | |
Returns: | |
obj:`list` of :obj:`str`: A list of email addresses. | |
""" | |
email_prefixes = [] | |
email_address_list = [] | |
for value in names_list: | |
name_matches = re.search(self.name_pattern, value) | |
if name_matches.group(1) and name_matches.group(3): | |
email_prefixes.append( | |
self.create_email_address_prefix( | |
name_matches.group(1), | |
name_matches.group(3) | |
) | |
) | |
# Uniquify email addresses on final iteration | |
if is_list_at_last_index(names_list, value) and len(email_prefixes) > 0: | |
email_address_list = self.create_uniquified_email_address_list( | |
email_prefixes | |
) | |
return email_address_list | |
## | |
def create_email_address_fields(self, names_list, email_address_list): | |
"""Creates a list of email address fields from a list of names and email addresses. | |
Args: | |
names_list :obj:`list` of :obj:`str`: List of names. | |
email_address_list :obj:`list` of :obj:`str`: List of email addresses. | |
Returns: | |
obj:`list` of :obj:`str`: List of email fields. | |
""" | |
results = [] | |
if names_list and email_address_list: | |
for index, value in enumerate(names_list): | |
results.append( | |
self.email_field.format( | |
value, | |
email_address_list[index] | |
) | |
) | |
return results | |
## | |
def create_uniquified_email_address_list(self, email_prefixes): | |
"""Appends a numeric to distinguish duplicates. | |
Reference: | |
https://stackoverflow.com/questions/30650474/python-rename-duplicates-in-list-with-progressive-numbers-without-sorting-list | |
Args: | |
email_prefixes :obj:`list` of :obj:`str`: Email prefix list. | |
Returns: | |
obj:`list` of :obj:`str`: Email prefix list. | |
""" | |
# Only assess email prefix duplicate for counts of over 1 | |
email_prefix_counts = {k: v for k, v in Counter(email_prefixes).items() if v > 1} | |
# Assign occurrence count to values in reverse | |
for i in reversed(range(len(email_prefixes))): | |
email_prefix = email_prefixes[i] | |
if email_prefix in email_prefix_counts and email_prefix_counts[email_prefix]: | |
# Append assigned duplicate count to email prefix | |
email_prefixes[i] += str(email_prefix_counts[email_prefix]) | |
# Decrement duplicate count for next iteration | |
email_prefix_counts[email_prefix] -= 1 | |
# Assign company name to all email prefixes immaterial of whether they are duplicate or not. | |
email_prefixes[i] = format_email_address(email_prefixes[i], self.email_domain) | |
return email_prefixes | |
## | |
def create_email_address_prefix(self, first_name, last_name): | |
"""Produces a formatted email prefix constructed from first and last name values. | |
Args: | |
first_name (str): First name. | |
last_name (str): Last name. | |
Returns: | |
str: Qualified email prefix, e.g., (bloggs.j) last_name.first_name_initial. | |
""" | |
if first_name and last_name: | |
return self.email_prefix.format( | |
last_name.replace('-', ''), | |
first_name[0] | |
) | |
else: | |
return 'nobody.a' | |
################################################################################################################ | |
# Please Note: These can probably be referenced from within a common utilities library | |
################################################################################################################ | |
def is_list_at_last_index(values_list, value): | |
"""Determines if the last index of a list is currently being processed. | |
Args: | |
values_list :obj:`list` of :obj:`str`: List of values. | |
value (str): Current value. | |
Returns: | |
bool: The return value. True for success, False otherwise. | |
""" | |
return len(values_list) - 1 == values_list.index(value) | |
def format_email_address(email_prefix, email_suffix): | |
"""Formats email prefix and suffix into a fully qualified email address | |
Args: | |
email_prefix (str): Email prefix. | |
email_suffix (str): Email suffix. | |
Returns: | |
str: Email address last_name.first_name_initial(number)@company.domain | |
""" | |
return '{0}@{1}'.format( | |
email_prefix, | |
email_suffix | |
).lower() | |
# ^_^... | |
if __name__ == '__main__': | |
EmailFields = GenerateEmailFields( | |
'John Doe; Peter Parker; Mary Jane Watson-Parker; James Doe; John Elvis Doe; Jane Doe; Penny Parker', | |
'Example' | |
) | |
# Format as string, delimited by '; ' | |
print '; '.join(EmailFields.get_results()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment