Created
December 19, 2018 23:05
-
-
Save pmart123/f6ff30721690413cd8c1cf5d5fecd9de to your computer and use it in GitHub Desktop.
Example code of cleaning contact names for CRM entry
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
import probablepeople as pp | |
ContactName = namedtuple('ContactName', 'first last') | |
sample_names = [ | |
ContactName('BOB', 'SMITH, JR'), | |
ContactName('Robert P.', 'Kardashian, CFA, CAIA'), | |
ContactName('Jimmy', 'Fallon, CIMA'), | |
ContactName('TOM', 'ARNOLD SR') | |
] | |
def process_name(first_name, last_name): | |
last_name = last_name.rstrip(',') | |
designations = find_designations(last_name) | |
last = remove_designations(last_name, designations) | |
last = format_last_name(last) | |
if first_name.isupper(): | |
first = first_name.title() | |
else: | |
first = first_name | |
full = first + ' ' + last | |
parsed = pp.parse(full) | |
for parts in parsed: | |
if parts[1] == 'FirstInitial': | |
# Keep first names that start with initial as one first name | |
# i.e. "R Michael" | |
middle = None | |
break | |
elif parts[1] in ['MiddleName', 'MiddleInitial']: | |
middle = parts[0] | |
break | |
else: | |
middle = None | |
if middle: | |
first = first.replace(' ' + middle, '') | |
name = { | |
'first_name': first, 'middle_name': middle, 'last_name': last, | |
'designations': designations | |
} | |
return name | |
def format_last_name(last_name): | |
parsed = pp.parse(last_name) | |
pieces = [] | |
for parts in parsed: | |
if parts[1] == 'SuffixGenerational': | |
suffix = parts[0] | |
if suffix.lower() in ['jr', 'sr', 'jr.', 'sr.']: | |
if not suffix.endswith('.'): | |
suffix += '.' | |
suffix = suffix.title() | |
part = suffix | |
else: | |
part = parts[0].rstrip(',') | |
if part.isupper(): | |
part = part.title() | |
pieces.append(part) | |
return ' '.join(pieces) | |
def remove_designations(text, designations): | |
for designation in designations: | |
text = text.replace(f', {designation}', '') | |
return text | |
def find_designations(text): | |
index = text.find(',', 0) | |
if index != -1: | |
suffixes = text[index+1:].split(',') | |
suffixes = [s.strip(' ') for s in suffixes] | |
designations = [] | |
for s in suffixes: | |
if not is_generational_suffix(s): | |
designations.append(s) | |
else: | |
designations = [] | |
return designations | |
def is_generational_suffix(suffix): | |
parsed = pp.parse(suffix) | |
if parsed[0][1] == 'SuffixGenerational': | |
return True | |
else: | |
return False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment