Skip to content

Instantly share code, notes, and snippets.

@pmart123
Created December 19, 2018 23:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pmart123/f6ff30721690413cd8c1cf5d5fecd9de to your computer and use it in GitHub Desktop.
Save pmart123/f6ff30721690413cd8c1cf5d5fecd9de to your computer and use it in GitHub Desktop.
Example code of cleaning contact names for CRM entry
from collections import namedtuple
import probablepeople as pp
ContactName = namedtuple('ContactName', 'first last')
sample_names = [
ContactName('BOB', 'SMITH, JR'),
ContactName('Robert P.', 'Kardashian, CFA, CAIA'),
ContactName('Jimmy', 'Fallon, CIMA'),
ContactName('TOM', 'ARNOLD SR')
]
def process_name(first_name, last_name):
last_name = last_name.rstrip(',')
designations = find_designations(last_name)
last = remove_designations(last_name, designations)
last = format_last_name(last)
if first_name.isupper():
first = first_name.title()
else:
first = first_name
full = first + ' ' + last
parsed = pp.parse(full)
for parts in parsed:
if parts[1] == 'FirstInitial':
# Keep first names that start with initial as one first name
# i.e. "R Michael"
middle = None
break
elif parts[1] in ['MiddleName', 'MiddleInitial']:
middle = parts[0]
break
else:
middle = None
if middle:
first = first.replace(' ' + middle, '')
name = {
'first_name': first, 'middle_name': middle, 'last_name': last,
'designations': designations
}
return name
def format_last_name(last_name):
parsed = pp.parse(last_name)
pieces = []
for parts in parsed:
if parts[1] == 'SuffixGenerational':
suffix = parts[0]
if suffix.lower() in ['jr', 'sr', 'jr.', 'sr.']:
if not suffix.endswith('.'):
suffix += '.'
suffix = suffix.title()
part = suffix
else:
part = parts[0].rstrip(',')
if part.isupper():
part = part.title()
pieces.append(part)
return ' '.join(pieces)
def remove_designations(text, designations):
for designation in designations:
text = text.replace(f', {designation}', '')
return text
def find_designations(text):
index = text.find(',', 0)
if index != -1:
suffixes = text[index+1:].split(',')
suffixes = [s.strip(' ') for s in suffixes]
designations = []
for s in suffixes:
if not is_generational_suffix(s):
designations.append(s)
else:
designations = []
return designations
def is_generational_suffix(suffix):
parsed = pp.parse(suffix)
if parsed[0][1] == 'SuffixGenerational':
return True
else:
return False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment