Skip to content

Instantly share code, notes, and snippets.

@atmb4u
Created March 9, 2017 17:32
Show Gist options
  • Save atmb4u/f37a87ec3e03de76ed6869905eed9568 to your computer and use it in GitHub Desktop.
Save atmb4u/f37a87ec3e03de76ed6869905eed9568 to your computer and use it in GitHub Desktop.
Find if two different strings (names) are similar enough to make it a single person's name.
import re, itertools
def similarish(string1, string2):
"""
Find if two different strings (names) are similar enough to make it a single person's name.
Input
string1 : a person's full/partial name
string2 : same person's full/partial name or different formatting from name1
Output
Boolean
True - if the names are similar
False - if names are different
Examples
similarish("Anoop Thomas Mathew", "anoop mathew") # True
similarish("John Doe", "John D.") # True
similarish("Bart Lorang", "Bartholomew Lorang") # True
similarish("Sum Ting Wong", "Bart Lorang") # False
similarish("Joana Doe", "John Doe") # False
Tested with Python 3.5.x / Python 2.7.x
"""
a = re.sub('[^A-z0-9 ]', '', string1).lower().split()
b = re.sub('[^A-z0-9 ]', '', string2).lower().split()
count = 0; length_a = len(a); length_b = len(b)
if length_a > length_b: a,b = b,a
for substring_a, substring_b in itertools.product(a, b):
if substring_a == substring_b or substring_a in substring_b or substring_b in substring_a:
count += 1
return count >= min(length_a, length_b)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment