Last active
March 2, 2019 11:49
-
-
Save talfco/243778afc7fc1b523bc239df92f06550 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def __calculate_name_matching(self, row): | |
name = row['Name'] | |
for clean in self.__cfg['twitterNameCleaner']: | |
name = name.replace(clean ,'') | |
for expand in self.__cfg['twitterNamesExpander']: | |
name = name.replace(expand.get('abbreviation'), expand.get('name')) | |
norm_name = sort_words(normalize_unicode_to_ascii(name)) | |
tp = double_metaphone(norm_name) | |
row['col_match1'] = norm_name | |
row['col_match2'] = tp[0] | |
row['col_match3'] = tp[1] | |
return row |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment