Created
December 6, 2019 16:26
-
-
Save morkapronczay/49593dcb260213233fffb9d08376c111 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gender_guesser.detector as gender | |
import pandas as pd | |
# instatiate the detector | |
d = gender.Detector() | |
# this functions adds a gender column for a specific column | |
def guess_col_gender(col, suff='_gender', df=df, d=d): | |
# extract first names by splitting by ' ' and choosing the first element | |
first_names = [f.split(' ')[0] for f in df[col].tolist()] | |
# guessing the gender | |
genders = [d.get_gender(first_name) for first_name in first_names] | |
# adding it as a col_gender column | |
df[f'{col}{suff}'] = genders | |
return df | |
# doing gender guessing for composers | |
df_g = guess_col_gender('composer', '_gender', df, d) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment