Last active
January 14, 2019 19:31
-
-
Save kassuts/f6f6375b964a25b3e98e97a2cc6d4071 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def genderize_api_connect(first_name): | |
url = 'https://api.genderize.io/?name={}'.format(first_name) | |
response = requests.get(url) | |
try: | |
assert response.status_code == 200 | |
content = json.loads(response.content) | |
except AssertionError: | |
self.logger.error("There was an error contacting the API service") | |
return content | |
def author_gender(all_authors, threshold=0.6): | |
for key, value in all_authors.items(): | |
# Ensure the author being evaluated has not been enriched using the API — this avoids unnecessary API calls | |
if 'gender' not in value: | |
try: | |
# Retrieve the probability of the gender prediction of the author | |
# and compare it to the threshold set in line 11. | |
result = genderize_api_connect(value['first_name']) | |
if result['probability'] > threshold: | |
# Add the gender to the author dictionary object and ensure that it is capitalized. | |
value['gender'] = result['gender'].capitalize() | |
# Handle exceptions, specifically if no gender is returned from the API. | |
# This case is handled by assigning the gender as “Unknown”. | |
except KeyError: | |
self.logger.error( | |
'Name of Author unknown for author_id {} in Research Paper with doi link {}'.format(key, | |
value[ | |
'doi'])) | |
value['gender'] = "Unknown" | |
return all_authors |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment