Last active
January 20, 2021 00:28
-
-
Save NbtKmy/4f04a25761389d38fd3760322c337414 to your computer and use it in GitHub Desktop.
Checking persons in the snf list with gnd api
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import requests | |
df = pd.read_csv('snf_person.csv', sep=',') | |
df['viaf'] = 'NaN' | |
df['orcid_gnd'] = 'NaN' | |
df['gnd'] = 'NaN' | |
df['ambiguity'] = 0 | |
df1 = df[:100] | |
for index, row in df1.iterrows(): | |
last_name = row[0] | |
first_name = row[1] | |
inst = row[3] | |
orcid = row[6] | |
comma = str(', ') | |
name = str(last_name) + comma + str(first_name) | |
gnd_prefix = 'https://lobid.org/gnd/search?q=' | |
gnd_suffix = '&filter=type:Person&format=json' | |
gnd_url = gnd_prefix + name + gnd_suffix | |
req = requests.get(gnd_url) | |
gnd_dat = req.json() | |
if not gnd_dat: | |
pass | |
else: | |
if gnd_dat['totalItems'] > 1: | |
for pers in gnd_dat['member']: | |
if 'affiliation' in pers: | |
for affis in pers['affiliation']: | |
if affis['label'] == inst: | |
df1.at[index, 'gnd'] = pers['id'] | |
if 'sameAs' in pers: | |
for other_ent in pers['sameAs']: | |
if other_ent['collection']['id'] == 'http://www.wikidata.org/entity/Q54919': | |
df1.at[index, 'viaf'] = other_ent['id'] | |
elif other_ent['collection']['id'] == 'https://orcid.org': | |
df1.at[index, 'orcid_gnd'] = other_ent['id'] | |
else: | |
pass | |
else: | |
pass | |
else: | |
pass | |
elif gnd_dat['totalItems'] == 1: | |
pers = gnd_dat['member'] | |
df1.at[index, 'gnd'] = pers[0]['id'] | |
df1.at[index, 'ambiguity'] = 50 | |
if 'affiliation' in pers: | |
for affis in pers[0]['affiliation']: | |
df1.at[index, 'Institute Name'] += affis['label'] | |
if 'sameAs' in pers: | |
for other_ent in pers[0]['sameAs']: | |
if other_ent['collection']['id'] == 'http://www.wikidata.org/entity/Q54919': | |
df1.at[index, 'viaf'] = other_ent['id'] | |
elif other_ent['collection']['id'] == 'https://orcid.org': | |
df1.at[index, 'orcid_gnd'] = other_ent['id'] | |
else: | |
pass | |
else: | |
pass | |
df1.to_csv('snf_checkedGND.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
i've cleaned the original snf table "P3_PersonExport.csv" with shell command "sed -e 's/";"/","/g' ./P3_PersonExport.csv > ./snf_person.csv"
This code works very slowly. This is why I've checked only first 100 data.
the result is here: https://drive.google.com/file/d/1aHuTTTH2S1l1WV1kDam353CAM4vU_IGR/view?usp=sharing