Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Jason-Gush/bcbab1c3c55e5684251ad3b8ee04eded to your computer and use it in GitHub Desktop.
Save Jason-Gush/bcbab1c3c55e5684251ad3b8ee04eded to your computer and use it in GitHub Desktop.
Using the NZ ORCID Hub's API to compile ORCID works
# Python3.6
# Use an access token from the Hub to retrieve (all) users
# With the list of users, use the ORCID API proxy to retrieve their work summaries
# Done handle 401 responses by using a public read directly from ORCID
# Done change from json out to tsv
# Done make UTF-8 safe, much easier than I thought but seems to slow operation
# Done use time.sleep to control request/min and prevent overloading ORCID
# Was 8/sec now max 20/sec to be safe for V2 and V3)
# From the work summaries, call each work individually to get the needed metadata missing from the summary
# Write row at time to allow recovery from error and add skip_orcid to allow resume at specific ORCID ID
# Use public API for bulk of work reads, only handing Trusted Party and Private calls back to the Hub's API
import requests
import time
import json
def fetch_hub_access_token(app_id, secret, service):
auth = {'client_id': app_id, 'client_secret': secret, 'grant_type': 'client_credentials'}
response = requests.post(
url='https://' + service + '/oauth/token',
data=auth
)
if response.status_code == 200:
hub_access_token = response.json()['access_token']
return hub_access_token
else:
raise Exception('Error: {0} {1}'.format(response.status_code, response.reason))
with open('Hub API credentials.json') as config_file:
API_credentials = json.load(config_file)
client_id = API_credentials["client_id"]
client_secret = API_credentials["client_secret"]
environment = API_credentials["service"]
access_token = fetch_hub_access_token(client_id, client_secret, environment)
public_orcid_access_token = API_credentials["public_access_token"]
public_headers = {'accept': 'application/json', 'authorization': 'Bearer ' + public_orcid_access_token}
ORCID_API_version = API_credentials['orcid_version']
if environment.lower() == "test.orcidhub.org.nz":
hub_url = 'https://test.orcidhub.org.nz/api/v1/'
member_orcid_url = 'https://test.orcidhub.org.nz/orcid/api/' + ORCID_API_version + '/'
member_headers = {'accept': 'application/json', 'authorization': 'Bearer ' + access_token}
pub_orcid_url = 'https://pub.sandbox.orcid.org/' + ORCID_API_version + '/'
elif environment.lower() == "orcidhub.org.nz":
hub_url = 'https://orcidhub.org.nz/api/v1/'
member_orcid_url = 'https://orcidhub.org.nz/orcid/api/' + ORCID_API_version + '/'
member_headers = {'accept': 'application/json', 'authorization': 'Bearer ' + access_token}
pub_orcid_url = 'https://pub.orcid.org/' + ORCID_API_version + '/'
else:
raise Exception('Invalid config environment: specify the service as either test.orcidhub.org.nz or orcidhub.org.nz')
def fetch_hub_users():
headers = {'accept': 'application/json', 'authorization': 'Bearer ' + access_token}
body = []
users_on_page = 1
page = 1
while users_on_page > 0:
response = requests.get(hub_url + 'users', headers=headers, params={'page': page})
if response.status_code == 200:
users_on_page = int(response.headers['pagination-count'])
for users in range(0, users_on_page-1):
body.append(response.json()[users])
page += 1
elif response.status_code == 401:
raise Exception('Access token expired/revoked get a new one')
else:
raise Exception('Error: {0} {1}'.format(response.status_code, response.reason))
return body
def fetch_works_list(orcid):
response = requests.get(member_orcid_url + orcid + '/works', headers=member_headers)
status = response.status_code
if status != 200:
print(orcid, status, response.reason)
time.sleep(0.05)
response = requests.get(pub_orcid_url + orcid + '/works', headers=public_headers)
return response, status
def fetch_work(orcid, putcode, visibility):
if visibility != "public":
response = requests.get(member_orcid_url + orcid + '/work/' +
str(putcode), headers=member_headers)
else:
time.sleep(0.05)
response = requests.get(pub_orcid_url + orcid + '/work/' +
str(putcode), headers=public_headers)
return response
def fetch_orcid_works(skip_orcid=""):
pub_summaries = []
hub_users = fetch_hub_users()
start_collating = 0
if isinstance(hub_users, list):
for users in hub_users:
if start_collating == 1 or skip_orcid == "":
if users['confirmed'] and users['orcid']:
response, status = fetch_works_list(users['orcid'])
if status != 200:
print(users['orcid'], str(status), response.reason)
pass
else:
print(users['orcid'])
pubs = response.json()['group']
if pubs:
for pub in pubs:
pub_sum = pub['work-summary'][0]
putcode = pub_sum['put-code']
source = pub_sum['source']['source-name']
external_id_type = ""
external_id_value = ""
if source:
source = source['value']
try:
title = pub_sum['title']['title']['value']
except (IndexError, TypeError):
title = "No Title"
try:
if len(pub_sum['external-ids']['external-id'][0]['external-id-value']) > 0:
external_id_value = \
pub_sum['external-ids']['external-id'][0]['external-id-value']
external_id_type = pub_sum['external-ids']['external-id'][0]['external-id-type']
except (IndexError, TypeError):
external_id_value = ""
external_id_type = ""
pub_type = pub_sum['type']
try:
pub_year = pub_sum['publication-date']['year']['value']
except (IndexError, TypeError):
pub_year = ""
visibility = pub_sum['visibility']
pub_summaries.append({'email': users['email'], 'orcid': users['orcid'],
'status': status, 'put-code': putcode, 'source': source,
'title': title, 'external-id-type': external_id_type,
'external-id-value': external_id_value, 'type': pub_type,
'publication-date-year': pub_year, 'visibility': visibility,
'note': 'OK'})
else:
pub_summaries.append({'email': users['email'], 'orcid': users['orcid'], 'status': status,
'put-code': '', 'source': '', 'title': '', 'external-id-type': '',
'external-id-value': '', 'type': '', 'publication-date-year': '',
'visibility': '', 'note': 'No visible works'})
else:
pub_summaries.append({'email': users['email'], 'orcid': users['orcid'], 'status': '',
'put-code': '', 'source': '', 'title': '', 'external-id-type': '',
'external-id-value': '', 'type': '', 'publication-date-year': '',
'visibility': '', 'note': 'ORCID iD not confirmed by Hub'})
else:
if users['orcid'] == skip_orcid:
start_collating = 1
pass
return pub_summaries
def fetch_work_details(skip_orcid=""):
outfile = open('organisation_works.tsv', 'w', encoding='utf-8')
row_headings = 'Email\tORCID iD\tResponse\tPut-Code\tVisibility\tSource\tTitle\tJournal Title\tShort Description' \
'\tCitation Type\tCitation Value\tExternal ID Type\tExternal ID Value\tExternal ID URL' \
'\tExternal ID Relationship\tType\tPublication Year\tURL\tLanguage Code\tCountry\tNote\n'
outfile.write(row_headings)
pub_summaries = fetch_orcid_works(skip_orcid)
i = 0
for record in pub_summaries:
email = record['email']
orcid = record['orcid']
status = record['status']
putcode = record['put-code']
visibility = record['visibility']
source = record['source']
title = record['title']
external_id_type = record['external-id-type']
external_id_value = record['external-id-value']
external_id_url = ""
external_id_relationship = ""
pub_type = record['type']
pub_year = record['publication-date-year']
short_description = ""
journal_title = ""
citation_type = ""
citation_value = ""
url = ""
language_code = ""
country = ""
note = record['note']
if record['put-code'] and record['status'] == 200:
print(orcid + "/work/" + str(putcode))
response = fetch_work(record['orcid'], record['put-code'], record['visibility'])
work = response.json()
try:
short_description = work['short-description']
except (IndexError, TypeError, KeyError):
pass
try:
journal_title = work['journal-title']['value']
except (IndexError, TypeError):
pass
try:
if len(work['citation']) > 0:
citation_type = work['citation']['citation-type']
citation_value = work['citation']['citation-value']
citation_value = citation_value.replace('\n', "")
citation_value = citation_value.strip()
except (IndexError, TypeError):
pass
try:
external_id_url = work['external-ids']['external-id'][0]['external-id-url']['value']
except (IndexError, TypeError):
pass
if external_id_value:
external_id_relationship = work['external-ids']['external-id'][0]['external-id-relationship']
try:
url = work['url']['value']
except (IndexError, TypeError):
pass
language_code = work['language-code']
try:
country = work['country']['value']
except (IndexError, TypeError):
pass
work_data = {'email': email, 'orcid': orcid, 'status': status, 'put-code': putcode,
'visibility': visibility, 'source': source, 'title': title, 'journal-title': journal_title,
'short-description': short_description, 'citation-type': citation_type,
'citation-value': citation_value, 'external-id-type': external_id_type,
'external-id-value': external_id_value, 'external-id-url': external_id_url,
'external-id-relationship': external_id_relationship, 'type': pub_type,
'publication-date-year': pub_year, 'url': url, 'language-code': language_code,
'country': country, 'note': note}
row_data = '\t'.join(str(work_data[value]) for value in work_data) + '\n'
outfile.write(row_data)
i += 1
outfile.close()
return i
def write_works(skip_orcid=""):
row_counter = fetch_work_details(skip_orcid)
return str(row_counter) + ' rows written to file'
print(write_works(skip_orcid=""))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment