Skip to content

Instantly share code, notes, and snippets.

@telatin
Created March 22, 2021 10:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save telatin/8c1cfdd6b84d6b59fd7891f5ba1af9b9 to your computer and use it in GitHub Desktop.
Save telatin/8c1cfdd6b84d6b59fd7891f5ba1af9b9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# Parses a file with a list of URLs (GTR user) and prints
# a table with their projects details
import json
import sys
import traceback
import urllib3
import xmltodict
from pprint import pprint
from IPython import embed
from urllib.parse import unquote
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
def getxml(url):
http = urllib3.PoolManager()
response = http.request('GET', url)
try:
data = xmltodict.parse(response.data)
except:
eprint("Failed to parse xml from response (%s)" % traceback.format_exc())
return data
if (len(sys.argv) < 2):
quit()
def get_pi(data):
try:
if data['gtr:roles']['gtr:role']['gtr:name'] == 'PRINCIPAL_INVESTIGATOR':
return data['gtr:firstName'] + ' ' +data['gtr:surname']
except Exception as e:
eprint("PI not found")
return ""
def get_user(data):
"""{
"@url": "https://gtr.ukri.org:443/person/FF0FC8ED-2B76-440A-9E29-12790BAE28BA",
"gtr:id": "FF0FC8ED-2B76-440A-9E29-12790BAE28BA",
"gtr:firstName": "John",
"gtr:otherNames": "William",
"gtr:surname": "Moreau"
}"""
try:
person = data['gtr:personOverview']['gtr:person']
if 'gtr:otherNames' in person:
return (person['gtr:id'],person['gtr:surname'], person['gtr:firstName'], person['gtr:otherNames'])
else:
return (person['gtr:id'],person['gtr:surname'], person['gtr:firstName'], "")
except Exception as e:
eprint(f"Unable to retrieve USER details: <{e}>")
return ('','','')
with open(sys.argv[1], 'r') as f:
l = f.readlines()
cnt = 0
print(f"AuthorUrl\tuserId\tuserName\tuserSurname\tuserOther\tprojectCount\tPI\tprojectId\tprojectTitle\tgrantStart\tgrantEnd\tgrantPounds\tincomeType")
totUrls = len(l)
for url in l:
if url[0]=='#':
continue
cnt += 1
# Get USER
url = url.rstrip('\n')
rawdata = getxml(url)
eprint(f"{cnt}/{totUrls}\tparsing {url}")
# Name, surname from ID
(userId, userName, userSurname, userOther) = get_user(rawdata)
data = rawdata['gtr:personOverview']['gtr:projectSearchResult']['gtr:results']['gtr:projectOverview']
# Iterate through projects
pCount = 0
for project in data:
try:
p = project['gtr:projectComposition']
except Exception as e:
p = data[project]
grantType = p['gtr:project']['gtr:grantCategory']
PI= userName + ' ' + userSurname + '*' # Star when inferred from user URL and not from project PI name
if p['gtr:personRoles'] is not None:
PI= get_pi(p['gtr:personRoles']['gtr:personRole'])
pCount += 1
# get Project ID and Title
projectTitle='<Unknown>'
projectId='<Unknown>'
try:
projectTitle = p['gtr:project']['gtr:title']
projectId = p['gtr:project']['gtr:id']
except:
pass
# Get start,end, amount
grantStart, grantEnd, grantPounds,incomeType = ('<Unknown>', '<Unknown>', 0, '<Unknown>')
try:
grantStart, grantEnd, grantPounds, incomeType = (p['gtr:project']['gtr:fund']['gtr:start'],p['gtr:project']['gtr:fund']['gtr:end'], p['gtr:project']['gtr:fund']['gtr:valuePounds'], p['gtr:project']['gtr:fund']['gtr:type'])
except:
pass
print(f"{url}\t{userId}\t{userName}\t{userSurname}\t{userOther}\t{pCount}\t{PI}\t{projectId}\t{projectTitle}\t{grantStart}\t{grantEnd}\t{grantPounds}\t{incomeType}")
#print(json.dumps(p['gtr:project']['gtr:fund'], indent=2))
"""
STUDENTSHIP
{
"gtr:leadResearchOrganisation": {
"@url": "https://gtr.ukri.org:443/organisation/D1774113-D5D2-4B7C-A412-66A90FE4B96F",
"gtr:id": "D1774113-D5D2-4B7C-A412-66A90FE4B96F",
"gtr:name": "University of Cambridge"
},
"gtr:personRoles": null,
"gtr:project": {
"@url": "https://gtr.ukri.org:443/projects?ref=studentship-2407790",
"gtr:id": "44405E8A-CEB9-4281-ABAE-1F2B3C6904F1",
"gtr:title": "Developing therapeutic monoclonal antibodies for ESKAPE pathogens",
"gtr:grantCategory": "Studentship",
"gtr:fund": {
"gtr:end": "2024-03-31",
"gtr:funder": {
"@url": "https://gtr.ukri.org:443/organisation/C008C651-F5B0-4859-A334-5F574AB6B57C",
"gtr:id": "C008C651-F5B0-4859-A334-5F574AB6B57C",
"gtr:name": "MRC"
},
"gtr:start": "2020-10-01",
"gtr:type": "INCOME_ACTUAL",
"gtr:valuePounds": "0"
},
"gtr:healthCategories": null,
"gtr:researchActivities": null,
"gtr:researchSubjects": null,
"gtr:researchTopics": null,
"gtr:rcukProgrammes": null
}
}
"""
"""
Project
{
"gtr:leadResearchOrganisation": {
"@url": "https://gtr.ukri.org:443/organisation/6676402F-8287-464E-9141-7E4118B331E7",
"gtr:id": "6676402F-8287-464E-9141-7E4118B331E7",
"gtr:name": "Natural Environment Research Council"
},
"gtr:personRoles": {
"gtr:personRole": {
"@url": "https://gtr.ukri.org:443/person/BB406388-0395-4344-98DD-645C13AEBD7B",
"gtr:id": "BB406388-0395-4344-98DD-645C13AEBD7B",
"gtr:firstName": "Andrew",
"gtr:surname": "Singer",
"gtr:roles": {
"gtr:role": {
"gtr:name": "PRINCIPAL_INVESTIGATOR"
}
}
}
},
"gtr:project": {
"@url": "https://gtr.ukri.org:443/projects?ref=G0902420",
"gtr:id": "8363ED05-28F4-4500-9BAD-F2E166F692CA",
"gtr:title": "Environmental Aetiology of Diarrhoeagenic Pathogens in Children in a Developing Country Setting",
"gtr:grantCategory": "Research Grant",
"gtr:fund": {
"gtr:end": "2011-03-31",
"gtr:funder": {
"@url": "https://gtr.ukri.org:443/organisation/C008C651-F5B0-4859-A334-5F574AB6B57C",
"gtr:id": "C008C651-F5B0-4859-A334-5F574AB6B57C",
"gtr:name": "MRC"
},
"gtr:start": "2010-05-01",
"gtr:type": "INCOME_ACTUAL",
"gtr:valuePounds": "39864"
},
"gtr:healthCategories": null,
"gtr:researchActivities": null,
"gtr:researchSubjects": null,
"gtr:researchTopics": null,
"gtr:rcukProgrammes": null
}
}
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment