Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Python code to query the NIH Reporter API
#!/usr/bin/env python3
import requests
import argparse
import re
from collections import defaultdict
'''
Query the Federal Reporter API (e.g. NIH, VA, FDA)
Example:
./query-nih-reporter.py -s gonadotropin -v --terms
One param:
https://api.federalreporter.nih.gov/v1/Projects/search?query=projectNumber:5R01NR014792-03
Multiple params:
https://api.federalreporter.nih.gov/v1/Projects/search?query=text:machine+learning$textFields:title,abstract
See the documentation at https://api.federalreporter.nih.gov/
'''
parser = argparse.ArgumentParser()
parser.add_argument('--verbose', '-v', action='store_true', help="Verbose")
parser.add_argument('--querytype', default='search', help="Search type")
parser.add_argument('--agency', default='NIH', help="Agency")
parser.add_argument('--projects', '-p', dest='projects',
help="Project numbers, comma-delimited")
parser.add_argument('--searchtext', '-s', dest='search',
help="Search string using API")
parser.add_argument('--fields', '-f', dest='fields',
default='title,abstract', help="Fields to search using API")
parser.add_argument('--fuzzy', help="Do a fuzzy text search on 'terms'")
parser.add_argument('--terms', action='store_true',
help="Print out numbers of 'terms'")
parser.add_argument('--filter', '-f',
help="Do a case-insensitive string filter on records, comma-delimited")
args = parser.parse_args()
baseurl = 'https://api.federalreporter.nih.gov/v1/Projects/'
def main():
rq = ReporterQuery(args.verbose, args.querytype, args.agency)
# Find grants using one or more project numbers
if args.projects:
rq.queryByProject(args.projects)
# Find grants by string search (default fields: title, abstract)
if args.search:
rq.queryByText(args.search,args.fields)
# Filter grants
if args.filter:
rq.doStringFilter(args.filter)
if args.fuzzy:
rq.doFuzzyTermFilter(args.fuzzy)
# Count all the terms and print()
if args.terms:
rq.analyzeTerms()
class ReporterQuery:
def __init__(self, verbose, querytype, agency):
self.verbose = verbose
self.agency = agency
self.url = baseurl + querytype
self.grants = []
def queryByProject(self,projects):
params = dict()
for project in projects.split(','):
paramarr = [['projectNumber', project],['agency', self.agency]]
params['query'] = self.createQuery(paramarr)
r = requests.get(self.url, params=params)
json = r.json()
if json['totalCount'] == 0:
if self.verbose:
print("No record for project number {}".format(project))
continue
if self.verbose:
print("Found record for project number '{}'".format(project))
self.grants.append(json['items'][0])
def queryByText(self,str,fields):
params = dict()
paramarr = [['text',str], ['textFields', fields],['agency', self.agency]]
params['query'] = self.createQuery(paramarr)
r = requests.get(self.url, params=params)
json = r.json()
if json['totalCount'] == 0:
if self.verbose:
print("No records for search string '{}'".format(str))
return
if self.verbose:
print("Found {0} records for search string '{1}'".format(json['totalCount'], str))
self.grants = [g for g in json['items']]
def createQuery(self, paramarray):
# Unconventional $ sign and : delimiters
return '$'.join([':'.join(x) for x in paramarray])
def doStringFilter(self, filters):
projects = set()
for filter in filters.split(','):
# regex = filter str with word boundaries
regex = r'\b' + re.escape(filter) + r'\b'
for grant in self.grants:
if re.search(regex, grant['abstract'], re.IGNORECASE):
projects.add(grant['projectNumber'])
if projects:
print("Filter:{0}\tProjects:{1}".format(
filters, projects))
def doFuzzyTermFilter(self, searchstr):
from fuzzywuzzy import fuzz
for grant in self.grants:
for term in grant['terms'].split(';'):
if fuzz.ratio(term, searchstr) > 80:
print("Term:{0}\tProject:{1}".format(
term, grant['projectNumber']))
def analyzeTerms(self):
terms = defaultdict(int)
for grant in self.grants:
for term in grant['terms'].split(';'):
terms[term] += 1
for term in terms:
print("{0}\t{1}".format(terms[term], term))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment