Skip to content

Instantly share code, notes, and snippets.

@sbamin
Last active December 6, 2021 22:27
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sbamin/c74d74c67163c08267da616bbead9f37 to your computer and use it in GitHub Desktop.
Save sbamin/c74d74c67163c08267da616bbead9f37 to your computer and use it in GitHub Desktop.
Use NIH RePORTER API to fetch grant records.
#!/bin/bash
## Use NIH RePORTER API to fetch grant records
## @sbamin
# usage
show_help() {
cat << EOF
Use NIH RePORTER API to fetch grant records.
This script depends on following:
NIH RePORTER API: https://api.federalreporter.nih.gov
date command - may differ between mac (unix) and other linux systems.
http (require installation): https://httpie.org
jq (stand-alone binary) at https://stedolan.github.io/jq/
bat (stand-alone binary) at https://github.com/sharkdp/bat
Options:
-h display this help and exit
-p Required if -q is missing: PI Name(s), e.g., LAST,F or LAST1,F1;LAST2,F2
-q Required if -p is missing: Text string, e.g., "cancer biology"
-s Optional: Search operator for text query (AND|OR; Default: AND)
-y Optional: Fiscal year in YYYY or "YYYY,YYYY" format (Default: current, prev one year)
-a Optional: Funding agency (Default: "NIH,DOD,HHS")
-i Optional: ? Row offset to start reading records since max 50 records allowed per query (Default: 1)
Example: ${0##*/} -p "Kaelin,W" -q "cancer VHL" -s OR -y "2017,2018,2019" -a "NCI,NINDS"
Limitations: Only outputs max 50 records per query ? regardless of -i option.
EOF
}
if [[ $# == 0 ]];then show_help;exit 1;fi
while getopts "p:q:y:s:a:i:h" opt; do
case "$opt" in
h) show_help;exit 0;;
p) PINAME=$OPTARG;;
q) QUERY=$OPTARG;;
y) FY=$OPTARG;;
s) QUERYOP=$OPTARG;;
a) AGENCY=$OPTARG;;
i) ROWID=$OPTARG;;
'?') show_help >&2 exit 1 ;;
esac
done
#### identify running OS ####
## date command differ b/w mac (unix) and linux
## https://stackoverflow.com/a/9805125/1243763
unamestr="$(uname)"
if [[ "$unamestr" == 'Linux' ]]; then
YRS="$(printf "%s,%s" "$(date +%Y)" "$(date +%Y --date="1 year ago")")"
elif [[ "$unamestr" == 'Darwin' ]]; then
YRS="$(printf "%s,%s" "$(date +%Y)" "$(date -v -1y +%Y)")"
else
echo -e '\nWARN: Unable to detect operating system.\nThis may throw an error regarding date format.\nhttps://stackoverflow.com/a/9805125/1243763\n' >&2
YRS="$(printf "%s,%s" "$(date +%Y)" "$(date +%Y --date="1 year ago")")"
fi
PINAME="${PINAME:-"NONE"}"
QUERY="${QUERY:-"NONE"}"
## get current and previous year: YYYY,YYYY
FY="${FY:-"$YRS"}"
QUERYOP="${QUERYOP:-"AND"}"
AGENCY="${AGENCY:-"NIH,DOD,HHS"}"
## API has limit of pulling max 50 records per query
## You can offset records by setting a row integer to start reading from records.
ROWID="${ROWID:-1}"
## replace bat with cat if you do not want to install bat
## http and jq are required though
if [[ "${PINAME}" == "NONE" && "${QUERY}" == "NONE" ]]; then
echo "Invalid or missing -p ad -q. One of two is arguments is required." >&2
show_help >&2
exit 1
elif [[ "${PINAME}" != "NONE" && "${QUERY}" == "NONE" ]]; then
printf "PI\tORG\tFY\tIC\tTYPE\t\$\$\tSTART\tEND\tGRANT_SINCE\tGRANT_ENDS\tTITLE\n"
http -b "https://api.federalreporter.nih.gov/v1/projects/search?query=piName:${PINAME}\$fy:${FY}\$agency:${AGENCY}\$sortBy:fy\$offset:${ROWID}" | jq -r '.items[] | "\(.contactPi)\t\(.orgName)\t\(.fy)\t\(.ic)\t\(.projectNumber)\t\(.totalCostAmount)\t\(.budgetStartDate)\t\(.budgetEndDate)\t\(.projectStartDate)\t\(.projectEndDate)\t\(.title)\thttps://projectreporter.nih.gov/project_info_description.cfm?aid=\(.nihApplId)"' | bat
elif [[ "${PINAME}" == "NONE" && "${QUERY}" != "NONE" ]]; then
printf "PI\tORG\tFY\tIC\tTYPE\t\$\$\tSTART\tEND\tGRANT_SINCE\tGRANT_ENDS\tTITLE\tURL\n"
http -b "https://api.federalreporter.nih.gov/v1/projects/search?query=fy:${FY}\$agency:${AGENCY}\$text:${QUERY}\$textOperator:${QUERYOP}\$sortBy:fy\$offset:${ROWID}" | jq -r '.items[] | "\(.contactPi)\t\(.orgName)\t\(.fy)\t\(.ic)\t\(.projectNumber)\t\(.totalCostAmount)\t\(.budgetStartDate)\t\(.budgetEndDate)\t\(.projectStartDate)\t\(.projectEndDate)\t\(.title)\thttps://projectreporter.nih.gov/project_info_description.cfm?aid=\(.nihApplId)"' | bat
elif [[ "${PINAME}" != "NONE" && "${QUERY}" != "NONE" ]]; then
printf "PI\tORG\tFY\tIC\tTYPE\t\$\$\tSTART\tEND\tGRANT_SINCE\tGRANT_ENDS\tTITLE\tURL\n"
http -b "https://api.federalreporter.nih.gov/v1/projects/search?query=piName:${PINAME}\$fy:${FY}\$agency:${AGENCY}\$text:${QUERY}\$textOperator:${QUERYOP}\$sortBy:fy\$offset:${ROWID}" | jq -r '.items[] | "\(.contactPi)\t\(.orgName)\t\(.fy)\t\(.ic)\t\(.projectNumber)\t\(.totalCostAmount)\t\(.budgetStartDate)\t\(.budgetEndDate)\t\(.projectStartDate)\t\(.projectEndDate)\t\(.title)\thttps://projectreporter.nih.gov/project_info_description.cfm?aid=\(.nihApplId)"' | bat
else
show_help >&2
echo "Something went wrong with if else loop. Please debug into script!" >&2
exit 1
fi
#_end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment