Skip to content

Instantly share code, notes, and snippets.

@bendichter
Created October 19, 2022 11:32
Show Gist options
  • Save bendichter/41a81a23ece06c74c868a470cc630370 to your computer and use it in GitHub Desktop.
Save bendichter/41a81a23ece06c74c868a470cc630370 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "a228faf2",
"metadata": {},
"source": [
"This notebook uses the reporter API to get the project IDs of neuroscience projects and pmids of neuroscience papers. The BRAIN FOA list is used to find projects and papers funded by the BRAIN Initiative."
]
},
{
"cell_type": "markdown",
"id": "6e901831",
"metadata": {},
"source": [
"# 1. All NIH-funded neuroscience papers"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "5e0fbb6e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████████| 24/24 [00:42<00:00, 1.78s/it]\n"
]
}
],
"source": [
"import requests\n",
"\n",
"import pandas as pd\n",
"from tqdm import tqdm, trange\n",
"import numpy as np\n",
"\n",
"def paginate_query(endpoint, query, step_size=500):\n",
" query.update(offset=0, limit=step_size)\n",
" response = requests.post(endpoint, json=query).json()\n",
" total = response[\"meta\"][\"total\"]\n",
" results = response[\"results\"]\n",
" \n",
" for i in trange(step_size, total, step_size):\n",
" query.update(offset=i)\n",
" results += requests.post(endpoint, json=query).json()[\"results\"]\n",
" \n",
" return results\n",
"\n",
"def clean_project_num(x):\n",
" return x[1:].split('-')[0]\n",
"\n",
" \n",
"endpoint = \"https://api.reporter.nih.gov/v2/projects/search/\"\n",
"query = {\n",
" \"criteria\":\n",
" {\n",
" \"agencies\": [\"NINDS\", \"NIMH\"],\n",
" \"project_start_date\": {\n",
" \"from_date\": \"2019-09-30T12:09:00Z\",\n",
" \"to_date\": \"2022-10-30T12:09:00Z\",\n",
" }\n",
" },\n",
" \"include_fields\": [\n",
"# \"ApplId\",\n",
"# \"SubprojectId\",\n",
" \"FiscalYear\",\n",
"# \"Organization\",\n",
" \"ProjectNum\",\n",
" \"OrgCountry\",\n",
" \"ProjectNumSplit\",\n",
"# \"ContactPiName\",\n",
"# \"AllText\",\n",
"# \"FullStudySection\",\n",
" \"ProjectStartDate\",\n",
" \"ProjectEndDate\"\n",
" ],\n",
" \"sort_field\":\"project_start_date\",\n",
" \"sort_order\":\"desc\",\n",
" }\n",
"\n",
"results = paginate_query(endpoint, query)\n",
"\n",
"project_numbers = list({clean_project_num(x[\"project_num\"]) for x in results}) "
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b0c6b974",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"7029"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(project_numbers)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b228085f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/8 [00:00<?, ?it/s]\n",
"0it [00:00, ?it/s]\u001b[A\n",
" 12%|█████▋ | 1/8 [00:01<00:13, 1.92s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
" 25%|███████████▎ | 2/8 [00:04<00:12, 2.08s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
" 38%|████████████████▉ | 3/8 [00:06<00:10, 2.13s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
" 50%|██████████████████████▌ | 4/8 [00:08<00:08, 2.09s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
" 62%|████████████████████████████▏ | 5/8 [00:10<00:06, 2.06s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
" 75%|█████████████████████████████████▊ | 6/8 [00:12<00:04, 2.03s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
" 88%|███████████████████████████████████████▍ | 7/8 [00:14<00:02, 2.07s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"100%|█████████████████████████████████████████████| 8/8 [00:15<00:00, 1.89s/it]\n"
]
},
{
"data": {
"text/plain": [
"16055"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"endpoint = \"https://api.reporter.nih.gov/v2/publications/search/\"\n",
"\n",
"pub_query = dict(\n",
" criteria=dict(core_project_nums=[x + '*' for x in project_numbers]),\n",
" sortField=\"string\",\n",
" sortOrder=\"string\",\n",
")\n",
"\n",
"results = []\n",
"for i in trange(0, len(project_numbers), 1000):\n",
" pub_query[\"criteria\"].update(core_project_nums=[x + '*' for x in project_numbers[i:(i+1000)]])\n",
" results += paginate_query(endpoint, pub_query, 8000)\n",
"\n",
"nih_pmids = {result[\"pmid\"] for result in results}\n",
"len(nih_pmids)"
]
},
{
"cell_type": "markdown",
"id": "f9119ec5",
"metadata": {},
"source": [
"# 2. All BRAIN-funded papers"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "98e34452",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
" 0%| | 0/2 [00:00<?, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1135\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"0it [00:00, ?it/s]\u001b[A\n",
" 50%|██████████████████████▌ | 1/2 [00:02<00:02, 2.93s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"100%|█████████████████████████████████████████████| 2/2 [00:03<00:00, 1.85s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5501\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"\n",
"fpath = \"/Users/bendichter/Downloads/funded_awards-2022-10-18T11-36-02.xlsx\"\n",
"\n",
"df = pd.read_excel(fpath)\n",
"\n",
"def clean1(x):\n",
" if \">\" not in x:\n",
" return x\n",
" else:\n",
" return x.split(\">\")[1][:-4]\n",
"\n",
"df = df[df[\"Project Number\"].apply(lambda x: not isinstance(x, float))]\n",
"project_numbers = df[\"Project Number\"].apply(clean1)\n",
"project_numbers = project_numbers.apply(clean_project_num)\n",
"\n",
"print(len(project_numbers))\n",
"\n",
"endpoint = \"https://api.reporter.nih.gov/v2/publications/search/\"\n",
"\n",
"results = []\n",
"for i in trange(0, len(project_numbers), 1000):\n",
" pub_query[\"criteria\"].update(core_project_nums=[x + '*' for x in project_numbers[i:(i+1000)]])\n",
" results += paginate_query(endpoint, pub_query, 8000)\n",
" \n",
"brain_pmids = {result[\"pmid\"] for result in results}\n",
"print(len(brain_pmids))\n"
]
},
{
"cell_type": "markdown",
"id": "c7a05305",
"metadata": {},
"source": [
"# 3. Recent BRAIN-funded neurophys projects"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b6de19bf",
"metadata": {},
"outputs": [],
"source": [
"data=\"\"\"1R01NS120819-01\n",
"1R01NS123663-01\n",
"1R01NS123916-01\n",
"1R01NS122742-01\n",
"1U01NS122082-01\n",
"1R01NS120594-01\n",
"1U01NS122040-01\n",
"1R01NS121773-01\n",
"1R01NS121913-01\n",
"1R01NS123424-01\n",
"1U01NS123658-01\n",
"1R01NS120850-01\n",
"1R34NS123819-01\n",
"1R01NS123912-01\n",
"1R01NS124592-01\n",
"1R01NS124017-01\n",
"1R34NS122272-01\n",
"1R01NS123665-01\n",
"1R34NS121898-01\n",
"1U01NS122124-01\n",
"1R34NS123913-01\n",
"1R01NS123887-01\n",
"1R01NS124564-01\n",
"1U01NS120824-01\n",
"1R01NS120832-01\n",
"1UF1MH128337-01\n",
"1R21EY033080-01\n",
"1R01NS120289-01A1\n",
"1R01NS123899-01\n",
"1R01NS121874-01\n",
"1R01NS123681-01\n",
"1R01NS123890-01\n",
"1R01NS123778-01\n",
"1R01NS121772-01\n",
"1R01NS121904-01\n",
"1R01NS121764-01\n",
"1U19NS123719-01\n",
"1R01NS123918-01\n",
"1R34NS121875-01\n",
"1U19NS123716-01\n",
"1R01NS112183-01A1\n",
"1U01NS122123-01\n",
"1UG3MH126864-01\n",
"1R01NS124590-01\n",
"1U01NS120822-01\n",
"1R01NS121911-01\n",
"1R34NS121873-01\n",
"1R01NS120828-01\n",
"1R01NS121918-01\n",
"1R01NS123903-01\n",
"1R01NS123842-01\n",
"1R01NS121919-01\n",
"1U01NS120820-01\n",
"1R34NS123876-01\n",
"1U01NS123668-01\n",
"1R01NS121776-01\n",
"1R34NS121766-01\n",
"1R01NS120851-01A1\"\"\"\n",
"\n",
"project_numbers = [clean_project_num(x) for x in data.split(\"\\n\")]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "848de072",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"\n",
"import pandas as pd\n",
"from tqdm import tqdm\n",
"\n",
"\n",
"query = dict(\n",
" offset=0,\n",
" limit=8000,\n",
" criteria=dict(core_project_nums=[x+'*' for x in project_numbers]),\n",
" sortField=\"string\",\n",
" sortOrder=\"string\",\n",
")\n",
"response = requests.post(\"https://api.reporter.nih.gov/v2/publications/search/\", json=query).json()\n",
"results_pmids = {result[\"pmid\"] for result in response[\"results\"]}"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "fc111967",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"42"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(results_pmids)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment