Skip to content

Instantly share code, notes, and snippets.

@philippemiron
Last active July 4, 2020 13:18
Show Gist options
  • Save philippemiron/c9f5a79c55358d7976f538c0314094d5 to your computer and use it in GitHub Desktop.
Save philippemiron/c9f5a79c55358d7976f538c0314094d5 to your computer and use it in GitHub Desktop.
NSF historical awards exploration
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Exploring NSF Historical Awards \n",
"- https://www.nsf.gov/awardsearch/download.jsp"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib.ticker import MaxNLocator\n",
"import os\n",
"import xml.etree.ElementTree as ET "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def parse_xml(xmlfile):\n",
" try:\n",
" # initialize everything\n",
" title = None\n",
" date = None\n",
" amount = None\n",
" gtype = None\n",
" dabv = None\n",
" dname = None\n",
" divabv = None\n",
" divname = None\n",
" abstract = None\n",
" name = None\n",
" role = None\n",
" institution = None\n",
" city = None\n",
" \n",
" # create element tree object \n",
" tree = ET.parse(xmlfile)\n",
" root = tree.getroot()\n",
"\n",
" #<Award>\n",
" # 0 AwardTitle\n",
" title = root[0][0].text\n",
"\n",
" # 1 AwardEffectiveDate\n",
" date = root[0][1].text\n",
"\n",
" # 2 AwardExpirationDate\n",
"\n",
" # 3 AwardTotalIntnAmount\n",
" amount = root[0][3].text\n",
" amount = float(amount)\n",
"\n",
" # 4 AwardAmount\n",
"\n",
" # 5 AwardInstrument\n",
" gtype = root[0][5][0].text\n",
"\n",
" # 6 Organization\n",
" # Directorate\n",
" dabv = root[0][6][1][0].text\n",
" dname = root[0][6][1][1].text\n",
" # Division\n",
" divabv = root[0][6][2][0].text\n",
" divname = root[0][6][2][1].text\n",
"\n",
" # 7 ProgramOfficer\n",
"\n",
" # 8 AbstractNarration\n",
" abstract = root[0][8].text\n",
"\n",
" # 9 MinAmdLetterDate\n",
" # 10 MaxAmdLetterDate\n",
" # 11 ARRAAmount\n",
" # 12 AwardID\n",
"\n",
" # 13 Investigator\n",
" first_name = root[0][13][0].text\n",
" last_name = root[0][13][1].text\n",
" name = '%s %s' % (first_name, last_name)\n",
" role = root[0][13][5].text\n",
"\n",
" # 14 Institution\n",
" institution = root[0][14][0].text\n",
" city = root[0][14][1].text\n",
" \n",
" # 15 ProgramElement\n",
" # </Award>\n",
" \n",
" #if dname == 'Directorate For Geosciences':\n",
" # print(title)\n",
" #if title == 'Collaborative Research: Enhancing our Understanding of North Atlantic Deep Water Pathways using Nonlinear Dynamics Techniques':\n",
" # print('%s %s' % (first_name, last_name))\n",
" #if first_name == 'Susan' and last_name == 'Lozier':\n",
" # print('%s %s: %s ($%s)' % (first_name, last_name, title, amount))\n",
" except:\n",
" pass\n",
" \n",
" data = [title, date, amount, gtype, dabv, dname, divabv, divname, name, institution]\n",
" return data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Creating an empty Dataframe with column names only\n",
"columns_name = ['title', 'date', 'amount', 'gtype', 'dabv', 'dname', 'divabv', 'divname', \n",
" 'name', 'institution']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2010/\n",
"2011/\n",
"2012/\n",
"2013/\n",
"2014/\n",
"2015/\n",
"2016/\n",
"2017/\n",
"2018/\n",
"2019/\n",
"2020/\n"
]
}
],
"source": [
"data = []\n",
"for year in range(2010, 2021): \n",
" folder = '%s/' % year\n",
" print(folder)\n",
" for filename in os.listdir(folder):\n",
" xmlfile = folder + filename\n",
" data.append(parse_xml(xmlfile))\n",
"\n",
"df = pd.DataFrame(data, columns=columns_name)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# remove DataFrame where name are not available\n",
"# Those usually don't have institution either...\n",
"df = df.dropna(subset=['name'])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>date</th>\n",
" <th>amount</th>\n",
" <th>gtype</th>\n",
" <th>dabv</th>\n",
" <th>dname</th>\n",
" <th>divabv</th>\n",
" <th>divname</th>\n",
" <th>name</th>\n",
" <th>institution</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>LTREB: Female settlement patterns and social ...</td>\n",
" <td>03/15/2011</td>\n",
" <td>339900.0</td>\n",
" <td>Continuing grant</td>\n",
" <td>BIO</td>\n",
" <td>Direct For Biological Sciences</td>\n",
" <td>IOS</td>\n",
" <td>Division Of Integrative Organismal Systems</td>\n",
" <td>Anne Pusey</td>\n",
" <td>Ian</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>SBIR Phase II: Patent End-To-End (PE2E) Exami...</td>\n",
" <td>03/15/2011</td>\n",
" <td>500000.0</td>\n",
" <td>Standard Grant</td>\n",
" <td>ENG</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>IIP</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Rocky Kahn</td>\n",
" <td>Team Patent LLC</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>COLLABORATIVE RESEARCH: Latitudinal variation ...</td>\n",
" <td>03/01/2011</td>\n",
" <td>327000.0</td>\n",
" <td>Standard Grant</td>\n",
" <td>BIO</td>\n",
" <td>Direct For Biological Sciences</td>\n",
" <td>DEB</td>\n",
" <td>Division Of Environmental Biology</td>\n",
" <td>James Cronin</td>\n",
" <td>Louisiana State University</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Workshop: SFF Symposium Student Support 2010;...</td>\n",
" <td>05/01/2010</td>\n",
" <td>15000.0</td>\n",
" <td>Standard Grant</td>\n",
" <td>ENG</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>CMMI</td>\n",
" <td>Div Of Civil, Mechanical, &amp; Manufact Inn</td>\n",
" <td>David Bourell</td>\n",
" <td>University of Texas at Austin</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>EAGER: Backbone Selenoamides as Minimal Chromo...</td>\n",
" <td>07/15/2010</td>\n",
" <td>250000.0</td>\n",
" <td>Standard Grant</td>\n",
" <td>MPS</td>\n",
" <td>Direct For Mathematical &amp; Physical Scien</td>\n",
" <td>CHE</td>\n",
" <td>Division Of Chemistry</td>\n",
" <td>Ernest Petersson</td>\n",
" <td>University of Pennsylvania</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title date amount \\\n",
"0 LTREB: Female settlement patterns and social ... 03/15/2011 339900.0 \n",
"1 SBIR Phase II: Patent End-To-End (PE2E) Exami... 03/15/2011 500000.0 \n",
"4 COLLABORATIVE RESEARCH: Latitudinal variation ... 03/01/2011 327000.0 \n",
"6 Workshop: SFF Symposium Student Support 2010;... 05/01/2010 15000.0 \n",
"8 EAGER: Backbone Selenoamides as Minimal Chromo... 07/15/2010 250000.0 \n",
"\n",
" gtype dabv dname divabv \\\n",
"0 Continuing grant BIO Direct For Biological Sciences IOS \n",
"1 Standard Grant ENG Directorate For Engineering IIP \n",
"4 Standard Grant BIO Direct For Biological Sciences DEB \n",
"6 Standard Grant ENG Directorate For Engineering CMMI \n",
"8 Standard Grant MPS Direct For Mathematical & Physical Scien CHE \n",
"\n",
" divname name \\\n",
"0 Division Of Integrative Organismal Systems Anne Pusey \n",
"1 Div Of Industrial Innovation & Partnersh Rocky Kahn \n",
"4 Division Of Environmental Biology James Cronin \n",
"6 Div Of Civil, Mechanical, & Manufact Inn David Bourell \n",
"8 Division Of Chemistry Ernest Petersson \n",
"\n",
" institution \n",
"0 Ian \n",
"1 Team Patent LLC \n",
"4 Louisiana State University \n",
"6 University of Texas at Austin \n",
"8 University of Pennsylvania "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"groups = df.groupby('name')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"At least one person got 27 proposals accepted during the last 10 years.\n"
]
}
],
"source": [
"# array with the number of proposal per PI\n",
"nb_proposal = []\n",
"for name, group in groups:\n",
" nb_proposal.append(len(group))\n",
"np_proposal = np.array(nb_proposal)\n",
"print('At least one person got %d proposals accepted during the last 10 years.' % np.max(np_proposal))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# let's see the amount and the name of the proposals\n",
"for name, group in groups:\n",
" if len(group) == np.max(np_proposal):\n",
" PI_name = name\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>date</th>\n",
" <th>amount</th>\n",
" <th>dname</th>\n",
" <th>divname</th>\n",
" <th>institution</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>43383</th>\n",
" <td>MIT VMS I-Corps Site</td>\n",
" <td>06/15/2014</td>\n",
" <td>299991.0</td>\n",
" <td>Direct For Computer &amp; Info Scie &amp; Enginr</td>\n",
" <td>Division Of Computer and Network Systems</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82468</th>\n",
" <td>I-Corps: An Accurate and Accessible Indoor Pos...</td>\n",
" <td>12/01/2016</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Roman</td>\n",
" </tr>\n",
" <tr>\n",
" <th>91892</th>\n",
" <td>I-Corps: An Objective Clinical Machine Learnin...</td>\n",
" <td>04/15/2017</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>93323</th>\n",
" <td>I-Corps: Mobile Augmented Reality</td>\n",
" <td>04/01/2017</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95592</th>\n",
" <td>I-Corps: Point-of-Care Physiological Assessment</td>\n",
" <td>04/01/2017</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95978</th>\n",
" <td>Type II: MIT Innovation Corps Site</td>\n",
" <td>08/01/2017</td>\n",
" <td>299613.0</td>\n",
" <td>Direct For Computer &amp; Info Scie &amp; Enginr</td>\n",
" <td>Division Of Computer and Network Systems</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99195</th>\n",
" <td>I-Corps: Factor graph computing for data-drive...</td>\n",
" <td>09/15/2018</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99279</th>\n",
" <td>I-Corps Teams: Programmable Nanotechnology Vac...</td>\n",
" <td>01/01/2018</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>101811</th>\n",
" <td>I-Corps: Improving the Energy Efficiency of Tr...</td>\n",
" <td>01/01/2018</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>101937</th>\n",
" <td>I-Corps Teams: Mobile Platform for Collecting,...</td>\n",
" <td>09/15/2018</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>102672</th>\n",
" <td>I-Corps: Organ-on-a-Chip Technology for Pharma...</td>\n",
" <td>01/01/2018</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>104798</th>\n",
" <td>I-Corps: Machine Learning Algorithms and Tools...</td>\n",
" <td>01/01/2018</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>109138</th>\n",
" <td>I-Corps Teams: Photonic Crystal Enabled Thermo...</td>\n",
" <td>01/01/2018</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>109250</th>\n",
" <td>I-Corps Node: New England Regional Innovation ...</td>\n",
" <td>08/01/2018</td>\n",
" <td>4200000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Marc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>113549</th>\n",
" <td>I-Corps: Heat-stable binding proteins for dive...</td>\n",
" <td>09/15/2019</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>113957</th>\n",
" <td>I-Corps: Ultra-clear, transparent aerogel mate...</td>\n",
" <td>09/15/2019</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>114746</th>\n",
" <td>I-Corps: Synthetic Matrix Solutions for Neurod...</td>\n",
" <td>04/15/2019</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116925</th>\n",
" <td>Broadening Participation - I-Corps Northeast R...</td>\n",
" <td>10/01/2019</td>\n",
" <td>171116.0</td>\n",
" <td>Direct For Computer &amp; Info Scie &amp; Enginr</td>\n",
" <td>Division Of Computer and Network Systems</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117967</th>\n",
" <td>I-Corps: Electric Reservoir Stimulation</td>\n",
" <td>03/15/2019</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118123</th>\n",
" <td>I-Corps: Decentralized fertilizer production f...</td>\n",
" <td>09/15/2019</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119205</th>\n",
" <td>I-Corps: Robust filtration membranes for harsh...</td>\n",
" <td>03/15/2019</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>120929</th>\n",
" <td>I-Corps: Acoustic monitoring of remote pumping...</td>\n",
" <td>03/15/2019</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>121225</th>\n",
" <td>I-Corps: Embedding fabric-based sensors into a...</td>\n",
" <td>09/01/2019</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>124283</th>\n",
" <td>I-Corps: Label-free optical imaging of the lym...</td>\n",
" <td>06/01/2020</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>124441</th>\n",
" <td>I-Corps Teams: IoT Sensor Networks Detecting U...</td>\n",
" <td>02/01/2020</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125869</th>\n",
" <td>I-Corps: Software Platform for Constructing Mi...</td>\n",
" <td>06/01/2020</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126075</th>\n",
" <td>I-Corps Teams: Machine Learning (ML)-powered D...</td>\n",
" <td>02/01/2020</td>\n",
" <td>50000.0</td>\n",
" <td>Directorate For Engineering</td>\n",
" <td>Div Of Industrial Innovation &amp; Partnersh</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title date \\\n",
"43383 MIT VMS I-Corps Site 06/15/2014 \n",
"82468 I-Corps: An Accurate and Accessible Indoor Pos... 12/01/2016 \n",
"91892 I-Corps: An Objective Clinical Machine Learnin... 04/15/2017 \n",
"93323 I-Corps: Mobile Augmented Reality 04/01/2017 \n",
"95592 I-Corps: Point-of-Care Physiological Assessment 04/01/2017 \n",
"95978 Type II: MIT Innovation Corps Site 08/01/2017 \n",
"99195 I-Corps: Factor graph computing for data-drive... 09/15/2018 \n",
"99279 I-Corps Teams: Programmable Nanotechnology Vac... 01/01/2018 \n",
"101811 I-Corps: Improving the Energy Efficiency of Tr... 01/01/2018 \n",
"101937 I-Corps Teams: Mobile Platform for Collecting,... 09/15/2018 \n",
"102672 I-Corps: Organ-on-a-Chip Technology for Pharma... 01/01/2018 \n",
"104798 I-Corps: Machine Learning Algorithms and Tools... 01/01/2018 \n",
"109138 I-Corps Teams: Photonic Crystal Enabled Thermo... 01/01/2018 \n",
"109250 I-Corps Node: New England Regional Innovation ... 08/01/2018 \n",
"113549 I-Corps: Heat-stable binding proteins for dive... 09/15/2019 \n",
"113957 I-Corps: Ultra-clear, transparent aerogel mate... 09/15/2019 \n",
"114746 I-Corps: Synthetic Matrix Solutions for Neurod... 04/15/2019 \n",
"116925 Broadening Participation - I-Corps Northeast R... 10/01/2019 \n",
"117967 I-Corps: Electric Reservoir Stimulation 03/15/2019 \n",
"118123 I-Corps: Decentralized fertilizer production f... 09/15/2019 \n",
"119205 I-Corps: Robust filtration membranes for harsh... 03/15/2019 \n",
"120929 I-Corps: Acoustic monitoring of remote pumping... 03/15/2019 \n",
"121225 I-Corps: Embedding fabric-based sensors into a... 09/01/2019 \n",
"124283 I-Corps: Label-free optical imaging of the lym... 06/01/2020 \n",
"124441 I-Corps Teams: IoT Sensor Networks Detecting U... 02/01/2020 \n",
"125869 I-Corps: Software Platform for Constructing Mi... 06/01/2020 \n",
"126075 I-Corps Teams: Machine Learning (ML)-powered D... 02/01/2020 \n",
"\n",
" amount dname \\\n",
"43383 299991.0 Direct For Computer & Info Scie & Enginr \n",
"82468 50000.0 Directorate For Engineering \n",
"91892 50000.0 Directorate For Engineering \n",
"93323 50000.0 Directorate For Engineering \n",
"95592 50000.0 Directorate For Engineering \n",
"95978 299613.0 Direct For Computer & Info Scie & Enginr \n",
"99195 50000.0 Directorate For Engineering \n",
"99279 50000.0 Directorate For Engineering \n",
"101811 50000.0 Directorate For Engineering \n",
"101937 50000.0 Directorate For Engineering \n",
"102672 50000.0 Directorate For Engineering \n",
"104798 50000.0 Directorate For Engineering \n",
"109138 50000.0 Directorate For Engineering \n",
"109250 4200000.0 Directorate For Engineering \n",
"113549 50000.0 Directorate For Engineering \n",
"113957 50000.0 Directorate For Engineering \n",
"114746 50000.0 Directorate For Engineering \n",
"116925 171116.0 Direct For Computer & Info Scie & Enginr \n",
"117967 50000.0 Directorate For Engineering \n",
"118123 50000.0 Directorate For Engineering \n",
"119205 50000.0 Directorate For Engineering \n",
"120929 50000.0 Directorate For Engineering \n",
"121225 50000.0 Directorate For Engineering \n",
"124283 50000.0 Directorate For Engineering \n",
"124441 50000.0 Directorate For Engineering \n",
"125869 50000.0 Directorate For Engineering \n",
"126075 50000.0 Directorate For Engineering \n",
"\n",
" divname \\\n",
"43383 Division Of Computer and Network Systems \n",
"82468 Div Of Industrial Innovation & Partnersh \n",
"91892 Div Of Industrial Innovation & Partnersh \n",
"93323 Div Of Industrial Innovation & Partnersh \n",
"95592 Div Of Industrial Innovation & Partnersh \n",
"95978 Division Of Computer and Network Systems \n",
"99195 Div Of Industrial Innovation & Partnersh \n",
"99279 Div Of Industrial Innovation & Partnersh \n",
"101811 Div Of Industrial Innovation & Partnersh \n",
"101937 Div Of Industrial Innovation & Partnersh \n",
"102672 Div Of Industrial Innovation & Partnersh \n",
"104798 Div Of Industrial Innovation & Partnersh \n",
"109138 Div Of Industrial Innovation & Partnersh \n",
"109250 Div Of Industrial Innovation & Partnersh \n",
"113549 Div Of Industrial Innovation & Partnersh \n",
"113957 Div Of Industrial Innovation & Partnersh \n",
"114746 Div Of Industrial Innovation & Partnersh \n",
"116925 Division Of Computer and Network Systems \n",
"117967 Div Of Industrial Innovation & Partnersh \n",
"118123 Div Of Industrial Innovation & Partnersh \n",
"119205 Div Of Industrial Innovation & Partnersh \n",
"120929 Div Of Industrial Innovation & Partnersh \n",
"121225 Div Of Industrial Innovation & Partnersh \n",
"124283 Div Of Industrial Innovation & Partnersh \n",
"124441 Div Of Industrial Innovation & Partnersh \n",
"125869 Div Of Industrial Innovation & Partnersh \n",
"126075 Div Of Industrial Innovation & Partnersh \n",
"\n",
" institution \n",
"43383 Massachusetts Institute of Technology \n",
"82468 Roman \n",
"91892 Massachusetts Institute of Technology \n",
"93323 Massachusetts Institute of Technology \n",
"95592 Massachusetts Institute of Technology \n",
"95978 Massachusetts Institute of Technology \n",
"99195 Massachusetts Institute of Technology \n",
"99279 Massachusetts Institute of Technology \n",
"101811 Massachusetts Institute of Technology \n",
"101937 Massachusetts Institute of Technology \n",
"102672 Massachusetts Institute of Technology \n",
"104798 Massachusetts Institute of Technology \n",
"109138 Massachusetts Institute of Technology \n",
"109250 Marc \n",
"113549 Massachusetts Institute of Technology \n",
"113957 Massachusetts Institute of Technology \n",
"114746 Massachusetts Institute of Technology \n",
"116925 Massachusetts Institute of Technology \n",
"117967 Massachusetts Institute of Technology \n",
"118123 Massachusetts Institute of Technology \n",
"119205 Massachusetts Institute of Technology \n",
"120929 Massachusetts Institute of Technology \n",
"121225 Massachusetts Institute of Technology \n",
"124283 Massachusetts Institute of Technology \n",
"124441 Massachusetts Institute of Technology \n",
"125869 Massachusetts Institute of Technology \n",
"126075 Massachusetts Institute of Technology "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pi_df = groups.get_group(PI_name)\n",
"pi_df[['title', 'date', 'amount', 'dname', 'divname', 'institution']]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"PIs_with_n_proposal = np.bincount(np.array(nb_proposal), minlength=np.max(nb_proposal))[1:]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[30074 11053 5185 2724 1415 815 460 257 151 76 47 45\n",
" 29 20 11 8 5 2 3 2 1 1 1 0\n",
" 0 1 1]\n"
]
}
],
"source": [
"print(PIs_with_n_proposal)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 288x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure(figsize=(4,4))\n",
"ax = fig.add_subplot(1,1,1)\n",
"ax.plot(np.arange(1, 11), PIs_with_n_proposal[:10])\n",
"ax.set_xticks(np.arange(1, 11, 3))\n",
"\n",
"ax.set_title('How many proposal per PI in the past decade?')\n",
"ax.set_xlabel('Number of funded proposals')\n",
"ax.set_ylabel('Number of PIs');"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure()\n",
"ax = fig.add_subplot(1,1,1)\n",
"ax.set_title('PIs with more than 10 proposals in the last decade')\n",
"ax.bar(np.arange(10, len(PIs_with_n_proposal)+1), PIs_with_n_proposal[9:])\n",
"ax.set_xlabel('Number of funded proposals')\n",
"ax.set_ylabel('Number of PIs')\n",
"ax.xaxis.set_major_locator(MaxNLocator(integer=True))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# get the institution where the PIs more than 9 proposal funded in the past decades\n",
"institution_count_top_pi = {}\n",
"for name, group in groups:\n",
" if len(group) >= 10:\n",
" for inst in group['institution']:\n",
" # when there is no institution's name NSF puts the name of the Pi\n",
" if len(inst.split(' ')) > 1:\n",
" if inst in institution_count_top_pi.keys():\n",
" institution_count_top_pi[inst] += 1\n",
" else:\n",
" institution_count_top_pi[inst] = 1"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# get top n institutions\n",
"n = 20\n",
"n_institutions_top_pi = {}\n",
"for key in sorted(institution_count_top_pi, key=institution_count_top_pi.get, reverse=True)[:n]:\n",
" n_institutions_top_pi.update({key: institution_count_top_pi[key]})"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure()\n",
"ax = fig.add_subplot(1,1,1)\n",
"\n",
"ax.bar(range(len(n_institutions_top_pi)), list(n_institutions_top_pi.values()), align='center')\n",
"ax.set_xticks(range(len(n_institutions_top_pi)))\n",
"ax.set_xticklabels(list(n_institutions_top_pi.keys()), rotation = 90);"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# get top instittion regardeless of the number of funded proposal per PIs\n",
"groups_i = df.groupby('institution')\n",
"\n",
"# get the institution where the PIs more than 9 proposal funded in the past decades\n",
"institution_count = {}\n",
"for inst, group in groups_i:\n",
" # when there is no institution's name NSF puts the name of the Pi\n",
" if len(inst.split(' ')) > 1:\n",
" nb_proposal_per_institution = len(group)\n",
" if inst in institution_count.keys():\n",
" institution_count[inst] += nb_proposal_per_institution\n",
" else:\n",
" institution_count[inst] = nb_proposal_per_institution"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"# get top n institutions\n",
"n = 25\n",
"n_institutions = {}\n",
"for key in sorted(institution_count, key=institution_count.get, reverse=True)[:n]:\n",
" n_institutions.update({key: institution_count[key]})"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure()\n",
"ax = fig.add_subplot(1,1,1)\n",
"\n",
"ax.bar(range(len(n_institutions)), list(n_institutions.values()), align='center')\n",
"ax.set_xticks(range(len(n_institutions)))\n",
"ax.set_xticklabels(list(n_institutions.keys()), rotation = 90);"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The University of Miami received 162 awards over the past decade.\n"
]
}
],
"source": [
"print('The University of Miami received %d awards over the past decade.' % len(df[df['institution'].str.contains(\"University of Miami\", case=False)]))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"df_oce = df[df['divname'] == 'Division Of Ocean Sciences']"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3630 proposal in Ocean Sciences\n"
]
}
],
"source": [
"print('%d proposal in Ocean Sciences' % len(df_oce))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"df_oce[['date', 'title', 'amount']].to_csv('oce.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"At least one person got 20 proposals accepted during the last 10 years.\n"
]
}
],
"source": [
"groups = df_oce.groupby('name')\n",
"\n",
"# array with the number of proposal per PI\n",
"nb_proposal = []\n",
"for name, group in groups:\n",
" nb_proposal.append(len(group))\n",
"np_proposal = np.array(nb_proposal)\n",
"print('At least one person got %d proposals accepted during the last 10 years.' % np.max(np_proposal))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment