Skip to content

Instantly share code, notes, and snippets.

@leigh-johnson
Created September 9, 2018 05:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leigh-johnson/99f47d400a047ce0ed5e8cacfa1471c0 to your computer and use it in GitHub Desktop.
Save leigh-johnson/99f47d400a047ce0ed5e8cacfa1471c0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Table of Contents\n",
"\n",
"1. [Training Dataset Exploration](#Training-Dataset-Exploration)\n",
"\n",
" 1. [Experiment 1](#Label-Experiment-1)\n",
" 2. [Experiment 2](#Label-Experiment-2)\n",
" 3. [Experiment 3](#Label-Experiment-3)\n",
"\n",
"2. [Scrape Unlabeled Data](#Scrape-unlabeled-scholarship-text)\n",
"3. [Exploring via AutoML Python lib](#Exploring-via-AutoML-Python-lib)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Training Dataset Exploration"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Label Experiment 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Experiment 1: use high-level categories as labels\n",
"# Train a model where the classifiers are broad categories, like \"military_service\"\n",
"# As opposed to specific labels like \"air force\" or \"national guard\"\n",
"\n",
"# Outputs a csv dataset of \"text\",label,label, ...\n",
"\n",
"\n",
"import pandas as pd\n",
"from sqlalchemy import create_engine\n",
"\n",
"OUT_FILE = 'data/scholarship_app_req_labels_v1.csv'\n",
"\n",
"# initialize a connection to Postgres SQL database\n",
"engine = create_engine(\n",
" 'postgresql://debug:debug@localhost/grant_api'\n",
")\n",
"\n",
"# load data into pandas dataframe\n",
"df = pd.read_sql(\"\"\"\n",
" SELECT CONCAT(program_name, ' ',program_description) as text, string_agg(distinct(category), ',') as labels\n",
" FROM scholarships_tag\n",
" join scholarships_scholarship_tags on scholarships_tag.id = scholarships_scholarship_tags.tag_id\n",
" join scholarships_scholarship on scholarships_scholarship.id = scholarships_scholarship_tags.scholarship_id\n",
" where scholarships_tag.category in (\n",
" 'religion',\n",
" 'academic_level',\n",
" 'hobby',\n",
" 'disciplines',\n",
" 'gender',\n",
" 'disability',\n",
" 'employment_sectors',\n",
" 'gpa',\n",
" 'heritage',\n",
" 'military_service'\n",
" ) or scholarships_tag.category is null\n",
"\n",
" group by scholarships_scholarship.id\n",
"\n",
"\"\"\", con=engine)\n",
"\n",
"\n",
"with open(OUT_FILE, 'a') as out_file:\n",
" \n",
" for idx, row in df.iterrows():\n",
" # escape double-quotes\n",
" out_file.write(\"\\\"\" + row['text'].replace('\"', '') +\"\\\"\" + ',')\n",
" if row['labels']:\n",
" out_file.write(row['labels'])\n",
" out_file.write('\\n')\n",
"print('finished writing categories dataset')\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Label Experiment 2"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"finished writing v1 labels dataset\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>labels</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Wildlife Leadership Awards Program established...</td>\n",
" <td>NO ESSAY,JUNIOR,SENIOR,FULL-TIME ONLY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Juliette A. Southard Scholarship Leadership-ba...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,leadersh...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Vertical Flight Foundation Scholarship This aw...</td>\n",
" <td>GRADUATE,GPA 3.5+,Electrical Engineering/Elect...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Appraisal Institute Education Trust Education ...</td>\n",
" <td>GRADUATE,JUNIOR,SENIOR,FULL-TIME ONLY,SOPHMORE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Father James B. Macelwane Annual Awards Availa...</td>\n",
" <td>GPA 3.0+,JUNIOR,SENIOR,FULL-TIME ONLY,NO ESSAY...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>American Society of Naval Engineers Scholarshi...</td>\n",
" <td>GRADUATE,GPA 2.5+,Electrical Engineering/Elect...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Armenian Students Association of America Inc. ...</td>\n",
" <td>GRADUATE,JUNIOR,SENIOR,FULL-TIME ONLY,SOPHMORE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Astrid G. Cates and Myrtle Beinhauer Scholarsh...</td>\n",
" <td>GPA 3.0+,JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Kathern F. Gruber Scholarship Award for underg...</td>\n",
" <td>GRADUATE,JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>National Society Daughters of the American Rev...</td>\n",
" <td>Nursing,JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Civil Air Patrol Academic Scholarships One-tim...</td>\n",
" <td>GRADUATE,JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>USBC Earl Anthony Memorial Scholarship Annuall...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,GPA 3.0+,HIGH SCH...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>NDS Student Recognition Award Awards available...</td>\n",
" <td>GPA 3.0+,SENIOR,FULL-TIME ONLY,NO ESSAY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>National Society Daughters of the American Rev...</td>\n",
" <td>History,HIGH SCHOOL,FULL-TIME ONLY,FRESHMAN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Daughters of the Cincinnati Scholarship Need a...</td>\n",
" <td>GPA 3.0+,HIGH SCHOOL,FEMALE,FULL-TIME ONLY,FRE...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Harry A. Applegate Scholarship Scholarship ava...</td>\n",
" <td>Education,JUNIOR,SENIOR,FULL-TIME ONLY,NO ESSA...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Edward J. Nell Memorial Scholarship in Journal...</td>\n",
" <td>GPA 3.0+,HIGH SCHOOL,Journalism,FULL-TIME ONLY...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Eight &amp; Forty Lung and Respiratory Disease Nur...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,Nursing,NO ESSAY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>National Society Daughters of the American Rev...</td>\n",
" <td>History,JUNIOR,SENIOR,FULL-TIME ONLY</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Francis P. Matthews and John E. Swift Educatio...</td>\n",
" <td>ANY MILITARY SERVICE,JUNIOR,SENIOR,FULL-TIME O...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Breakthrough to Nursing Scholarships for Racia...</td>\n",
" <td>Nursing,BOTH PART-TIME and FULL-TIME,JUNIOR,SE...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>American Council of the Blind Scholarships Mer...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,GPA 3.5+...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Northeastern Loggers' Association Scholarships...</td>\n",
" <td>JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN,SOPHMORE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Foundation of the National Student Nurses' Ass...</td>\n",
" <td>Nursing,BOTH PART-TIME and FULL-TIME,JUNIOR,SE...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Elizabeth Gardner Norweb Summer Environmental ...</td>\n",
" <td>SOPHMORE,Environmental Science,JUNIOR,FULL-TIM...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>International Foodservice Editorial Council Co...</td>\n",
" <td>GRADUATE,Journalism,FULL-TIME ONLY,JUNIOR,SENI...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Jeannette Rankin Women's Scholarship Fund Appl...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,FEMALE,JUNIOR,SEN...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>Jimmy A. Young Memorial Education Recognition ...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,GPA 3.0+,JUNIOR,H...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>John and Muriel Landis Scholarship Awards Maxi...</td>\n",
" <td>GRADUATE,JUNIOR,SENIOR,FULL-TIME ONLY,NO ESSAY...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Douvas Memorial Scholarship Available to Wyomi...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,NO ESSAY,FRESHMAN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4235</th>\n",
" <td>Zelus Recovery $1000 College Scholarship Zelus...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,JUNIOR,S...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4236</th>\n",
" <td>Women in Defense HORIZONS-Michigan Scholarship...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,GPA 3.0+...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4237</th>\n",
" <td>Environmental Litigation Group, P.C. Asbestos ...</td>\n",
" <td>GRADUATE,GPA 3.0+,JUNIOR,SENIOR,FULL-TIME ONLY...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4238</th>\n",
" <td>2017 Tailor Made Lawns Scholarship Fund This $...</td>\n",
" <td>GPA 3.0+,Environmental Science,Physical Scienc...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4239</th>\n",
" <td>NANOG Scholarship Program The North American N...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,GPA 3.0+...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4240</th>\n",
" <td>CompHealth Medical Scholarship We're offering ...</td>\n",
" <td>Biology,Chemical Engineering,JUNIOR,SENIOR,FUL...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4241</th>\n",
" <td>Home Improvement Scholarship by Home Improveme...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,JUNIOR,S...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4242</th>\n",
" <td>TurboSquid 2017 Spring Scholarship TurboSquid ...</td>\n",
" <td>GRADUATE,GPA 3.0+,JUNIOR,SENIOR,FULL-TIME ONLY...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4243</th>\n",
" <td>CoverWallet Small Business Scholarship The Cov...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,JUNIOR,S...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4244</th>\n",
" <td>2018 Open Essay Competition There are plenty o...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,JUNIOR,S...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4245</th>\n",
" <td>Melissa Read Memorial Scholarship Here at High...</td>\n",
" <td>JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN,SOPHMORE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4246</th>\n",
" <td>Autism Scholarship The scholarship is in the a...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,JUNIOR,SENIOR,FRE...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4247</th>\n",
" <td>Law Student Scholarship The $1,000 scholarship...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,Law/Legal Service...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4248</th>\n",
" <td>Disabled Veteran Scholarship This is a $1,000 ...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,ANY MILITARY SERV...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4249</th>\n",
" <td>Law Student Scholarship The scholarship, in th...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,Law/Lega...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4250</th>\n",
" <td>2017 Law School Scholarship The scholarship wi...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,Law/Lega...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4251</th>\n",
" <td>Disabled Veteran Scholarship Our firm and its ...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,ANY MILITARY SERV...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4252</th>\n",
" <td>Law Student Scholarship This is a single schol...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,Law/Lega...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4253</th>\n",
" <td>Autism Scholarship This is a one-time scholars...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,JUNIOR,SENIOR,FRE...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4254</th>\n",
" <td>Healthline Stronger Scholarship Program The mi...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,GPA 3.0+...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4255</th>\n",
" <td>Future U.S. Nurse Scholarship Travel Nurse Sou...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,GPA 2.5+,JUNIOR,S...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4256</th>\n",
" <td>2017 Shelving.com Business Scholarship This sc...</td>\n",
" <td>GPA 2.5+,JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4257</th>\n",
" <td>2017 Hardwick &amp; Pendergast, P.S. Scholarship A...</td>\n",
" <td>BOTH PART-TIME and FULL-TIME,GPA 3.0+,JUNIOR,S...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4258</th>\n",
" <td>La-Philosophie.com Scholarship Since 2008, the...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,History,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4259</th>\n",
" <td>AcadGild Merit-Based Scholarships AcadGild bel...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,ANY MILI...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4260</th>\n",
" <td>Sterbenz-Ryan Scholarship The Sterbenz-Ryan Sc...</td>\n",
" <td>GPA 2.5+,JUNIOR,FULL-TIME ONLY,NO ESSAY,FRESHM...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4261</th>\n",
" <td>Family Travel Forum Teen Travel Writing Schola...</td>\n",
" <td>FULL-TIME ONLY,HIGH SCHOOL,FRESHMAN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4262</th>\n",
" <td>PromoCodesForYou.com Student Savings Scholarsh...</td>\n",
" <td>JUNIOR,FULL-TIME ONLY,FRESHMAN,SOPHMORE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4263</th>\n",
" <td>TonaLaw Veteran's Scholarship The TonaLaw Vete...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,ANY MILI...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4264</th>\n",
" <td>ZipRecruiter $3,000 Scholarship Starting July ...</td>\n",
" <td>GRADUATE,BOTH PART-TIME and FULL-TIME,GPA 2.5+...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4265 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" text \\\n",
"0 Wildlife Leadership Awards Program established... \n",
"1 Juliette A. Southard Scholarship Leadership-ba... \n",
"2 Vertical Flight Foundation Scholarship This aw... \n",
"3 Appraisal Institute Education Trust Education ... \n",
"4 Father James B. Macelwane Annual Awards Availa... \n",
"5 American Society of Naval Engineers Scholarshi... \n",
"6 Armenian Students Association of America Inc. ... \n",
"7 Astrid G. Cates and Myrtle Beinhauer Scholarsh... \n",
"8 Kathern F. Gruber Scholarship Award for underg... \n",
"9 National Society Daughters of the American Rev... \n",
"10 Civil Air Patrol Academic Scholarships One-tim... \n",
"11 USBC Earl Anthony Memorial Scholarship Annuall... \n",
"12 NDS Student Recognition Award Awards available... \n",
"13 National Society Daughters of the American Rev... \n",
"14 Daughters of the Cincinnati Scholarship Need a... \n",
"15 Harry A. Applegate Scholarship Scholarship ava... \n",
"16 Edward J. Nell Memorial Scholarship in Journal... \n",
"17 Eight & Forty Lung and Respiratory Disease Nur... \n",
"18 National Society Daughters of the American Rev... \n",
"19 Francis P. Matthews and John E. Swift Educatio... \n",
"20 Breakthrough to Nursing Scholarships for Racia... \n",
"21 American Council of the Blind Scholarships Mer... \n",
"22 Northeastern Loggers' Association Scholarships... \n",
"23 Foundation of the National Student Nurses' Ass... \n",
"24 Elizabeth Gardner Norweb Summer Environmental ... \n",
"25 International Foodservice Editorial Council Co... \n",
"26 Jeannette Rankin Women's Scholarship Fund Appl... \n",
"27 Jimmy A. Young Memorial Education Recognition ... \n",
"28 John and Muriel Landis Scholarship Awards Maxi... \n",
"29 Douvas Memorial Scholarship Available to Wyomi... \n",
"... ... \n",
"4235 Zelus Recovery $1000 College Scholarship Zelus... \n",
"4236 Women in Defense HORIZONS-Michigan Scholarship... \n",
"4237 Environmental Litigation Group, P.C. Asbestos ... \n",
"4238 2017 Tailor Made Lawns Scholarship Fund This $... \n",
"4239 NANOG Scholarship Program The North American N... \n",
"4240 CompHealth Medical Scholarship We're offering ... \n",
"4241 Home Improvement Scholarship by Home Improveme... \n",
"4242 TurboSquid 2017 Spring Scholarship TurboSquid ... \n",
"4243 CoverWallet Small Business Scholarship The Cov... \n",
"4244 2018 Open Essay Competition There are plenty o... \n",
"4245 Melissa Read Memorial Scholarship Here at High... \n",
"4246 Autism Scholarship The scholarship is in the a... \n",
"4247 Law Student Scholarship The $1,000 scholarship... \n",
"4248 Disabled Veteran Scholarship This is a $1,000 ... \n",
"4249 Law Student Scholarship The scholarship, in th... \n",
"4250 2017 Law School Scholarship The scholarship wi... \n",
"4251 Disabled Veteran Scholarship Our firm and its ... \n",
"4252 Law Student Scholarship This is a single schol... \n",
"4253 Autism Scholarship This is a one-time scholars... \n",
"4254 Healthline Stronger Scholarship Program The mi... \n",
"4255 Future U.S. Nurse Scholarship Travel Nurse Sou... \n",
"4256 2017 Shelving.com Business Scholarship This sc... \n",
"4257 2017 Hardwick & Pendergast, P.S. Scholarship A... \n",
"4258 La-Philosophie.com Scholarship Since 2008, the... \n",
"4259 AcadGild Merit-Based Scholarships AcadGild bel... \n",
"4260 Sterbenz-Ryan Scholarship The Sterbenz-Ryan Sc... \n",
"4261 Family Travel Forum Teen Travel Writing Schola... \n",
"4262 PromoCodesForYou.com Student Savings Scholarsh... \n",
"4263 TonaLaw Veteran's Scholarship The TonaLaw Vete... \n",
"4264 ZipRecruiter $3,000 Scholarship Starting July ... \n",
"\n",
" labels \n",
"0 NO ESSAY,JUNIOR,SENIOR,FULL-TIME ONLY \n",
"1 GRADUATE,BOTH PART-TIME and FULL-TIME,leadersh... \n",
"2 GRADUATE,GPA 3.5+,Electrical Engineering/Elect... \n",
"3 GRADUATE,JUNIOR,SENIOR,FULL-TIME ONLY,SOPHMORE \n",
"4 GPA 3.0+,JUNIOR,SENIOR,FULL-TIME ONLY,NO ESSAY... \n",
"5 GRADUATE,GPA 2.5+,Electrical Engineering/Elect... \n",
"6 GRADUATE,JUNIOR,SENIOR,FULL-TIME ONLY,SOPHMORE \n",
"7 GPA 3.0+,JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN... \n",
"8 GRADUATE,JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN... \n",
"9 Nursing,JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN,... \n",
"10 GRADUATE,JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN... \n",
"11 BOTH PART-TIME and FULL-TIME,GPA 3.0+,HIGH SCH... \n",
"12 GPA 3.0+,SENIOR,FULL-TIME ONLY,NO ESSAY \n",
"13 History,HIGH SCHOOL,FULL-TIME ONLY,FRESHMAN \n",
"14 GPA 3.0+,HIGH SCHOOL,FEMALE,FULL-TIME ONLY,FRE... \n",
"15 Education,JUNIOR,SENIOR,FULL-TIME ONLY,NO ESSA... \n",
"16 GPA 3.0+,HIGH SCHOOL,Journalism,FULL-TIME ONLY... \n",
"17 BOTH PART-TIME and FULL-TIME,Nursing,NO ESSAY \n",
"18 History,JUNIOR,SENIOR,FULL-TIME ONLY \n",
"19 ANY MILITARY SERVICE,JUNIOR,SENIOR,FULL-TIME O... \n",
"20 Nursing,BOTH PART-TIME and FULL-TIME,JUNIOR,SE... \n",
"21 GRADUATE,BOTH PART-TIME and FULL-TIME,GPA 3.5+... \n",
"22 JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN,SOPHMORE \n",
"23 Nursing,BOTH PART-TIME and FULL-TIME,JUNIOR,SE... \n",
"24 SOPHMORE,Environmental Science,JUNIOR,FULL-TIM... \n",
"25 GRADUATE,Journalism,FULL-TIME ONLY,JUNIOR,SENI... \n",
"26 BOTH PART-TIME and FULL-TIME,FEMALE,JUNIOR,SEN... \n",
"27 BOTH PART-TIME and FULL-TIME,GPA 3.0+,JUNIOR,H... \n",
"28 GRADUATE,JUNIOR,SENIOR,FULL-TIME ONLY,NO ESSAY... \n",
"29 BOTH PART-TIME and FULL-TIME,NO ESSAY,FRESHMAN \n",
"... ... \n",
"4235 GRADUATE,BOTH PART-TIME and FULL-TIME,JUNIOR,S... \n",
"4236 GRADUATE,BOTH PART-TIME and FULL-TIME,GPA 3.0+... \n",
"4237 GRADUATE,GPA 3.0+,JUNIOR,SENIOR,FULL-TIME ONLY... \n",
"4238 GPA 3.0+,Environmental Science,Physical Scienc... \n",
"4239 GRADUATE,BOTH PART-TIME and FULL-TIME,GPA 3.0+... \n",
"4240 Biology,Chemical Engineering,JUNIOR,SENIOR,FUL... \n",
"4241 GRADUATE,BOTH PART-TIME and FULL-TIME,JUNIOR,S... \n",
"4242 GRADUATE,GPA 3.0+,JUNIOR,SENIOR,FULL-TIME ONLY... \n",
"4243 GRADUATE,BOTH PART-TIME and FULL-TIME,JUNIOR,S... \n",
"4244 GRADUATE,BOTH PART-TIME and FULL-TIME,JUNIOR,S... \n",
"4245 JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN,SOPHMORE \n",
"4246 BOTH PART-TIME and FULL-TIME,JUNIOR,SENIOR,FRE... \n",
"4247 BOTH PART-TIME and FULL-TIME,Law/Legal Service... \n",
"4248 BOTH PART-TIME and FULL-TIME,ANY MILITARY SERV... \n",
"4249 GRADUATE,BOTH PART-TIME and FULL-TIME,Law/Lega... \n",
"4250 GRADUATE,BOTH PART-TIME and FULL-TIME,Law/Lega... \n",
"4251 BOTH PART-TIME and FULL-TIME,ANY MILITARY SERV... \n",
"4252 GRADUATE,BOTH PART-TIME and FULL-TIME,Law/Lega... \n",
"4253 BOTH PART-TIME and FULL-TIME,JUNIOR,SENIOR,FRE... \n",
"4254 GRADUATE,BOTH PART-TIME and FULL-TIME,GPA 3.0+... \n",
"4255 BOTH PART-TIME and FULL-TIME,GPA 2.5+,JUNIOR,S... \n",
"4256 GPA 2.5+,JUNIOR,SENIOR,FULL-TIME ONLY,FRESHMAN... \n",
"4257 BOTH PART-TIME and FULL-TIME,GPA 3.0+,JUNIOR,S... \n",
"4258 GRADUATE,BOTH PART-TIME and FULL-TIME,History,... \n",
"4259 GRADUATE,BOTH PART-TIME and FULL-TIME,ANY MILI... \n",
"4260 GPA 2.5+,JUNIOR,FULL-TIME ONLY,NO ESSAY,FRESHM... \n",
"4261 FULL-TIME ONLY,HIGH SCHOOL,FRESHMAN \n",
"4262 JUNIOR,FULL-TIME ONLY,FRESHMAN,SOPHMORE \n",
"4263 GRADUATE,BOTH PART-TIME and FULL-TIME,ANY MILI... \n",
"4264 GRADUATE,BOTH PART-TIME and FULL-TIME,GPA 2.5+... \n",
"\n",
"[4265 rows x 2 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Experiment 2:\n",
"# Train a model on the most specific possible classifiers, where the label has more than 100 samples\n",
"\n",
"import pandas as pd\n",
"from sqlalchemy import create_engine\n",
"\n",
"OUT_FILE = 'data/scholarship_app_req_labels_v2.csv'\n",
"\n",
"# initialize a connection to Postgres SQL database\n",
"engine = create_engine(\n",
" 'postgresql://debug:debug@localhost/grant_api'\n",
")\n",
"\n",
"OUT_FILE = 'data/scholarship_app_req_labels_v1.csv'\n",
"\n",
"df = pd.read_sql(\"\"\"\n",
" SELECT CONCAT(program_name, ' ',program_description) as text, array_to_string(array_agg(text),',') as labels\n",
" FROM scholarships_tag\n",
" join scholarships_scholarship_tags on scholarships_tag.id = scholarships_scholarship_tags.tag_id\n",
" join scholarships_scholarship on scholarships_scholarship.id = scholarships_scholarship_tags.scholarship_id\n",
" where scholarships_tag.text in (SELECT text from scholarships_tag\n",
" join scholarships_scholarship_tags on scholarships_tag.id = scholarships_scholarship_tags.tag_id\n",
" join scholarships_scholarship on scholarships_scholarship.id = scholarships_scholarship_tags.scholarship_id\n",
" group by scholarships_tag.id\n",
" having count(*) > 100\n",
" order by count(*) DESC)\n",
"\n",
" group by scholarships_scholarship.id\n",
"\n",
"\"\"\", con=engine)\n",
"\n",
"\n",
"with open(OUT_FILE, 'a') as out_file:\n",
" \n",
" for idx, row in df.iterrows():\n",
" # escape double-quotes\n",
" out_file.write(\"\\\"\" + row['text'].replace('\"', '') +\"\\\"\" + ',')\n",
" if row['labels']:\n",
" out_file.write(row['labels'])\n",
" out_file.write('\\n')\n",
"print('finished writing v1 labels dataset')\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Label Experiment 3"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"finished writing data/scholarship_app_req_labels_v3.csv\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>transcript</th>\n",
" <th>autobiography</th>\n",
" <th>test_scores</th>\n",
" <th>essay</th>\n",
" <th>community_service</th>\n",
" <th>high_school</th>\n",
" <th>financial_need</th>\n",
" <th>application_form</th>\n",
" <th>application_fee</th>\n",
" <th>interview</th>\n",
" <th>references</th>\n",
" <th>contest</th>\n",
" <th>gpa</th>\n",
" <th>military_service</th>\n",
" <th>disability</th>\n",
" <th>academic_level</th>\n",
" <th>undergrad</th>\n",
" <th>institution_type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Wildlife Leadership Awards Program established...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>National High School Journalist of the Year/Si...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Association for Iron and Steel Technology Balt...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>WOCN Accredited Nursing Education Program Scho...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Appraisal Institute Education Trust Education ...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Father James B. Macelwane Annual Awards Availa...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Astrid G. Cates and Myrtle Beinhauer Scholarsh...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Archaeology of Portugal Fellowship One-time aw...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[GRADUATE]</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Family Protection Specialist Social Worker For...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[GRADUATE]</td>\n",
" <td>False</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Caleb L. Butler Scholarship Scholarship for gr...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Eight &amp; Forty Lung and Respiratory Disease Nur...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Maine Rural Rehabilitation Fund Scholarship Pr...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>ASPIRE HIGHER Scholarship Program Valeant Derm...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>disABLEDperson Inc. National College Scholarsh...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Illinois AMVETS Ladies Auxiliary Worchid Schol...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>USBC Earl Anthony Memorial Scholarship Annuall...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Civil Air Patrol Academic Scholarships One-tim...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Jimmy A. Young Memorial Education Recognition ...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Tennessee Step Up Scholarship Scholarship prog...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Jeannette Rankin Women's Scholarship Fund Appl...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>American Council of the Blind Scholarships Mer...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3.5</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Breakthrough to Nursing Scholarships for Racia...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Northeastern Loggers' Association Scholarships...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Foundation of the National Student Nurses' Ass...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>John and Muriel Landis Scholarship Awards Maxi...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>Douvas Memorial Scholarship Available to Wyomi...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Francis P. Matthews and John E. Swift Educatio...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>Marine Corps Scholarship Foundation Available ...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>American Nuclear Society Undergraduate Scholar...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>John W. McDevitt (Fourth Degree) Scholarships ...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4237</th>\n",
" <td>James and Mary Dawson Scholarship One-time awa...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[GRADUATE]</td>\n",
" <td>False</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4238</th>\n",
" <td>Alberta Agriculture Food and Rural Development...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4239</th>\n",
" <td>Roadway Worker Memorial Scholarship Program Sc...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4240</th>\n",
" <td>Leonard A. Lorenzen Memorial Scholarship Schol...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4241</th>\n",
" <td>WIFLE Scholarship Scholarship to encourage wom...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4242</th>\n",
" <td>Diagnosis Delayed Scholarship The Diagnosis De...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4243</th>\n",
" <td>Costa Rican Vacations Scholarship The Costa Ri...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4244</th>\n",
" <td>Scholarships for Military Children One-time aw...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4245</th>\n",
" <td>Disabled Veteran Scholarship This is a $1,000 ...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4246</th>\n",
" <td>BISI Academic and Outreach Grants The British ...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[GRADUATE]</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4247</th>\n",
" <td>Sergeant Major Douglas R. Drum Memorial Schola...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4248</th>\n",
" <td>The Rev. Dr. Karen Layman Gift of Hope: 21st C...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4249</th>\n",
" <td>Pretty Lightroom Presets Bi-annual Scholarship...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4250</th>\n",
" <td>Guardian Debt Relief Scholarship Guardian Debt...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4251</th>\n",
" <td>The Walter J. Travis Memorial Scholarship This...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE, TRADE OR TECHNICAL SCHOOL]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4252</th>\n",
" <td>Foreclosure.com Scholarship Program The Forecl...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4253</th>\n",
" <td>Ketamine Clinics of Los Angeles Scholarship Pr...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4254</th>\n",
" <td>Youth Incentive Award Each of the two awards (...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4255</th>\n",
" <td>College Now Greater Cleveland Adult Learner Pr...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>2.5</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4256</th>\n",
" <td>PanHellenic Scholarship Awards The PanHellenic...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3.5</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4257</th>\n",
" <td>Keller Law Offices Scholarship for Higher Educ...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4258</th>\n",
" <td>Polish American Club of North Jersey Scholarsh...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4259</th>\n",
" <td>Family Travel Forum Teen Travel Writing Schola...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4260</th>\n",
" <td>peermusic Latin Scholarship Award for the best...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4261</th>\n",
" <td>Justin G. Schiller Prize for Bibliographical W...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4262</th>\n",
" <td>Maryland SPJ Pro Chapter College Scholarship S...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4263</th>\n",
" <td>Khia DJ K-Swift Memorial Scholarship You must:...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>2.5</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4264</th>\n",
" <td>Missouri Broadcasters Association Scholarship ...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4265</th>\n",
" <td>Scholarships for Orphans of Veterans Scholarsh...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4266</th>\n",
" <td>Multicultural Scholarship Program The Multicul...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>None</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>[FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE]</td>\n",
" <td>True</td>\n",
" <td>[TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4267 rows × 19 columns</p>\n",
"</div>"
],
"text/plain": [
" text transcript \\\n",
"0 Wildlife Leadership Awards Program established... False \n",
"1 National High School Journalist of the Year/Si... False \n",
"2 Association for Iron and Steel Technology Balt... True \n",
"3 WOCN Accredited Nursing Education Program Scho... False \n",
"4 Appraisal Institute Education Trust Education ... True \n",
"5 Father James B. Macelwane Annual Awards Availa... False \n",
"6 Astrid G. Cates and Myrtle Beinhauer Scholarsh... True \n",
"7 Archaeology of Portugal Fellowship One-time aw... False \n",
"8 Family Protection Specialist Social Worker For... False \n",
"9 Caleb L. Butler Scholarship Scholarship for gr... True \n",
"10 Eight & Forty Lung and Respiratory Disease Nur... True \n",
"11 Maine Rural Rehabilitation Fund Scholarship Pr... True \n",
"12 ASPIRE HIGHER Scholarship Program Valeant Derm... False \n",
"13 disABLEDperson Inc. National College Scholarsh... False \n",
"14 Illinois AMVETS Ladies Auxiliary Worchid Schol... True \n",
"15 USBC Earl Anthony Memorial Scholarship Annuall... True \n",
"16 Civil Air Patrol Academic Scholarships One-tim... True \n",
"17 Jimmy A. Young Memorial Education Recognition ... True \n",
"18 Tennessee Step Up Scholarship Scholarship prog... False \n",
"19 Jeannette Rankin Women's Scholarship Fund Appl... False \n",
"20 American Council of the Blind Scholarships Mer... False \n",
"21 Breakthrough to Nursing Scholarships for Racia... True \n",
"22 Northeastern Loggers' Association Scholarships... True \n",
"23 Foundation of the National Student Nurses' Ass... True \n",
"24 John and Muriel Landis Scholarship Awards Maxi... True \n",
"25 Douvas Memorial Scholarship Available to Wyomi... False \n",
"26 Francis P. Matthews and John E. Swift Educatio... False \n",
"27 Marine Corps Scholarship Foundation Available ... False \n",
"28 American Nuclear Society Undergraduate Scholar... True \n",
"29 John W. McDevitt (Fourth Degree) Scholarships ... True \n",
"... ... ... \n",
"4237 James and Mary Dawson Scholarship One-time awa... False \n",
"4238 Alberta Agriculture Food and Rural Development... True \n",
"4239 Roadway Worker Memorial Scholarship Program Sc... False \n",
"4240 Leonard A. Lorenzen Memorial Scholarship Schol... True \n",
"4241 WIFLE Scholarship Scholarship to encourage wom... False \n",
"4242 Diagnosis Delayed Scholarship The Diagnosis De... False \n",
"4243 Costa Rican Vacations Scholarship The Costa Ri... False \n",
"4244 Scholarships for Military Children One-time aw... False \n",
"4245 Disabled Veteran Scholarship This is a $1,000 ... False \n",
"4246 BISI Academic and Outreach Grants The British ... False \n",
"4247 Sergeant Major Douglas R. Drum Memorial Schola... False \n",
"4248 The Rev. Dr. Karen Layman Gift of Hope: 21st C... True \n",
"4249 Pretty Lightroom Presets Bi-annual Scholarship... False \n",
"4250 Guardian Debt Relief Scholarship Guardian Debt... False \n",
"4251 The Walter J. Travis Memorial Scholarship This... False \n",
"4252 Foreclosure.com Scholarship Program The Forecl... False \n",
"4253 Ketamine Clinics of Los Angeles Scholarship Pr... False \n",
"4254 Youth Incentive Award Each of the two awards (... False \n",
"4255 College Now Greater Cleveland Adult Learner Pr... True \n",
"4256 PanHellenic Scholarship Awards The PanHellenic... False \n",
"4257 Keller Law Offices Scholarship for Higher Educ... False \n",
"4258 Polish American Club of North Jersey Scholarsh... True \n",
"4259 Family Travel Forum Teen Travel Writing Schola... False \n",
"4260 peermusic Latin Scholarship Award for the best... False \n",
"4261 Justin G. Schiller Prize for Bibliographical W... False \n",
"4262 Maryland SPJ Pro Chapter College Scholarship S... True \n",
"4263 Khia DJ K-Swift Memorial Scholarship You must:... False \n",
"4264 Missouri Broadcasters Association Scholarship ... False \n",
"4265 Scholarships for Orphans of Veterans Scholarsh... False \n",
"4266 Multicultural Scholarship Program The Multicul... False \n",
"\n",
" autobiography test_scores essay community_service high_school \\\n",
"0 False False False False False \n",
"1 False False True False True \n",
"2 False True True False False \n",
"3 False False False False False \n",
"4 False False True False False \n",
"5 False False False False False \n",
"6 False True True True False \n",
"7 False False True False False \n",
"8 False False False False False \n",
"9 False False True False True \n",
"10 False False False False False \n",
"11 False False False False False \n",
"12 False False True False False \n",
"13 False False True False False \n",
"14 False True False False True \n",
"15 False False True True True \n",
"16 False True True False False \n",
"17 False False False False False \n",
"18 False False False False False \n",
"19 False False True False False \n",
"20 True False True False False \n",
"21 False False False False False \n",
"22 False False True False False \n",
"23 False False False False False \n",
"24 False False False False False \n",
"25 False False False False False \n",
"26 False False False False False \n",
"27 False False True False False \n",
"28 False False False False False \n",
"29 False True False False False \n",
"... ... ... ... ... ... \n",
"4237 False False True False False \n",
"4238 False False True False False \n",
"4239 False False True True False \n",
"4240 False False False False False \n",
"4241 False False True True False \n",
"4242 False False True False False \n",
"4243 False False True False False \n",
"4244 False False True False False \n",
"4245 False False True False False \n",
"4246 False False False False False \n",
"4247 False False True True False \n",
"4248 False False True False False \n",
"4249 False False True False False \n",
"4250 False False True False False \n",
"4251 False False True True False \n",
"4252 False False True False False \n",
"4253 False False True False False \n",
"4254 False False False False True \n",
"4255 False False True False False \n",
"4256 True False True False False \n",
"4257 False False True False False \n",
"4258 False False True False False \n",
"4259 False False True False True \n",
"4260 False False False False False \n",
"4261 False False False False False \n",
"4262 False False True False False \n",
"4263 False False False False True \n",
"4264 False False True False False \n",
"4265 False False False False False \n",
"4266 False False True False False \n",
"\n",
" financial_need application_form application_fee interview \\\n",
"0 False True False False \n",
"1 False True False False \n",
"2 False True False False \n",
"3 False True False False \n",
"4 False True False False \n",
"5 False False False False \n",
"6 False True False False \n",
"7 False True False False \n",
"8 False True False False \n",
"9 True True False False \n",
"10 False True False False \n",
"11 True True False False \n",
"12 False False False False \n",
"13 False True False False \n",
"14 True True False False \n",
"15 False True False False \n",
"16 False True False False \n",
"17 False True False False \n",
"18 False True False False \n",
"19 True True False False \n",
"20 False True False True \n",
"21 True True True False \n",
"22 False True False False \n",
"23 True True True False \n",
"24 True True False False \n",
"25 False True False False \n",
"26 False True False False \n",
"27 True True False False \n",
"28 False True False False \n",
"29 False True False False \n",
"... ... ... ... ... \n",
"4237 True True False True \n",
"4238 False True False False \n",
"4239 True True False True \n",
"4240 False True False False \n",
"4241 False True False False \n",
"4242 False True False False \n",
"4243 False False False False \n",
"4244 False True False False \n",
"4245 False True False False \n",
"4246 False True False False \n",
"4247 False True False False \n",
"4248 False True False False \n",
"4249 False False False False \n",
"4250 False True False False \n",
"4251 False True False False \n",
"4252 False True False False \n",
"4253 False False False False \n",
"4254 False True False False \n",
"4255 True True False False \n",
"4256 True True False False \n",
"4257 True True False False \n",
"4258 False True True False \n",
"4259 False True False False \n",
"4260 False True False False \n",
"4261 False True False False \n",
"4262 True True False False \n",
"4263 False True False False \n",
"4264 True True False False \n",
"4265 False True False False \n",
"4266 True True False False \n",
"\n",
" references contest gpa military_service disability \\\n",
"0 False False None False False \n",
"1 False False 3.0 False False \n",
"2 False False None False False \n",
"3 False False None False False \n",
"4 True False None False False \n",
"5 False False 3.0 False False \n",
"6 True False 3.0 False False \n",
"7 False False None False False \n",
"8 False False None False False \n",
"9 False False None False False \n",
"10 True False None False False \n",
"11 False False 3.0 False False \n",
"12 False False None False False \n",
"13 False False None False True \n",
"14 False False None True False \n",
"15 True True 3.0 False False \n",
"16 True False None False False \n",
"17 True False 3.0 False False \n",
"18 False False None False False \n",
"19 False False None False False \n",
"20 False False 3.5 False True \n",
"21 False False None False False \n",
"22 False True None False False \n",
"23 False False None False False \n",
"24 True False None False False \n",
"25 False False None False False \n",
"26 False False None True False \n",
"27 False False None False False \n",
"28 True False None False False \n",
"29 True False 3.0 False False \n",
"... ... ... ... ... ... \n",
"4237 True False None False False \n",
"4238 True False None False False \n",
"4239 False False None False False \n",
"4240 True False None False False \n",
"4241 False False 3.0 False False \n",
"4242 False False None False False \n",
"4243 False False 3.0 False False \n",
"4244 False False 3.0 True False \n",
"4245 False False None True True \n",
"4246 False False None False False \n",
"4247 False False None False False \n",
"4248 True False 3.0 False False \n",
"4249 False False None False False \n",
"4250 False False 3.0 False False \n",
"4251 False False 3.0 False False \n",
"4252 False False None False False \n",
"4253 False False None False False \n",
"4254 False False None False False \n",
"4255 False False 2.5 False False \n",
"4256 False False 3.5 False False \n",
"4257 False False None False False \n",
"4258 True False 3.0 False False \n",
"4259 False False None False False \n",
"4260 False False None False False \n",
"4261 False True None False False \n",
"4262 True False None False False \n",
"4263 False False 2.5 False False \n",
"4264 False False 3.0 False False \n",
"4265 False False None True False \n",
"4266 False False None False False \n",
"\n",
" academic_level undergrad \\\n",
"0 [JUNIOR, SENIOR] True \n",
"1 [FRESHMAN] True \n",
"2 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"3 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4 [SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"5 [SOPHMORE, JUNIOR, SENIOR] True \n",
"6 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"7 [GRADUATE] False \n",
"8 [GRADUATE] False \n",
"9 [FRESHMAN] True \n",
"10 None False \n",
"11 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"12 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"13 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"14 [FRESHMAN] True \n",
"15 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"16 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"17 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"18 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"19 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"20 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"21 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"22 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"23 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"24 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"25 [FRESHMAN] True \n",
"26 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"27 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"28 [JUNIOR, SENIOR] True \n",
"29 [FRESHMAN] True \n",
"... ... ... \n",
"4237 [GRADUATE] False \n",
"4238 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"4239 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"4240 [FRESHMAN, SOPHMORE] True \n",
"4241 [SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"4242 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"4243 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4244 [FRESHMAN, SOPHMORE, JUNIOR] True \n",
"4245 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4246 [GRADUATE] False \n",
"4247 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4248 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4249 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"4250 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"4251 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"4252 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4253 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"4254 [FRESHMAN] True \n",
"4255 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4256 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4257 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"4258 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"4259 [FRESHMAN] True \n",
"4260 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"4261 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4262 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4263 [FRESHMAN] True \n",
"4264 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4265 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR] True \n",
"4266 [FRESHMAN, SOPHMORE, JUNIOR, SENIOR, GRADUATE] True \n",
"\n",
" institution_type \n",
"0 [FOUR-YEAR COLLEGE] \n",
"1 [FOUR-YEAR COLLEGE] \n",
"2 [FOUR-YEAR COLLEGE] \n",
"3 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"4 [FOUR-YEAR COLLEGE] \n",
"5 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"6 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"7 None \n",
"8 [FOUR-YEAR COLLEGE] \n",
"9 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"10 None \n",
"11 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"12 [FOUR-YEAR COLLEGE] \n",
"13 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"14 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"15 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"16 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"17 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"18 [FOUR-YEAR COLLEGE] \n",
"19 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"20 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"21 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"22 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"23 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"24 [FOUR-YEAR COLLEGE] \n",
"25 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"26 [FOUR-YEAR COLLEGE] \n",
"27 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"28 [FOUR-YEAR COLLEGE] \n",
"29 [FOUR-YEAR COLLEGE] \n",
"... ... \n",
"4237 [FOUR-YEAR COLLEGE] \n",
"4238 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"4239 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"4240 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"4241 [FOUR-YEAR COLLEGE] \n",
"4242 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"4243 [FOUR-YEAR COLLEGE] \n",
"4244 [FOUR-YEAR COLLEGE] \n",
"4245 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"4246 None \n",
"4247 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"4248 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"4249 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"4250 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"4251 [FOUR-YEAR COLLEGE, TRADE OR TECHNICAL SCHOOL] \n",
"4252 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"4253 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"4254 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"4255 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"4256 [FOUR-YEAR COLLEGE] \n",
"4257 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"4258 [FOUR-YEAR COLLEGE] \n",
"4259 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"4260 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE, TRADE OR... \n",
"4261 [FOUR-YEAR COLLEGE] \n",
"4262 [FOUR-YEAR COLLEGE] \n",
"4263 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"4264 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"4265 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"4266 [TWO-YEAR COLLEGE, FOUR-YEAR COLLEGE] \n",
"\n",
"[4267 rows x 19 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Experiment 3:\n",
"# Train a model on 25 premeditated classifiers, which represent basic application requirements\n",
"\n",
"# Most of these classifiers are boolean e.g. \"essay\" and \"no essay\"\n",
"# transcript, bio, test scores, interview, essay, no essay, community service, \n",
"# high school, undergrad, graduate, freshman, sophomore, junior, senior, two year college, four year college, trade school, \n",
"# financial need\n",
"\n",
"import pandas as pd\n",
"from sqlalchemy import create_engine\n",
"\n",
"# initialize a connection to Postgres SQL database\n",
"engine = create_engine(\n",
" 'postgresql://debug:debug@localhost/grant_api'\n",
")\n",
"\n",
"OUT_FILE = 'data/scholarship_app_req_labels_v3.csv' \n",
"\n",
"# writes out a dataset of program name/description text and tag text\n",
"# long-tail tags that appear less than 100 times are omitted\n",
"\n",
"df = pd.read_sql(\"\"\"\n",
"\n",
"SELECT \n",
"CONCAT(program_name, ' ',program_description) as text,\n",
"-- boolean fields\n",
"transcript,\n",
"bio as autobiography,\n",
"test_scores,\n",
"essay,\n",
"community_service,\n",
"high_school,\n",
"financial_need,\n",
"application_form,\n",
"application_fee,\n",
"interview,\n",
"\"references\",\n",
"contest,\n",
"\n",
"-- varchar\n",
"gpa,\n",
"\n",
"\n",
"CASE WHEN military_service is not null THEN true\n",
" ELSE false\n",
"END as military_service,\n",
"\n",
"CASE WHEN disability is not null THEN true\n",
" ELSE false\n",
"END as disability,\n",
"\n",
"-- array fields\n",
"academic_level,\n",
"-- label 'undergraduate' if scholarship includes f/s/j/sr\n",
"CASE WHEN (\n",
"'FRESHMAN'=ANY(academic_level) or\n",
"'SOPHMORE'=ANY(academic_level) or\n",
"'JUNIOR'=ANY(academic_level) or\n",
"'SENIOR'=ANY(academic_level)\n",
") THEN true\n",
" ELSE false\n",
"END as undergrad,\n",
"institution_type\n",
"\n",
"\n",
"\n",
"FROM \n",
"scholarships_scholarship\n",
"join\n",
"scholarships_applicationrequirements on scholarships_applicationrequirements.id = scholarships_scholarship.application_requirements_id\n",
"\n",
"\n",
"\"\"\", con=engine)\n",
"\n",
"\n",
"with open(OUT_FILE, 'a') as out_file:\n",
" \n",
" for idx, row in df.iterrows():\n",
" labels = []\n",
" for column in df.columns:\n",
" # exclude 'text' column, which contains program_name and program_description\n",
" if column == 'text':\n",
" continue\n",
" # <class 'bool'>\n",
" elif type(row[column]) is bool and row[column]:\n",
" labels.append(column)\n",
"\n",
" # <class 'list'>\n",
" elif type(row[column]) is list:\n",
" labels = labels + row[column]\n",
" # class <class 'str'> (gpa)\n",
" elif type(row[column]) is str:\n",
" labels.append(f'{column}_{row[column]}')\n",
"\n",
" out_file.write(\"\\\"\" + row['text'].replace('\"', '') +\"\\\"\" + ',')\n",
" if len(labels) > 0:\n",
" out_file.write(','.join(labels))\n",
" out_file.write('\\n')\n",
"print(f'finished writing {OUT_FILE}')\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scrape unlabeled scholarship text"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"begin data/scholarships_com_by_major_urls_v1.csv\n",
"finished writing data/scholarships_com_by_major_urls_v1.csv\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>label_type</th>\n",
" <th>url</th>\n",
" </tr>\n",
" <tr>\n",
" <th>label</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>social work</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>psychology</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nursing</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>math</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>history</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>english</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>computer science</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>communications</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>business</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>biology</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>anthropology</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>accounting</th>\n",
" <td>major</td>\n",
" <td>https://www.scholarships.com/financial-aid/col...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" label_type url\n",
"label \n",
"social work major https://www.scholarships.com/financial-aid/col...\n",
"psychology major https://www.scholarships.com/financial-aid/col...\n",
"nursing major https://www.scholarships.com/financial-aid/col...\n",
"math major https://www.scholarships.com/financial-aid/col...\n",
"history major https://www.scholarships.com/financial-aid/col...\n",
"english major https://www.scholarships.com/financial-aid/col...\n",
"computer science major https://www.scholarships.com/financial-aid/col...\n",
"communications major https://www.scholarships.com/financial-aid/col...\n",
"business major https://www.scholarships.com/financial-aid/col...\n",
"biology major https://www.scholarships.com/financial-aid/col...\n",
"anthropology major https://www.scholarships.com/financial-aid/col...\n",
"accounting major https://www.scholarships.com/financial-aid/col..."
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Scraping scholarships.com by academic major\n",
"\n",
"# Step 1:\n",
"# Building a list of urls to crawl\n",
"\n",
"# Entrypoint:\n",
"# https://www.scholarships.com/financial-aid/college-scholarships/scholarships-by-major/\n",
"\n",
"# On the Entrypoint page, there's a <ul> element containing links to lists of scholarships organized by academic major. \n",
"# ul containing \"$MajorName Scholarships\" links\n",
"# class=bot10\n",
"# traverse through li\n",
"# <a href=\"/financial-aid/college-scholarships/scholarships-by-major/accounting-scholarships/\">Accounting Scholarships</a>\n",
"\n",
"import requests\n",
"import pandas as pd\n",
"from bs4 import BeautifulSoup\n",
"\n",
"BASE_URL = 'https://www.scholarships.com'\n",
"OUT_FILE = 'data/scholarships_com_by_major_urls_v1.csv'\n",
"\n",
"print(f'begin {OUT_FILE}')\n",
"# initialize DataFrame\n",
"scholarships_by_major_df = None\n",
"\n",
"# get html\n",
"scholarships_by_majors_url = f'{BASE_URL}/financial-aid/college-scholarships/scholarships-by-major/'\n",
"scholarships_by_majors_page = requests.get(scholarships_by_majors_url)\n",
"\n",
"# parse html\n",
"scholarships_by_majors_soup = BeautifulSoup(scholarships_by_majors_page.text, 'html.parser')\n",
"\n",
"# finds all links in our element of interest (<ul class=\"bot10\"></ul>)\n",
"majors_ul = scholarships_by_majors_soup.find('ul', attrs={'class': 'bot10'})\n",
"ahrefs = majors_ul.find_all('a')\n",
"\n",
"for a in ahrefs:\n",
" data = {\n",
" 'label': a.getText().replace(' Scholarships', '').lstrip().lower(),\n",
" # e.g. \"Accounting Scholarships\" => \"accounting\"\n",
" 'url': f\"{BASE_URL}{a['href']}\",\n",
" 'label_type': 'major'\n",
" }\n",
" # initialize/concatenate data into dataframe\n",
" if scholarships_by_major_df is None:\n",
" scholarships_by_major_df = pd.DataFrame.from_records([data], columns=data.keys(), index='label')\n",
" else:\n",
" _df = pd.DataFrame.from_records([data], columns=data.keys(), index='label')\n",
" scholarships_by_major_df = pd.concat([_df, scholarships_by_major_df], axis =0, sort=True)\n",
"\n",
"scholarships_by_major_df.to_csv(OUT_FILE)\n",
"print(f'finished writing {OUT_FILE}')\n",
"scholarships_by_major_df\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"skipping data/scholarships.com/major-social_work.csv\n",
"skipping data/scholarships.com/major-psychology.csv\n",
"skipping data/scholarships.com/major-nursing.csv\n",
"skipping data/scholarships.com/major-math.csv\n",
"skipping data/scholarships.com/major-history.csv\n",
"skipping data/scholarships.com/major-english.csv\n",
"skipping data/scholarships.com/major-computer_science.csv\n",
"skipping data/scholarships.com/major-communications.csv\n",
"skipping data/scholarships.com/major-business.csv\n",
"skipping data/scholarships.com/major-biology.csv\n",
"skipping data/scholarships.com/major-anthropology.csv\n",
"skipping data/scholarships.com/major-accounting.csv\n"
]
}
],
"source": [
"# Scraping scholarships.com by academic major\n",
"\n",
"# Step 2:\n",
"# data/scholarships_com_by_major_urls_v1.csv contains a list of urls to scrape\n",
"# The following code visits each url, then extracts scholarship text, amount, due date, and contact info.\n",
"\n",
"import pandas as pd\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import os.path\n",
"import re\n",
"\n",
"\n",
"scholarships_by_major_urls = pd.read_csv('data/scholarships_com_by_major_urls_v1.csv')\n",
"BASE_URL = 'https://www.scholarships.com'\n",
"\n",
"def find_scholarship_divs_in_list(tag):\n",
" '''\n",
" https://www.crummy.com/software/BeautifulSoup/bs4/doc/#a-function\n",
" \n",
" Every scholarship.com list view uses the same format for scholarships.\n",
" \n",
" <div>\n",
" <h3>program_name</h3>\n",
" <ul>\n",
" <li>application deadline</li>\n",
" <li>amount</li>\n",
" <li>description (truncated)\n",
" </ul>\n",
" </div>\n",
" '''\n",
" children = [child for child in tag.children if child != '\\n']\n",
" \n",
" return len(children) == 2 and children[0].name == 'h3' and children[1].name == 'ul'\n",
"\n",
"def clean_text(text):\n",
" '''\n",
" Prepares text for csv storage\n",
" Escapes double-quotes, strips new-lines and return chars\n",
" @todo strip html/javascript\n",
" '''\n",
" # escape quotes\n",
" result = text.replace('\"', '\"\"')\n",
"\n",
" # remove whitespace\n",
" return ''.join([char for char in result if char not in '\"\\n\\r\\t'])\n",
"\n",
"def extract_contact_info(tag):\n",
" '''\n",
" Any of the contact fields can be empty, which is why there's a lot of logic guarded by if statements \n",
" '''\n",
" contact_title = None\n",
" if tag.next_sibling and tag.next_sibling.next_sibling:\n",
" contact_title_el = tag.next_sibling.next_sibling\n",
" # replace whitespace\n",
" contact_title = clean_text(contact_title_el.get_text(strip=True))\n",
"\n",
" contact_address1 = None\n",
" contact_address1_el = details_soup.find('li', attrs={'id': 'liAddress1Text'})\n",
"\n",
" if contact_address1_el is not None:\n",
" contact_address1 = clean_text(contact_address1_el.get_text(strip=True))\n",
"\n",
" contact_address2_el = details_soup.find('li', attrs={'id': 'liAddress2Text'})\n",
" contact_address2 = None\n",
" if contact_address2_el:\n",
" contact_address2 = clean_text(contact_address2_el.get_text(strip=True))\n",
"\n",
" contact_city_state_zip_el = details_soup.find('li', attrs={'id': 'liCityStateZIPText'})\n",
" contact_city_state_zip = None\n",
" if contact_city_state_zip_el:\n",
" contact_city_state_zip = clean_text(contact_city_state_zip_el.get_text(strip=True))\n",
"\n",
" contact_email = None\n",
" contact_email_el = None\n",
" if contact_city_state_zip_el and contact_city_state_zip_el.next_sibling:\n",
" # occasionally, there's a div with \\n as the inner content inserted here\n",
" if contact_city_state_zip_el.next_sibling == '\\n':\n",
" if contact_city_state_zip_el.next_sibling.next_sibling:\n",
" contact_email_el = contact_city_state_zip_el.next_sibling.next_sibling.find('a')\n",
" else:\n",
" contact_email_el = contact_city_state_zip_el.next_sibling.find('a')\n",
"\n",
" if contact_email_el:\n",
" contact_email = clean_text(contact_email_el.get_text(strip=True))\n",
"\n",
" contact_phone = None\n",
"\n",
" if contact_email_el and contact_email_el.next_sibling:\n",
" if contact_email_el.next_sibling == '\\n':\n",
" contact_phone_el = contact_email_el.next_sibling.next_sibling\n",
" else:\n",
" contact_phone_el = contact_email_el.next_sibling\n",
"\n",
" if contact_phone_el: \n",
" contact_phone = clean_text(contact_phone_el.get_text(strip=True))\n",
" return {\n",
" 'contact_title': contact_title,\n",
" 'contact_address1': contact_address1,\n",
" 'contact_address2': contact_address2,\n",
" 'contact_city_state_zip': contact_city_state_zip,\n",
" 'contact_email': contact_email,\n",
" 'contact_phone': contact_phone \n",
" }\n",
"\n",
"# for each major, scrape scholarships from major index page\n",
"for idx, row in scholarships_by_major_urls.iterrows():\n",
" # get html\n",
" page_html = requests.get(row['url']).text\n",
" scholarships_details_df = None\n",
" \n",
" OUT_FILE = f\"data/scholarships.com/{row['label_type']}-{(row['label'].replace(' ', '_'))}.csv\"\n",
" # skip file if it already exists\n",
" if os.path.isfile(OUT_FILE):\n",
" print(f'skipping {OUT_FILE}')\n",
" continue\n",
" print(f'starting {OUT_FILE}')\n",
" # parse html\n",
" page_soup = BeautifulSoup(page_html, 'html.parser')\n",
" section_div = page_soup.find('div', attrs={'class': 'innercontent'})\n",
" \n",
" scholarship_divs = [div for div in page_soup.find_all('div') if find_scholarship_divs_in_list(div)]\n",
" \n",
" for scholarship_div in scholarship_divs:\n",
" # the scholarship description text in the list view is truncated, so we need to follow each url to the details view\n",
" details_url = scholarship_div.find('a')['href']\n",
" details_url = f\"{BASE_URL}{details_url}\"\n",
" details_html = requests.get(details_url).text\n",
" details_soup = BeautifulSoup(details_html, 'html.parser')\n",
" \n",
" program_name = details_soup.find('div', attrs={'class': 'innercontent'}).find('h1').get_text(strip=True)\n",
" \n",
" amount_raw, due_date_raw, num_available_raw = [\n",
" h3.get_text(strip=True) for h3 in\n",
" details_soup.find('div', attrs={'class': 'innercontent'}).find_all('h3')\n",
" ]\n",
" \n",
" text_el = details_soup.find('li', attrs={'class': 'scholdescrip'}).find('div')\n",
" program_description = text_el.get_text(strip=True)\n",
" contact_info = extract_contact_info(text_el)\n",
" \n",
" data = {\n",
" 'details_url': details_url,\n",
" 'program_name': program_name,\n",
" 'program_description': program_description,\n",
" **contact_info\n",
" }\n",
" if scholarships_details_df is None:\n",
" scholarships_details_df = pd.DataFrame.from_records([data], columns=data.keys())\n",
" else:\n",
" _df = pd.DataFrame.from_records([data], columns=data.keys())\n",
" scholarships_details_df = pd.concat([_df, scholarships_details_df], axis =0, sort=True)\n",
"\n",
" scholarships_details_df.to_csv(OUT_FILE)\n",
" print(f'finished writing {OUT_FILE}')\n",
" scholarships_details_df\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Exploring via AutoML Python lib"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"name: \"projects/702148157522/locations/us-central1/models/TCN8843315649683173684/modelEvaluations/8534929302381822278\"\n",
"annotation_spec_id: \"3238212699577573231\"\n",
"create_time {\n",
" seconds: 1535967640\n",
" nanos: 890650000\n",
"}\n",
"classification_evaluation_metrics {\n",
" au_prc: 0.9878106713294983\n",
" base_au_prc: 0.7505617737770081\n",
" confidence_metrics_entry {\n",
" recall: 1.0\n",
" precision: 0.7505617737770081\n",
" f1_score: 0.8575096130371094\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.05000000074505806\n",
" recall: 0.9910179376602173\n",
" precision: 0.885026752948761\n",
" f1_score: 0.9350282549858093\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.10000000149011612\n",
" recall: 0.9880239367485046\n",
" precision: 0.9016393423080444\n",
" f1_score: 0.9428571462631226\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.15000000596046448\n",
" recall: 0.9880239367485046\n",
" precision: 0.9192200303077698\n",
" f1_score: 0.9523809552192688\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.20000000298023224\n",
" recall: 0.985029935836792\n",
" precision: 0.9241573214530945\n",
" f1_score: 0.9536231756210327\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.25\n",
" recall: 0.9790419340133667\n",
" precision: 0.9342857003211975\n",
" f1_score: 0.9561403393745422\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.30000001192092896\n",
" recall: 0.9790419340133667\n",
" precision: 0.9396551847457886\n",
" f1_score: 0.9589442610740662\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.3499999940395355\n",
" recall: 0.9730538725852966\n",
" precision: 0.9420289993286133\n",
" f1_score: 0.9572901129722595\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.4000000059604645\n",
" recall: 0.9730538725852966\n",
" precision: 0.944767415523529\n",
" f1_score: 0.9587020874023438\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.44999998807907104\n",
" recall: 0.970059871673584\n",
" precision: 0.9473684430122375\n",
" f1_score: 0.9585798978805542\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.5\n",
" recall: 0.970059871673584\n",
" precision: 0.9473684430122375\n",
" f1_score: 0.9585798978805542\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.550000011920929\n",
" recall: 0.970059871673584\n",
" precision: 0.9501466155052185\n",
" f1_score: 0.9599999785423279\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.6000000238418579\n",
" recall: 0.961077868938446\n",
" precision: 0.9497041702270508\n",
" f1_score: 0.9553571343421936\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.6499999761581421\n",
" recall: 0.9491018056869507\n",
" precision: 0.9519519805908203\n",
" f1_score: 0.9505247473716736\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.699999988079071\n",
" recall: 0.9491018056869507\n",
" precision: 0.9519519805908203\n",
" f1_score: 0.9505247473716736\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.75\n",
" recall: 0.946107804775238\n",
" precision: 0.9546827673912048\n",
" f1_score: 0.9503759145736694\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.800000011920929\n",
" recall: 0.9431137442588806\n",
" precision: 0.957446813583374\n",
" f1_score: 0.9502262473106384\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.8500000238418579\n",
" recall: 0.940119743347168\n",
" precision: 0.9661538600921631\n",
" f1_score: 0.9529590010643005\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.875\n",
" recall: 0.9371257424354553\n",
" precision: 0.9660493731498718\n",
" f1_score: 0.9513677954673767\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.8999999761581421\n",
" recall: 0.9371257424354553\n",
" precision: 0.9720497131347656\n",
" f1_score: 0.9542682766914368\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9100000262260437\n",
" recall: 0.9341317415237427\n",
" precision: 0.9750000238418579\n",
" f1_score: 0.9541284441947937\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9200000166893005\n",
" recall: 0.92514967918396\n",
" precision: 0.9778481125831604\n",
" f1_score: 0.9507692456245422\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9300000071525574\n",
" recall: 0.9221556782722473\n",
" precision: 0.9777777791023254\n",
" f1_score: 0.9491525292396545\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9399999976158142\n",
" recall: 0.9191616773605347\n",
" precision: 0.977707028388977\n",
" f1_score: 0.9475308656692505\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.949999988079071\n",
" recall: 0.9071856141090393\n",
" precision: 0.977419376373291\n",
" f1_score: 0.9409937858581543\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9599999785423279\n",
" recall: 0.8892215490341187\n",
" precision: 0.9801980257034302\n",
" f1_score: 0.9324960708618164\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9700000286102295\n",
" recall: 0.8532934188842773\n",
" precision: 0.9861591458320618\n",
" f1_score: 0.9149277806282043\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9800000190734863\n",
" recall: 0.8083832263946533\n",
" precision: 0.985401451587677\n",
" f1_score: 0.8881579041481018\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9900000095367432\n",
" recall: 0.7095808386802673\n",
" precision: 0.991631805896759\n",
" f1_score: 0.8272251486778259\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9950000047683716\n",
" recall: 0.5808383226394653\n",
" precision: 0.9948717951774597\n",
" f1_score: 0.7334593534469604\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9959999918937683\n",
" recall: 0.5419161915779114\n",
" precision: 0.9945054650306702\n",
" f1_score: 0.7015503644943237\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.996999979019165\n",
" recall: 0.488023966550827\n",
" precision: 0.9939024448394775\n",
" f1_score: 0.654618501663208\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9980000257492065\n",
" recall: 0.398203581571579\n",
" precision: 0.9925373196601868\n",
" f1_score: 0.5683760643005371\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 0.9990000128746033\n",
" recall: 0.26646706461906433\n",
" precision: 0.9888888597488403\n",
" f1_score: 0.41981130838394165\n",
" }\n",
" confidence_metrics_entry {\n",
" confidence_threshold: 1.0\n",
" precision: 1.0\n",
" }\n",
"}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from google.cloud import automl_v1beta1 as automl\n",
"\n",
"# initialize client\n",
"client = automl.AutoMlClient()\n",
"automl_account = client.location_path('get-grant', 'us-central1')\n",
"\n",
"# Check out the UI URL to get dataset & model id\n",
"# e.g.\n",
"# https://beta-dot-custom-vision.appspot.com/text/datasets/evaluate?dataset=TCN2907194836301227905&model=TCN8843315649683173684&project=get-grant\n",
"\n",
"DATASET_ID='TCN2907194836301227905'\n",
"MODEL_ID='TCN8843315649683173684'\n",
"PROJECT_ID='get-grant'\n",
"# I had to use client.list_model_evaluations() to get this model evaluation id\n",
"EVALUATION_ID='8534929302381822278'\n",
"LOCATION = 'us-central1'\n",
"dataset_name = f'projects/{PROJECT_ID}/locations/{LOCATION}/datasets/{DATASET_ID}'\n",
"\n",
"model_eval_name = client.model_evaluation_path(PROJECT_ID, LOCATION, MODEL_ID, EVALUATION_ID)\n",
"model_eval = client.get_model_evaluation(model_eval_name)\n",
"\n",
"\n",
"model_eval\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment