Skip to content

Instantly share code, notes, and snippets.

@zehsilva
Created January 16, 2020 04:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zehsilva/bda2be52b7903e0150ed5aadd0c965c3 to your computer and use it in GitHub Desktop.
Save zehsilva/bda2be52b7903e0150ed5aadd0c965c3 to your computer and use it in GitHub Desktop.
iq_us_colleges.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "iq_us_colleges.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyPvqvnh4LpA4ydRdFox8Ile",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/zehsilva/bda2be52b7903e0150ed5aadd0c965c3/iq_us_colleges.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "NjwKIEV7s1-f",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 202
},
"outputId": "6bfb16d0-f528-4ac4-8afb-be5081d0538f"
},
"source": [
"!pip install -q tabula-py\n",
"!wget https://www.psychologytoday.com/files/attachments/56143/1339-us-colleges-ranked-average-student-brainpower.pdf\n"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": [
"--2020-01-16 00:20:38-- https://www.psychologytoday.com/files/attachments/56143/1339-us-colleges-ranked-average-student-brainpower.pdf\n",
"Resolving www.psychologytoday.com (www.psychologytoday.com)... 34.255.244.15, 52.212.96.84\n",
"Connecting to www.psychologytoday.com (www.psychologytoday.com)|34.255.244.15|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 679034 (663K) [application/pdf]\n",
"Saving to: ‘1339-us-colleges-ranked-average-student-brainpower.pdf.1’\n",
"\n",
"\r 1339-us-c 0%[ ] 0 --.-KB/s \r1339-us-colleges-ra 100%[===================>] 663.12K --.-KB/s in 0.1s \n",
"\n",
"2020-01-16 00:20:39 (6.64 MB/s) - ‘1339-us-colleges-ranked-average-student-brainpower.pdf.1’ saved [679034/679034]\n",
"\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "4XqVSivWs2t5",
"colab_type": "code",
"colab": {}
},
"source": [
"import tabula\n",
"import pandas as pd\n",
"import numpy as np\n",
"import functools \n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "ldHYwR6XgWg2",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "ebafb7f2-6397-460c-cd84-6ae308a8207d"
},
"source": [
"### out or order for presentation\n",
"### run all the other cells then run this one to visualize the final table\n",
"\n",
"def show_year(df, year):\n",
" cols = [col for col in df.columns if str(year) in col]\n",
" return df[['rank','name','rank_official','range_iq_estimates']+cols]\n",
"\n",
"df_2016=show_year(percentile_college,2016)\n",
"df_2016[ (df_2016.iq_2016_1>122) ]"
],
"execution_count": 201,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>rank</th>\n",
" <th>name</th>\n",
" <th>rank_official</th>\n",
" <th>range_iq_estimates</th>\n",
" <th>percentile2016</th>\n",
" <th>iq_2016_0</th>\n",
" <th>iq_2016_1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>California Institute of Technology</td>\n",
" <td>10.0</td>\n",
" <td>135.0-138.0</td>\n",
" <td>99.0</td>\n",
" <td>135.0</td>\n",
" <td>137.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>University of Chicago</td>\n",
" <td>4.0</td>\n",
" <td>135.0-137.0</td>\n",
" <td>99.0</td>\n",
" <td>135.0</td>\n",
" <td>137.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Princeton University</td>\n",
" <td>1.0</td>\n",
" <td>131.0-137.0</td>\n",
" <td>98.0</td>\n",
" <td>131.0</td>\n",
" <td>133.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>Harvard University</td>\n",
" <td>2.0</td>\n",
" <td>131.0-137.0</td>\n",
" <td>98.0</td>\n",
" <td>131.0</td>\n",
" <td>133.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3</td>\n",
" <td>Yale University</td>\n",
" <td>3.0</td>\n",
" <td>131.0-137.0</td>\n",
" <td>98.0</td>\n",
" <td>131.0</td>\n",
" <td>133.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>Massachusetts Institute of Technology</td>\n",
" <td>7.0</td>\n",
" <td>131.0-137.0</td>\n",
" <td>98.0</td>\n",
" <td>131.0</td>\n",
" <td>133.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>Columbia University</td>\n",
" <td>4.0</td>\n",
" <td>131.0-137.0</td>\n",
" <td>98.0</td>\n",
" <td>131.0</td>\n",
" <td>133.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>Harvey Mudd College</td>\n",
" <td>NaN</td>\n",
" <td>131.0-137.0</td>\n",
" <td>98.0</td>\n",
" <td>131.0</td>\n",
" <td>133.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>Stanford University</td>\n",
" <td>4.0</td>\n",
" <td>128.0-133.0</td>\n",
" <td>98.0</td>\n",
" <td>131.0</td>\n",
" <td>133.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>Northwestern University</td>\n",
" <td>13.0</td>\n",
" <td>128.0-133.0</td>\n",
" <td>98.0</td>\n",
" <td>131.0</td>\n",
" <td>133.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>11</td>\n",
" <td>Dartmouth College</td>\n",
" <td>11.0</td>\n",
" <td>128.0-133.0</td>\n",
" <td>97.0</td>\n",
" <td>128.0</td>\n",
" <td>130.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>11</td>\n",
" <td>Washington University in St. Louis</td>\n",
" <td>14.0</td>\n",
" <td>128.0-133.0</td>\n",
" <td>97.0</td>\n",
" <td>128.0</td>\n",
" <td>130.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>11</td>\n",
" <td>University of Notre Dame</td>\n",
" <td>16.0</td>\n",
" <td>128.0-133.0</td>\n",
" <td>97.0</td>\n",
" <td>128.0</td>\n",
" <td>130.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>11</td>\n",
" <td>Vanderbilt University</td>\n",
" <td>16.0</td>\n",
" <td>128.0-133.0</td>\n",
" <td>97.0</td>\n",
" <td>128.0</td>\n",
" <td>130.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>11</td>\n",
" <td>Rice University</td>\n",
" <td>19.0</td>\n",
" <td>128.0-133.0</td>\n",
" <td>97.0</td>\n",
" <td>128.0</td>\n",
" <td>130.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>11</td>\n",
" <td>Pomona College</td>\n",
" <td>NaN</td>\n",
" <td>128.0-133.0</td>\n",
" <td>97.0</td>\n",
" <td>128.0</td>\n",
" <td>130.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>18</td>\n",
" <td>University of Pennsylvania</td>\n",
" <td>8.0</td>\n",
" <td>126.0-133.0</td>\n",
" <td>97.0</td>\n",
" <td>128.0</td>\n",
" <td>130.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>19</td>\n",
" <td>Tufts University</td>\n",
" <td>27.0</td>\n",
" <td>126.0-130.0</td>\n",
" <td>96.0</td>\n",
" <td>126.0</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>20</td>\n",
" <td>Amherst College</td>\n",
" <td>NaN</td>\n",
" <td>126.0-130.0</td>\n",
" <td>96.0</td>\n",
" <td>126.0</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>20</td>\n",
" <td>Swarthmore College</td>\n",
" <td>NaN</td>\n",
" <td>126.0-130.0</td>\n",
" <td>96.0</td>\n",
" <td>126.0</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>22</td>\n",
" <td>Brown University (3)</td>\n",
" <td>16.0</td>\n",
" <td>126.0-130.0</td>\n",
" <td>96.0</td>\n",
" <td>126.0</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>22</td>\n",
" <td>Carnegie Mellon University</td>\n",
" <td>25.0</td>\n",
" <td>126.0-130.0</td>\n",
" <td>96.0</td>\n",
" <td>126.0</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>22</td>\n",
" <td>Williams College</td>\n",
" <td>NaN</td>\n",
" <td>126.0-130.0</td>\n",
" <td>96.0</td>\n",
" <td>126.0</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>22</td>\n",
" <td>Bowdoin College (2)</td>\n",
" <td>NaN</td>\n",
" <td>126.0-130.0</td>\n",
" <td>96.0</td>\n",
" <td>126.0</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>26</td>\n",
" <td>Johns Hopkins University</td>\n",
" <td>NaN</td>\n",
" <td>126.0-130.0</td>\n",
" <td>96.0</td>\n",
" <td>126.0</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>26</td>\n",
" <td>Carleton College</td>\n",
" <td>NaN</td>\n",
" <td>126.0-130.0</td>\n",
" <td>96.0</td>\n",
" <td>126.0</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>28</td>\n",
" <td>Cornell University</td>\n",
" <td>NaN</td>\n",
" <td>125.0-128.0</td>\n",
" <td>95.0</td>\n",
" <td>125.0</td>\n",
" <td>126.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>29</td>\n",
" <td>Georgetown University</td>\n",
" <td>21.0</td>\n",
" <td>125.0-128.0</td>\n",
" <td>95.0</td>\n",
" <td>125.0</td>\n",
" <td>126.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>29</td>\n",
" <td>Wellesley College</td>\n",
" <td>NaN</td>\n",
" <td>125.0-128.0</td>\n",
" <td>95.0</td>\n",
" <td>125.0</td>\n",
" <td>126.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>29</td>\n",
" <td>Claremont McKenna College</td>\n",
" <td>NaN</td>\n",
" <td>125.0-128.0</td>\n",
" <td>95.0</td>\n",
" <td>125.0</td>\n",
" <td>126.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>32</td>\n",
" <td>Haverford College</td>\n",
" <td>NaN</td>\n",
" <td>123.0-126.0</td>\n",
" <td>94.0</td>\n",
" <td>123.0</td>\n",
" <td>125.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>32</td>\n",
" <td>Washington and Lee University</td>\n",
" <td>NaN</td>\n",
" <td>123.0-126.0</td>\n",
" <td>94.0</td>\n",
" <td>123.0</td>\n",
" <td>125.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>32</td>\n",
" <td>Wesleyan University</td>\n",
" <td>NaN</td>\n",
" <td>123.0-126.0</td>\n",
" <td>94.0</td>\n",
" <td>123.0</td>\n",
" <td>125.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>35</td>\n",
" <td>Vassar College</td>\n",
" <td>NaN</td>\n",
" <td>122.0-126.0</td>\n",
" <td>94.0</td>\n",
" <td>123.0</td>\n",
" <td>125.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>36</td>\n",
" <td>Northeastern University</td>\n",
" <td>NaN</td>\n",
" <td>122.0-126.0</td>\n",
" <td>94.0</td>\n",
" <td>123.0</td>\n",
" <td>125.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>37</td>\n",
" <td>Rensselaer Polytechnic Institute</td>\n",
" <td>42.0</td>\n",
" <td>122.0-125.0</td>\n",
" <td>93.0</td>\n",
" <td>122.0</td>\n",
" <td>124.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>38</td>\n",
" <td>Georgia Institute of Technology</td>\n",
" <td>35.0</td>\n",
" <td>122.0-125.0</td>\n",
" <td>93.0</td>\n",
" <td>122.0</td>\n",
" <td>124.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>38</td>\n",
" <td>Middlebury College</td>\n",
" <td>NaN</td>\n",
" <td>122.0-125.0</td>\n",
" <td>93.0</td>\n",
" <td>122.0</td>\n",
" <td>124.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>38</td>\n",
" <td>Hamilton College</td>\n",
" <td>NaN</td>\n",
" <td>122.0-125.0</td>\n",
" <td>93.0</td>\n",
" <td>122.0</td>\n",
" <td>124.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>41</td>\n",
" <td>University of Southern California</td>\n",
" <td>NaN</td>\n",
" <td>122.0-125.0</td>\n",
" <td>93.0</td>\n",
" <td>122.0</td>\n",
" <td>124.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" rank name ... iq_2016_0 iq_2016_1\n",
"0 1 California Institute of Technology ... 135.0 137.0\n",
"1 2 University of Chicago ... 135.0 137.0\n",
"2 3 Princeton University ... 131.0 133.0\n",
"3 3 Harvard University ... 131.0 133.0\n",
"4 3 Yale University ... 131.0 133.0\n",
"5 6 Massachusetts Institute of Technology ... 131.0 133.0\n",
"6 7 Columbia University ... 131.0 133.0\n",
"7 8 Harvey Mudd College ... 131.0 133.0\n",
"8 9 Stanford University ... 131.0 133.0\n",
"9 10 Northwestern University ... 131.0 133.0\n",
"10 11 Dartmouth College ... 128.0 130.0\n",
"11 11 Washington University in St. Louis ... 128.0 130.0\n",
"12 11 University of Notre Dame ... 128.0 130.0\n",
"13 11 Vanderbilt University ... 128.0 130.0\n",
"14 11 Rice University ... 128.0 130.0\n",
"15 11 Pomona College ... 128.0 130.0\n",
"16 18 University of Pennsylvania ... 128.0 130.0\n",
"17 19 Tufts University ... 126.0 128.0\n",
"18 20 Amherst College ... 126.0 128.0\n",
"19 20 Swarthmore College ... 126.0 128.0\n",
"20 22 Brown University (3) ... 126.0 128.0\n",
"21 22 Carnegie Mellon University ... 126.0 128.0\n",
"22 22 Williams College ... 126.0 128.0\n",
"23 22 Bowdoin College (2) ... 126.0 128.0\n",
"24 26 Johns Hopkins University ... 126.0 128.0\n",
"25 26 Carleton College ... 126.0 128.0\n",
"26 28 Cornell University ... 125.0 126.0\n",
"27 29 Georgetown University ... 125.0 126.0\n",
"28 29 Wellesley College ... 125.0 126.0\n",
"29 29 Claremont McKenna College ... 125.0 126.0\n",
"30 32 Haverford College ... 123.0 125.0\n",
"31 32 Washington and Lee University ... 123.0 125.0\n",
"32 32 Wesleyan University ... 123.0 125.0\n",
"33 35 Vassar College ... 123.0 125.0\n",
"34 36 Northeastern University ... 123.0 125.0\n",
"35 37 Rensselaer Polytechnic Institute ... 122.0 124.0\n",
"36 38 Georgia Institute of Technology ... 122.0 124.0\n",
"37 38 Middlebury College ... 122.0 124.0\n",
"38 38 Hamilton College ... 122.0 124.0\n",
"39 41 University of Southern California ... 122.0 124.0\n",
"\n",
"[40 rows x 7 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 201
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "iTOUG0MOtFKC",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 134
},
"outputId": "c76519ec-00ea-4c93-c96b-4b5aca287125"
},
"source": [
"pdf_path = \"1339-us-colleges-ranked-average-student-brainpower.pdf\"\n",
"\n",
"data = tabula.read_pdf(pdf_path, pages=\"2-10\", stream=True)\n",
"\n",
"## from here: https://blog.prepscholar.com/historical-percentiles-new-sat\n",
"data_sat=\"\"\"1600\t99+\t99+\t99+\t99+\n",
"1590\t99+\t99+\t99+\t99+\n",
"1580\t99+\t99+\t99+\t99+\n",
"1570\t99+\t99+\t99+\t99+\n",
"1560\t99+\t99+\t99+\t99+\n",
"1550\t99+\t99+\t99+\t99+\n",
"1540\t99\t99\t99+\t99\n",
"1530\t99\t99\t99+\t99\n",
"1520\t99\t99\t99\t99\n",
"1510\t99\t99\t99\t99\n",
"1500\t98\t99\t99\t98\n",
"1490\t98\t98\t99\t98\n",
"1480\t98\t98\t99\t98\n",
"1470\t97\t98\t98\t98\n",
"1460\t97\t97\t98\t97\n",
"1450\t96\t97\t98\t97\n",
"1440\t96\t96\t97\t96\n",
"1430\t96\t96\t97\t96\n",
"1420\t95\t95\t96\t95\n",
"1410\t95\t95\t96\t95\n",
"1400\t94\t94\t95\t94\n",
"1390\t93\t94\t95\t94\n",
"1380\t93\t93\t94\t93\n",
"1370\t92\t92\t94\t92\n",
"1360\t91\t92\t93\t91\n",
"1350\t91\t91\t92\t91\n",
"1340\t90\t90\t91\t90\n",
"1330\t89\t89\t90\t89\n",
"1320\t88\t88\t90\t88\n",
"1310\t87\t88\t89\t87\n",
"1300\t86\t87\t88\t86\n",
"1290\t85\t86\t87\t85\n",
"1280\t84\t84\t86\t83\n",
"1270\t83\t83\t85\t82\n",
"1260\t82\t82\t83\t81\n",
"1250\t81\t81\t82\t80\n",
"1240\t80\t80\t81\t78\n",
"1230\t78\t79\t80\t77\n",
"1220\t77\t77\t78\t76\n",
"1210\t76\t76\t77\t74\n",
"1200\t74\t74\t76\t72\n",
"1190\t73\t73\t74\t71\n",
"1180\t72\t72\t73\t69\n",
"1170\t70\t70\t71\t67\n",
"1160\t68\t68\t69\t65\n",
"1150\t67\t67\t68\t64\n",
"1140\t65\t65\t66\t62\n",
"1130\t64\t63\t64\t60\n",
"1120\t62\t62\t63\t58\n",
"1110\t60\t60\t61\t57\n",
"1100\t58\t58\t59\t55\n",
"1090\t57\t56\t57\t52\n",
"1080\t55\t54\t55\t50\n",
"1070\t53\t52\t53\t48\n",
"1060\t51\t51\t51\t46\n",
"1050\t49\t49\t49\t44\n",
"1040\t47\t47\t47\t42\n",
"1030\t46\t45\t45\t40\n",
"1020\t44\t43\t43\t38\n",
"1010\t42\t41\t41\t36\n",
"1000\t40\t39\t40\t34\n",
"990\t38\t37\t38\t32\n",
"980\t36\t36\t36\t30\n",
"970\t35\t34\t34\t29\n",
"960\t33\t32\t32\t27\n",
"950\t31\t31\t31\t25\n",
"940\t30\t29\t29\t24\n",
"930\t28\t27\t27\t22\n",
"920\t27\t26\t26\t20\n",
"910\t25\t24\t24\t19\n",
"900\t23\t23\t22\t18\n",
"890\t22\t21\t21\t16\n",
"880\t20\t20\t19\t15\n",
"870\t19\t18\t18\t14\n",
"860\t18\t17\t17\t13\n",
"850\t16\t15\t15\t12\n",
"840\t15\t14\t14\t11\n",
"830\t14\t13\t13\t10\n",
"820\t13\t12\t12\t9\n",
"810\t11\t11\t11\t8\n",
"800\t10\t10\t9\t7\n",
"790\t9\t9\t8\t7\n",
"780\t8\t8\t8\t6\n",
"770\t7\t7\t7\t5\n",
"760\t6\t6\t6\t4\n",
"750\t5\t5\t5\t4\n",
"740\t5\t4\t4\t3\n",
"730\t4\t4\t4\t3\n",
"720\t3\t3\t3\t2\n",
"710\t3\t3\t3\t2\n",
"700\t2\t2\t2\t2\n",
"690\t2\t2\t2\t2\n",
"680\t1\t1\t1\t1\n",
"670\t1\t1\t1\t1\n",
"660\t1\t1\t1\t1\n",
"650\t1\t1\t1\t1\n",
"640\t1\t1\t1\t1-\n",
"630\t1-\t1-\t1-\t1-\n",
"620\t1-\t1-\t1-\t1-\n",
"610\t1-\t1-\t1-\t1-\n",
"600\t1-\t1-\t1-\t1-\n",
"590\t1-\t1-\t1-\t1-\n",
"580\t1-\t1-\t1-\t1-\n",
"570\t1-\t1-\t1-\t1-\n",
"560\t1-\t1-\t1-\t1-\n",
"550\t1-\t1-\t1-\t1-\n",
"540\t1-\t1-\t1-\t1-\n",
"530\t1-\t1-\t1-\t1-\n",
"520\t1-\t1-\t1-\t1-\n",
"510\t1-\t1-\t1-\t1-\n",
"500\t1-\t1-\t1-\t1-\n",
"490\t1-\t1-\t1-\t1-\n",
"480\t1-\t1-\t1-\t1-\n",
"470\t1-\t1-\t1-\t1-\n",
"460\t1-\t1-\t1-\t1-\n",
"450\t1-\t1-\t1-\t1-\n",
"440\t1-\t1-\t1-\t1-\n",
"430\t1-\t1-\t1-\t1-\n",
"420\t1-\t1-\t1-\t1-\n",
"410\t1-\t1-\t1-\t1-\n",
"400\t1-\t1-\t1-\t1-\"\"\".replace(\"1-\",\"0.99\").replace(\"99+\",\"99.09\")\n",
"sat_percentile = pd.DataFrame([line.split() for line in data_sat.split(\"\\n\")], \n",
" columns=['sat','percentile2019','percentile2018','percentile2017','percentile2016']).apply(lambda x:pd.to_numeric(x))\n"
],
"execution_count": 178,
"outputs": [
{
"output_type": "stream",
"text": [
"WARNING:tabula.io:Got stderr: Jan 16, 2020 4:00:29 AM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
"WARNING: Using fallback font 'LiberationSerif' for 'TimesNewRomanPSMT'\n",
"Jan 16, 2020 4:00:30 AM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
"WARNING: Using fallback font 'LiberationSerif' for 'TimesNewRomanPSMT'\n",
"Jan 16, 2020 4:00:31 AM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
"WARNING: Using fallback font 'LiberationSerif' for 'TimesNewRomanPSMT'\n",
"\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "L-aLFg5gtfI_",
"colab_type": "code",
"colab": {}
},
"source": [
"data_clean = pd.DataFrame(functools.reduce(lambda a,b:a+b,[d.values.tolist() for d in data])[1:])\n",
"data_clean.columns=[\"rank\",\"name\",\"sat_original\",\"rank_official\",\"liberal_arts\" ]\n",
"data_clean[['rank','sat_original','rank_official']]=data_clean[['rank','sat_original','rank_official']].apply(lambda x:pd.to_numeric(x,'coerce'))\n",
"data_clean['sat']=(data_clean.sat_original/10).astype(int)*10\n",
"percentile_college = pd.merge(data_clean[['rank','sat','name','rank_official']],sat_percentile,on='sat')"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "nJ2Yu4zJ1K9z",
"colab_type": "code",
"colab": {}
},
"source": [
"### found here https://www.iqcomparisonsite.com/iqtable.aspx\n",
"iqrawpercentile=\"\"\"202 99.9999999995% 190,057,377,928 99.9999999908% 10,881,440,294\n",
"201 99.9999999992% 119,937,672,336 99.9999999862% 7,252,401,045\n",
"200 99.9999999987% 76,017,176,740 99.9999999794% 4,852,159,346\n",
"199 99.9999999979% 48,390,420,202 99.9999999693% 3,258,706,819\n",
"198 99.9999999968% 30,938,221,975 99.9999999545% 2,196,908,409\n",
"197 99.9999999950% 19,866,426,228 99.9999999327% 1,486,736,899\n",
"196 99.9999999922% 12,812,462,045 99.9999999010% 1,009,976,678\n",
"195 99.9999999880% 8,299,126,114 99.9999998548% 688,720,101\n",
"194 99.9999999815% 5,399,067,340 99.9999997879% 471,441,334\n",
"193 99.9999999717% 3,527,693,270 99.9999996913% 323,940,499\n",
"192 99.9999999568% 2,314,980,850 99.9999995524% 223,436,817\n",
"191 99.9999999345% 1,525,765,721 99.9999993536% 154,701,783\n",
"190 99.9999999010% 1,009,976,678 99.9999990699% 107,519,234\n",
"189 99.9999998511% 671,455,130 99.9999986669% 75,011,253\n",
"188 99.9999997770% 448,336,263 99.9999980964% 52,530,944\n",
"187 99.9999996674% 300,656,786 99.9999972920% 36,927,646\n",
"186 99.9999995062% 202,496,482 99.9999961624% 26,05\n",
"185 99.9999992699% 136,975,305 99.9999945820% 18,457,107\n",
"184 99.9999989254% 93,056,001 99.9999923799% 13,123,126\n",
"183 99.9999984250% 63,492,548 99.9999893231% 9,366,012\n",
"182 99.9999977016% 43,508,721 99.9999850966% 6,709,882\n",
"181 99.9999966604% 29,943,596 99.9999792755% 4,825,216\n",
"180 99.9999951684% 20,696,863 99.9999712895% 3,483,046\n",
"179 99.9999930398% 14,367,357 99.9999603760% 2,523,720\n",
"178 99.9999900166% 10,016,587 99.9999455198% 1,835,530\n",
"177 99.9999857417% 7,013,455 99.9999253755% 1,340,043\n",
"176 99.9999797237% 4,931,877 99.9998981672% 982,001\n",
"175 99.9999712895% 3,483,046 99.9998615605% 722,337\n",
"174 99.9999595211% 2,470,424 99.9998125011% 533,337\n",
"173 99.9999431733% 1,759,737 99.9997470088% 395,271\n",
"172 99.9999205647% 1,258,887 99.9996599197% 294,048\n",
"171 99.9998894360% 904,454 99.9995445629% 219,569\n",
"170 99.9998467663% 652,598 99.9993923584% 164,571\n",
"169 99.9997885357% 472,893 99.9991923180% 123,811\n",
"168 99.9997094213% 344,141 99.9989304314% 93,496\n",
"167 99.9996024097% 251,515 99.9985889129% 70,867\n",
"166 99.9994583047% 184,606 99.9981452833% 53,917\n",
"165 99.9992651083% 136,074 99.9975712563% 41,174\n",
"164 99.9990072440% 100,730 99.9968313965% 31,560\n",
"163 99.9986645903% 74,883 99.9958815099% 24,281\n",
"162 99.9982112841% 55,906 99.9946667250% 18,750\n",
"161 99.9976142490% 41,916 99.9931192192% 14,533\n",
"160 99.9968313965% 31,560 99.9911555410% 11,307\n",
"159 99.9958094411% 23,863 99.9886734737% 8,829\n",
"158 99.9944812644% 18,120 99.9855483883% 6,920\n",
"157 99.9927627566% 13,817 99.9816290270% 5,443\n",
"156 99.9905490555% 10,581 99.9767326626% 4,298\n",
"155 99.9877101029% 8,137 99.9706395788% 3,406\n",
"154 99.9840854286% 6,284 99.9630868216% 2,709\n",
"153 99.9794780761% 4,873 99.9537611786% 2,163\n",
"152 99.9736475807% 3,795 99.9422913506% 1,733\n",
"151 99.9663019177% 2,968 99.9282392963% 1,394\n",
"150 99.9570883466% 2,330 99.9110907427% 1,125\n",
"149 99.9455830880% 1,838 99.8902448799% 911\n",
"148 99.9312797919% 1,455 99.8650032777% 741\n",
"147 99.9135767802% 1,157 99.8345580959% 604\n",
"146 99.8917630764% 924 99.7979796890% 495\n",
"145 99.8650032777% 741 99.7542037453% 407\n",
"144 99.8323213712% 596 99.7020181412% 336\n",
"143 99.7925836483% 482 99.6400497338% 278\n",
"142 99.7444809358% 391 99.5667513617% 231\n",
"141 99.6865104294% 319 99.4803893690% 192\n",
"140 99.6169574875% 261 99.3790320141% 161\n",
"139 99.5338778217% 215 99.2605391688% 135\n",
"138 99.4350805958% 177 99.1225537500% 114\n",
"137 99.3181130218% 147 98.9624953632% 96\n",
"136 99.1802471131% 122 98.7775566587% 82\n",
"135 99.0184693146% 102 98.5647029151% 70\n",
"134 98.8294737819% 85 98.3206753694% 60\n",
"133 98.6096601092% 72 98.0419987942% 51\n",
"132 98.3551363216% 61 97.7249937964% 44\n",
"131 98.0617279292% 52 97.3657942589% 38\n",
"130 97.7249937964% 44 96.9603702812% 33\n",
"129 97.3402495072% 38 96.5045568849% 29\n",
"128 96.9025987934% 32 95.9940886433% 25\n",
"127 96.4069734486% 28 95.4246402670% 22\n",
"126 95.8481819706% 24 94.7918730337% 19\n",
"125 95.2209669590% 21 94.0914867949% 17\n",
"124 94.5200710546% 18 93.3192771207% 15\n",
"123 93.7403109348% 16 92.4711969715% 13\n",
"122 92.8766585983% 14 91.5434221090% 12\n",
"121 91.9243288744% 12 90.5324192858% 11\n",
"120 90.8788718026% 11 89.4350160914% 9\n",
"119 89.7362682436% 10 88.2484711894% 9\n",
"118 88.4930268282% 9 86.9705435536% 8\n",
"117 87.1462801289% 8 85.5995592220% 7\n",
"116 85.6938777630% 7 84.1344740241% 6\n",
"115 84.1344740241% 6.30297414356 82.5749307167% 5.7388581000\n",
"114 82.4676075848% 5.70372814115 80.9213089868% 5.2414497373\n",
"113 80.6937708458% 5.17967538878 79.1747668425% 4.8018670064\n",
"112 78.8144666062% 4.72020213705 77.3372720270% 4.4125314534\n",
"111 76.8322499196% 75.4116222443% 4.31634490415 4.0669620824\n",
"110 74.7507532660% 3.96051419092 73.4014531849% 3.7596038872\n",
"109 72.5746935061% 3.64626736341 71.3112335745% 3.4856849025\n",
"108 70.3098594977% 3.36812148102 69.1462467364% 3.2410967685\n",
"107 67.9630797074% 3.12139865776 66.9125584538% 3.0222947235\n",
"106 65.5421696587% 2.90209798497 64.6169712244% 2.8262136810\n",
"105 63.0558595794% 2.70678919205 62.2669653200% 2.6501976543\n",
"104 60.5137031432% 2.53252414027 59.8706273779% 2.4919402788\n",
"103 57.9259687167% 2.37676298063 57.4365675495% 2.3494345790\n",
"102 55.3035150084% 2.23731239758 54.9738265155% 2.2209304558\n",
"101 52.6576534466% 2.11227383685 52.4917739192% 2.1048986302\n",
"100 49.9999999782% 1.99999999913 49.9999999782% 1.9999999991\n",
"99 47.3423465534% 1.89905917668 47.5082260808% 1.9050604034\n",
"98 44.6964849916% 1.80820333002 45.0261734845% 1.8190474693\n",
"97 42.0740312833% 1.72634143572 42.5634324505% 1.7410511155\n",
"96 39.4862968568% 1.65251826951 40.1293726221% 1.6702681161\n",
"95 36.9441404206% 1.58589543727 37.7330346800% 1.6059880144\n",
"94 34.4578303413% 1.52573527121 35.3830287756% 1.5475810473\n",
"93 32.0369202926% 1.47138711828 33.0874415462% 1.4944877660\n",
"92 29.6901405023% 1.42227563409 30.8537532636% 1.4462100941\n",
"91 27.4253064939% 1.37789076562 28.6887664255% 1.4023036061\n",
"90 25.2492467340% 1.33777916116 26.5985468151% 1.3623708477\n",
"89 23.1677500804% 1.30153679093 24.5883777557% 1.3260555472\n",
"88 21.1855333938% 1.26880259813 22.6627279730% 1.2930375921\n",
"87 19.3062291542% 1.23925302972 20.8252331575% 1.2630286642\n",
"86 17.5323924152% 1.21259732068 19.0786910132% 1.2357684429\n",
"86 17.5323924152% 1.21259732068 19.0786910132% 1.2357684429\n",
"85 15.8655259759% 1.18857342558 17.4250692833% 1.2110213007\n",
"84 14.3061222370% 1.16694450771 15.8655259759% 1.1885734256\n",
"83 12.8537198711% 1.14749590978 14.4004407780% 1.1682303146\n",
"82 11.5069731718% 1.13003254137 13.0294564464% 1.1498145914\n",
"81 10.2637317564% 1.11437662784 11.7515288106% 1.1331641064\n",
"80 9.1211281974% 1.10036577278 10.5649839086% 1.1181302847\n",
"79 8.0756711256% 1.08785129274 9.4675807142% 1.1045766896\n",
"78 7.1233414017% 1.07669678808 8.4565778910% 1.0923777776\n",
"77 6.2596890652% 1.06677691809 7.5288030285% 1.0814178174\n",
"76 5.4799289454% 1.05797635237 6.6807228793% 1.0715899553\n",
"75 4.7790330410% 1.05018887325 5.9085132051% 1.0627954070\n",
"74 4.1518180294% 1.04331660699 5.2081269663% 1.0549427583\n",
"73 3.5930265514% 1.03726936365 4.5753597330% 1.0479473616\n",
"72 3.0974012066% 1.03196406748 4.0059113567% 1.0417308129\n",
"71 2.6597504928% 1.02732426212 3.4954431151% 1.0362204981\n",
"70 2.2750062036% 1.02327967611 3.0396297188% 1.0313491967\n",
"69 1.9382720708% 1.01976583639 2.6342057411% 1.0270547348\n",
"68 1.6448636784% 1.01672371917 2.2750062036% 1.0232796761\n",
"67 1.3903398908% 1.01409942889 1.9580012058% 1.0199710454\n",
"66 1.1705262181% 1.01184389811 1.6793246306% 1.0170800762\n",
"65 0.9815306854% 1.00991260208 1.4352970849% 1.0145619785\n",
"64 0.8197528869% 1.00826528377 1.2224433413% 1.0123757196\n",
"63 0.6818869782% 1.006865686 1.0375046368% 1.0104838164\n",
"62 0.5649194042% 1.00568128874 0.8774462500% 1.0088521352\n",
"61 0.4661221783% 0.7394608312% 1.00468305052 1.0074496959\n",
"60 0.3830425125% 1.0038451537 0.6209679859% 1.0062484809\n",
"59 0.3134895706% 1.00314475418 0.5196106310% 1.0052232469\n",
"58 0.2555190642% 1.00256173637 0.4332486383% 1.0043513385\n",
"57 0.2074163517% 1.00207847461 0.3599502662% 1.0036125059\n",
"56 0.1676786288% 1.00167960262 0.2979818588% 1.0029887244\n",
"55 0.1349967223% 1.0013517921 0.2457962547% 1.0024640190\n",
"54 0.1082369236% 1.00108354203 0.2020203110% 1.0020242926\n",
"53 0.0864232198% 1.00086497974 0.1654419041% 1.0016571607\n",
"52 0.0687202081% 1.00068767465 0.1349967223% 1.0013517921\n",
"51 0.0544169120% 1.0005444654 0.1097551201% 1.0010987571\n",
"50 0.0429116534% 1.00042930075 0.0889092573% 1.0008898838\n",
"49 0.0336980823% 1.00033709442 0.0717607037% 1.0007181224\n",
"48 0.0263524193% 1.00026359366 0.0577086494% 1.0005774197\n",
"47 0.0205219239% 1.00020526136 0.0462388214% 1.0004626021\n",
"46 0.0159145714% 1.00015917105 0.0369131784% 1.0003692681\n",
"45 0.0122898971% 1.00012291408 0.0293604212% 1.0002936904\n",
"44 0.0094509445% 1.00009451838 0.0232673374% 1.0002327275\n",
"43 0.0072372434% 1.00007237767 0.0183709730% 1.0001837435\n",
"42 0.0055187356% 1.0000551904 0.0144516117% 1.0001445370\n",
"41 0.0041905589% 1.00004190735 0.0113265263% 1.0001132781\n",
"40 0.0031686035% 1.00003168704 0.0088444590% 1.0000884524\n",
"39 0.0023857510% 1.00002385808 0.0068807808% 1.0000688125\n",
"38 0.0017887159% 1.00001788748 0.0053332750% 1.0000533356\n",
"37 0.0013354097% 1.00001335428 0.0041184901% 1.0000411866\n",
"36 0.0009927560% 1.00000992766 0.0031686035% 1.0000316870\n",
"35 0.0007348917% 1.00000734897 0.0024287437% 1.0000242880\n",
"34 0.0005416953% 1.00000541698 0.0018547167% 1.0000185475\n",
"33 0.0003975903% 1.00000397592 0.0014110871% 1.0000141111\n",
"32 0.0002905787% 1.0000029058 0.0010695686% 1.0000106958\n",
"31 0.0002114643% 1.00000211465 0.0008076820% 1.0000080769\n",
"30 0.0001532337% 1.00000153234 0.0006076416% 1.0000060765\n",
"29 0.0001105640% 1.00000110564 0.0004554371% 1.0000045544\n",
"28 0.0000794353% 1.00000079435 0.0003400803% 1.0000034008\n",
"27 0.0000568267% 1.00000056827 0.0002529912% 1.0000025299\n",
"26 0.0000404789% 1.00000040479 0.0001874989% 1.0000018750\n",
"25 0.0000287105% 1.00000028711 0.0001384395% 1.0000013844\n",
"24 0.0000202763% 1.00000020276 0.0001018328% 1.0000010183\n",
"23 0.0000142583% 1.00000014258 0.0000746245% 1.0000007462\n",
"22 0.0000099834% 1.00000009983 0.0000544802% 1.0000005448\n",
"21 0.0000069602% 1.0000000696 0.0000396240% 1.0000003962\n",
"20 0.0000048317% 1.00000004832 0.0000287105% 1.0000002871\n",
"19 0.0000033396% 1.0000000334 0.0000207245% 1.0000002072\n",
"18 0.0000022984% 1.00000002298 0.0000149034% 1.0000001490\n",
"17 0.0000015750% 1.00000001575 0.0000106769% 1.0000001068\n",
"16 0.0000010746% 1.00000001075 0.0000076201% 1.0000000762\n",
"15 0.0000007301% 1.0000000073 0.0000054180% 1.0000000542\n",
"14 0.0000004938% 1.00000000494 0.0000038376% 1.0000000384\n",
"13 0.0000003326% 1.00000000333 0.0000027080% 1.0000000271\n",
"12 0.0000002230% 1.00000000223 0.0000019036% 1.0000000190\n",
"11 0.0000001489% 0.0000013331% 1.00000000149 1.0000000133\n",
"10 0.0000000990% 1.00000000099 0.0000009301% 1.0000000093\n",
"9 0.0000000655% 1.00000000066 0.0000006464% 1.0000000065\n",
"8 0.0000000432% 1.00000000043 0.0000004476% 1.0000000045\n",
"7 0.0000000283% 1.00000000028 0.0000003087% 1.0000000031\n",
"6 0.0000000185% 1.00000000019 0.0000002121% 1.0000000021\n",
"5 0.0000000120% 1.00000000012 0.0000001452% 1.0000000015\n",
"4 0.0000000078% 1.00000000008 0.0000000990% 1.0000000010\n",
"3 0.0000000050% 1.00000000005 0.0000000673% 1.0000000007\n",
"2 0.0000000032% 1.00000000003 0.0000000455% 1.0000000005\n",
"1 0.0000000021% 1.00000000002 0.0000000307% 1.0000000003\"\"\".replace(\"%\",\"\")\n",
"iq_percentile = np.array([(int(line.split()[0]),float(line.split()[1]),float(line.split()[3])) for line in iqrawpercentile.split(\"\\n\")])"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "ByyiSFtY53-W",
"colab_type": "code",
"colab": {}
},
"source": [
"select_iq1 = lambda x:iq_percentile[:,0][np.argmin(np.abs(x-iq_percentile[:,1]))]\n",
"select_iq2 = lambda x:iq_percentile[:,0][np.argmin(np.abs(x-iq_percentile[:,2]))]\n",
"percentile_college['iq_2016_0']=percentile_college.percentile2016.apply(select_iq1)\n",
"percentile_college['iq_2016_1']=percentile_college.percentile2016.apply(select_iq2)\n",
"percentile_college['iq_2017_0']=percentile_college.percentile2017.apply(select_iq1)\n",
"percentile_college['iq_2017_1']=percentile_college.percentile2017.apply(select_iq2)\n",
"percentile_college['iq_2018_0']=percentile_college.percentile2018.apply(select_iq1)\n",
"percentile_college['iq_2018_1']=percentile_college.percentile2018.apply(select_iq2)\n",
"percentile_college['iq_2019_0']=percentile_college.percentile2019.apply(select_iq1)\n",
"percentile_college['iq_2019_1']=percentile_college.percentile2019.apply(select_iq2)\n",
"percentile_college['range_iq_estimates']=percentile_college[[x for x in percentile_college.columns if \"iq_20\" in x]].apply(lambda x:\"-\".join([str(x.min()),str(x.max())]),axis=1)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "XZNfxEmC7jSg",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "h6P8eDwt7l8q",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "4c9d1e74-cf90-4403-9be0-535119a075e7"
},
"source": [
""
],
"execution_count": 154,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {
"tags": []
},
"execution_count": 154
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "nyLPQhSw9t3s",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment