Created
April 12, 2020 23:20
-
-
Save codebrain001/b3dab538aa5574577af81c2f79a2d2ce to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Correct each county\n", | |
"(data_2016_df.loc[data_2016_df['fips'] == \"08111\", \"Total_cvap_est\"])= 574\n", | |
"(data_2016_df.loc[data_2016_df['fips'] == \"35021\", \"Total_cvap_est\"]) = 562\n", | |
"(data_2016_df.loc[data_2016_df['fips'] == \"48301\", \"Total_cvap_est\"]) = 86\n", | |
"(data_2016_df.loc[data_2016_df['fips'] == \"48311\", \"Total_cvap_est\"]) = 566" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>county</th>\n", | |
" <th>fips</th>\n", | |
" <th>candidate(d)</th>\n", | |
" <th>votes (d)</th>\n", | |
" <th>year</th>\n", | |
" <th>candidate(r)</th>\n", | |
" <th>votes (r)</th>\n", | |
" <th>candidate(O)</th>\n", | |
" <th>votes (O)</th>\n", | |
" <th>votes_total</th>\n", | |
" <th>voter_share_major_party</th>\n", | |
" <th>voter_share_dem</th>\n", | |
" <th>voter_share_rep</th>\n", | |
" <th>voter_share_other</th>\n", | |
" <th>rawdiff_dem_vs_rep</th>\n", | |
" <th>rawdiff_rep_vs_dem</th>\n", | |
" <th>rawdiff_dem_vs_other</th>\n", | |
" <th>rawdiff_rep_vs_other</th>\n", | |
" <th>rawdiff_other_vs_dem</th>\n", | |
" <th>rawdiff_other_vs_rep</th>\n", | |
" <th>pctdiff_dem_vs_rep</th>\n", | |
" <th>pctdiff_rep_vs_dem</th>\n", | |
" <th>pctdiff_dem_vs_other</th>\n", | |
" <th>pctdiff_rep_vs_other</th>\n", | |
" <th>pctdiff_other_vs_dem</th>\n", | |
" <th>pctdiff_other_vs_rep</th>\n", | |
" <th>OBJECTID</th>\n", | |
" <th>GEONAME</th>\n", | |
" <th>Total_cvap_est</th>\n", | |
" <th>SHAPE</th>\n", | |
" <th>Shape__Area</th>\n", | |
" <th>Shape__Length</th>\n", | |
" <th>voter_turnout</th>\n", | |
" <th>voter_turnout_majparty</th>\n", | |
" <th>voter_turnout_dem</th>\n", | |
" <th>voter_turnout_rep</th>\n", | |
" <th>voter_turnout_other</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Autauga</td>\n", | |
" <td>01001</td>\n", | |
" <td>Hillary Clinton</td>\n", | |
" <td>5936.0</td>\n", | |
" <td>2016</td>\n", | |
" <td>Donald Trump</td>\n", | |
" <td>18172.0</td>\n", | |
" <td>Other</td>\n", | |
" <td>865.0</td>\n", | |
" <td>24973.0</td>\n", | |
" <td>0.965363</td>\n", | |
" <td>0.237697</td>\n", | |
" <td>0.727666</td>\n", | |
" <td>0.034637</td>\n", | |
" <td>-12236.0</td>\n", | |
" <td>12236.0</td>\n", | |
" <td>5071.0</td>\n", | |
" <td>17307.0</td>\n", | |
" <td>-5071.0</td>\n", | |
" <td>-17307.0</td>\n", | |
" <td>-0.489969</td>\n", | |
" <td>0.489969</td>\n", | |
" <td>0.203059</td>\n", | |
" <td>0.693028</td>\n", | |
" <td>-0.203059</td>\n", | |
" <td>-0.693028</td>\n", | |
" <td>1.0</td>\n", | |
" <td>Autauga County, Alabama</td>\n", | |
" <td>40690.0</td>\n", | |
" <td>{'rings': [[[-9619465, 3856529.0001000017], [-...</td>\n", | |
" <td>2.208654e+09</td>\n", | |
" <td>2.498864e+05</td>\n", | |
" <td>0.613738</td>\n", | |
" <td>0.592480</td>\n", | |
" <td>0.145884</td>\n", | |
" <td>0.446596</td>\n", | |
" <td>0.021258</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Baldwin</td>\n", | |
" <td>01003</td>\n", | |
" <td>Hillary Clinton</td>\n", | |
" <td>18458.0</td>\n", | |
" <td>2016</td>\n", | |
" <td>Donald Trump</td>\n", | |
" <td>72883.0</td>\n", | |
" <td>Other</td>\n", | |
" <td>3874.0</td>\n", | |
" <td>95215.0</td>\n", | |
" <td>0.959313</td>\n", | |
" <td>0.193856</td>\n", | |
" <td>0.765457</td>\n", | |
" <td>0.040687</td>\n", | |
" <td>-54425.0</td>\n", | |
" <td>54425.0</td>\n", | |
" <td>14584.0</td>\n", | |
" <td>69009.0</td>\n", | |
" <td>-14584.0</td>\n", | |
" <td>-69009.0</td>\n", | |
" <td>-0.571601</td>\n", | |
" <td>0.571601</td>\n", | |
" <td>0.153169</td>\n", | |
" <td>0.724770</td>\n", | |
" <td>-0.153169</td>\n", | |
" <td>-0.724770</td>\n", | |
" <td>2.0</td>\n", | |
" <td>Baldwin County, Alabama</td>\n", | |
" <td>151770.0</td>\n", | |
" <td>{'rings': [[[-9746859, 3539643.0001000017], [-...</td>\n", | |
" <td>5.671048e+09</td>\n", | |
" <td>1.655940e+06</td>\n", | |
" <td>0.627364</td>\n", | |
" <td>0.601838</td>\n", | |
" <td>0.121618</td>\n", | |
" <td>0.480220</td>\n", | |
" <td>0.025525</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Baldwin</td>\n", | |
" <td>13009</td>\n", | |
" <td>Hillary Clinton</td>\n", | |
" <td>7970.0</td>\n", | |
" <td>2016</td>\n", | |
" <td>Donald Trump</td>\n", | |
" <td>7697.0</td>\n", | |
" <td>Other</td>\n", | |
" <td>449.0</td>\n", | |
" <td>16116.0</td>\n", | |
" <td>0.972139</td>\n", | |
" <td>0.494540</td>\n", | |
" <td>0.477600</td>\n", | |
" <td>0.027861</td>\n", | |
" <td>273.0</td>\n", | |
" <td>-273.0</td>\n", | |
" <td>7521.0</td>\n", | |
" <td>7248.0</td>\n", | |
" <td>-7521.0</td>\n", | |
" <td>-7248.0</td>\n", | |
" <td>0.016940</td>\n", | |
" <td>-0.016940</td>\n", | |
" <td>0.466679</td>\n", | |
" <td>0.449739</td>\n", | |
" <td>-0.466679</td>\n", | |
" <td>-0.449739</td>\n", | |
" <td>392.0</td>\n", | |
" <td>Baldwin County, Georgia</td>\n", | |
" <td>36225.0</td>\n", | |
" <td>{'rings': [[[-9270032, 3920184.0001000017], [-...</td>\n", | |
" <td>9.921188e+08</td>\n", | |
" <td>1.894294e+05</td>\n", | |
" <td>0.444886</td>\n", | |
" <td>0.432491</td>\n", | |
" <td>0.220014</td>\n", | |
" <td>0.212478</td>\n", | |
" <td>0.012395</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Barbour</td>\n", | |
" <td>01005</td>\n", | |
" <td>Hillary Clinton</td>\n", | |
" <td>4871.0</td>\n", | |
" <td>2016</td>\n", | |
" <td>Donald Trump</td>\n", | |
" <td>5454.0</td>\n", | |
" <td>Other</td>\n", | |
" <td>144.0</td>\n", | |
" <td>10469.0</td>\n", | |
" <td>0.986245</td>\n", | |
" <td>0.465278</td>\n", | |
" <td>0.520967</td>\n", | |
" <td>0.013755</td>\n", | |
" <td>-583.0</td>\n", | |
" <td>583.0</td>\n", | |
" <td>4727.0</td>\n", | |
" <td>5310.0</td>\n", | |
" <td>-4727.0</td>\n", | |
" <td>-5310.0</td>\n", | |
" <td>-0.055688</td>\n", | |
" <td>0.055688</td>\n", | |
" <td>0.451524</td>\n", | |
" <td>0.507212</td>\n", | |
" <td>-0.451524</td>\n", | |
" <td>-0.507212</td>\n", | |
" <td>3.0</td>\n", | |
" <td>Barbour County, Alabama</td>\n", | |
" <td>20375.0</td>\n", | |
" <td>{'rings': [[[-9468394, 3771591.0001000017], [-...</td>\n", | |
" <td>3.257902e+09</td>\n", | |
" <td>3.208964e+05</td>\n", | |
" <td>0.513816</td>\n", | |
" <td>0.506748</td>\n", | |
" <td>0.239067</td>\n", | |
" <td>0.267681</td>\n", | |
" <td>0.007067</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Barbour</td>\n", | |
" <td>54001</td>\n", | |
" <td>Hillary Clinton</td>\n", | |
" <td>1222.0</td>\n", | |
" <td>2016</td>\n", | |
" <td>Donald Trump</td>\n", | |
" <td>4527.0</td>\n", | |
" <td>Other</td>\n", | |
" <td>305.0</td>\n", | |
" <td>6054.0</td>\n", | |
" <td>0.949620</td>\n", | |
" <td>0.201850</td>\n", | |
" <td>0.747770</td>\n", | |
" <td>0.050380</td>\n", | |
" <td>-3305.0</td>\n", | |
" <td>3305.0</td>\n", | |
" <td>917.0</td>\n", | |
" <td>4222.0</td>\n", | |
" <td>-917.0</td>\n", | |
" <td>-4222.0</td>\n", | |
" <td>-0.545920</td>\n", | |
" <td>0.545920</td>\n", | |
" <td>0.151470</td>\n", | |
" <td>0.697390</td>\n", | |
" <td>-0.151470</td>\n", | |
" <td>-0.697390</td>\n", | |
" <td>2993.0</td>\n", | |
" <td>Barbour County, West Virginia</td>\n", | |
" <td>13410.0</td>\n", | |
" <td>{'rings': [[[-8893931, 4764677.000100002], [-8...</td>\n", | |
" <td>1.477859e+09</td>\n", | |
" <td>1.901228e+05</td>\n", | |
" <td>0.451454</td>\n", | |
" <td>0.428710</td>\n", | |
" <td>0.091126</td>\n", | |
" <td>0.337584</td>\n", | |
" <td>0.022744</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" county fips ... voter_turnout_rep voter_turnout_other\n", | |
"0 Autauga 01001 ... 0.446596 0.021258\n", | |
"1 Baldwin 01003 ... 0.480220 0.025525\n", | |
"2 Baldwin 13009 ... 0.212478 0.012395\n", | |
"3 Barbour 01005 ... 0.267681 0.007067\n", | |
"4 Barbour 54001 ... 0.337584 0.022744\n", | |
"\n", | |
"[5 rows x 37 columns]" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data_2016_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['county', 'fips', 'candidate(d)', 'votes (d)', 'year', 'candidate(r)',\n", | |
" 'votes (r)', 'candidate(O)', 'votes (O)', 'votes_total',\n", | |
" 'voter_share_major_party', 'voter_share_dem', 'voter_share_rep',\n", | |
" 'voter_share_other', 'rawdiff_dem_vs_rep', 'rawdiff_rep_vs_dem',\n", | |
" 'rawdiff_dem_vs_other', 'rawdiff_rep_vs_other', 'rawdiff_other_vs_dem',\n", | |
" 'rawdiff_other_vs_rep', 'pctdiff_dem_vs_rep', 'pctdiff_rep_vs_dem',\n", | |
" 'pctdiff_dem_vs_other', 'pctdiff_rep_vs_other', 'pctdiff_other_vs_dem',\n", | |
" 'pctdiff_other_vs_rep', 'OBJECTID', 'GEONAME', 'Total_cvap_est',\n", | |
" 'SHAPE', 'Shape__Area', 'Shape__Length', 'voter_turnout',\n", | |
" 'voter_turnout_majparty', 'voter_turnout_dem', 'voter_turnout_rep',\n", | |
" 'voter_turnout_other'],\n", | |
" dtype='object')" | |
] | |
}, | |
"execution_count": 42, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data_2016_df.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Recalculate voter turnout fields\n", | |
"data_2016_df['voter_turnout'] = (data_2016_df['votes_total'] / data_2016_df['Total_cvap_est'])\n", | |
"data_2016_df['voter_turnout_majparty'] = ((data_2016_df['votes (d)']+data_2016_df['votes (r)']) / data_2016_df['Total_cvap_est'])\n", | |
"data_2016_df['voter_turnout_dem'] = (data_2016_df['votes (d)'] / data_2016_df['Total_cvap_est'])\n", | |
"data_2016_df['voter_turnout_gop'] = (data_2016_df['votes (r)'] / data_2016_df['Total_cvap_est'])\n", | |
"data_2016_df['voter_turnout_other'] = (data_2016_df['votes (O)'] / data_2016_df['Total_cvap_est'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"To confirm that this correction addressed the issue, you will again query for counties with a voter turnout value above 100%." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>county</th>\n", | |
" <th>fips</th>\n", | |
" <th>candidate(d)</th>\n", | |
" <th>votes (d)</th>\n", | |
" <th>year</th>\n", | |
" <th>candidate(r)</th>\n", | |
" <th>votes (r)</th>\n", | |
" <th>candidate(O)</th>\n", | |
" <th>votes (O)</th>\n", | |
" <th>votes_total</th>\n", | |
" <th>voter_share_major_party</th>\n", | |
" <th>voter_share_dem</th>\n", | |
" <th>voter_share_rep</th>\n", | |
" <th>voter_share_other</th>\n", | |
" <th>rawdiff_dem_vs_rep</th>\n", | |
" <th>rawdiff_rep_vs_dem</th>\n", | |
" <th>rawdiff_dem_vs_other</th>\n", | |
" <th>rawdiff_rep_vs_other</th>\n", | |
" <th>rawdiff_other_vs_dem</th>\n", | |
" <th>rawdiff_other_vs_rep</th>\n", | |
" <th>pctdiff_dem_vs_rep</th>\n", | |
" <th>pctdiff_rep_vs_dem</th>\n", | |
" <th>pctdiff_dem_vs_other</th>\n", | |
" <th>pctdiff_rep_vs_other</th>\n", | |
" <th>pctdiff_other_vs_dem</th>\n", | |
" <th>pctdiff_other_vs_rep</th>\n", | |
" <th>OBJECTID</th>\n", | |
" <th>GEONAME</th>\n", | |
" <th>Total_cvap_est</th>\n", | |
" <th>SHAPE</th>\n", | |
" <th>Shape__Area</th>\n", | |
" <th>Shape__Length</th>\n", | |
" <th>voter_turnout</th>\n", | |
" <th>voter_turnout_majparty</th>\n", | |
" <th>voter_turnout_dem</th>\n", | |
" <th>voter_turnout_rep</th>\n", | |
" <th>voter_turnout_other</th>\n", | |
" <th>voter_turnout_gop</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Empty DataFrame\n", | |
"Columns: [county, fips, candidate(d), votes (d), year, candidate(r), votes (r), candidate(O), votes (O), votes_total, voter_share_major_party, voter_share_dem, voter_share_rep, voter_share_other, rawdiff_dem_vs_rep, rawdiff_rep_vs_dem, rawdiff_dem_vs_other, rawdiff_rep_vs_other, rawdiff_other_vs_dem, rawdiff_other_vs_rep, pctdiff_dem_vs_rep, pctdiff_rep_vs_dem, pctdiff_dem_vs_other, pctdiff_rep_vs_other, pctdiff_other_vs_dem, pctdiff_other_vs_rep, OBJECTID, GEONAME, Total_cvap_est, SHAPE, Shape__Area, Shape__Length, voter_turnout, voter_turnout_majparty, voter_turnout_dem, voter_turnout_rep, voter_turnout_other, voter_turnout_gop]\n", | |
"Index: []" | |
] | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data_2016_df.loc[data_2016_df['voter_turnout'] > 1]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"No records are returned, indicating that there are no counties with a turnout value above 100%. Well done! You have cleaned the data. Next, you will convert the dataframe to a permanent dataset called a feature class. Feature classes are stored in an ArcGIS Pro file geodatabase." | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment