Skip to content

Instantly share code, notes, and snippets.

@codebrain001
Created April 12, 2020 23:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codebrain001/0baaa2a1051a9833717be9ee4a100091 to your computer and use it in GitHub Desktop.
Save codebrain001/0baaa2a1051a9833717be9ee4a100091 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(<class 'dask.dataframe.core.DataFrame'>, <class 'pandas.core.frame.DataFrame'>)"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Checking the dataframes of the two datasets\n",
"type(df), type(counties_df)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>OBJECTID</th>\n",
" <th>fips</th>\n",
" <th>GEONAME</th>\n",
" <th>Total_cvap_est</th>\n",
" <th>SHAPE</th>\n",
" <th>Shape__Area</th>\n",
" <th>Shape__Length</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>01001</td>\n",
" <td>Autauga County, Alabama</td>\n",
" <td>40690</td>\n",
" <td>{'rings': [[[-9619465, 3856529.0001000017], [-...</td>\n",
" <td>2.208654e+09</td>\n",
" <td>2.498864e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>01003</td>\n",
" <td>Baldwin County, Alabama</td>\n",
" <td>151770</td>\n",
" <td>{'rings': [[[-9746859, 3539643.0001000017], [-...</td>\n",
" <td>5.671048e+09</td>\n",
" <td>1.655940e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>01005</td>\n",
" <td>Barbour County, Alabama</td>\n",
" <td>20375</td>\n",
" <td>{'rings': [[[-9468394, 3771591.0001000017], [-...</td>\n",
" <td>3.257902e+09</td>\n",
" <td>3.208964e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>01007</td>\n",
" <td>Bibb County, Alabama</td>\n",
" <td>17590</td>\n",
" <td>{'rings': [[[-9692114, 3928124.0001000017], [-...</td>\n",
" <td>2.311999e+09</td>\n",
" <td>2.279184e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>01009</td>\n",
" <td>Blount County, Alabama</td>\n",
" <td>42430</td>\n",
" <td>{'rings': [[[-9623907, 4063676.0001000017], [-...</td>\n",
" <td>2.456909e+09</td>\n",
" <td>2.926429e+05</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" OBJECTID fips ... Shape__Area Shape__Length\n",
"0 1 01001 ... 2.208654e+09 2.498864e+05\n",
"1 2 01003 ... 5.671048e+09 1.655940e+06\n",
"2 3 01005 ... 3.257902e+09 3.208964e+05\n",
"3 4 01007 ... 2.311999e+09 2.279184e+05\n",
"4 5 01009 ... 2.456909e+09 2.926429e+05\n",
"\n",
"[5 rows x 7 columns]"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# rename columns\n",
"counties_df = counties_df.rename(columns={'GEOID': 'fips'})\n",
"counties_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>county</th>\n",
" <th>fips</th>\n",
" <th>candidate(d)</th>\n",
" <th>votes (d)</th>\n",
" <th>year</th>\n",
" <th>candidate(r)</th>\n",
" <th>votes (r)</th>\n",
" <th>candidate(O)</th>\n",
" <th>votes (O)</th>\n",
" <th>votes_total</th>\n",
" <th>voter_share_major_party</th>\n",
" <th>voter_share_dem</th>\n",
" <th>voter_share_rep</th>\n",
" <th>voter_share_other</th>\n",
" <th>rawdiff_dem_vs_rep</th>\n",
" <th>rawdiff_rep_vs_dem</th>\n",
" <th>rawdiff_dem_vs_other</th>\n",
" <th>rawdiff_rep_vs_other</th>\n",
" <th>rawdiff_other_vs_dem</th>\n",
" <th>rawdiff_other_vs_rep</th>\n",
" <th>pctdiff_dem_vs_rep</th>\n",
" <th>pctdiff_rep_vs_dem</th>\n",
" <th>pctdiff_dem_vs_other</th>\n",
" <th>pctdiff_rep_vs_other</th>\n",
" <th>pctdiff_other_vs_dem</th>\n",
" <th>pctdiff_other_vs_rep</th>\n",
" <th>OBJECTID</th>\n",
" <th>GEONAME</th>\n",
" <th>Total_cvap_est</th>\n",
" <th>SHAPE</th>\n",
" <th>Shape__Area</th>\n",
" <th>Shape__Length</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Autauga</td>\n",
" <td>01001</td>\n",
" <td>Hillary Clinton</td>\n",
" <td>5936.0</td>\n",
" <td>2016</td>\n",
" <td>Donald Trump</td>\n",
" <td>18172.0</td>\n",
" <td>Other</td>\n",
" <td>865.0</td>\n",
" <td>24973.0</td>\n",
" <td>0.965363</td>\n",
" <td>0.237697</td>\n",
" <td>0.727666</td>\n",
" <td>0.034637</td>\n",
" <td>-12236.0</td>\n",
" <td>12236.0</td>\n",
" <td>5071.0</td>\n",
" <td>17307.0</td>\n",
" <td>-5071.0</td>\n",
" <td>-17307.0</td>\n",
" <td>-0.489969</td>\n",
" <td>0.489969</td>\n",
" <td>0.203059</td>\n",
" <td>0.693028</td>\n",
" <td>-0.203059</td>\n",
" <td>-0.693028</td>\n",
" <td>1.0</td>\n",
" <td>Autauga County, Alabama</td>\n",
" <td>40690.0</td>\n",
" <td>{'rings': [[[-9619465, 3856529.0001000017], [-...</td>\n",
" <td>2.208654e+09</td>\n",
" <td>2.498864e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Baldwin</td>\n",
" <td>01003</td>\n",
" <td>Hillary Clinton</td>\n",
" <td>18458.0</td>\n",
" <td>2016</td>\n",
" <td>Donald Trump</td>\n",
" <td>72883.0</td>\n",
" <td>Other</td>\n",
" <td>3874.0</td>\n",
" <td>95215.0</td>\n",
" <td>0.959313</td>\n",
" <td>0.193856</td>\n",
" <td>0.765457</td>\n",
" <td>0.040687</td>\n",
" <td>-54425.0</td>\n",
" <td>54425.0</td>\n",
" <td>14584.0</td>\n",
" <td>69009.0</td>\n",
" <td>-14584.0</td>\n",
" <td>-69009.0</td>\n",
" <td>-0.571601</td>\n",
" <td>0.571601</td>\n",
" <td>0.153169</td>\n",
" <td>0.724770</td>\n",
" <td>-0.153169</td>\n",
" <td>-0.724770</td>\n",
" <td>2.0</td>\n",
" <td>Baldwin County, Alabama</td>\n",
" <td>151770.0</td>\n",
" <td>{'rings': [[[-9746859, 3539643.0001000017], [-...</td>\n",
" <td>5.671048e+09</td>\n",
" <td>1.655940e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Baldwin</td>\n",
" <td>13009</td>\n",
" <td>Hillary Clinton</td>\n",
" <td>7970.0</td>\n",
" <td>2016</td>\n",
" <td>Donald Trump</td>\n",
" <td>7697.0</td>\n",
" <td>Other</td>\n",
" <td>449.0</td>\n",
" <td>16116.0</td>\n",
" <td>0.972139</td>\n",
" <td>0.494540</td>\n",
" <td>0.477600</td>\n",
" <td>0.027861</td>\n",
" <td>273.0</td>\n",
" <td>-273.0</td>\n",
" <td>7521.0</td>\n",
" <td>7248.0</td>\n",
" <td>-7521.0</td>\n",
" <td>-7248.0</td>\n",
" <td>0.016940</td>\n",
" <td>-0.016940</td>\n",
" <td>0.466679</td>\n",
" <td>0.449739</td>\n",
" <td>-0.466679</td>\n",
" <td>-0.449739</td>\n",
" <td>392.0</td>\n",
" <td>Baldwin County, Georgia</td>\n",
" <td>36225.0</td>\n",
" <td>{'rings': [[[-9270032, 3920184.0001000017], [-...</td>\n",
" <td>9.921188e+08</td>\n",
" <td>1.894294e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Barbour</td>\n",
" <td>01005</td>\n",
" <td>Hillary Clinton</td>\n",
" <td>4871.0</td>\n",
" <td>2016</td>\n",
" <td>Donald Trump</td>\n",
" <td>5454.0</td>\n",
" <td>Other</td>\n",
" <td>144.0</td>\n",
" <td>10469.0</td>\n",
" <td>0.986245</td>\n",
" <td>0.465278</td>\n",
" <td>0.520967</td>\n",
" <td>0.013755</td>\n",
" <td>-583.0</td>\n",
" <td>583.0</td>\n",
" <td>4727.0</td>\n",
" <td>5310.0</td>\n",
" <td>-4727.0</td>\n",
" <td>-5310.0</td>\n",
" <td>-0.055688</td>\n",
" <td>0.055688</td>\n",
" <td>0.451524</td>\n",
" <td>0.507212</td>\n",
" <td>-0.451524</td>\n",
" <td>-0.507212</td>\n",
" <td>3.0</td>\n",
" <td>Barbour County, Alabama</td>\n",
" <td>20375.0</td>\n",
" <td>{'rings': [[[-9468394, 3771591.0001000017], [-...</td>\n",
" <td>3.257902e+09</td>\n",
" <td>3.208964e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Barbour</td>\n",
" <td>54001</td>\n",
" <td>Hillary Clinton</td>\n",
" <td>1222.0</td>\n",
" <td>2016</td>\n",
" <td>Donald Trump</td>\n",
" <td>4527.0</td>\n",
" <td>Other</td>\n",
" <td>305.0</td>\n",
" <td>6054.0</td>\n",
" <td>0.949620</td>\n",
" <td>0.201850</td>\n",
" <td>0.747770</td>\n",
" <td>0.050380</td>\n",
" <td>-3305.0</td>\n",
" <td>3305.0</td>\n",
" <td>917.0</td>\n",
" <td>4222.0</td>\n",
" <td>-917.0</td>\n",
" <td>-4222.0</td>\n",
" <td>-0.545920</td>\n",
" <td>0.545920</td>\n",
" <td>0.151470</td>\n",
" <td>0.697390</td>\n",
" <td>-0.151470</td>\n",
" <td>-0.697390</td>\n",
" <td>2993.0</td>\n",
" <td>Barbour County, West Virginia</td>\n",
" <td>13410.0</td>\n",
" <td>{'rings': [[[-8893931, 4764677.000100002], [-8...</td>\n",
" <td>1.477859e+09</td>\n",
" <td>1.901228e+05</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" county fips ... Shape__Area Shape__Length\n",
"0 Autauga 01001 ... 2.208654e+09 2.498864e+05\n",
"1 Baldwin 01003 ... 5.671048e+09 1.655940e+06\n",
"2 Baldwin 13009 ... 9.921188e+08 1.894294e+05\n",
"3 Barbour 01005 ... 3.257902e+09 3.208964e+05\n",
"4 Barbour 54001 ... 1.477859e+09 1.901228e+05\n",
"\n",
"[5 rows x 32 columns]"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"geo_df = dd.merge(df.compute(),counties_df, how='left', on='fips')\n",
"geo_df.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment