Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@AlJohri
Created August 28, 2020 05:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AlJohri/22b13dbe72d2fefc08b2a6513a2ec802 to your computer and use it in GitHub Desktop.
Save AlJohri/22b13dbe72d2fefc08b2a6513a2ec802 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import time\n",
"import pandas as pd\n",
"from pathlib import Path\n",
"\n",
"import sys; sys.path.insert(0, '../scripts/')\n",
"from filter import get_committees, Database\n",
"\n",
"DATABASE_URI = \"postgres://postgres:pecota123@dhrumil-campaign-finance.c3b2p46qcdck.us-east-1.rds.amazonaws.com/postgres\"\n",
"database = Database(DATABASE_URI)\n",
"await database.connect()\n",
"\n",
"cwd = '.'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Load Data\n",
"\n",
"NOTE: Takes 1.5 minutes."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 46.1 s, sys: 23.5 s, total: 1min 9s\n",
"Wall time: 1min 16s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"contributions = pd.read_parquet(f'{cwd}/../data/contributions.parquet',\n",
" columns=[\"cmte_id\", \"name\", \"occupation\", \"employer\", \"transaction_dt\", \"transaction_amt\", \"file_year\"])\n",
"names = pd.read_parquet(f'{cwd}/../data/names.parquet')\n",
"candidate_master = pd.read_csv(f'{cwd}/../data/candidate_master.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Load Committees"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cand_id</th>\n",
" <th>cand_name</th>\n",
" <th>cmte_id</th>\n",
" <th>cmte_nm</th>\n",
" <th>connected_org_nm</th>\n",
" <th>cmte_pty_affiliation</th>\n",
" <th>cmte_dsgn</th>\n",
" <th>cmte_tp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>H0TX22302</td>\n",
" <td>NEHLS, TROY</td>\n",
" <td>C00717777</td>\n",
" <td>TAKE BACK TX-22 REPUBLICAN NOMINEE FUND 2020</td>\n",
" <td>TAKE BACK THE HOUSE TEXAS 2020</td>\n",
" <td>None</td>\n",
" <td>U</td>\n",
" <td>N</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>H0TX22302</td>\n",
" <td>NEHLS, TROY</td>\n",
" <td>C00730150</td>\n",
" <td>NEHLS FOR CONGRESS</td>\n",
" <td>CRUZ 20 FOR 20 VICTORY FUND</td>\n",
" <td>REP</td>\n",
" <td>P</td>\n",
" <td>H</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>H8TX22313</td>\n",
" <td>KULKARNI, SRI PRESTON</td>\n",
" <td>C00662874</td>\n",
" <td>SRI FOR CONGRESS</td>\n",
" <td>SPK VICTORY FUND</td>\n",
" <td>DEM</td>\n",
" <td>P</td>\n",
" <td>H</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" cand_id cand_name cmte_id \\\n",
"0 H0TX22302 NEHLS, TROY C00717777 \n",
"1 H0TX22302 NEHLS, TROY C00730150 \n",
"2 H8TX22313 KULKARNI, SRI PRESTON C00662874 \n",
"\n",
" cmte_nm \\\n",
"0 TAKE BACK TX-22 REPUBLICAN NOMINEE FUND 2020 \n",
"1 NEHLS FOR CONGRESS \n",
"2 SRI FOR CONGRESS \n",
"\n",
" connected_org_nm cmte_pty_affiliation cmte_dsgn cmte_tp \n",
"0 TAKE BACK THE HOUSE TEXAS 2020 None U N \n",
"1 CRUZ 20 FOR 20 VICTORY FUND REP P H \n",
"2 SPK VICTORY FUND DEM P H "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"committees = await get_committees(database, 2020, [\"KULKARNI\", \"NEHLS\"], president=False)\n",
"committees"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Filter and Join Contributions"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"a = contributions.query('cmte_id in @committees.cmte_id.tolist()')\n",
"a = a.merge(committees[['cmte_id', 'cand_name', 'cmte_pty_affiliation']], how='left', on='cmte_id')\n",
"a = a.merge(names[['fec_name', 'nationality', 'south_asian']], how='left', left_on='name', right_on='fec_name')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"C00662874 2518\n",
"C00730150 465\n",
"Name: cmte_id, dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.cmte_id.value_counts(dropna=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"celtic_english 1325\n",
"south_asian 579\n",
"NaN 472\n",
"european__german 168\n",
"hispanic__spanish 106\n",
"european__french 71\n",
"east_asian__chinese 65\n",
"muslim__pakistanis__pakistan 39\n",
"hispanic__philippines 31\n",
"east_asian__malay__indonesia 24\n",
"muslim__nubian 22\n",
"hispanic__portuguese 16\n",
"muslim__persian 11\n",
"east_asian__indochina__vietnam 9\n",
"muslim__pakistanis__bangladesh 9\n",
"african__west_african 8\n",
"nordic__scandinavian__norway 7\n",
"greek 7\n",
"european__italian__italy 4\n",
"european__russian 2\n",
"african__east_african 2\n",
"african__south_african 2\n",
"nordic__scandinavian__denmark 2\n",
"muslim__arabian_peninsula 1\n",
"nordic__scandinavian__sweden 1\n",
"Name: nationality, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.nationality.value_counts(dropna=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Donations"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>cmte_pty_affiliation</th>\n",
" <th>DEM</th>\n",
" <th>REP</th>\n",
" </tr>\n",
" <tr>\n",
" <th>nationality</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>african__east_african</th>\n",
" <td>1000.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>african__south_african</th>\n",
" <td>1000.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>african__west_african</th>\n",
" <td>9000.0</td>\n",
" <td>2000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>celtic_english</th>\n",
" <td>797753.0</td>\n",
" <td>196062.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>east_asian__chinese</th>\n",
" <td>67310.0</td>\n",
" <td>500.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>east_asian__indochina__vietnam</th>\n",
" <td>4750.0</td>\n",
" <td>1000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>east_asian__malay__indonesia</th>\n",
" <td>25050.0</td>\n",
" <td>3700.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>european__french</th>\n",
" <td>38672.0</td>\n",
" <td>6050.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>european__german</th>\n",
" <td>117232.0</td>\n",
" <td>32859.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>european__italian__italy</th>\n",
" <td>4300.0</td>\n",
" <td>2500.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>european__russian</th>\n",
" <td>600.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>greek</th>\n",
" <td>6350.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hispanic__philippines</th>\n",
" <td>10025.0</td>\n",
" <td>6318.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hispanic__portuguese</th>\n",
" <td>11987.0</td>\n",
" <td>2750.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hispanic__spanish</th>\n",
" <td>45545.0</td>\n",
" <td>25909.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>muslim__arabian_peninsula</th>\n",
" <td>100.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>muslim__nubian</th>\n",
" <td>17900.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>muslim__pakistanis__bangladesh</th>\n",
" <td>7850.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>muslim__pakistanis__pakistan</th>\n",
" <td>51822.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>muslim__persian</th>\n",
" <td>10400.0</td>\n",
" <td>1250.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nordic__scandinavian__denmark</th>\n",
" <td>3050.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nordic__scandinavian__norway</th>\n",
" <td>9100.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nordic__scandinavian__sweden</th>\n",
" <td>2800.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>south_asian</th>\n",
" <td>540194.0</td>\n",
" <td>17300.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"cmte_pty_affiliation DEM REP\n",
"nationality \n",
"african__east_african 1000.0 NaN\n",
"african__south_african 1000.0 NaN\n",
"african__west_african 9000.0 2000.0\n",
"celtic_english 797753.0 196062.0\n",
"east_asian__chinese 67310.0 500.0\n",
"east_asian__indochina__vietnam 4750.0 1000.0\n",
"east_asian__malay__indonesia 25050.0 3700.0\n",
"european__french 38672.0 6050.0\n",
"european__german 117232.0 32859.0\n",
"european__italian__italy 4300.0 2500.0\n",
"european__russian 600.0 NaN\n",
"greek 6350.0 NaN\n",
"hispanic__philippines 10025.0 6318.0\n",
"hispanic__portuguese 11987.0 2750.0\n",
"hispanic__spanish 45545.0 25909.0\n",
"muslim__arabian_peninsula 100.0 NaN\n",
"muslim__nubian 17900.0 NaN\n",
"muslim__pakistanis__bangladesh 7850.0 NaN\n",
"muslim__pakistanis__pakistan 51822.0 NaN\n",
"muslim__persian 10400.0 1250.0\n",
"nordic__scandinavian__denmark 3050.0 NaN\n",
"nordic__scandinavian__norway 9100.0 NaN\n",
"nordic__scandinavian__sweden 2800.0 NaN\n",
"south_asian 540194.0 17300.0"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.pivot_table(\n",
" index=[\"nationality\"],\n",
" columns=[\"cmte_pty_affiliation\"],\n",
" values=\"transaction_amt\",\n",
" aggfunc=\"sum\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Donors"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>cmte_pty_affiliation</th>\n",
" <th>DEM</th>\n",
" <th>REP</th>\n",
" </tr>\n",
" <tr>\n",
" <th>nationality</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>african__east_african</th>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>african__south_african</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>african__west_african</th>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>celtic_english</th>\n",
" <td>558.0</td>\n",
" <td>166.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>east_asian__chinese</th>\n",
" <td>28.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>east_asian__indochina__vietnam</th>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>east_asian__malay__indonesia</th>\n",
" <td>13.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>european__french</th>\n",
" <td>23.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>european__german</th>\n",
" <td>67.0</td>\n",
" <td>23.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>european__italian__italy</th>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>european__russian</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>greek</th>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hispanic__philippines</th>\n",
" <td>10.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hispanic__portuguese</th>\n",
" <td>7.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hispanic__spanish</th>\n",
" <td>33.0</td>\n",
" <td>13.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>muslim__arabian_peninsula</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>muslim__nubian</th>\n",
" <td>11.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>muslim__pakistanis__bangladesh</th>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>muslim__pakistanis__pakistan</th>\n",
" <td>22.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>muslim__persian</th>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nordic__scandinavian__denmark</th>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nordic__scandinavian__norway</th>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nordic__scandinavian__sweden</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>south_asian</th>\n",
" <td>339.0</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"cmte_pty_affiliation DEM REP\n",
"nationality \n",
"african__east_african 2.0 NaN\n",
"african__south_african 1.0 NaN\n",
"african__west_african 5.0 1.0\n",
"celtic_english 558.0 166.0\n",
"east_asian__chinese 28.0 1.0\n",
"east_asian__indochina__vietnam 6.0 1.0\n",
"east_asian__malay__indonesia 13.0 2.0\n",
"european__french 23.0 5.0\n",
"european__german 67.0 23.0\n",
"european__italian__italy 3.0 1.0\n",
"european__russian 1.0 NaN\n",
"greek 2.0 NaN\n",
"hispanic__philippines 10.0 5.0\n",
"hispanic__portuguese 7.0 2.0\n",
"hispanic__spanish 33.0 13.0\n",
"muslim__arabian_peninsula 1.0 NaN\n",
"muslim__nubian 11.0 NaN\n",
"muslim__pakistanis__bangladesh 4.0 NaN\n",
"muslim__pakistanis__pakistan 22.0 NaN\n",
"muslim__persian 6.0 1.0\n",
"nordic__scandinavian__denmark 2.0 NaN\n",
"nordic__scandinavian__norway 4.0 NaN\n",
"nordic__scandinavian__sweden 1.0 NaN\n",
"south_asian 339.0 10.0"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.pivot_table(\n",
" index=[\"nationality\"],\n",
" columns=[\"cmte_pty_affiliation\"],\n",
" values=\"name\",\n",
" aggfunc=lambda x: len(x.unique()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment