Skip to content

Instantly share code, notes, and snippets.

@mattalhonte
Created August 1, 2018 18:04
Show Gist options
  • Save mattalhonte/80bc2849b824f9f6174d1faa6b9d32f0 to your computer and use it in GitHub Desktop.
Save mattalhonte/80bc2849b824f9f6174d1faa6b9d32f0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import dask.dataframe as dd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"columns = ['FILER_ID',\n",
" 'FREPORT_ID',\n",
" 'TRANSACTION_CODE',\n",
" 'E_YEAR',\n",
" 'T3_TRID',\n",
" 'DATE1_10',\n",
" 'DATE2_12',\n",
" 'CONTRIB_CODE_20',\n",
" 'CONTRIB_TYPE_CODE_25',\n",
" 'CORP_30',\n",
" 'FIRST_NAME_40',\n",
" 'MID_INIT_42',\n",
" 'LAST_NAME_44',\n",
" 'ADDR_1_50',\n",
" 'CITY_52',\n",
" 'STATE_54',\n",
" 'ZIP_56',\n",
" 'CHECK_NO_60',\n",
" 'CHECK_DATE_62',\n",
" 'AMOUNT_70',\n",
" 'AMOUNT2_72',\n",
" 'DESCRIPTION_80',\n",
" 'OTHER_RECPT_CODE_90',\n",
" 'PURPOSE_CODE1_100',\n",
" 'PURPOSE_CODE2_102',\n",
" 'EXPLANATION_110',\n",
" 'XFER_TYPE_120',\n",
" 'CHKBOX_130',\n",
" 'CREREC_UID',\n",
" 'CREREC_DATE']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df = dd.read_csv(\"ALL_REPORTS/ALL_REPORTS.out\",\n",
" header=None,\n",
" names = columns,\n",
" encoding = \"ISO-8859-1\",\n",
" dtype=\"object\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>FILER_ID</th>\n",
" <th>FREPORT_ID</th>\n",
" <th>TRANSACTION_CODE</th>\n",
" <th>E_YEAR</th>\n",
" <th>T3_TRID</th>\n",
" <th>DATE1_10</th>\n",
" <th>DATE2_12</th>\n",
" <th>CONTRIB_CODE_20</th>\n",
" <th>CONTRIB_TYPE_CODE_25</th>\n",
" <th>CORP_30</th>\n",
" <th>...</th>\n",
" <th>AMOUNT2_72</th>\n",
" <th>DESCRIPTION_80</th>\n",
" <th>OTHER_RECPT_CODE_90</th>\n",
" <th>PURPOSE_CODE1_100</th>\n",
" <th>PURPOSE_CODE2_102</th>\n",
" <th>EXPLANATION_110</th>\n",
" <th>XFER_TYPE_120</th>\n",
" <th>CHKBOX_130</th>\n",
" <th>CREREC_UID</th>\n",
" <th>CREREC_DATE</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A00017</td>\n",
" <td>K</td>\n",
" <td>G</td>\n",
" <td>1999</td>\n",
" <td>4530</td>\n",
" <td>01/12/1999</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>PEOPLE TO ELECT MANGINELLI</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NAA</td>\n",
" <td>07/22/1999 08:43:59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>A00037</td>\n",
" <td>K</td>\n",
" <td>G</td>\n",
" <td>1999</td>\n",
" <td>999999</td>\n",
" <td>01/12/1999</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CITIZENS TO ELECT TEDISCO TO ASSEMBLY</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>A00037</td>\n",
" <td>K</td>\n",
" <td>N</td>\n",
" <td>1999</td>\n",
" <td>1693</td>\n",
" <td>01/15/1999</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>OUTSTANDING LOAN</td>\n",
" <td>...</td>\n",
" <td>10474.8</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>OTHER</td>\n",
" <td>NaN</td>\n",
" <td>PREVIOUS LOAN FROM JAMES TEDISCO</td>\n",
" <td>NaN</td>\n",
" <td>P</td>\n",
" <td>JM</td>\n",
" <td>07/15/1999 15:08:17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>A00037</td>\n",
" <td>J</td>\n",
" <td>N</td>\n",
" <td>2000</td>\n",
" <td>1694</td>\n",
" <td>01/13/2000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>OUTSTANDING LOAN</td>\n",
" <td>...</td>\n",
" <td>10474.8</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>OTHER</td>\n",
" <td>NaN</td>\n",
" <td>LOANS FROM PREVIOUS CAMPAIGNS FROM J</td>\n",
" <td>NaN</td>\n",
" <td>P</td>\n",
" <td>JM</td>\n",
" <td>01/14/1900 16:35:09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>A00037</td>\n",
" <td>K</td>\n",
" <td>X</td>\n",
" <td>2000</td>\n",
" <td>999999</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>07/20/2000 00:00:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" FILER_ID FREPORT_ID TRANSACTION_CODE E_YEAR T3_TRID DATE1_10 DATE2_12 \\\n",
"0 A00017 K G 1999 4530 01/12/1999 NaN \n",
"1 A00037 K G 1999 999999 01/12/1999 NaN \n",
"2 A00037 K N 1999 1693 01/15/1999 NaN \n",
"3 A00037 J N 2000 1694 01/13/2000 NaN \n",
"4 A00037 K X 2000 999999 NaN NaN \n",
"\n",
" CONTRIB_CODE_20 CONTRIB_TYPE_CODE_25 CORP_30 \\\n",
"0 NaN NaN PEOPLE TO ELECT MANGINELLI \n",
"1 NaN NaN CITIZENS TO ELECT TEDISCO TO ASSEMBLY \n",
"2 NaN NaN OUTSTANDING LOAN \n",
"3 NaN NaN OUTSTANDING LOAN \n",
"4 NaN NaN NaN \n",
"\n",
" ... AMOUNT2_72 DESCRIPTION_80 OTHER_RECPT_CODE_90 \\\n",
"0 ... NaN NaN NaN \n",
"1 ... NaN NaN NaN \n",
"2 ... 10474.8 NaN NaN \n",
"3 ... 10474.8 NaN NaN \n",
"4 ... NaN NaN NaN \n",
"\n",
" PURPOSE_CODE1_100 PURPOSE_CODE2_102 EXPLANATION_110 \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 OTHER NaN PREVIOUS LOAN FROM JAMES TEDISCO \n",
"3 OTHER NaN LOANS FROM PREVIOUS CAMPAIGNS FROM J \n",
"4 NaN NaN NaN \n",
"\n",
" XFER_TYPE_120 CHKBOX_130 CREREC_UID CREREC_DATE \n",
"0 2 NaN NAA 07/22/1999 08:43:59 \n",
"1 2 NaN NaN NaN \n",
"2 NaN P JM 07/15/1999 15:08:17 \n",
"3 NaN P JM 01/14/1900 16:35:09 \n",
"4 NaN NaN NaN 07/20/2000 00:00:00 \n",
"\n",
"[5 rows x 30 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:py36]",
"language": "python",
"name": "conda-env-py36-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment