Created
November 30, 2018 20:13
-
-
Save Whamp/f44a341485ce98afb984c25c50f00a2f to your computer and use it in GitHub Desktop.
Tabular .to_df() Throws Error
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Tabular models" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from fastai import *\n", | |
"from fastai.tabular import *" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Tabular data should be in a Pandas `DataFrame`." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"path = untar_data(URLs.ADULT_SAMPLE)\n", | |
"df = pd.read_csv(path/'adult.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dep_var = '>=50k'\n", | |
"cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n", | |
"cont_names = ['age', 'fnlwgt', 'education-num']\n", | |
"procs = [FillMissing, Categorify, Normalize]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n", | |
" .split_by_idx(list(range(800,1000)))\n", | |
" .label_from_df(cols=dep_var)\n", | |
" .add_test(test, label=0)\n", | |
" .databunch())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <tr>\n", | |
" <th>workclass</th>\n", | |
" <th>education</th>\n", | |
" <th>marital-status</th>\n", | |
" <th>occupation</th>\n", | |
" <th>relationship</th>\n", | |
" <th>race</th>\n", | |
" <th>education-num_na</th>\n", | |
" <th>age</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education-num</th>\n", | |
" <th>target</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Private</th>\n", | |
" <th> Bachelors</th>\n", | |
" <th> Married-civ-spouse</th>\n", | |
" <th> Sales</th>\n", | |
" <th> Wife</th>\n", | |
" <th> White</th>\n", | |
" <th>False</th>\n", | |
" <th>-0.4095</th>\n", | |
" <th>-1.4633</th>\n", | |
" <th>1.1422</th>\n", | |
" <th>1</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Private</th>\n", | |
" <th> HS-grad</th>\n", | |
" <th> Married-civ-spouse</th>\n", | |
" <th> Protective-serv</th>\n", | |
" <th> Husband</th>\n", | |
" <th> White</th>\n", | |
" <th>False</th>\n", | |
" <th>-0.2629</th>\n", | |
" <th>0.6819</th>\n", | |
" <th>-0.4224</th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Local-gov</th>\n", | |
" <th> HS-grad</th>\n", | |
" <th> Married-civ-spouse</th>\n", | |
" <th> Exec-managerial</th>\n", | |
" <th> Husband</th>\n", | |
" <th> White</th>\n", | |
" <th>False</th>\n", | |
" <th>0.8365</th>\n", | |
" <th>-0.3475</th>\n", | |
" <th>-0.4224</th>\n", | |
" <th>1</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Local-gov</th>\n", | |
" <th> Some-college</th>\n", | |
" <th> Married-civ-spouse</th>\n", | |
" <th> Protective-serv</th>\n", | |
" <th> Husband</th>\n", | |
" <th> White</th>\n", | |
" <th>False</th>\n", | |
" <th>-0.2629</th>\n", | |
" <th>0.4134</th>\n", | |
" <th>-0.0312</th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> ?</th>\n", | |
" <th> Masters</th>\n", | |
" <th> Widowed</th>\n", | |
" <th> ?</th>\n", | |
" <th> Not-in-family</th>\n", | |
" <th> White</th>\n", | |
" <th>False</th>\n", | |
" <th>2.0826</th>\n", | |
" <th>-0.8612</th>\n", | |
" <th>1.5334</th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Private</th>\n", | |
" <th> Some-college</th>\n", | |
" <th> Never-married</th>\n", | |
" <th> Handlers-cleaners</th>\n", | |
" <th> Own-child</th>\n", | |
" <th> Black</th>\n", | |
" <th>False</th>\n", | |
" <th>1.3496</th>\n", | |
" <th>-0.6150</th>\n", | |
" <th>-0.0312</th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Local-gov</th>\n", | |
" <th> Assoc-acdm</th>\n", | |
" <th> Married-civ-spouse</th>\n", | |
" <th> Adm-clerical</th>\n", | |
" <th> Wife</th>\n", | |
" <th> White</th>\n", | |
" <th>False</th>\n", | |
" <th>-0.5561</th>\n", | |
" <th>-0.1973</th>\n", | |
" <th>0.7511</th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Private</th>\n", | |
" <th> Bachelors</th>\n", | |
" <th> Never-married</th>\n", | |
" <th> Exec-managerial</th>\n", | |
" <th> Own-child</th>\n", | |
" <th> White</th>\n", | |
" <th>False</th>\n", | |
" <th>-1.2158</th>\n", | |
" <th>-0.0405</th>\n", | |
" <th>1.1422</th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Private</th>\n", | |
" <th> 1st-4th</th>\n", | |
" <th> Married-civ-spouse</th>\n", | |
" <th> Machine-op-inspct</th>\n", | |
" <th> Husband</th>\n", | |
" <th> White</th>\n", | |
" <th>False</th>\n", | |
" <th>-0.1163</th>\n", | |
" <th>2.0479</th>\n", | |
" <th>-3.1604</th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Private</th>\n", | |
" <th> Some-college</th>\n", | |
" <th> Never-married</th>\n", | |
" <th> Handlers-cleaners</th>\n", | |
" <th> Own-child</th>\n", | |
" <th> Black</th>\n", | |
" <th>False</th>\n", | |
" <th>-1.0692</th>\n", | |
" <th>-0.3017</th>\n", | |
" <th>-0.0312</th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
"</table>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"data.show_batch(rows=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "AttributeError", | |
"evalue": "'int' object has no attribute 'relative_to'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-7-f50728457558>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_ds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/data_block.py\u001b[0m in \u001b[0;36mto_df\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mto_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m->\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m \u001b[0;34m\"Create `pd.DataFrame` containing `items` from `self.x` and `self.y`\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 458\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_relative_item_paths\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mo\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 459\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 460\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdest\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m->\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/data_block.py\u001b[0m in \u001b[0;36m_relative_item_paths\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelative_to\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 105\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_paths\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange_of\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 106\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mto_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/data_block.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelative_to\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 105\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_paths\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange_of\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 106\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mto_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/data_block.py\u001b[0m in \u001b[0;36m_relative_item_path\u001b[0;34m(self, i)\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcols\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 104\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelative_to\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 105\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_paths\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange_of\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mAttributeError\u001b[0m: 'int' object has no attribute 'relative_to'" | |
] | |
} | |
], | |
"source": [ | |
"data.train_ds.to_df()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>workclass</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education</th>\n", | |
" <th>education-num</th>\n", | |
" <th>marital-status</th>\n", | |
" <th>occupation</th>\n", | |
" <th>relationship</th>\n", | |
" <th>race</th>\n", | |
" <th>sex</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" <th>native-country</th>\n", | |
" <th>>=50k</th>\n", | |
" <th>education-num_na</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.763248</td>\n", | |
" <td>Private</td>\n", | |
" <td>-0.838107</td>\n", | |
" <td>Assoc-acdm</td>\n", | |
" <td>0.751083</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Wife</td>\n", | |
" <td>White</td>\n", | |
" <td>Female</td>\n", | |
" <td>0</td>\n", | |
" <td>1902</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" <td>1</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.396752</td>\n", | |
" <td>Private</td>\n", | |
" <td>0.445849</td>\n", | |
" <td>Masters</td>\n", | |
" <td>1.533375</td>\n", | |
" <td>Divorced</td>\n", | |
" <td>Exec-managerial</td>\n", | |
" <td>Not-in-family</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>10520</td>\n", | |
" <td>0</td>\n", | |
" <td>45</td>\n", | |
" <td>United-States</td>\n", | |
" <td>1</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>-0.043043</td>\n", | |
" <td>Private</td>\n", | |
" <td>-0.886792</td>\n", | |
" <td>HS-grad</td>\n", | |
" <td>-0.031209</td>\n", | |
" <td>Divorced</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Unmarried</td>\n", | |
" <td>Black</td>\n", | |
" <td>Female</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>32</td>\n", | |
" <td>United-States</td>\n", | |
" <td>0</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>-0.043043</td>\n", | |
" <td>Self-emp-inc</td>\n", | |
" <td>-0.728821</td>\n", | |
" <td>Prof-school</td>\n", | |
" <td>1.924521</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Prof-specialty</td>\n", | |
" <td>Husband</td>\n", | |
" <td>Asian-Pac-Islander</td>\n", | |
" <td>Male</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" <td>1</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0.250154</td>\n", | |
" <td>Self-emp-not-inc</td>\n", | |
" <td>-1.018462</td>\n", | |
" <td>7th-8th</td>\n", | |
" <td>-0.031209</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Other-service</td>\n", | |
" <td>Wife</td>\n", | |
" <td>Black</td>\n", | |
" <td>Female</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>50</td>\n", | |
" <td>United-States</td>\n", | |
" <td>0</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" age workclass fnlwgt education education-num \\\n", | |
"0 0.763248 Private -0.838107 Assoc-acdm 0.751083 \n", | |
"1 0.396752 Private 0.445849 Masters 1.533375 \n", | |
"2 -0.043043 Private -0.886792 HS-grad -0.031209 \n", | |
"3 -0.043043 Self-emp-inc -0.728821 Prof-school 1.924521 \n", | |
"4 0.250154 Self-emp-not-inc -1.018462 7th-8th -0.031209 \n", | |
"\n", | |
" marital-status occupation relationship race \\\n", | |
"0 Married-civ-spouse NaN Wife White \n", | |
"1 Divorced Exec-managerial Not-in-family White \n", | |
"2 Divorced NaN Unmarried Black \n", | |
"3 Married-civ-spouse Prof-specialty Husband Asian-Pac-Islander \n", | |
"4 Married-civ-spouse Other-service Wife Black \n", | |
"\n", | |
" sex capital-gain capital-loss hours-per-week native-country >=50k \\\n", | |
"0 Female 0 1902 40 United-States 1 \n", | |
"1 Male 10520 0 45 United-States 1 \n", | |
"2 Female 0 0 32 United-States 0 \n", | |
"3 Male 0 0 40 United-States 1 \n", | |
"4 Female 0 0 50 United-States 0 \n", | |
"\n", | |
" education-num_na \n", | |
"0 False \n", | |
"1 False \n", | |
"2 True \n", | |
"3 False \n", | |
"4 True " | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.train_ds.x.xtra.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"((32361, 3), (32561, 15))" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"conts = data.train_ds.x.conts\n", | |
"conts.shape, df.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['workclass',\n", | |
" 'education',\n", | |
" 'marital-status',\n", | |
" 'occupation',\n", | |
" 'relationship',\n", | |
" 'race',\n", | |
" 'education-num_na']" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.train_ds.x.cat_names" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "AttributeError", | |
"evalue": "'TabularList' object has no attribute 'cats'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-14-6df08dc069ed>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_ds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcats\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;31mAttributeError\u001b[0m: 'TabularList' object has no attribute 'cats'" | |
] | |
} | |
], | |
"source": [ | |
"data.train_ds.x.cats" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn = tabular_learner(data, layers=[200,100], metrics=accuracy)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total time: 00:04\n", | |
"epoch train_loss valid_loss accuracy\n", | |
"1 0.364682 0.385048 0.820000 (00:04)\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"learn.fit(1, 1e-2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Inference" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"row = df.iloc[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(1, tensor(0), tensor([0.6365, 0.3635]))" | |
] | |
}, | |
"execution_count": null, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"learn.predict(row)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda env:fastai]", | |
"language": "python", | |
"name": "conda-env-fastai-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment