Skip to content

Instantly share code, notes, and snippets.

@Whamp
Created November 30, 2018 20:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Whamp/f44a341485ce98afb984c25c50f00a2f to your computer and use it in GitHub Desktop.
Save Whamp/f44a341485ce98afb984c25c50f00a2f to your computer and use it in GitHub Desktop.
Tabular .to_df() Throws Error
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tabular models"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from fastai import *\n",
"from fastai.tabular import *"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Tabular data should be in a Pandas `DataFrame`."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"path = untar_data(URLs.ADULT_SAMPLE)\n",
"df = pd.read_csv(path/'adult.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"dep_var = '>=50k'\n",
"cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n",
"cont_names = ['age', 'fnlwgt', 'education-num']\n",
"procs = [FillMissing, Categorify, Normalize]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n",
" .split_by_idx(list(range(800,1000)))\n",
" .label_from_df(cols=dep_var)\n",
" .add_test(test, label=0)\n",
" .databunch())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <col width='10%'> <tr>\n",
" <th>workclass</th>\n",
" <th>education</th>\n",
" <th>marital-status</th>\n",
" <th>occupation</th>\n",
" <th>relationship</th>\n",
" <th>race</th>\n",
" <th>education-num_na</th>\n",
" <th>age</th>\n",
" <th>fnlwgt</th>\n",
" <th>education-num</th>\n",
" <th>target</th>\n",
" </tr>\n",
" <tr>\n",
" <th> Private</th>\n",
" <th> Bachelors</th>\n",
" <th> Married-civ-spouse</th>\n",
" <th> Sales</th>\n",
" <th> Wife</th>\n",
" <th> White</th>\n",
" <th>False</th>\n",
" <th>-0.4095</th>\n",
" <th>-1.4633</th>\n",
" <th>1.1422</th>\n",
" <th>1</th>\n",
" </tr>\n",
" <tr>\n",
" <th> Private</th>\n",
" <th> HS-grad</th>\n",
" <th> Married-civ-spouse</th>\n",
" <th> Protective-serv</th>\n",
" <th> Husband</th>\n",
" <th> White</th>\n",
" <th>False</th>\n",
" <th>-0.2629</th>\n",
" <th>0.6819</th>\n",
" <th>-0.4224</th>\n",
" <th>0</th>\n",
" </tr>\n",
" <tr>\n",
" <th> Local-gov</th>\n",
" <th> HS-grad</th>\n",
" <th> Married-civ-spouse</th>\n",
" <th> Exec-managerial</th>\n",
" <th> Husband</th>\n",
" <th> White</th>\n",
" <th>False</th>\n",
" <th>0.8365</th>\n",
" <th>-0.3475</th>\n",
" <th>-0.4224</th>\n",
" <th>1</th>\n",
" </tr>\n",
" <tr>\n",
" <th> Local-gov</th>\n",
" <th> Some-college</th>\n",
" <th> Married-civ-spouse</th>\n",
" <th> Protective-serv</th>\n",
" <th> Husband</th>\n",
" <th> White</th>\n",
" <th>False</th>\n",
" <th>-0.2629</th>\n",
" <th>0.4134</th>\n",
" <th>-0.0312</th>\n",
" <th>0</th>\n",
" </tr>\n",
" <tr>\n",
" <th> ?</th>\n",
" <th> Masters</th>\n",
" <th> Widowed</th>\n",
" <th> ?</th>\n",
" <th> Not-in-family</th>\n",
" <th> White</th>\n",
" <th>False</th>\n",
" <th>2.0826</th>\n",
" <th>-0.8612</th>\n",
" <th>1.5334</th>\n",
" <th>0</th>\n",
" </tr>\n",
" <tr>\n",
" <th> Private</th>\n",
" <th> Some-college</th>\n",
" <th> Never-married</th>\n",
" <th> Handlers-cleaners</th>\n",
" <th> Own-child</th>\n",
" <th> Black</th>\n",
" <th>False</th>\n",
" <th>1.3496</th>\n",
" <th>-0.6150</th>\n",
" <th>-0.0312</th>\n",
" <th>0</th>\n",
" </tr>\n",
" <tr>\n",
" <th> Local-gov</th>\n",
" <th> Assoc-acdm</th>\n",
" <th> Married-civ-spouse</th>\n",
" <th> Adm-clerical</th>\n",
" <th> Wife</th>\n",
" <th> White</th>\n",
" <th>False</th>\n",
" <th>-0.5561</th>\n",
" <th>-0.1973</th>\n",
" <th>0.7511</th>\n",
" <th>0</th>\n",
" </tr>\n",
" <tr>\n",
" <th> Private</th>\n",
" <th> Bachelors</th>\n",
" <th> Never-married</th>\n",
" <th> Exec-managerial</th>\n",
" <th> Own-child</th>\n",
" <th> White</th>\n",
" <th>False</th>\n",
" <th>-1.2158</th>\n",
" <th>-0.0405</th>\n",
" <th>1.1422</th>\n",
" <th>0</th>\n",
" </tr>\n",
" <tr>\n",
" <th> Private</th>\n",
" <th> 1st-4th</th>\n",
" <th> Married-civ-spouse</th>\n",
" <th> Machine-op-inspct</th>\n",
" <th> Husband</th>\n",
" <th> White</th>\n",
" <th>False</th>\n",
" <th>-0.1163</th>\n",
" <th>2.0479</th>\n",
" <th>-3.1604</th>\n",
" <th>0</th>\n",
" </tr>\n",
" <tr>\n",
" <th> Private</th>\n",
" <th> Some-college</th>\n",
" <th> Never-married</th>\n",
" <th> Handlers-cleaners</th>\n",
" <th> Own-child</th>\n",
" <th> Black</th>\n",
" <th>False</th>\n",
" <th>-1.0692</th>\n",
" <th>-0.3017</th>\n",
" <th>-0.0312</th>\n",
" <th>0</th>\n",
" </tr>\n",
"</table>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data.show_batch(rows=10)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'int' object has no attribute 'relative_to'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-7-f50728457558>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_ds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/data_block.py\u001b[0m in \u001b[0;36mto_df\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mto_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m->\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m \u001b[0;34m\"Create `pd.DataFrame` containing `items` from `self.x` and `self.y`\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 458\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_relative_item_paths\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mo\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 459\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 460\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdest\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m->\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/data_block.py\u001b[0m in \u001b[0;36m_relative_item_paths\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelative_to\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 105\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_paths\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange_of\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 106\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mto_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/data_block.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelative_to\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 105\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_paths\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange_of\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 106\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mto_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/data_block.py\u001b[0m in \u001b[0;36m_relative_item_path\u001b[0;34m(self, i)\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcols\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 104\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelative_to\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 105\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_relative_item_paths\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_relative_item_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange_of\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'int' object has no attribute 'relative_to'"
]
}
],
"source": [
"data.train_ds.to_df()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>workclass</th>\n",
" <th>fnlwgt</th>\n",
" <th>education</th>\n",
" <th>education-num</th>\n",
" <th>marital-status</th>\n",
" <th>occupation</th>\n",
" <th>relationship</th>\n",
" <th>race</th>\n",
" <th>sex</th>\n",
" <th>capital-gain</th>\n",
" <th>capital-loss</th>\n",
" <th>hours-per-week</th>\n",
" <th>native-country</th>\n",
" <th>&gt;=50k</th>\n",
" <th>education-num_na</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.763248</td>\n",
" <td>Private</td>\n",
" <td>-0.838107</td>\n",
" <td>Assoc-acdm</td>\n",
" <td>0.751083</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>NaN</td>\n",
" <td>Wife</td>\n",
" <td>White</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>1902</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.396752</td>\n",
" <td>Private</td>\n",
" <td>0.445849</td>\n",
" <td>Masters</td>\n",
" <td>1.533375</td>\n",
" <td>Divorced</td>\n",
" <td>Exec-managerial</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>10520</td>\n",
" <td>0</td>\n",
" <td>45</td>\n",
" <td>United-States</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.043043</td>\n",
" <td>Private</td>\n",
" <td>-0.886792</td>\n",
" <td>HS-grad</td>\n",
" <td>-0.031209</td>\n",
" <td>Divorced</td>\n",
" <td>NaN</td>\n",
" <td>Unmarried</td>\n",
" <td>Black</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>32</td>\n",
" <td>United-States</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-0.043043</td>\n",
" <td>Self-emp-inc</td>\n",
" <td>-0.728821</td>\n",
" <td>Prof-school</td>\n",
" <td>1.924521</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Prof-specialty</td>\n",
" <td>Husband</td>\n",
" <td>Asian-Pac-Islander</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.250154</td>\n",
" <td>Self-emp-not-inc</td>\n",
" <td>-1.018462</td>\n",
" <td>7th-8th</td>\n",
" <td>-0.031209</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Other-service</td>\n",
" <td>Wife</td>\n",
" <td>Black</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>United-States</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age workclass fnlwgt education education-num \\\n",
"0 0.763248 Private -0.838107 Assoc-acdm 0.751083 \n",
"1 0.396752 Private 0.445849 Masters 1.533375 \n",
"2 -0.043043 Private -0.886792 HS-grad -0.031209 \n",
"3 -0.043043 Self-emp-inc -0.728821 Prof-school 1.924521 \n",
"4 0.250154 Self-emp-not-inc -1.018462 7th-8th -0.031209 \n",
"\n",
" marital-status occupation relationship race \\\n",
"0 Married-civ-spouse NaN Wife White \n",
"1 Divorced Exec-managerial Not-in-family White \n",
"2 Divorced NaN Unmarried Black \n",
"3 Married-civ-spouse Prof-specialty Husband Asian-Pac-Islander \n",
"4 Married-civ-spouse Other-service Wife Black \n",
"\n",
" sex capital-gain capital-loss hours-per-week native-country >=50k \\\n",
"0 Female 0 1902 40 United-States 1 \n",
"1 Male 10520 0 45 United-States 1 \n",
"2 Female 0 0 32 United-States 0 \n",
"3 Male 0 0 40 United-States 1 \n",
"4 Female 0 0 50 United-States 0 \n",
"\n",
" education-num_na \n",
"0 False \n",
"1 False \n",
"2 True \n",
"3 False \n",
"4 True "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.train_ds.x.xtra.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((32361, 3), (32561, 15))"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"conts = data.train_ds.x.conts\n",
"conts.shape, df.shape"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['workclass',\n",
" 'education',\n",
" 'marital-status',\n",
" 'occupation',\n",
" 'relationship',\n",
" 'race',\n",
" 'education-num_na']"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.train_ds.x.cat_names"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'TabularList' object has no attribute 'cats'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-14-6df08dc069ed>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_ds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcats\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: 'TabularList' object has no attribute 'cats'"
]
}
],
"source": [
"data.train_ds.x.cats"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"learn = tabular_learner(data, layers=[200,100], metrics=accuracy)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total time: 00:04\n",
"epoch train_loss valid_loss accuracy\n",
"1 0.364682 0.385048 0.820000 (00:04)\n",
"\n"
]
}
],
"source": [
"learn.fit(1, 1e-2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Inference"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"row = df.iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1, tensor(0), tensor([0.6365, 0.3635]))"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"learn.predict(row)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:fastai]",
"language": "python",
"name": "conda-env-fastai-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment