Skip to content

Instantly share code, notes, and snippets.

@pplonski
Created May 6, 2019 14:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pplonski/da681470884a806662272c7d44fe6e45 to your computer and use it in GitHub Desktop.
Save pplonski/da681470884a806662272c7d44fe6e45 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import openml\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from supervised.automl import AutoML\n",
"\n",
"import os\n",
"import numpy as np\n",
"import pandas as pd\n",
"import sklearn.model_selection\n",
"from sklearn.metrics import log_loss, f1_score"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('https://raw.githubusercontent.com/pplonski/datasets-for-start/master/adult/data.csv', skipinitialspace=True)\n",
"x_cols = [c for c in df.columns if c != 'income']\n",
"X = df[x_cols]\n",
"y = df['income']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>workclass</th>\n",
" <th>fnlwgt</th>\n",
" <th>education</th>\n",
" <th>education-num</th>\n",
" <th>marital-status</th>\n",
" <th>occupation</th>\n",
" <th>relationship</th>\n",
" <th>race</th>\n",
" <th>sex</th>\n",
" <th>capital-gain</th>\n",
" <th>capital-loss</th>\n",
" <th>hours-per-week</th>\n",
" <th>native-country</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>39</td>\n",
" <td>State-gov</td>\n",
" <td>77516</td>\n",
" <td>Bachelors</td>\n",
" <td>13</td>\n",
" <td>Never-married</td>\n",
" <td>Adm-clerical</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>2174</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>50</td>\n",
" <td>Self-emp-not-inc</td>\n",
" <td>83311</td>\n",
" <td>Bachelors</td>\n",
" <td>13</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Exec-managerial</td>\n",
" <td>Husband</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>38</td>\n",
" <td>Private</td>\n",
" <td>215646</td>\n",
" <td>HS-grad</td>\n",
" <td>9</td>\n",
" <td>Divorced</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>53</td>\n",
" <td>Private</td>\n",
" <td>234721</td>\n",
" <td>11th</td>\n",
" <td>7</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Husband</td>\n",
" <td>Black</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28</td>\n",
" <td>Private</td>\n",
" <td>338409</td>\n",
" <td>Bachelors</td>\n",
" <td>13</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Prof-specialty</td>\n",
" <td>Wife</td>\n",
" <td>Black</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>Cuba</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age workclass fnlwgt education education-num \\\n",
"0 39 State-gov 77516 Bachelors 13 \n",
"1 50 Self-emp-not-inc 83311 Bachelors 13 \n",
"2 38 Private 215646 HS-grad 9 \n",
"3 53 Private 234721 11th 7 \n",
"4 28 Private 338409 Bachelors 13 \n",
"\n",
" marital-status occupation relationship race sex \\\n",
"0 Never-married Adm-clerical Not-in-family White Male \n",
"1 Married-civ-spouse Exec-managerial Husband White Male \n",
"2 Divorced Handlers-cleaners Not-in-family White Male \n",
"3 Married-civ-spouse Handlers-cleaners Husband Black Male \n",
"4 Married-civ-spouse Prof-specialty Wife Black Female \n",
"\n",
" capital-gain capital-loss hours-per-week native-country \n",
"0 2174 0 40 United-States \n",
"1 0 0 13 United-States \n",
"2 0 0 40 United-States \n",
"3 0 0 40 United-States \n",
"4 0 0 40 Cuba "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"seed = 1234\n",
"X_train, X_test, y_train, y_test = \\\n",
" sklearn.model_selection.train_test_split(X, y, test_size = 0.3, random_state=seed)\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"scrolled": true
},
"outputs": [
{
"ename": "ValueError",
"evalue": "could not convert string to float: 'Private'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-11-9005dc76e109>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRandomForestClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_estimators\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/ensemble/forest.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0;31m# Validate or convert input data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"csc\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDTYPE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 251\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'csc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 526\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 527\u001b[0;31m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 528\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 529\u001b[0m raise ValueError(\"Complex data not supported\\n\"\n",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/numpy/core/numeric.py\u001b[0m in \u001b[0;36masarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 537\u001b[0m \"\"\"\n\u001b[0;32m--> 538\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 539\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 540\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Private'"
]
}
],
"source": [
"rf = RandomForestClassifier(n_estimators = 1000)\n",
"rf = rf.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# The Random Forest does not work with categorical\n",
"# Let's convert them, with integer encoding"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "'<' not supported between instances of 'str' and 'float'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-22-a05c0e4307e4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcolumn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'workclass'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'education'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'marital-status'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'occupation'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'relationship'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'race'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sex'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'native-country'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mcategorical_convert\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLabelEncoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36mfit_transform\u001b[0;34m(self, y)\u001b[0m\n\u001b[1;32m 234\u001b[0m \"\"\"\n\u001b[1;32m 235\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcolumn_or_1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwarn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 236\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 237\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m 106\u001b[0m \"\"\"\n\u001b[1;32m 107\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 108\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_python\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 109\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode_python\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;31m# only used in _encode below, see docstring there for details\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0muniques\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0muniques\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0muniques\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: '<' not supported between instances of 'str' and 'float'"
]
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"for column in ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex','native-country']:\n",
" categorical_convert = LabelEncoder()\n",
" X_train[column] = categorical_convert.fit_transform(X_train[column])\n",
" X_test[column] = categorical_convert.transform(X_test[column])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# still something ?\n",
"# looks like we have missing values in the data"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"29700 Private\n",
"1529 Private\n",
"27477 Private\n",
"31950 Private\n",
"4732 Private\n",
"10858 Private\n",
"24518 Private\n",
"10035 NaN\n",
"1324 Private\n",
"26727 Private\n",
"10255 Self-emp-not-inc\n",
"25713 Private\n",
"32541 NaN\n",
"22242 Private\n",
"8743 State-gov\n",
"21018 Private\n",
"15462 Private\n",
"24948 Private\n",
"4630 Private\n",
"31931 Private\n",
"18589 Private\n",
"13984 Private\n",
"28047 Private\n",
"21419 Self-emp-not-inc\n",
"29795 Private\n",
"12366 Private\n",
"25362 Private\n",
"15778 NaN\n",
"18061 Private\n",
"16192 Private\n",
" ... \n",
"2558 State-gov\n",
"16630 State-gov\n",
"18815 Private\n",
"9161 Private\n",
"27984 Private\n",
"27973 Private\n",
"7644 Private\n",
"14192 State-gov\n",
"26744 Private\n",
"27066 Private\n",
"13686 Private\n",
"7962 Private\n",
"8060 Private\n",
"7916 Private\n",
"1182 Private\n",
"32427 Private\n",
"8222 NaN\n",
"23706 Private\n",
"9449 Private\n",
"8471 Private\n",
"23217 Private\n",
"32399 Private\n",
"22521 Private\n",
"17048 Federal-gov\n",
"23924 Private\n",
"27852 Private\n",
"23605 Private\n",
"1318 State-gov\n",
"25299 Private\n",
"27439 Private\n",
"Name: workclass, Length: 22792, dtype: object"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train[column]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"# lets fill with mean values"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"train_mode = X_train.mode().iloc[0]\n",
"X_train = X_train.fillna(train_mode)\n",
"X_test = X_test.fillna(train_mode)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"29700 Private\n",
"1529 Private\n",
"27477 Private\n",
"31950 Private\n",
"4732 Private\n",
"10858 Private\n",
"24518 Private\n",
"10035 Private\n",
"1324 Private\n",
"26727 Private\n",
"10255 Self-emp-not-inc\n",
"25713 Private\n",
"32541 Private\n",
"22242 Private\n",
"8743 State-gov\n",
"21018 Private\n",
"15462 Private\n",
"24948 Private\n",
"4630 Private\n",
"31931 Private\n",
"18589 Private\n",
"13984 Private\n",
"28047 Private\n",
"21419 Self-emp-not-inc\n",
"29795 Private\n",
"12366 Private\n",
"25362 Private\n",
"15778 Private\n",
"18061 Private\n",
"16192 Private\n",
" ... \n",
"2558 State-gov\n",
"16630 State-gov\n",
"18815 Private\n",
"9161 Private\n",
"27984 Private\n",
"27973 Private\n",
"7644 Private\n",
"14192 State-gov\n",
"26744 Private\n",
"27066 Private\n",
"13686 Private\n",
"7962 Private\n",
"8060 Private\n",
"7916 Private\n",
"1182 Private\n",
"32427 Private\n",
"8222 Private\n",
"23706 Private\n",
"9449 Private\n",
"8471 Private\n",
"23217 Private\n",
"32399 Private\n",
"22521 Private\n",
"17048 Federal-gov\n",
"23924 Private\n",
"27852 Private\n",
"23605 Private\n",
"1318 State-gov\n",
"25299 Private\n",
"27439 Private\n",
"Name: workclass, Length: 22792, dtype: object"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train[column]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "could not convert string to float: 'Private'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-31-9005dc76e109>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRandomForestClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_estimators\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/ensemble/forest.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0;31m# Validate or convert input data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"csc\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDTYPE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 251\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'csc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 526\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 527\u001b[0;31m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 528\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 529\u001b[0m raise ValueError(\"Complex data not supported\\n\"\n",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/numpy/core/numeric.py\u001b[0m in \u001b[0;36masarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 537\u001b[0m \"\"\"\n\u001b[0;32m--> 538\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 539\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 540\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Private'"
]
}
],
"source": [
"rf = RandomForestClassifier(n_estimators = 1000)\n",
"rf = rf.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# oh, yes! still need to convert!"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "y contains previously unseen labels: 'Holand-Netherlands'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode_python\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0mencoded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0mencoded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'Holand-Netherlands'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-33-a05c0e4307e4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mcategorical_convert\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLabelEncoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36mtransform\u001b[0;34m(self, y)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m 106\u001b[0m \"\"\"\n\u001b[1;32m 107\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 108\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_python\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 109\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode_python\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m raise ValueError(\"y contains previously unseen labels: %s\"\n\u001b[0;32m---> 71\u001b[0;31m % str(e))\n\u001b[0m\u001b[1;32m 72\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoded\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: y contains previously unseen labels: 'Holand-Netherlands'"
]
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"for column in ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex','native-country']:\n",
" categorical_convert = LabelEncoder()\n",
" X_train[column] = categorical_convert.fit_transform(X_train[column])\n",
" X_test[column] = categorical_convert.transform(X_test[column])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"# what!?@^%$^%$^ ????????\n",
"# looks like that there are different labels between X_train and X_test ...\n",
"# which can happen in real life as well :) and will break your production model!\n",
"# what to do in this situation ?"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>workclass</th>\n",
" <th>fnlwgt</th>\n",
" <th>education</th>\n",
" <th>education-num</th>\n",
" <th>marital-status</th>\n",
" <th>occupation</th>\n",
" <th>relationship</th>\n",
" <th>race</th>\n",
" <th>sex</th>\n",
" <th>capital-gain</th>\n",
" <th>capital-loss</th>\n",
" <th>hours-per-week</th>\n",
" <th>native-country</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1055</th>\n",
" <td>32</td>\n",
" <td>3</td>\n",
" <td>87643</td>\n",
" <td>9</td>\n",
" <td>13</td>\n",
" <td>2</td>\n",
" <td>11</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26305</th>\n",
" <td>27</td>\n",
" <td>3</td>\n",
" <td>207352</td>\n",
" <td>9</td>\n",
" <td>13</td>\n",
" <td>2</td>\n",
" <td>12</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>India</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9568</th>\n",
" <td>73</td>\n",
" <td>6</td>\n",
" <td>74040</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15071</th>\n",
" <td>39</td>\n",
" <td>3</td>\n",
" <td>174924</td>\n",
" <td>11</td>\n",
" <td>9</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>14344</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>31</td>\n",
" <td>3</td>\n",
" <td>264936</td>\n",
" <td>15</td>\n",
" <td>10</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age workclass fnlwgt education education-num marital-status \\\n",
"1055 32 3 87643 9 13 2 \n",
"26305 27 3 207352 9 13 2 \n",
"9568 73 6 74040 5 4 0 \n",
"15071 39 3 174924 11 9 5 \n",
"2012 31 3 264936 15 10 4 \n",
"\n",
" occupation relationship race sex capital-gain capital-loss \\\n",
"1055 11 0 4 1 0 0 \n",
"26305 12 0 1 1 0 0 \n",
"9568 7 1 1 0 0 0 \n",
"15071 3 1 4 1 14344 0 \n",
"2012 7 1 4 0 0 0 \n",
"\n",
" hours-per-week native-country \n",
"1055 40 United-States \n",
"26305 40 India \n",
"9568 40 United-States \n",
"15071 40 United-States \n",
"2012 40 United-States "
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test.head()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"# we need to assign a new integer for this new label or remove this row from predictions\n",
"# let's hack it ..."
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"\n",
"column = 'native-country'\n",
"all_values = np.unique(list(X_test[column].values))\n",
"diff = np.setdiff1d(all_values, categorical_convert.classes_)\n",
"categorical_convert.classes_ = np.concatenate((categorical_convert.classes_, diff))\n",
"X_test[column] = categorical_convert.transform(X_test[column])"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
" 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
" 34, 35, 36, 37, 38, 39])"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.unique(X_train[column])"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
" 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
" 34, 35, 36, 37, 38, 39, 40])"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.unique(X_test[column])"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"# can we train now?"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"rf = RandomForestClassifier(n_estimators = 1000)\n",
"rf = rf.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"# :), let's do predictions! and compute some metrics!"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.338857591429531"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"log_loss(y_test, rf.predict_proba(X_test)[:,1])"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"# Please remember that for production you need to save:\n",
"# 1. values used for missing values inputation\n",
"# 2. encoders to properly convert categoricals to integers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"# and the AutoML way"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"# let's reread the data, to have raw data"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('https://raw.githubusercontent.com/pplonski/datasets-for-start/master/adult/data.csv', skipinitialspace=True)\n",
"x_cols = [c for c in df.columns if c != 'income']\n",
"X = df[x_cols]\n",
"y = df['income']"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"seed = 1234\n",
"X_train, X_test, y_train, y_test = \\\n",
" sklearn.model_selection.train_test_split(X, y, test_size = 0.3, random_state=seed)\n"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8da2d19d9f4d4096b315f2ca24f55960",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='MLJAR AutoML', max=80, style=ProgressStyle(description_width=…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Learner CatBoost final loss 0.4962725171175057 time 9.11 seconds\n",
"Learner Xgboost final loss 0.4756562801743394 time 5.83 seconds\n",
"Learner RF final loss 0.3065861813563275 time 3.08 seconds\n",
"Learner RF final loss 0.305368676349097 time 2.88 seconds\n",
"Learner RF final loss 0.3094612291478825 time 2.89 seconds\n",
"Learner LightGBM final loss 0.530521322787529 time 31.48 seconds\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Learner NN final loss 0.4039734161034428 time 21.71 seconds\n",
"Learner Ensemble final loss 0.29935576492662624 time 2.54 seconds\n",
"\n"
]
}
],
"source": [
"automl = AutoML(total_time_limit=60)\n",
"automl.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.3114670474446203"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"log_loss(y_test, automl.predict(X_test)['p_>50K'])"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>model_type</th>\n",
" <th>metric_type</th>\n",
" <th>metric_value</th>\n",
" <th>train_time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8495f070-fe32-4c22-82d2-3f03997b90c6</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.496273</td>\n",
" <td>9.114185</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7abe66a3-844f-4add-9a88-674fe6fd9b73</td>\n",
" <td>Xgboost</td>\n",
" <td>logloss</td>\n",
" <td>0.475656</td>\n",
" <td>5.827809</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3756b039-2a98-459b-b4ed-c3967b0fee5c</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.306586</td>\n",
" <td>3.079033</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>066e83f8-42f3-4f90-8956-a1c60caa1e55</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.305369</td>\n",
" <td>2.877668</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>16a97fa8-4fa7-4545-b694-b74705acd759</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.309461</td>\n",
" <td>2.892735</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2eeabf78-4303-44da-bc0e-37a398d48ff3</td>\n",
" <td>LightGBM</td>\n",
" <td>logloss</td>\n",
" <td>0.530521</td>\n",
" <td>31.484354</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0e5848b1-394b-449f-acf2-93eca2b7e123</td>\n",
" <td>NN</td>\n",
" <td>logloss</td>\n",
" <td>0.403973</td>\n",
" <td>21.710288</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>3c86db84-c765-46d4-96aa-35ff4f18153c</td>\n",
" <td>Ensemble</td>\n",
" <td>logloss</td>\n",
" <td>0.299356</td>\n",
" <td>2.535452</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid model_type metric_type metric_value \\\n",
"0 8495f070-fe32-4c22-82d2-3f03997b90c6 CatBoost logloss 0.496273 \n",
"1 7abe66a3-844f-4add-9a88-674fe6fd9b73 Xgboost logloss 0.475656 \n",
"2 3756b039-2a98-459b-b4ed-c3967b0fee5c RF logloss 0.306586 \n",
"3 066e83f8-42f3-4f90-8956-a1c60caa1e55 RF logloss 0.305369 \n",
"4 16a97fa8-4fa7-4545-b694-b74705acd759 RF logloss 0.309461 \n",
"5 2eeabf78-4303-44da-bc0e-37a398d48ff3 LightGBM logloss 0.530521 \n",
"6 0e5848b1-394b-449f-acf2-93eca2b7e123 NN logloss 0.403973 \n",
"7 3c86db84-c765-46d4-96aa-35ff4f18153c Ensemble logloss 0.299356 \n",
"\n",
" train_time \n",
"0 9.114185 \n",
"1 5.827809 \n",
"2 3.079033 \n",
"3 2.877668 \n",
"4 2.892735 \n",
"5 31.484354 \n",
"6 21.710288 \n",
"7 2.535452 "
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"automl.get_leaderboard()"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'best_model': {'library_version': '0.1',\n",
" 'algorithm_name': 'Greedy Ensemble',\n",
" 'algorithm_short_name': 'Ensemble',\n",
" 'uid': '3c86db84-c765-46d4-96aa-35ff4f18153c',\n",
" 'models': [{'model': {'uid': '3756b039-2a98-459b-b4ed-c3967b0fee5c',\n",
" 'algorithm_short_name': 'RF',\n",
" 'framework_file': '3756b039-2a98-459b-b4ed-c3967b0fee5c.framework',\n",
" 'framework_file_path': '/tmp/3756b039-2a98-459b-b4ed-c3967b0fee5c.framework',\n",
" 'preprocessing': [{'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}}],\n",
" 'learners': [{'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': '6314141c-61a4-41a2-8ede-9f5ccfa443fd',\n",
" 'model_file': '6314141c-61a4-41a2-8ede-9f5ccfa443fd.rf.model',\n",
" 'model_file_path': '/tmp/6314141c-61a4-41a2-8ede-9f5ccfa443fd.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 3,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.6,\n",
" 'min_samples_split': 8,\n",
" 'min_samples_leaf': 20}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': '80e35a7c-51ac-445a-937e-2afa4e178d82',\n",
" 'model_file': '80e35a7c-51ac-445a-937e-2afa4e178d82.rf.model',\n",
" 'model_file_path': '/tmp/80e35a7c-51ac-445a-937e-2afa4e178d82.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 3,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.6,\n",
" 'min_samples_split': 8,\n",
" 'min_samples_leaf': 20}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': 'd4e24c6e-d7fc-44d2-92bd-f77ac1e945fe',\n",
" 'model_file': 'd4e24c6e-d7fc-44d2-92bd-f77ac1e945fe.rf.model',\n",
" 'model_file_path': '/tmp/d4e24c6e-d7fc-44d2-92bd-f77ac1e945fe.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 3,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.6,\n",
" 'min_samples_split': 8,\n",
" 'min_samples_leaf': 20}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': 'dc6e330e-eb9a-40f0-a39c-2c72551f1046',\n",
" 'model_file': 'dc6e330e-eb9a-40f0-a39c-2c72551f1046.rf.model',\n",
" 'model_file_path': '/tmp/dc6e330e-eb9a-40f0-a39c-2c72551f1046.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 3,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.6,\n",
" 'min_samples_split': 8,\n",
" 'min_samples_leaf': 20}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': '3867f64a-6a73-482b-bf45-dc33de6ff63c',\n",
" 'model_file': '3867f64a-6a73-482b-bf45-dc33de6ff63c.rf.model',\n",
" 'model_file_path': '/tmp/3867f64a-6a73-482b-bf45-dc33de6ff63c.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 3,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.6,\n",
" 'min_samples_split': 8,\n",
" 'min_samples_leaf': 20}}],\n",
" 'params': {'additional': {'trees_in_step': 10,\n",
" 'train_cant_improve_limit': 5,\n",
" 'max_steps': 500,\n",
" 'max_rows_limit': None,\n",
" 'max_cols_limit': None},\n",
" 'preprocessing': {'columns_preprocessing': {'workclass': ['na_fill_median',\n",
" 'categorical_to_int'],\n",
" 'education': ['categorical_to_int'],\n",
" 'marital-status': ['categorical_to_int'],\n",
" 'occupation': ['na_fill_median', 'categorical_to_int'],\n",
" 'relationship': ['categorical_to_int'],\n",
" 'race': ['categorical_to_int'],\n",
" 'sex': ['categorical_to_int'],\n",
" 'native-country': ['na_fill_median', 'categorical_to_int']},\n",
" 'target_preprocessing': ['na_exclude', 'categorical_to_int']},\n",
" 'validation': {'validation_type': 'kfold',\n",
" 'k_folds': 5,\n",
" 'shuffle': True},\n",
" 'learner': {'model_type': 'RF',\n",
" 'seed': 3,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.6,\n",
" 'min_samples_split': 8,\n",
" 'min_samples_leaf': 20}}},\n",
" 'repeat': 2},\n",
" {'model': {'uid': '066e83f8-42f3-4f90-8956-a1c60caa1e55',\n",
" 'algorithm_short_name': 'RF',\n",
" 'framework_file': '066e83f8-42f3-4f90-8956-a1c60caa1e55.framework',\n",
" 'framework_file_path': '/tmp/066e83f8-42f3-4f90-8956-a1c60caa1e55.framework',\n",
" 'preprocessing': [{'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}}],\n",
" 'learners': [{'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': '7c909c7e-a998-479c-a7d7-d02e584fc70e',\n",
" 'model_file': '7c909c7e-a998-479c-a7d7-d02e584fc70e.rf.model',\n",
" 'model_file_path': '/tmp/7c909c7e-a998-479c-a7d7-d02e584fc70e.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 4,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.3,\n",
" 'min_samples_split': 40,\n",
" 'min_samples_leaf': 5}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': 'd2cc8bba-ca0f-4f84-87d8-feb53e399ab0',\n",
" 'model_file': 'd2cc8bba-ca0f-4f84-87d8-feb53e399ab0.rf.model',\n",
" 'model_file_path': '/tmp/d2cc8bba-ca0f-4f84-87d8-feb53e399ab0.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 4,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.3,\n",
" 'min_samples_split': 40,\n",
" 'min_samples_leaf': 5}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': '6ddd7f19-3bcf-4482-a1b7-7c28211211d7',\n",
" 'model_file': '6ddd7f19-3bcf-4482-a1b7-7c28211211d7.rf.model',\n",
" 'model_file_path': '/tmp/6ddd7f19-3bcf-4482-a1b7-7c28211211d7.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 4,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.3,\n",
" 'min_samples_split': 40,\n",
" 'min_samples_leaf': 5}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': 'b153e6ba-ad42-402c-a249-68eb50622fa5',\n",
" 'model_file': 'b153e6ba-ad42-402c-a249-68eb50622fa5.rf.model',\n",
" 'model_file_path': '/tmp/b153e6ba-ad42-402c-a249-68eb50622fa5.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 4,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.3,\n",
" 'min_samples_split': 40,\n",
" 'min_samples_leaf': 5}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': 'd9939f94-22bf-40b2-8fcc-df6cf7a35dea',\n",
" 'model_file': 'd9939f94-22bf-40b2-8fcc-df6cf7a35dea.rf.model',\n",
" 'model_file_path': '/tmp/d9939f94-22bf-40b2-8fcc-df6cf7a35dea.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 4,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.3,\n",
" 'min_samples_split': 40,\n",
" 'min_samples_leaf': 5}}],\n",
" 'params': {'additional': {'trees_in_step': 10,\n",
" 'train_cant_improve_limit': 5,\n",
" 'max_steps': 500,\n",
" 'max_rows_limit': None,\n",
" 'max_cols_limit': None},\n",
" 'preprocessing': {'columns_preprocessing': {'workclass': ['na_fill_median',\n",
" 'categorical_to_int'],\n",
" 'education': ['categorical_to_int'],\n",
" 'marital-status': ['categorical_to_int'],\n",
" 'occupation': ['na_fill_median', 'categorical_to_int'],\n",
" 'relationship': ['categorical_to_int'],\n",
" 'race': ['categorical_to_int'],\n",
" 'sex': ['categorical_to_int'],\n",
" 'native-country': ['na_fill_median', 'categorical_to_int']},\n",
" 'target_preprocessing': ['na_exclude', 'categorical_to_int']},\n",
" 'validation': {'validation_type': 'kfold',\n",
" 'k_folds': 5,\n",
" 'shuffle': True},\n",
" 'learner': {'model_type': 'RF',\n",
" 'seed': 4,\n",
" 'criterion': 'entropy',\n",
" 'max_features': 0.3,\n",
" 'min_samples_split': 40,\n",
" 'min_samples_leaf': 5}}},\n",
" 'repeat': 3},\n",
" {'model': {'uid': '16a97fa8-4fa7-4545-b694-b74705acd759',\n",
" 'algorithm_short_name': 'RF',\n",
" 'framework_file': '16a97fa8-4fa7-4545-b694-b74705acd759.framework',\n",
" 'framework_file_path': '/tmp/16a97fa8-4fa7-4545-b694-b74705acd759.framework',\n",
" 'preprocessing': [{'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
" {'missing_values': [{'fill_method': 'na_fill_median',\n",
" 'fill_params': {'workclass': 'Private',\n",
" 'occupation': 'Prof-specialty',\n",
" 'native-country': 'United-States'}}],\n",
" 'categorical': [{'convert_method': 'categorical_to_int',\n",
" 'convert_params': {'workclass': {'Federal-gov': 0,\n",
" 'Local-gov': 1,\n",
" 'Never-worked': 2,\n",
" 'Private': 3,\n",
" 'Self-emp-inc': 4,\n",
" 'Self-emp-not-inc': 5,\n",
" 'State-gov': 6,\n",
" 'Without-pay': 7},\n",
" 'education': {'10th': 0,\n",
" '11th': 1,\n",
" '12th': 2,\n",
" '1st-4th': 3,\n",
" '5th-6th': 4,\n",
" '7th-8th': 5,\n",
" '9th': 6,\n",
" 'Assoc-acdm': 7,\n",
" 'Assoc-voc': 8,\n",
" 'Bachelors': 9,\n",
" 'Doctorate': 10,\n",
" 'HS-grad': 11,\n",
" 'Masters': 12,\n",
" 'Preschool': 13,\n",
" 'Prof-school': 14,\n",
" 'Some-college': 15},\n",
" 'marital-status': {'Divorced': 0,\n",
" 'Married-AF-spouse': 1,\n",
" 'Married-civ-spouse': 2,\n",
" 'Married-spouse-absent': 3,\n",
" 'Never-married': 4,\n",
" 'Separated': 5,\n",
" 'Widowed': 6},\n",
" 'occupation': {'Adm-clerical': 0,\n",
" 'Armed-Forces': 1,\n",
" 'Craft-repair': 2,\n",
" 'Exec-managerial': 3,\n",
" 'Farming-fishing': 4,\n",
" 'Handlers-cleaners': 5,\n",
" 'Machine-op-inspct': 6,\n",
" 'Other-service': 7,\n",
" 'Priv-house-serv': 8,\n",
" 'Prof-specialty': 9,\n",
" 'Protective-serv': 10,\n",
" 'Sales': 11,\n",
" 'Tech-support': 12,\n",
" 'Transport-moving': 13},\n",
" 'relationship': {'Husband': 0,\n",
" 'Not-in-family': 1,\n",
" 'Other-relative': 2,\n",
" 'Own-child': 3,\n",
" 'Unmarried': 4,\n",
" 'Wife': 5},\n",
" 'race': {'Amer-Indian-Eskimo': 0,\n",
" 'Asian-Pac-Islander': 1,\n",
" 'Black': 2,\n",
" 'Other': 3,\n",
" 'White': 4},\n",
" 'sex': {'Female': 0, 'Male': 1},\n",
" 'native-country': {'Cambodia': 0,\n",
" 'Canada': 1,\n",
" 'China': 2,\n",
" 'Columbia': 3,\n",
" 'Cuba': 4,\n",
" 'Dominican-Republic': 5,\n",
" 'Ecuador': 6,\n",
" 'El-Salvador': 7,\n",
" 'England': 8,\n",
" 'France': 9,\n",
" 'Germany': 10,\n",
" 'Greece': 11,\n",
" 'Guatemala': 12,\n",
" 'Haiti': 13,\n",
" 'Honduras': 14,\n",
" 'Hong': 15,\n",
" 'Hungary': 16,\n",
" 'India': 17,\n",
" 'Iran': 18,\n",
" 'Ireland': 19,\n",
" 'Italy': 20,\n",
" 'Jamaica': 21,\n",
" 'Japan': 22,\n",
" 'Laos': 23,\n",
" 'Mexico': 24,\n",
" 'Nicaragua': 25,\n",
" 'Outlying-US(Guam-USVI-etc)': 26,\n",
" 'Peru': 27,\n",
" 'Philippines': 28,\n",
" 'Poland': 29,\n",
" 'Portugal': 30,\n",
" 'Puerto-Rico': 31,\n",
" 'Scotland': 32,\n",
" 'South': 33,\n",
" 'Taiwan': 34,\n",
" 'Thailand': 35,\n",
" 'Trinadad&Tobago': 36,\n",
" 'United-States': 37,\n",
" 'Vietnam': 38,\n",
" 'Yugoslavia': 39}}}],\n",
" 'categorical_y': {'<=50K': 0, '>50K': 1}}],\n",
" 'learners': [{'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': '46ecdf0b-a48a-43fa-87c0-47679a360e06',\n",
" 'model_file': '46ecdf0b-a48a-43fa-87c0-47679a360e06.rf.model',\n",
" 'model_file_path': '/tmp/46ecdf0b-a48a-43fa-87c0-47679a360e06.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 5,\n",
" 'criterion': 'gini',\n",
" 'max_features': 0.5,\n",
" 'min_samples_split': 20,\n",
" 'min_samples_leaf': 16}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': 'cd981d70-2064-4d7f-b184-fee61cd23ff0',\n",
" 'model_file': 'cd981d70-2064-4d7f-b184-fee61cd23ff0.rf.model',\n",
" 'model_file_path': '/tmp/cd981d70-2064-4d7f-b184-fee61cd23ff0.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 5,\n",
" 'criterion': 'gini',\n",
" 'max_features': 0.5,\n",
" 'min_samples_split': 20,\n",
" 'min_samples_leaf': 16}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': '3ea42453-4bcf-4a93-8009-9ac09598bc29',\n",
" 'model_file': '3ea42453-4bcf-4a93-8009-9ac09598bc29.rf.model',\n",
" 'model_file_path': '/tmp/3ea42453-4bcf-4a93-8009-9ac09598bc29.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 5,\n",
" 'criterion': 'gini',\n",
" 'max_features': 0.5,\n",
" 'min_samples_split': 20,\n",
" 'min_samples_leaf': 16}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': 'fb9a8c23-d45b-4509-be2a-2f4f39dab67b',\n",
" 'model_file': 'fb9a8c23-d45b-4509-be2a-2f4f39dab67b.rf.model',\n",
" 'model_file_path': '/tmp/fb9a8c23-d45b-4509-be2a-2f4f39dab67b.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 5,\n",
" 'criterion': 'gini',\n",
" 'max_features': 0.5,\n",
" 'min_samples_split': 20,\n",
" 'min_samples_leaf': 16}},\n",
" {'library_version': '0.20.3',\n",
" 'algorithm_name': 'Random Forest',\n",
" 'algorithm_short_name': 'RF',\n",
" 'uid': '493f0185-e331-4310-91c0-9a8cec5179ed',\n",
" 'model_file': '493f0185-e331-4310-91c0-9a8cec5179ed.rf.model',\n",
" 'model_file_path': '/tmp/493f0185-e331-4310-91c0-9a8cec5179ed.rf.model',\n",
" 'params': {'model_type': 'RF',\n",
" 'seed': 5,\n",
" 'criterion': 'gini',\n",
" 'max_features': 0.5,\n",
" 'min_samples_split': 20,\n",
" 'min_samples_leaf': 16}}],\n",
" 'params': {'additional': {'trees_in_step': 10,\n",
" 'train_cant_improve_limit': 5,\n",
" 'max_steps': 500,\n",
" 'max_rows_limit': None,\n",
" 'max_cols_limit': None},\n",
" 'preprocessing': {'columns_preprocessing': {'workclass': ['na_fill_median',\n",
" 'categorical_to_int'],\n",
" 'education': ['categorical_to_int'],\n",
" 'marital-status': ['categorical_to_int'],\n",
" 'occupation': ['na_fill_median', 'categorical_to_int'],\n",
" 'relationship': ['categorical_to_int'],\n",
" 'race': ['categorical_to_int'],\n",
" 'sex': ['categorical_to_int'],\n",
" 'native-country': ['na_fill_median', 'categorical_to_int']},\n",
" 'target_preprocessing': ['na_exclude', 'categorical_to_int']},\n",
" 'validation': {'validation_type': 'kfold',\n",
" 'k_folds': 5,\n",
" 'shuffle': True},\n",
" 'learner': {'model_type': 'RF',\n",
" 'seed': 5,\n",
" 'criterion': 'gini',\n",
" 'max_features': 0.5,\n",
" 'min_samples_split': 20,\n",
" 'min_samples_leaf': 16}}},\n",
" 'repeat': 1}]},\n",
" 'threshold': 0.3792094447282226}"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"automl.to_json()"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a8482f483fec443fa3bf6fff4a68f5ae",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='MLJAR AutoML', max=80, style=ProgressStyle(description_width=…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Learner CatBoost final loss 0.4962725171175057 time 8.7 seconds\n",
"Learner CatBoost final loss 0.298971902901766 time 7.78 seconds\n",
"Learner CatBoost final loss 0.3687963242745068 time 6.72 seconds\n",
"Learner CatBoost final loss 0.5241412727811743 time 6.98 seconds\n",
"Learner CatBoost final loss 0.320242187891504 time 8.47 seconds\n",
"Learner CatBoost final loss 0.3276007892517636 time 6.67 seconds\n",
"Learner CatBoost final loss 0.31286838781837084 time 8.78 seconds\n",
"Learner CatBoost final loss 0.3194302661081719 time 8.27 seconds\n",
"Learner CatBoost final loss 0.308917492043316 time 8.73 seconds\n",
"Learner CatBoost final loss 0.5221297190975681 time 7.52 seconds\n",
"Learner Xgboost final loss 0.311372604774252 time 19.98 seconds\n",
"Learner Xgboost final loss 0.2842308795115343 time 44.59 seconds\n",
"Learner Xgboost final loss 0.5124198237210352 time 6.05 seconds\n",
"Learner Xgboost final loss 0.31983838345814286 time 28.92 seconds\n",
"Learner RF final loss 0.32201007412896604 time 6.64 seconds\n",
"Learner RF final loss 0.30181666534456947 time 6.41 seconds\n",
"Learner RF final loss 0.31178228545519937 time 6.59 seconds\n",
"Learner RF final loss 0.3077256287675534 time 6.34 seconds\n",
"Learner RF final loss 0.3037423508551022 time 6.51 seconds\n",
"Learner RF final loss 0.3022910292393144 time 6.39 seconds\n",
"Learner RF final loss 0.3024787249684428 time 6.36 seconds\n",
"Learner RF final loss 0.3051025531513369 time 6.43 seconds\n",
"Learner RF final loss 0.3009434792047708 time 6.39 seconds\n",
"Learner RF final loss 0.3059040438520827 time 6.35 seconds\n",
"Learner LightGBM final loss 0.52685725579385 time 74.56 seconds\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Learner NN final loss 0.4404172010190013 time 37.23 seconds\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" return self.partial_fit(X, y)\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Learner NN final loss 0.44313672512751584 time 49.54 seconds\n",
"Learner CatBoost final loss 0.2924718431766818 time 7.46 seconds\n",
"Learner CatBoost final loss 0.3008517882859747 time 7.93 seconds\n",
"Learner RF final loss 0.30141657521244086 time 6.42 seconds\n",
"Learner RF final loss 0.30108077775213377 time 6.44 seconds\n",
"Learner RF final loss 0.3029979984348817 time 6.36 seconds\n",
"Learner RF final loss 0.30137187809404953 time 6.37 seconds\n",
"Learner RF final loss 0.3039241119152016 time 6.36 seconds\n",
"Learner CatBoost final loss 0.29292902314568803 time 7.38 seconds\n",
"Learner Ensemble final loss 0.2841549706146483 time 63.89 seconds\n",
"\n"
]
}
],
"source": [
"automl = AutoML(total_time_limit=10*60) # let's go crazy and train for 10 minutes :-D\n",
"automl.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.2975106252933293"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"log_loss(y_test, automl.predict(X_test)['p_>50K'])"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>model_type</th>\n",
" <th>metric_type</th>\n",
" <th>metric_value</th>\n",
" <th>train_time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>82cab950-7ce3-42f4-bd7d-886ad5553643</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.496273</td>\n",
" <td>8.698081</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>fc5ea6f6-a92e-4272-a2fa-3a27f12fcc7c</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.298972</td>\n",
" <td>7.781434</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3adba1ba-fb1b-43ae-afcf-969a2c6e7375</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.368796</td>\n",
" <td>6.723236</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>f8dea4a9-abf1-4218-8361-63d3b795a752</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.524141</td>\n",
" <td>6.980789</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>74bc7a7d-09b4-4ece-9035-686a33612bed</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.320242</td>\n",
" <td>8.472449</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>c1a40c3a-0fe6-466e-9bda-e0b356254f71</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.327601</td>\n",
" <td>6.673680</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>fd61e2ab-7c75-4e9f-a1b7-a4b2dfd08a79</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.312868</td>\n",
" <td>8.777036</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>6b67440e-6001-4acc-bbef-bd0cf1052598</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.319430</td>\n",
" <td>8.268195</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0c8cb41a-cd92-4ac6-ac83-db1ec779b4e2</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.308917</td>\n",
" <td>8.730249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>fe348a66-22c2-4ecd-9bd5-9293ace74ed3</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.522130</td>\n",
" <td>7.523533</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>bee786ab-c29d-4404-b904-c1937330cca5</td>\n",
" <td>Xgboost</td>\n",
" <td>logloss</td>\n",
" <td>0.311373</td>\n",
" <td>19.979386</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>a0ff281d-efc9-4593-ae0a-3973acc7badb</td>\n",
" <td>Xgboost</td>\n",
" <td>logloss</td>\n",
" <td>0.284231</td>\n",
" <td>44.590009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>ed0bdf42-6b57-42e3-aec7-105522f83f68</td>\n",
" <td>Xgboost</td>\n",
" <td>logloss</td>\n",
" <td>0.512420</td>\n",
" <td>6.050704</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>277ca639-5908-4347-87f8-9ecabd1ad4a3</td>\n",
" <td>Xgboost</td>\n",
" <td>logloss</td>\n",
" <td>0.319838</td>\n",
" <td>28.923516</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>30c6b1e4-16b2-4590-87ca-91814a83c091</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.322010</td>\n",
" <td>6.638074</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>cc9ed647-bbca-4ede-a85a-ce9ef001741d</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.301817</td>\n",
" <td>6.411106</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>eadcee3f-7982-48c6-8b82-11ad9cfdefe7</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.311782</td>\n",
" <td>6.589697</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>54aac109-abc7-4ae7-b613-601df85ea44e</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.307726</td>\n",
" <td>6.337984</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>0f8846d9-244a-43b1-881e-bf8503bdb181</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.303742</td>\n",
" <td>6.505825</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>402ea7a2-6737-46cc-aab5-f743e4bdea4f</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.302291</td>\n",
" <td>6.394600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>b7ab3204-44a4-4d25-b8ae-a6b500689a55</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.302479</td>\n",
" <td>6.355931</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>c3a0e608-1ff3-4d35-9eeb-84003d8ae3be</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.305103</td>\n",
" <td>6.428447</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>d7d81758-0ec0-4dac-90fb-57c74f8ab348</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.300943</td>\n",
" <td>6.389448</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>7b661734-ac7d-45ba-bf71-dbe1618f654a</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.305904</td>\n",
" <td>6.351137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>7d7fbf29-a00b-4501-95a1-b63a8bb15457</td>\n",
" <td>LightGBM</td>\n",
" <td>logloss</td>\n",
" <td>0.526857</td>\n",
" <td>74.563801</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>baf6f37a-41fb-4d9b-a13c-30ef9310f217</td>\n",
" <td>NN</td>\n",
" <td>logloss</td>\n",
" <td>0.440417</td>\n",
" <td>37.227938</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>b4cacc4f-4d1a-48e9-ae08-9ec12e2a838f</td>\n",
" <td>NN</td>\n",
" <td>logloss</td>\n",
" <td>0.443137</td>\n",
" <td>49.542672</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>1d9ec79a-38eb-4dfc-945a-80f9e31beb1b</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.292472</td>\n",
" <td>7.463734</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>06799b1e-972d-4eb5-9271-fd8413d489ec</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.300852</td>\n",
" <td>7.930739</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>21ad6589-1c25-470a-8436-e344c00320c4</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.301417</td>\n",
" <td>6.422292</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>e07d516d-33c1-41d0-9ee5-b581616c4edb</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.301081</td>\n",
" <td>6.438730</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>83879b2c-b0fb-43f3-a282-be9419c48ee3</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.302998</td>\n",
" <td>6.355017</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>d8aa6b74-2894-4deb-bae8-b0d1439f71a1</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.301372</td>\n",
" <td>6.374434</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>08e4a0b8-01fd-4b47-8c8f-e593d7ee37ea</td>\n",
" <td>RF</td>\n",
" <td>logloss</td>\n",
" <td>0.303924</td>\n",
" <td>6.363737</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>2491de8b-1bc0-4964-be8d-1287b9090e29</td>\n",
" <td>CatBoost</td>\n",
" <td>logloss</td>\n",
" <td>0.292929</td>\n",
" <td>7.383213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>12f08b01-6f52-4014-bb5e-305780191a96</td>\n",
" <td>Ensemble</td>\n",
" <td>logloss</td>\n",
" <td>0.284155</td>\n",
" <td>63.892297</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid model_type metric_type metric_value \\\n",
"0 82cab950-7ce3-42f4-bd7d-886ad5553643 CatBoost logloss 0.496273 \n",
"1 fc5ea6f6-a92e-4272-a2fa-3a27f12fcc7c CatBoost logloss 0.298972 \n",
"2 3adba1ba-fb1b-43ae-afcf-969a2c6e7375 CatBoost logloss 0.368796 \n",
"3 f8dea4a9-abf1-4218-8361-63d3b795a752 CatBoost logloss 0.524141 \n",
"4 74bc7a7d-09b4-4ece-9035-686a33612bed CatBoost logloss 0.320242 \n",
"5 c1a40c3a-0fe6-466e-9bda-e0b356254f71 CatBoost logloss 0.327601 \n",
"6 fd61e2ab-7c75-4e9f-a1b7-a4b2dfd08a79 CatBoost logloss 0.312868 \n",
"7 6b67440e-6001-4acc-bbef-bd0cf1052598 CatBoost logloss 0.319430 \n",
"8 0c8cb41a-cd92-4ac6-ac83-db1ec779b4e2 CatBoost logloss 0.308917 \n",
"9 fe348a66-22c2-4ecd-9bd5-9293ace74ed3 CatBoost logloss 0.522130 \n",
"10 bee786ab-c29d-4404-b904-c1937330cca5 Xgboost logloss 0.311373 \n",
"11 a0ff281d-efc9-4593-ae0a-3973acc7badb Xgboost logloss 0.284231 \n",
"12 ed0bdf42-6b57-42e3-aec7-105522f83f68 Xgboost logloss 0.512420 \n",
"13 277ca639-5908-4347-87f8-9ecabd1ad4a3 Xgboost logloss 0.319838 \n",
"14 30c6b1e4-16b2-4590-87ca-91814a83c091 RF logloss 0.322010 \n",
"15 cc9ed647-bbca-4ede-a85a-ce9ef001741d RF logloss 0.301817 \n",
"16 eadcee3f-7982-48c6-8b82-11ad9cfdefe7 RF logloss 0.311782 \n",
"17 54aac109-abc7-4ae7-b613-601df85ea44e RF logloss 0.307726 \n",
"18 0f8846d9-244a-43b1-881e-bf8503bdb181 RF logloss 0.303742 \n",
"19 402ea7a2-6737-46cc-aab5-f743e4bdea4f RF logloss 0.302291 \n",
"20 b7ab3204-44a4-4d25-b8ae-a6b500689a55 RF logloss 0.302479 \n",
"21 c3a0e608-1ff3-4d35-9eeb-84003d8ae3be RF logloss 0.305103 \n",
"22 d7d81758-0ec0-4dac-90fb-57c74f8ab348 RF logloss 0.300943 \n",
"23 7b661734-ac7d-45ba-bf71-dbe1618f654a RF logloss 0.305904 \n",
"24 7d7fbf29-a00b-4501-95a1-b63a8bb15457 LightGBM logloss 0.526857 \n",
"25 baf6f37a-41fb-4d9b-a13c-30ef9310f217 NN logloss 0.440417 \n",
"26 b4cacc4f-4d1a-48e9-ae08-9ec12e2a838f NN logloss 0.443137 \n",
"27 1d9ec79a-38eb-4dfc-945a-80f9e31beb1b CatBoost logloss 0.292472 \n",
"28 06799b1e-972d-4eb5-9271-fd8413d489ec CatBoost logloss 0.300852 \n",
"29 21ad6589-1c25-470a-8436-e344c00320c4 RF logloss 0.301417 \n",
"30 e07d516d-33c1-41d0-9ee5-b581616c4edb RF logloss 0.301081 \n",
"31 83879b2c-b0fb-43f3-a282-be9419c48ee3 RF logloss 0.302998 \n",
"32 d8aa6b74-2894-4deb-bae8-b0d1439f71a1 RF logloss 0.301372 \n",
"33 08e4a0b8-01fd-4b47-8c8f-e593d7ee37ea RF logloss 0.303924 \n",
"34 2491de8b-1bc0-4964-be8d-1287b9090e29 CatBoost logloss 0.292929 \n",
"35 12f08b01-6f52-4014-bb5e-305780191a96 Ensemble logloss 0.284155 \n",
"\n",
" train_time \n",
"0 8.698081 \n",
"1 7.781434 \n",
"2 6.723236 \n",
"3 6.980789 \n",
"4 8.472449 \n",
"5 6.673680 \n",
"6 8.777036 \n",
"7 8.268195 \n",
"8 8.730249 \n",
"9 7.523533 \n",
"10 19.979386 \n",
"11 44.590009 \n",
"12 6.050704 \n",
"13 28.923516 \n",
"14 6.638074 \n",
"15 6.411106 \n",
"16 6.589697 \n",
"17 6.337984 \n",
"18 6.505825 \n",
"19 6.394600 \n",
"20 6.355931 \n",
"21 6.428447 \n",
"22 6.389448 \n",
"23 6.351137 \n",
"24 74.563801 \n",
"25 37.227938 \n",
"26 49.542672 \n",
"27 7.463734 \n",
"28 7.930739 \n",
"29 6.422292 \n",
"30 6.438730 \n",
"31 6.355017 \n",
"32 6.374434 \n",
"33 6.363737 \n",
"34 7.383213 \n",
"35 63.892297 "
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"automl.get_leaderboard()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 1. new missing value\n",
"# 2. new categorical value\n",
"# 3. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment