pplonski/example.ipynb

## example.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import openml\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from supervised.automl import AutoML\n",
    "\n",
    "import os\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import sklearn.model_selection\n",
    "from sklearn.metrics import log_loss, f1_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('https://raw.githubusercontent.com/pplonski/datasets-for-start/master/adult/data.csv', skipinitialspace=True)\n",
    "x_cols = [c for c in df.columns if c != 'income']\n",
    "X = df[x_cols]\n",
    "y = df['income']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>education-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39</td>\n",
       "      <td>State-gov</td>\n",
       "      <td>77516</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>2174</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50</td>\n",
       "      <td>Self-emp-not-inc</td>\n",
       "      <td>83311</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>United-States</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38</td>\n",
       "      <td>Private</td>\n",
       "      <td>215646</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53</td>\n",
       "      <td>Private</td>\n",
       "      <td>234721</td>\n",
       "      <td>11th</td>\n",
       "      <td>7</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28</td>\n",
       "      <td>Private</td>\n",
       "      <td>338409</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>13</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>Wife</td>\n",
       "      <td>Black</td>\n",
       "      <td>Female</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>Cuba</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age         workclass  fnlwgt  education  education-num  \\\n",
       "0   39         State-gov   77516  Bachelors             13   \n",
       "1   50  Self-emp-not-inc   83311  Bachelors             13   \n",
       "2   38           Private  215646    HS-grad              9   \n",
       "3   53           Private  234721       11th              7   \n",
       "4   28           Private  338409  Bachelors             13   \n",
       "\n",
       "       marital-status         occupation   relationship   race     sex  \\\n",
       "0       Never-married       Adm-clerical  Not-in-family  White    Male   \n",
       "1  Married-civ-spouse    Exec-managerial        Husband  White    Male   \n",
       "2            Divorced  Handlers-cleaners  Not-in-family  White    Male   \n",
       "3  Married-civ-spouse  Handlers-cleaners        Husband  Black    Male   \n",
       "4  Married-civ-spouse     Prof-specialty           Wife  Black  Female   \n",
       "\n",
       "   capital-gain  capital-loss  hours-per-week native-country  \n",
       "0          2174             0              40  United-States  \n",
       "1             0             0              13  United-States  \n",
       "2             0             0              40  United-States  \n",
       "3             0             0              40  United-States  \n",
       "4             0             0              40           Cuba  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "seed = 1234\n",
    "X_train, X_test, y_train, y_test = \\\n",
    "    sklearn.model_selection.train_test_split(X, y, test_size = 0.3, random_state=seed)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "could not convert string to float: 'Private'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-11-9005dc76e109>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRandomForestClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_estimators\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/ensemble/forest.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m    248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    249\u001b[0m         \u001b[0;31m# Validate or convert input data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m         \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"csc\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDTYPE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    251\u001b[0m         \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'csc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    252\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m    525\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    526\u001b[0m                 \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 527\u001b[0;31m                 \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    528\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    529\u001b[0m                 raise ValueError(\"Complex data not supported\\n\"\n",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/numpy/core/numeric.py\u001b[0m in \u001b[0;36masarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m    536\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    537\u001b[0m     \"\"\"\n\u001b[0;32m--> 538\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    539\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    540\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Private'"
     ]
    }
   ],
   "source": [
    "rf = RandomForestClassifier(n_estimators = 1000)\n",
    "rf = rf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The Random Forest does not work with categorical\n",
    "# Let's convert them, with integer encoding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "'<' not supported between instances of 'str' and 'float'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-22-a05c0e4307e4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcolumn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'workclass'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'education'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'marital-status'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'occupation'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'relationship'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'race'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sex'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'native-country'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mcategorical_convert\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLabelEncoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m     \u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m     \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36mfit_transform\u001b[0;34m(self, y)\u001b[0m\n\u001b[1;32m    234\u001b[0m         \"\"\"\n\u001b[1;32m    235\u001b[0m         \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcolumn_or_1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwarn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 236\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    237\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    238\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m    106\u001b[0m     \"\"\"\n\u001b[1;32m    107\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 108\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_python\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    109\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    110\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode_python\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m     61\u001b[0m     \u001b[0;31m# only used in _encode below, see docstring there for details\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     62\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0muniques\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m         \u001b[0muniques\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     64\u001b[0m         \u001b[0muniques\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     65\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: '<' not supported between instances of 'str' and 'float'"
     ]
    }
   ],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "for column in ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex','native-country']:\n",
    "    categorical_convert = LabelEncoder()\n",
    "    X_train[column] = categorical_convert.fit_transform(X_train[column])\n",
    "    X_test[column] = categorical_convert.transform(X_test[column])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# still something ?\n",
    "# looks like we have missing values in the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "29700             Private\n",
       "1529              Private\n",
       "27477             Private\n",
       "31950             Private\n",
       "4732              Private\n",
       "10858             Private\n",
       "24518             Private\n",
       "10035                 NaN\n",
       "1324              Private\n",
       "26727             Private\n",
       "10255    Self-emp-not-inc\n",
       "25713             Private\n",
       "32541                 NaN\n",
       "22242             Private\n",
       "8743            State-gov\n",
       "21018             Private\n",
       "15462             Private\n",
       "24948             Private\n",
       "4630              Private\n",
       "31931             Private\n",
       "18589             Private\n",
       "13984             Private\n",
       "28047             Private\n",
       "21419    Self-emp-not-inc\n",
       "29795             Private\n",
       "12366             Private\n",
       "25362             Private\n",
       "15778                 NaN\n",
       "18061             Private\n",
       "16192             Private\n",
       "               ...       \n",
       "2558            State-gov\n",
       "16630           State-gov\n",
       "18815             Private\n",
       "9161              Private\n",
       "27984             Private\n",
       "27973             Private\n",
       "7644              Private\n",
       "14192           State-gov\n",
       "26744             Private\n",
       "27066             Private\n",
       "13686             Private\n",
       "7962              Private\n",
       "8060              Private\n",
       "7916              Private\n",
       "1182              Private\n",
       "32427             Private\n",
       "8222                  NaN\n",
       "23706             Private\n",
       "9449              Private\n",
       "8471              Private\n",
       "23217             Private\n",
       "32399             Private\n",
       "22521             Private\n",
       "17048         Federal-gov\n",
       "23924             Private\n",
       "27852             Private\n",
       "23605             Private\n",
       "1318            State-gov\n",
       "25299             Private\n",
       "27439             Private\n",
       "Name: workclass, Length: 22792, dtype: object"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train[column]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "# lets fill with mean values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_mode = X_train.mode().iloc[0]\n",
    "X_train = X_train.fillna(train_mode)\n",
    "X_test = X_test.fillna(train_mode)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "29700             Private\n",
       "1529              Private\n",
       "27477             Private\n",
       "31950             Private\n",
       "4732              Private\n",
       "10858             Private\n",
       "24518             Private\n",
       "10035             Private\n",
       "1324              Private\n",
       "26727             Private\n",
       "10255    Self-emp-not-inc\n",
       "25713             Private\n",
       "32541             Private\n",
       "22242             Private\n",
       "8743            State-gov\n",
       "21018             Private\n",
       "15462             Private\n",
       "24948             Private\n",
       "4630              Private\n",
       "31931             Private\n",
       "18589             Private\n",
       "13984             Private\n",
       "28047             Private\n",
       "21419    Self-emp-not-inc\n",
       "29795             Private\n",
       "12366             Private\n",
       "25362             Private\n",
       "15778             Private\n",
       "18061             Private\n",
       "16192             Private\n",
       "               ...       \n",
       "2558            State-gov\n",
       "16630           State-gov\n",
       "18815             Private\n",
       "9161              Private\n",
       "27984             Private\n",
       "27973             Private\n",
       "7644              Private\n",
       "14192           State-gov\n",
       "26744             Private\n",
       "27066             Private\n",
       "13686             Private\n",
       "7962              Private\n",
       "8060              Private\n",
       "7916              Private\n",
       "1182              Private\n",
       "32427             Private\n",
       "8222              Private\n",
       "23706             Private\n",
       "9449              Private\n",
       "8471              Private\n",
       "23217             Private\n",
       "32399             Private\n",
       "22521             Private\n",
       "17048         Federal-gov\n",
       "23924             Private\n",
       "27852             Private\n",
       "23605             Private\n",
       "1318            State-gov\n",
       "25299             Private\n",
       "27439             Private\n",
       "Name: workclass, Length: 22792, dtype: object"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train[column]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "could not convert string to float: 'Private'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-31-9005dc76e109>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRandomForestClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_estimators\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/ensemble/forest.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m    248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    249\u001b[0m         \u001b[0;31m# Validate or convert input data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m         \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"csc\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDTYPE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    251\u001b[0m         \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'csc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    252\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m    525\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    526\u001b[0m                 \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 527\u001b[0;31m                 \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    528\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    529\u001b[0m                 raise ValueError(\"Complex data not supported\\n\"\n",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/numpy/core/numeric.py\u001b[0m in \u001b[0;36masarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m    536\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    537\u001b[0m     \"\"\"\n\u001b[0;32m--> 538\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    539\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    540\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Private'"
     ]
    }
   ],
   "source": [
    "rf = RandomForestClassifier(n_estimators = 1000)\n",
    "rf = rf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# oh, yes! still need to convert!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "y contains previously unseen labels: 'Holand-Netherlands'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode_python\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m     67\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m             \u001b[0mencoded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     69\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m     67\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m             \u001b[0mencoded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     69\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: 'Holand-Netherlands'",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-33-a05c0e4307e4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mcategorical_convert\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLabelEncoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m     \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36mtransform\u001b[0;34m(self, y)\u001b[0m\n\u001b[1;32m    255\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m         \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    258\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    259\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m    106\u001b[0m     \"\"\"\n\u001b[1;32m    107\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 108\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_python\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    109\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    110\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode_python\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m     69\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     70\u001b[0m             raise ValueError(\"y contains previously unseen labels: %s\"\n\u001b[0;32m---> 71\u001b[0;31m                              % str(e))\n\u001b[0m\u001b[1;32m     72\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoded\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     73\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: y contains previously unseen labels: 'Holand-Netherlands'"
     ]
    }
   ],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "for column in ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex','native-country']:\n",
    "    categorical_convert = LabelEncoder()\n",
    "    X_train[column] = categorical_convert.fit_transform(X_train[column])\n",
    "    X_test[column] = categorical_convert.transform(X_test[column])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "# what!?@^%$^%$^ ????????\n",
    "# looks like that there are different labels between X_train and X_test ...\n",
    "# which can happen in real life as well :) and will break your production model!\n",
    "# what to do in this situation ?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>education-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1055</th>\n",
       "      <td>32</td>\n",
       "      <td>3</td>\n",
       "      <td>87643</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26305</th>\n",
       "      <td>27</td>\n",
       "      <td>3</td>\n",
       "      <td>207352</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>India</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9568</th>\n",
       "      <td>73</td>\n",
       "      <td>6</td>\n",
       "      <td>74040</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15071</th>\n",
       "      <td>39</td>\n",
       "      <td>3</td>\n",
       "      <td>174924</td>\n",
       "      <td>11</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>14344</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2012</th>\n",
       "      <td>31</td>\n",
       "      <td>3</td>\n",
       "      <td>264936</td>\n",
       "      <td>15</td>\n",
       "      <td>10</td>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       age  workclass  fnlwgt  education  education-num  marital-status  \\\n",
       "1055    32          3   87643          9             13               2   \n",
       "26305   27          3  207352          9             13               2   \n",
       "9568    73          6   74040          5              4               0   \n",
       "15071   39          3  174924         11              9               5   \n",
       "2012    31          3  264936         15             10               4   \n",
       "\n",
       "       occupation  relationship  race  sex  capital-gain  capital-loss  \\\n",
       "1055           11             0     4    1             0             0   \n",
       "26305          12             0     1    1             0             0   \n",
       "9568            7             1     1    0             0             0   \n",
       "15071           3             1     4    1         14344             0   \n",
       "2012            7             1     4    0             0             0   \n",
       "\n",
       "       hours-per-week native-country  \n",
       "1055               40  United-States  \n",
       "26305              40          India  \n",
       "9568               40  United-States  \n",
       "15071              40  United-States  \n",
       "2012               40  United-States  "
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "# we need to assign a new integer for this new label or remove this row from predictions\n",
    "# let's hack it ..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "column = 'native-country'\n",
    "all_values = np.unique(list(X_test[column].values))\n",
    "diff = np.setdiff1d(all_values, categorical_convert.classes_)\n",
    "categorical_convert.classes_ = np.concatenate((categorical_convert.classes_, diff))\n",
    "X_test[column] = categorical_convert.transform(X_test[column])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,\n",
       "       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
       "       34, 35, 36, 37, 38, 39])"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.unique(X_train[column])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,\n",
       "       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
       "       34, 35, 36, 37, 38, 39, 40])"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.unique(X_test[column])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "# can we train now?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "rf = RandomForestClassifier(n_estimators = 1000)\n",
    "rf = rf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "# :), let's do predictions! and compute some metrics!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.338857591429531"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "log_loss(y_test, rf.predict_proba(X_test)[:,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Please remember that for production you need to save:\n",
    "# 1. values used for missing values inputation\n",
    "# 2. encoders to properly convert categoricals to integers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "# and the AutoML way"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's reread the data, to have raw data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('https://raw.githubusercontent.com/pplonski/datasets-for-start/master/adult/data.csv', skipinitialspace=True)\n",
    "x_cols = [c for c in df.columns if c != 'income']\n",
    "X = df[x_cols]\n",
    "y = df['income']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "seed = 1234\n",
    "X_train, X_test, y_train, y_test = \\\n",
    "    sklearn.model_selection.train_test_split(X, y, test_size = 0.3, random_state=seed)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8da2d19d9f4d4096b315f2ca24f55960",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='MLJAR AutoML', max=80, style=ProgressStyle(description_width=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Learner CatBoost final loss 0.4962725171175057 time 9.11 seconds\n",
      "Learner Xgboost final loss 0.4756562801743394 time 5.83 seconds\n",
      "Learner RF final loss 0.3065861813563275 time 3.08 seconds\n",
      "Learner RF final loss 0.305368676349097 time 2.88 seconds\n",
      "Learner RF final loss 0.3094612291478825 time 2.89 seconds\n",
      "Learner LightGBM final loss 0.530521322787529 time 31.48 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Learner NN final loss 0.4039734161034428 time 21.71 seconds\n",
      "Learner Ensemble final loss 0.29935576492662624 time 2.54 seconds\n",
      "\n"
     ]
    }
   ],
   "source": [
    "automl = AutoML(total_time_limit=60)\n",
    "automl.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.3114670474446203"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "log_loss(y_test, automl.predict(X_test)['p_>50K'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uid</th>\n",
       "      <th>model_type</th>\n",
       "      <th>metric_type</th>\n",
       "      <th>metric_value</th>\n",
       "      <th>train_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8495f070-fe32-4c22-82d2-3f03997b90c6</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.496273</td>\n",
       "      <td>9.114185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7abe66a3-844f-4add-9a88-674fe6fd9b73</td>\n",
       "      <td>Xgboost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.475656</td>\n",
       "      <td>5.827809</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3756b039-2a98-459b-b4ed-c3967b0fee5c</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.306586</td>\n",
       "      <td>3.079033</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>066e83f8-42f3-4f90-8956-a1c60caa1e55</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.305369</td>\n",
       "      <td>2.877668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>16a97fa8-4fa7-4545-b694-b74705acd759</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.309461</td>\n",
       "      <td>2.892735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2eeabf78-4303-44da-bc0e-37a398d48ff3</td>\n",
       "      <td>LightGBM</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.530521</td>\n",
       "      <td>31.484354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0e5848b1-394b-449f-acf2-93eca2b7e123</td>\n",
       "      <td>NN</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.403973</td>\n",
       "      <td>21.710288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>3c86db84-c765-46d4-96aa-35ff4f18153c</td>\n",
       "      <td>Ensemble</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.299356</td>\n",
       "      <td>2.535452</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                    uid model_type metric_type  metric_value  \\\n",
       "0  8495f070-fe32-4c22-82d2-3f03997b90c6   CatBoost     logloss      0.496273   \n",
       "1  7abe66a3-844f-4add-9a88-674fe6fd9b73    Xgboost     logloss      0.475656   \n",
       "2  3756b039-2a98-459b-b4ed-c3967b0fee5c         RF     logloss      0.306586   \n",
       "3  066e83f8-42f3-4f90-8956-a1c60caa1e55         RF     logloss      0.305369   \n",
       "4  16a97fa8-4fa7-4545-b694-b74705acd759         RF     logloss      0.309461   \n",
       "5  2eeabf78-4303-44da-bc0e-37a398d48ff3   LightGBM     logloss      0.530521   \n",
       "6  0e5848b1-394b-449f-acf2-93eca2b7e123         NN     logloss      0.403973   \n",
       "7  3c86db84-c765-46d4-96aa-35ff4f18153c   Ensemble     logloss      0.299356   \n",
       "\n",
       "   train_time  \n",
       "0    9.114185  \n",
       "1    5.827809  \n",
       "2    3.079033  \n",
       "3    2.877668  \n",
       "4    2.892735  \n",
       "5   31.484354  \n",
       "6   21.710288  \n",
       "7    2.535452  "
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "automl.get_leaderboard()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'best_model': {'library_version': '0.1',\n",
       "  'algorithm_name': 'Greedy Ensemble',\n",
       "  'algorithm_short_name': 'Ensemble',\n",
       "  'uid': '3c86db84-c765-46d4-96aa-35ff4f18153c',\n",
       "  'models': [{'model': {'uid': '3756b039-2a98-459b-b4ed-c3967b0fee5c',\n",
       "     'algorithm_short_name': 'RF',\n",
       "     'framework_file': '3756b039-2a98-459b-b4ed-c3967b0fee5c.framework',\n",
       "     'framework_file_path': '/tmp/3756b039-2a98-459b-b4ed-c3967b0fee5c.framework',\n",
       "     'preprocessing': [{'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}}],\n",
       "     'learners': [{'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': '6314141c-61a4-41a2-8ede-9f5ccfa443fd',\n",
       "       'model_file': '6314141c-61a4-41a2-8ede-9f5ccfa443fd.rf.model',\n",
       "       'model_file_path': '/tmp/6314141c-61a4-41a2-8ede-9f5ccfa443fd.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 3,\n",
       "        'criterion': 'entropy',\n",
       "        'max_features': 0.6,\n",
       "        'min_samples_split': 8,\n",
       "        'min_samples_leaf': 20}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': '80e35a7c-51ac-445a-937e-2afa4e178d82',\n",
       "       'model_file': '80e35a7c-51ac-445a-937e-2afa4e178d82.rf.model',\n",
       "       'model_file_path': '/tmp/80e35a7c-51ac-445a-937e-2afa4e178d82.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 3,\n",
       "        'criterion': 'entropy',\n",
       "        'max_features': 0.6,\n",
       "        'min_samples_split': 8,\n",
       "        'min_samples_leaf': 20}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': 'd4e24c6e-d7fc-44d2-92bd-f77ac1e945fe',\n",
       "       'model_file': 'd4e24c6e-d7fc-44d2-92bd-f77ac1e945fe.rf.model',\n",
       "       'model_file_path': '/tmp/d4e24c6e-d7fc-44d2-92bd-f77ac1e945fe.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 3,\n",
       "        'criterion': 'entropy',\n",
       "        'max_features': 0.6,\n",
       "        'min_samples_split': 8,\n",
       "        'min_samples_leaf': 20}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': 'dc6e330e-eb9a-40f0-a39c-2c72551f1046',\n",
       "       'model_file': 'dc6e330e-eb9a-40f0-a39c-2c72551f1046.rf.model',\n",
       "       'model_file_path': '/tmp/dc6e330e-eb9a-40f0-a39c-2c72551f1046.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 3,\n",
       "        'criterion': 'entropy',\n",
       "        'max_features': 0.6,\n",
       "        'min_samples_split': 8,\n",
       "        'min_samples_leaf': 20}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': '3867f64a-6a73-482b-bf45-dc33de6ff63c',\n",
       "       'model_file': '3867f64a-6a73-482b-bf45-dc33de6ff63c.rf.model',\n",
       "       'model_file_path': '/tmp/3867f64a-6a73-482b-bf45-dc33de6ff63c.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 3,\n",
       "        'criterion': 'entropy',\n",
       "        'max_features': 0.6,\n",
       "        'min_samples_split': 8,\n",
       "        'min_samples_leaf': 20}}],\n",
       "     'params': {'additional': {'trees_in_step': 10,\n",
       "       'train_cant_improve_limit': 5,\n",
       "       'max_steps': 500,\n",
       "       'max_rows_limit': None,\n",
       "       'max_cols_limit': None},\n",
       "      'preprocessing': {'columns_preprocessing': {'workclass': ['na_fill_median',\n",
       "         'categorical_to_int'],\n",
       "        'education': ['categorical_to_int'],\n",
       "        'marital-status': ['categorical_to_int'],\n",
       "        'occupation': ['na_fill_median', 'categorical_to_int'],\n",
       "        'relationship': ['categorical_to_int'],\n",
       "        'race': ['categorical_to_int'],\n",
       "        'sex': ['categorical_to_int'],\n",
       "        'native-country': ['na_fill_median', 'categorical_to_int']},\n",
       "       'target_preprocessing': ['na_exclude', 'categorical_to_int']},\n",
       "      'validation': {'validation_type': 'kfold',\n",
       "       'k_folds': 5,\n",
       "       'shuffle': True},\n",
       "      'learner': {'model_type': 'RF',\n",
       "       'seed': 3,\n",
       "       'criterion': 'entropy',\n",
       "       'max_features': 0.6,\n",
       "       'min_samples_split': 8,\n",
       "       'min_samples_leaf': 20}}},\n",
       "    'repeat': 2},\n",
       "   {'model': {'uid': '066e83f8-42f3-4f90-8956-a1c60caa1e55',\n",
       "     'algorithm_short_name': 'RF',\n",
       "     'framework_file': '066e83f8-42f3-4f90-8956-a1c60caa1e55.framework',\n",
       "     'framework_file_path': '/tmp/066e83f8-42f3-4f90-8956-a1c60caa1e55.framework',\n",
       "     'preprocessing': [{'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}}],\n",
       "     'learners': [{'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': '7c909c7e-a998-479c-a7d7-d02e584fc70e',\n",
       "       'model_file': '7c909c7e-a998-479c-a7d7-d02e584fc70e.rf.model',\n",
       "       'model_file_path': '/tmp/7c909c7e-a998-479c-a7d7-d02e584fc70e.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 4,\n",
       "        'criterion': 'entropy',\n",
       "        'max_features': 0.3,\n",
       "        'min_samples_split': 40,\n",
       "        'min_samples_leaf': 5}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': 'd2cc8bba-ca0f-4f84-87d8-feb53e399ab0',\n",
       "       'model_file': 'd2cc8bba-ca0f-4f84-87d8-feb53e399ab0.rf.model',\n",
       "       'model_file_path': '/tmp/d2cc8bba-ca0f-4f84-87d8-feb53e399ab0.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 4,\n",
       "        'criterion': 'entropy',\n",
       "        'max_features': 0.3,\n",
       "        'min_samples_split': 40,\n",
       "        'min_samples_leaf': 5}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': '6ddd7f19-3bcf-4482-a1b7-7c28211211d7',\n",
       "       'model_file': '6ddd7f19-3bcf-4482-a1b7-7c28211211d7.rf.model',\n",
       "       'model_file_path': '/tmp/6ddd7f19-3bcf-4482-a1b7-7c28211211d7.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 4,\n",
       "        'criterion': 'entropy',\n",
       "        'max_features': 0.3,\n",
       "        'min_samples_split': 40,\n",
       "        'min_samples_leaf': 5}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': 'b153e6ba-ad42-402c-a249-68eb50622fa5',\n",
       "       'model_file': 'b153e6ba-ad42-402c-a249-68eb50622fa5.rf.model',\n",
       "       'model_file_path': '/tmp/b153e6ba-ad42-402c-a249-68eb50622fa5.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 4,\n",
       "        'criterion': 'entropy',\n",
       "        'max_features': 0.3,\n",
       "        'min_samples_split': 40,\n",
       "        'min_samples_leaf': 5}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': 'd9939f94-22bf-40b2-8fcc-df6cf7a35dea',\n",
       "       'model_file': 'd9939f94-22bf-40b2-8fcc-df6cf7a35dea.rf.model',\n",
       "       'model_file_path': '/tmp/d9939f94-22bf-40b2-8fcc-df6cf7a35dea.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 4,\n",
       "        'criterion': 'entropy',\n",
       "        'max_features': 0.3,\n",
       "        'min_samples_split': 40,\n",
       "        'min_samples_leaf': 5}}],\n",
       "     'params': {'additional': {'trees_in_step': 10,\n",
       "       'train_cant_improve_limit': 5,\n",
       "       'max_steps': 500,\n",
       "       'max_rows_limit': None,\n",
       "       'max_cols_limit': None},\n",
       "      'preprocessing': {'columns_preprocessing': {'workclass': ['na_fill_median',\n",
       "         'categorical_to_int'],\n",
       "        'education': ['categorical_to_int'],\n",
       "        'marital-status': ['categorical_to_int'],\n",
       "        'occupation': ['na_fill_median', 'categorical_to_int'],\n",
       "        'relationship': ['categorical_to_int'],\n",
       "        'race': ['categorical_to_int'],\n",
       "        'sex': ['categorical_to_int'],\n",
       "        'native-country': ['na_fill_median', 'categorical_to_int']},\n",
       "       'target_preprocessing': ['na_exclude', 'categorical_to_int']},\n",
       "      'validation': {'validation_type': 'kfold',\n",
       "       'k_folds': 5,\n",
       "       'shuffle': True},\n",
       "      'learner': {'model_type': 'RF',\n",
       "       'seed': 4,\n",
       "       'criterion': 'entropy',\n",
       "       'max_features': 0.3,\n",
       "       'min_samples_split': 40,\n",
       "       'min_samples_leaf': 5}}},\n",
       "    'repeat': 3},\n",
       "   {'model': {'uid': '16a97fa8-4fa7-4545-b694-b74705acd759',\n",
       "     'algorithm_short_name': 'RF',\n",
       "     'framework_file': '16a97fa8-4fa7-4545-b694-b74705acd759.framework',\n",
       "     'framework_file_path': '/tmp/16a97fa8-4fa7-4545-b694-b74705acd759.framework',\n",
       "     'preprocessing': [{'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}},\n",
       "      {'missing_values': [{'fill_method': 'na_fill_median',\n",
       "         'fill_params': {'workclass': 'Private',\n",
       "          'occupation': 'Prof-specialty',\n",
       "          'native-country': 'United-States'}}],\n",
       "       'categorical': [{'convert_method': 'categorical_to_int',\n",
       "         'convert_params': {'workclass': {'Federal-gov': 0,\n",
       "           'Local-gov': 1,\n",
       "           'Never-worked': 2,\n",
       "           'Private': 3,\n",
       "           'Self-emp-inc': 4,\n",
       "           'Self-emp-not-inc': 5,\n",
       "           'State-gov': 6,\n",
       "           'Without-pay': 7},\n",
       "          'education': {'10th': 0,\n",
       "           '11th': 1,\n",
       "           '12th': 2,\n",
       "           '1st-4th': 3,\n",
       "           '5th-6th': 4,\n",
       "           '7th-8th': 5,\n",
       "           '9th': 6,\n",
       "           'Assoc-acdm': 7,\n",
       "           'Assoc-voc': 8,\n",
       "           'Bachelors': 9,\n",
       "           'Doctorate': 10,\n",
       "           'HS-grad': 11,\n",
       "           'Masters': 12,\n",
       "           'Preschool': 13,\n",
       "           'Prof-school': 14,\n",
       "           'Some-college': 15},\n",
       "          'marital-status': {'Divorced': 0,\n",
       "           'Married-AF-spouse': 1,\n",
       "           'Married-civ-spouse': 2,\n",
       "           'Married-spouse-absent': 3,\n",
       "           'Never-married': 4,\n",
       "           'Separated': 5,\n",
       "           'Widowed': 6},\n",
       "          'occupation': {'Adm-clerical': 0,\n",
       "           'Armed-Forces': 1,\n",
       "           'Craft-repair': 2,\n",
       "           'Exec-managerial': 3,\n",
       "           'Farming-fishing': 4,\n",
       "           'Handlers-cleaners': 5,\n",
       "           'Machine-op-inspct': 6,\n",
       "           'Other-service': 7,\n",
       "           'Priv-house-serv': 8,\n",
       "           'Prof-specialty': 9,\n",
       "           'Protective-serv': 10,\n",
       "           'Sales': 11,\n",
       "           'Tech-support': 12,\n",
       "           'Transport-moving': 13},\n",
       "          'relationship': {'Husband': 0,\n",
       "           'Not-in-family': 1,\n",
       "           'Other-relative': 2,\n",
       "           'Own-child': 3,\n",
       "           'Unmarried': 4,\n",
       "           'Wife': 5},\n",
       "          'race': {'Amer-Indian-Eskimo': 0,\n",
       "           'Asian-Pac-Islander': 1,\n",
       "           'Black': 2,\n",
       "           'Other': 3,\n",
       "           'White': 4},\n",
       "          'sex': {'Female': 0, 'Male': 1},\n",
       "          'native-country': {'Cambodia': 0,\n",
       "           'Canada': 1,\n",
       "           'China': 2,\n",
       "           'Columbia': 3,\n",
       "           'Cuba': 4,\n",
       "           'Dominican-Republic': 5,\n",
       "           'Ecuador': 6,\n",
       "           'El-Salvador': 7,\n",
       "           'England': 8,\n",
       "           'France': 9,\n",
       "           'Germany': 10,\n",
       "           'Greece': 11,\n",
       "           'Guatemala': 12,\n",
       "           'Haiti': 13,\n",
       "           'Honduras': 14,\n",
       "           'Hong': 15,\n",
       "           'Hungary': 16,\n",
       "           'India': 17,\n",
       "           'Iran': 18,\n",
       "           'Ireland': 19,\n",
       "           'Italy': 20,\n",
       "           'Jamaica': 21,\n",
       "           'Japan': 22,\n",
       "           'Laos': 23,\n",
       "           'Mexico': 24,\n",
       "           'Nicaragua': 25,\n",
       "           'Outlying-US(Guam-USVI-etc)': 26,\n",
       "           'Peru': 27,\n",
       "           'Philippines': 28,\n",
       "           'Poland': 29,\n",
       "           'Portugal': 30,\n",
       "           'Puerto-Rico': 31,\n",
       "           'Scotland': 32,\n",
       "           'South': 33,\n",
       "           'Taiwan': 34,\n",
       "           'Thailand': 35,\n",
       "           'Trinadad&Tobago': 36,\n",
       "           'United-States': 37,\n",
       "           'Vietnam': 38,\n",
       "           'Yugoslavia': 39}}}],\n",
       "       'categorical_y': {'<=50K': 0, '>50K': 1}}],\n",
       "     'learners': [{'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': '46ecdf0b-a48a-43fa-87c0-47679a360e06',\n",
       "       'model_file': '46ecdf0b-a48a-43fa-87c0-47679a360e06.rf.model',\n",
       "       'model_file_path': '/tmp/46ecdf0b-a48a-43fa-87c0-47679a360e06.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 5,\n",
       "        'criterion': 'gini',\n",
       "        'max_features': 0.5,\n",
       "        'min_samples_split': 20,\n",
       "        'min_samples_leaf': 16}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': 'cd981d70-2064-4d7f-b184-fee61cd23ff0',\n",
       "       'model_file': 'cd981d70-2064-4d7f-b184-fee61cd23ff0.rf.model',\n",
       "       'model_file_path': '/tmp/cd981d70-2064-4d7f-b184-fee61cd23ff0.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 5,\n",
       "        'criterion': 'gini',\n",
       "        'max_features': 0.5,\n",
       "        'min_samples_split': 20,\n",
       "        'min_samples_leaf': 16}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': '3ea42453-4bcf-4a93-8009-9ac09598bc29',\n",
       "       'model_file': '3ea42453-4bcf-4a93-8009-9ac09598bc29.rf.model',\n",
       "       'model_file_path': '/tmp/3ea42453-4bcf-4a93-8009-9ac09598bc29.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 5,\n",
       "        'criterion': 'gini',\n",
       "        'max_features': 0.5,\n",
       "        'min_samples_split': 20,\n",
       "        'min_samples_leaf': 16}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': 'fb9a8c23-d45b-4509-be2a-2f4f39dab67b',\n",
       "       'model_file': 'fb9a8c23-d45b-4509-be2a-2f4f39dab67b.rf.model',\n",
       "       'model_file_path': '/tmp/fb9a8c23-d45b-4509-be2a-2f4f39dab67b.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 5,\n",
       "        'criterion': 'gini',\n",
       "        'max_features': 0.5,\n",
       "        'min_samples_split': 20,\n",
       "        'min_samples_leaf': 16}},\n",
       "      {'library_version': '0.20.3',\n",
       "       'algorithm_name': 'Random Forest',\n",
       "       'algorithm_short_name': 'RF',\n",
       "       'uid': '493f0185-e331-4310-91c0-9a8cec5179ed',\n",
       "       'model_file': '493f0185-e331-4310-91c0-9a8cec5179ed.rf.model',\n",
       "       'model_file_path': '/tmp/493f0185-e331-4310-91c0-9a8cec5179ed.rf.model',\n",
       "       'params': {'model_type': 'RF',\n",
       "        'seed': 5,\n",
       "        'criterion': 'gini',\n",
       "        'max_features': 0.5,\n",
       "        'min_samples_split': 20,\n",
       "        'min_samples_leaf': 16}}],\n",
       "     'params': {'additional': {'trees_in_step': 10,\n",
       "       'train_cant_improve_limit': 5,\n",
       "       'max_steps': 500,\n",
       "       'max_rows_limit': None,\n",
       "       'max_cols_limit': None},\n",
       "      'preprocessing': {'columns_preprocessing': {'workclass': ['na_fill_median',\n",
       "         'categorical_to_int'],\n",
       "        'education': ['categorical_to_int'],\n",
       "        'marital-status': ['categorical_to_int'],\n",
       "        'occupation': ['na_fill_median', 'categorical_to_int'],\n",
       "        'relationship': ['categorical_to_int'],\n",
       "        'race': ['categorical_to_int'],\n",
       "        'sex': ['categorical_to_int'],\n",
       "        'native-country': ['na_fill_median', 'categorical_to_int']},\n",
       "       'target_preprocessing': ['na_exclude', 'categorical_to_int']},\n",
       "      'validation': {'validation_type': 'kfold',\n",
       "       'k_folds': 5,\n",
       "       'shuffle': True},\n",
       "      'learner': {'model_type': 'RF',\n",
       "       'seed': 5,\n",
       "       'criterion': 'gini',\n",
       "       'max_features': 0.5,\n",
       "       'min_samples_split': 20,\n",
       "       'min_samples_leaf': 16}}},\n",
       "    'repeat': 1}]},\n",
       " 'threshold': 0.3792094447282226}"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "automl.to_json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a8482f483fec443fa3bf6fff4a68f5ae",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='MLJAR AutoML', max=80, style=ProgressStyle(description_width=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Learner CatBoost final loss 0.4962725171175057 time 8.7 seconds\n",
      "Learner CatBoost final loss 0.298971902901766 time 7.78 seconds\n",
      "Learner CatBoost final loss 0.3687963242745068 time 6.72 seconds\n",
      "Learner CatBoost final loss 0.5241412727811743 time 6.98 seconds\n",
      "Learner CatBoost final loss 0.320242187891504 time 8.47 seconds\n",
      "Learner CatBoost final loss 0.3276007892517636 time 6.67 seconds\n",
      "Learner CatBoost final loss 0.31286838781837084 time 8.78 seconds\n",
      "Learner CatBoost final loss 0.3194302661081719 time 8.27 seconds\n",
      "Learner CatBoost final loss 0.308917492043316 time 8.73 seconds\n",
      "Learner CatBoost final loss 0.5221297190975681 time 7.52 seconds\n",
      "Learner Xgboost final loss 0.311372604774252 time 19.98 seconds\n",
      "Learner Xgboost final loss 0.2842308795115343 time 44.59 seconds\n",
      "Learner Xgboost final loss 0.5124198237210352 time 6.05 seconds\n",
      "Learner Xgboost final loss 0.31983838345814286 time 28.92 seconds\n",
      "Learner RF final loss 0.32201007412896604 time 6.64 seconds\n",
      "Learner RF final loss 0.30181666534456947 time 6.41 seconds\n",
      "Learner RF final loss 0.31178228545519937 time 6.59 seconds\n",
      "Learner RF final loss 0.3077256287675534 time 6.34 seconds\n",
      "Learner RF final loss 0.3037423508551022 time 6.51 seconds\n",
      "Learner RF final loss 0.3022910292393144 time 6.39 seconds\n",
      "Learner RF final loss 0.3024787249684428 time 6.36 seconds\n",
      "Learner RF final loss 0.3051025531513369 time 6.43 seconds\n",
      "Learner RF final loss 0.3009434792047708 time 6.39 seconds\n",
      "Learner RF final loss 0.3059040438520827 time 6.35 seconds\n",
      "Learner LightGBM final loss 0.52685725579385 time 74.56 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\r",
      "Learner NN final loss 0.4404172010190013 time 37.23 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n",
      "/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Learner NN final loss 0.44313672512751584 time 49.54 seconds\n",
      "Learner CatBoost final loss 0.2924718431766818 time 7.46 seconds\n",
      "Learner CatBoost final loss 0.3008517882859747 time 7.93 seconds\n",
      "Learner RF final loss 0.30141657521244086 time 6.42 seconds\n",
      "Learner RF final loss 0.30108077775213377 time 6.44 seconds\n",
      "Learner RF final loss 0.3029979984348817 time 6.36 seconds\n",
      "Learner RF final loss 0.30137187809404953 time 6.37 seconds\n",
      "Learner RF final loss 0.3039241119152016 time 6.36 seconds\n",
      "Learner CatBoost final loss 0.29292902314568803 time 7.38 seconds\n",
      "Learner Ensemble final loss 0.2841549706146483 time 63.89 seconds\n",
      "\n"
     ]
    }
   ],
   "source": [
    "automl = AutoML(total_time_limit=10*60) # let's go crazy and train for 10 minutes :-D\n",
    "automl.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.2975106252933293"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "log_loss(y_test, automl.predict(X_test)['p_>50K'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uid</th>\n",
       "      <th>model_type</th>\n",
       "      <th>metric_type</th>\n",
       "      <th>metric_value</th>\n",
       "      <th>train_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>82cab950-7ce3-42f4-bd7d-886ad5553643</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.496273</td>\n",
       "      <td>8.698081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>fc5ea6f6-a92e-4272-a2fa-3a27f12fcc7c</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.298972</td>\n",
       "      <td>7.781434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3adba1ba-fb1b-43ae-afcf-969a2c6e7375</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.368796</td>\n",
       "      <td>6.723236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>f8dea4a9-abf1-4218-8361-63d3b795a752</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.524141</td>\n",
       "      <td>6.980789</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>74bc7a7d-09b4-4ece-9035-686a33612bed</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.320242</td>\n",
       "      <td>8.472449</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>c1a40c3a-0fe6-466e-9bda-e0b356254f71</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.327601</td>\n",
       "      <td>6.673680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>fd61e2ab-7c75-4e9f-a1b7-a4b2dfd08a79</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.312868</td>\n",
       "      <td>8.777036</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>6b67440e-6001-4acc-bbef-bd0cf1052598</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.319430</td>\n",
       "      <td>8.268195</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0c8cb41a-cd92-4ac6-ac83-db1ec779b4e2</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.308917</td>\n",
       "      <td>8.730249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>fe348a66-22c2-4ecd-9bd5-9293ace74ed3</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.522130</td>\n",
       "      <td>7.523533</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>bee786ab-c29d-4404-b904-c1937330cca5</td>\n",
       "      <td>Xgboost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.311373</td>\n",
       "      <td>19.979386</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>a0ff281d-efc9-4593-ae0a-3973acc7badb</td>\n",
       "      <td>Xgboost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.284231</td>\n",
       "      <td>44.590009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>ed0bdf42-6b57-42e3-aec7-105522f83f68</td>\n",
       "      <td>Xgboost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.512420</td>\n",
       "      <td>6.050704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>277ca639-5908-4347-87f8-9ecabd1ad4a3</td>\n",
       "      <td>Xgboost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.319838</td>\n",
       "      <td>28.923516</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>30c6b1e4-16b2-4590-87ca-91814a83c091</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.322010</td>\n",
       "      <td>6.638074</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>cc9ed647-bbca-4ede-a85a-ce9ef001741d</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.301817</td>\n",
       "      <td>6.411106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>eadcee3f-7982-48c6-8b82-11ad9cfdefe7</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.311782</td>\n",
       "      <td>6.589697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>54aac109-abc7-4ae7-b613-601df85ea44e</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.307726</td>\n",
       "      <td>6.337984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>0f8846d9-244a-43b1-881e-bf8503bdb181</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.303742</td>\n",
       "      <td>6.505825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>402ea7a2-6737-46cc-aab5-f743e4bdea4f</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.302291</td>\n",
       "      <td>6.394600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>b7ab3204-44a4-4d25-b8ae-a6b500689a55</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.302479</td>\n",
       "      <td>6.355931</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>c3a0e608-1ff3-4d35-9eeb-84003d8ae3be</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.305103</td>\n",
       "      <td>6.428447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>d7d81758-0ec0-4dac-90fb-57c74f8ab348</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.300943</td>\n",
       "      <td>6.389448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>7b661734-ac7d-45ba-bf71-dbe1618f654a</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.305904</td>\n",
       "      <td>6.351137</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>7d7fbf29-a00b-4501-95a1-b63a8bb15457</td>\n",
       "      <td>LightGBM</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.526857</td>\n",
       "      <td>74.563801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>baf6f37a-41fb-4d9b-a13c-30ef9310f217</td>\n",
       "      <td>NN</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.440417</td>\n",
       "      <td>37.227938</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>b4cacc4f-4d1a-48e9-ae08-9ec12e2a838f</td>\n",
       "      <td>NN</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.443137</td>\n",
       "      <td>49.542672</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>1d9ec79a-38eb-4dfc-945a-80f9e31beb1b</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.292472</td>\n",
       "      <td>7.463734</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>06799b1e-972d-4eb5-9271-fd8413d489ec</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.300852</td>\n",
       "      <td>7.930739</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>21ad6589-1c25-470a-8436-e344c00320c4</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.301417</td>\n",
       "      <td>6.422292</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>e07d516d-33c1-41d0-9ee5-b581616c4edb</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.301081</td>\n",
       "      <td>6.438730</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>83879b2c-b0fb-43f3-a282-be9419c48ee3</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.302998</td>\n",
       "      <td>6.355017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>d8aa6b74-2894-4deb-bae8-b0d1439f71a1</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.301372</td>\n",
       "      <td>6.374434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>08e4a0b8-01fd-4b47-8c8f-e593d7ee37ea</td>\n",
       "      <td>RF</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.303924</td>\n",
       "      <td>6.363737</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>2491de8b-1bc0-4964-be8d-1287b9090e29</td>\n",
       "      <td>CatBoost</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.292929</td>\n",
       "      <td>7.383213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>12f08b01-6f52-4014-bb5e-305780191a96</td>\n",
       "      <td>Ensemble</td>\n",
       "      <td>logloss</td>\n",
       "      <td>0.284155</td>\n",
       "      <td>63.892297</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                     uid model_type metric_type  metric_value  \\\n",
       "0   82cab950-7ce3-42f4-bd7d-886ad5553643   CatBoost     logloss      0.496273   \n",
       "1   fc5ea6f6-a92e-4272-a2fa-3a27f12fcc7c   CatBoost     logloss      0.298972   \n",
       "2   3adba1ba-fb1b-43ae-afcf-969a2c6e7375   CatBoost     logloss      0.368796   \n",
       "3   f8dea4a9-abf1-4218-8361-63d3b795a752   CatBoost     logloss      0.524141   \n",
       "4   74bc7a7d-09b4-4ece-9035-686a33612bed   CatBoost     logloss      0.320242   \n",
       "5   c1a40c3a-0fe6-466e-9bda-e0b356254f71   CatBoost     logloss      0.327601   \n",
       "6   fd61e2ab-7c75-4e9f-a1b7-a4b2dfd08a79   CatBoost     logloss      0.312868   \n",
       "7   6b67440e-6001-4acc-bbef-bd0cf1052598   CatBoost     logloss      0.319430   \n",
       "8   0c8cb41a-cd92-4ac6-ac83-db1ec779b4e2   CatBoost     logloss      0.308917   \n",
       "9   fe348a66-22c2-4ecd-9bd5-9293ace74ed3   CatBoost     logloss      0.522130   \n",
       "10  bee786ab-c29d-4404-b904-c1937330cca5    Xgboost     logloss      0.311373   \n",
       "11  a0ff281d-efc9-4593-ae0a-3973acc7badb    Xgboost     logloss      0.284231   \n",
       "12  ed0bdf42-6b57-42e3-aec7-105522f83f68    Xgboost     logloss      0.512420   \n",
       "13  277ca639-5908-4347-87f8-9ecabd1ad4a3    Xgboost     logloss      0.319838   \n",
       "14  30c6b1e4-16b2-4590-87ca-91814a83c091         RF     logloss      0.322010   \n",
       "15  cc9ed647-bbca-4ede-a85a-ce9ef001741d         RF     logloss      0.301817   \n",
       "16  eadcee3f-7982-48c6-8b82-11ad9cfdefe7         RF     logloss      0.311782   \n",
       "17  54aac109-abc7-4ae7-b613-601df85ea44e         RF     logloss      0.307726   \n",
       "18  0f8846d9-244a-43b1-881e-bf8503bdb181         RF     logloss      0.303742   \n",
       "19  402ea7a2-6737-46cc-aab5-f743e4bdea4f         RF     logloss      0.302291   \n",
       "20  b7ab3204-44a4-4d25-b8ae-a6b500689a55         RF     logloss      0.302479   \n",
       "21  c3a0e608-1ff3-4d35-9eeb-84003d8ae3be         RF     logloss      0.305103   \n",
       "22  d7d81758-0ec0-4dac-90fb-57c74f8ab348         RF     logloss      0.300943   \n",
       "23  7b661734-ac7d-45ba-bf71-dbe1618f654a         RF     logloss      0.305904   \n",
       "24  7d7fbf29-a00b-4501-95a1-b63a8bb15457   LightGBM     logloss      0.526857   \n",
       "25  baf6f37a-41fb-4d9b-a13c-30ef9310f217         NN     logloss      0.440417   \n",
       "26  b4cacc4f-4d1a-48e9-ae08-9ec12e2a838f         NN     logloss      0.443137   \n",
       "27  1d9ec79a-38eb-4dfc-945a-80f9e31beb1b   CatBoost     logloss      0.292472   \n",
       "28  06799b1e-972d-4eb5-9271-fd8413d489ec   CatBoost     logloss      0.300852   \n",
       "29  21ad6589-1c25-470a-8436-e344c00320c4         RF     logloss      0.301417   \n",
       "30  e07d516d-33c1-41d0-9ee5-b581616c4edb         RF     logloss      0.301081   \n",
       "31  83879b2c-b0fb-43f3-a282-be9419c48ee3         RF     logloss      0.302998   \n",
       "32  d8aa6b74-2894-4deb-bae8-b0d1439f71a1         RF     logloss      0.301372   \n",
       "33  08e4a0b8-01fd-4b47-8c8f-e593d7ee37ea         RF     logloss      0.303924   \n",
       "34  2491de8b-1bc0-4964-be8d-1287b9090e29   CatBoost     logloss      0.292929   \n",
       "35  12f08b01-6f52-4014-bb5e-305780191a96   Ensemble     logloss      0.284155   \n",
       "\n",
       "    train_time  \n",
       "0     8.698081  \n",
       "1     7.781434  \n",
       "2     6.723236  \n",
       "3     6.980789  \n",
       "4     8.472449  \n",
       "5     6.673680  \n",
       "6     8.777036  \n",
       "7     8.268195  \n",
       "8     8.730249  \n",
       "9     7.523533  \n",
       "10   19.979386  \n",
       "11   44.590009  \n",
       "12    6.050704  \n",
       "13   28.923516  \n",
       "14    6.638074  \n",
       "15    6.411106  \n",
       "16    6.589697  \n",
       "17    6.337984  \n",
       "18    6.505825  \n",
       "19    6.394600  \n",
       "20    6.355931  \n",
       "21    6.428447  \n",
       "22    6.389448  \n",
       "23    6.351137  \n",
       "24   74.563801  \n",
       "25   37.227938  \n",
       "26   49.542672  \n",
       "27    7.463734  \n",
       "28    7.930739  \n",
       "29    6.422292  \n",
       "30    6.438730  \n",
       "31    6.355017  \n",
       "32    6.374434  \n",
       "33    6.363737  \n",
       "34    7.383213  \n",
       "35   63.892297  "
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "automl.get_leaderboard()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. new missing value\n",
    "# 2. new categorical value\n",
    "# 3. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "venv"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}