mmerce/bigmler.ipynb

## bigmler.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    " # The Diabetes dataset: BigMLer example  ![Python inside](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTSqpDdjXT-bX4POiYNUIj1gSfj9IjKnMklEWetncCQqsT2uWl6) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Note**: Check the **quick start** section of [BigMLer's documentation](http://bigmler.readthedocs.org/en/latest/#quick-start) to know how to **install** and set your **credentials** before using BigMLer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating a prediction\n",
    "The **prediction workflow** is just one line in BigMLer:\n",
    "![prediction workflow](https://cloud.githubusercontent.com/assets/722203/11405481/5f8f4d00-93a8-11e5-93df-723b1eeb5abb.png)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2015-11-24 19:44:08] Retrieving project info.\n",
      "[2015-11-24 19:44:09] Creating source.\n",
      "[2015-11-24 19:44:11] Source created: https://bigml.com/dashboard/source/5654aff93faa625ace0006b4\n",
      "[2015-11-24 19:44:11] Creating dataset.\n",
      "[2015-11-24 19:44:15] Dataset created: https://bigml.com/dashboard/dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 19:44:15] Creating model.\n",
      "[2015-11-24 19:44:18] Model created: https://bigml.com/dashboard/model/5654afff8ea1a454e70008a7\n",
      "[2015-11-24 19:44:18] Retrieving model. https://bigml.com/dashboard/model/5654afff8ea1a454e70008a7\n",
      "[2015-11-24 19:44:18] Creating local predictions.\n",
      "\n",
      "Generated files:\n",
      "\n",
      " diabetes\n",
      "  ├─bigmler_sessions\n",
      "  ├─predictions.csv\n",
      "  ├─models\n",
      "  ├─dataset\n",
      "  └─source\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!bigmler --train diabetes.csv --name \"Diabetes dataset\" \\\n",
    "         --test diabetes_test.csv \\\n",
    "         --tag \"PyConES\" --tag \"diabetes\" \\\n",
    "         --project \"BigMLer in PyConES\" \\\n",
    "         --prediction-info full \\\n",
    "         --prediction-header \\\n",
    "         --output-dir diabetes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnancies</th>\n",
       "      <th>plasma glucose</th>\n",
       "      <th>blood pressure</th>\n",
       "      <th>triceps skin thickness</th>\n",
       "      <th>insulin</th>\n",
       "      <th>bmi</th>\n",
       "      <th>diabetes pedigree</th>\n",
       "      <th>age</th>\n",
       "      <th>diabetes</th>\n",
       "      <th>confidence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>197</td>\n",
       "      <td>70</td>\n",
       "      <td>45</td>\n",
       "      <td>543</td>\n",
       "      <td>30.5</td>\n",
       "      <td>158</td>\n",
       "      <td>53</td>\n",
       "      <td>True</td>\n",
       "      <td>0.83182</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8</td>\n",
       "      <td>125</td>\n",
       "      <td>96</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>232</td>\n",
       "      <td>54</td>\n",
       "      <td>False</td>\n",
       "      <td>0.43849</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>110</td>\n",
       "      <td>92</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>37.6</td>\n",
       "      <td>191</td>\n",
       "      <td>30</td>\n",
       "      <td>True</td>\n",
       "      <td>0.34237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>126</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>235</td>\n",
       "      <td>39.3</td>\n",
       "      <td>704</td>\n",
       "      <td>27</td>\n",
       "      <td>False</td>\n",
       "      <td>0.85688</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8</td>\n",
       "      <td>99</td>\n",
       "      <td>84</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>35.4</td>\n",
       "      <td>388</td>\n",
       "      <td>50</td>\n",
       "      <td>True</td>\n",
       "      <td>0.56551</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>7</td>\n",
       "      <td>196</td>\n",
       "      <td>90</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>39.8</td>\n",
       "      <td>451</td>\n",
       "      <td>41</td>\n",
       "      <td>True</td>\n",
       "      <td>0.83182</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>9</td>\n",
       "      <td>119</td>\n",
       "      <td>80</td>\n",
       "      <td>35</td>\n",
       "      <td>0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>263</td>\n",
       "      <td>29</td>\n",
       "      <td>True</td>\n",
       "      <td>0.34237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>11</td>\n",
       "      <td>143</td>\n",
       "      <td>94</td>\n",
       "      <td>33</td>\n",
       "      <td>146</td>\n",
       "      <td>36.6</td>\n",
       "      <td>254</td>\n",
       "      <td>51</td>\n",
       "      <td>True</td>\n",
       "      <td>0.64566</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>10</td>\n",
       "      <td>125</td>\n",
       "      <td>70</td>\n",
       "      <td>26</td>\n",
       "      <td>115</td>\n",
       "      <td>31.1</td>\n",
       "      <td>205</td>\n",
       "      <td>41</td>\n",
       "      <td>True</td>\n",
       "      <td>0.75750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>7</td>\n",
       "      <td>147</td>\n",
       "      <td>76</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>39.4</td>\n",
       "      <td>257</td>\n",
       "      <td>43</td>\n",
       "      <td>True</td>\n",
       "      <td>0.64566</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnancies  plasma glucose  blood pressure  triceps skin thickness  \\\n",
       "0            2             197              70                      45   \n",
       "1            8             125              96                     NaN   \n",
       "2            4             110              92                     NaN   \n",
       "3            3             126             NaN                     NaN   \n",
       "4            8              99              84                     NaN   \n",
       "5            7             196              90                     NaN   \n",
       "6            9             119              80                      35   \n",
       "7           11             143              94                      33   \n",
       "8           10             125              70                      26   \n",
       "9            7             147              76                       0   \n",
       "\n",
       "   insulin   bmi  diabetes pedigree  age diabetes  confidence  \n",
       "0      543  30.5                158   53     True     0.83182  \n",
       "1        0   0.0                232   54    False     0.43849  \n",
       "2        0  37.6                191   30     True     0.34237  \n",
       "3      235  39.3                704   27    False     0.85688  \n",
       "4        0  35.4                388   50     True     0.56551  \n",
       "5        0  39.8                451   41     True     0.83182  \n",
       "6        0  29.0                263   29     True     0.34237  \n",
       "7      146  36.6                254   51     True     0.64566  \n",
       "8      115  31.1                205   41     True     0.75750  \n",
       "9        0  39.4                257   43     True     0.64566  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from IPython.display import display, IFrame, JSON\n",
    "PREDICTIONS_FILE = 'diabetes/predictions.csv'\n",
    "display(pd.read_csv(PREDICTIONS_FILE, nrows=10))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The files contained in the output folder store the IDs of the resources created in the BigMLer command."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dataset/5654affc3faa625ace0006b9\r\n"
     ]
    }
   ],
   "source": [
    "!cat ./diabetes/dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating an evaluation\n",
    "BigMLer can use existing resources too. For instance, this is the **evaluation workflow**, where the original dataset is split in training/test datasets to evaluate your models.\n",
    "![evaluation workflow](https://cloud.githubusercontent.com/assets/722203/11405488/6b1f334c-93a8-11e5-9771-058cfc9d4305.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2015-11-24 19:44:28] Retrieving dataset. https://bigml.com/dashboard/dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 19:44:29] Creating dataset.\n",
      "[2015-11-24 19:44:32] Dataset created: https://bigml.com/dashboard/dataset/5654b00d8ea1a45af90003a7\n",
      "[2015-11-24 19:44:32] Creating dataset.\n",
      "[2015-11-24 19:44:35] Dataset created: https://bigml.com/dashboard/dataset/5654b0108ea1a454e70008af\n",
      "[2015-11-24 19:44:35] Creating model.\n",
      "[2015-11-24 19:44:40] Model created: https://bigml.com/dashboard/model/5654b0133faa62548800033a\n",
      "[2015-11-24 19:44:40] Creating evaluations.\n",
      "[2015-11-24 19:44:44] Evaluation created: https://bigml.com/dashboard/evaluation/5654b01a8ea1a45af90003b2\n",
      "[2015-11-24 19:44:44] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654b01a8ea1a45af90003b2\n",
      "\n",
      "Generated files:\n",
      "\n",
      " diabetes_eval\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation.txt\n",
      "  ├─dataset_train\n",
      "  ├─dataset_test\n",
      "  ├─evaluation.json\n",
      "  ├─models\n",
      "  └─evaluations\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!bigmler --datasets diabetes/dataset \\\n",
    "         --name \"Diabetes split\" \\\n",
    "         --test-split 0.2 \\\n",
    "         --seed \"PyConES 2015\" \\\n",
    "         --tag \"PyConES\" --tag \"diabetes\" \\\n",
    "         --output-dir diabetes_eval \\\n",
    "         --evaluate"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The evaluation results are stored in human-readable format in the **evaluation.txt** file and in JSON in **evaluation.json**. The evaluation file shows the metrics for the *model* predictions, but also for the *mode* prediction or a *random* prediction to help comparing them."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{   'class_names': ['false', 'true'],\r\n",
      "    'mode': {   'accuracy': 0.6,\r\n",
      "                'average_f_measure': 0.375,\r\n",
      "                'average_phi': 0,\r\n",
      "                'average_precision': 0.3,\r\n",
      "                'average_recall': 0.5,\r\n",
      "                'confusion_matrix': [[24, 0], [16, 0]],\r\n",
      "                'per_class_statistics': [   {   'accuracy': 0.6,\r\n",
      "                                                'class_name': 'false',\r\n",
      "                                                'f_measure': 0.7499999999999999,\r\n",
      "                                                'phi_coefficient': 0,\r\n",
      "                                                'precision': 0.6,\r\n",
      "                                                'present_in_test_data': True,\r\n",
      "                                                'recall': 1.0},\r\n",
      "                                            {   'accuracy': 0.6,\r\n",
      "                                                'class_name': 'true',\r\n",
      "                                                'f_measure': 0,\r\n",
      "                                                'phi_coefficient': 0,\r\n",
      "                                                'precision': 0,\r\n",
      "                                                'present_in_test_data': True,\r\n",
      "                                                'recall': 0.0}]},\r\n",
      "    'model': {   'accuracy': 0.625,\r\n",
      "                 'average_f_measure': 0.58071,\r\n",
      "                 'average_phi': 0.18286,\r\n",
      "                 'average_precision': 0.60031,\r\n",
      "                 'average_recall': 0.58333,\r\n",
      "                 'confusion_matrix': [[19, 5], [10, 6]],\r\n",
      "                 'per_class_statistics': [   {   'accuracy': 0.625,\r\n",
      "                                                 'class_name': 'false',\r\n",
      "                                                 'f_measure': 0.7169811320754716,\r\n",
      "                                                 'phi_coefficient': 0.18286012835299778,\r\n",
      "                                                 'precision': 0.6551724137931034,\r\n",
      "                                                 'present_in_test_data': True,\r\n",
      "                                                 'recall': 0.7916666666666666},\r\n",
      "                                             {   'accuracy': 0.625,\r\n",
      "                                                 'class_name': 'true',\r\n",
      "                                                 'f_measure': 0.4444444444444444,\r\n",
      "                                                 'phi_coefficient': 0.18286012835299778,\r\n",
      "                                                 'precision': 0.5454545454545454,\r\n",
      "                                                 'present_in_test_data': True,\r\n",
      "                                                 'recall': 0.375}]},\r\n",
      "    'random': {   'accuracy': 0.6,\r\n",
      "                  'average_f_measure': 0.59596,\r\n",
      "                  'average_phi': 0.20412,\r\n",
      "                  'average_precision': 0.6,\r\n",
      "                  'average_recall': 0.60417,\r\n",
      "                  'confusion_matrix': [[14, 10], [6, 10]],\r\n",
      "                  'per_class_statistics': [   {   'accuracy': 0.6,\r\n",
      "                                                  'class_name': 'false',\r\n",
      "                                                  'f_measure': 0.6363636363636365,\r\n",
      "                                                  'phi_coefficient': 0.2041241452319315,\r\n",
      "                                                  'precision': 0.7,\r\n",
      "                                                  'present_in_test_data': True,\r\n",
      "                                                  'recall': 0.5833333333333334},\r\n",
      "                                              {   'accuracy': 0.6,\r\n",
      "                                                  'class_name': 'true',\r\n",
      "                                                  'f_measure': 0.5555555555555556,\r\n",
      "                                                  'phi_coefficient': 0.2041241452319315,\r\n",
      "                                                  'precision': 0.5,\r\n",
      "                                                  'present_in_test_data': True,\r\n",
      "                                                  'recall': 0.625}]}}\r\n"
     ]
    }
   ],
   "source": [
    "!cat ./diabetes_eval/evaluation.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "#### Creating a k-fold cross-validation\n",
    "The higher level of abstraction allows the user to build more complex calls. For instance, asking for **k-fold cross-validation.**\n",
    "![k-fold cross-validation workflow](https://cloud.githubusercontent.com/assets/722203/11405551/bd315f2a-93a8-11e5-93e7-b9186b2f697e.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Creating the kfold datasets............\n",
      "[2015-11-24 19:44:52] Retrieving dataset. https://bigml.com/dashboard/dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 19:44:53] Creating dataset.\n",
      "[2015-11-24 19:44:56] Dataset created: https://bigml.com/dashboard/dataset/5654b0253faa6254d30005e3\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654affc3faa625ace0006b9\n",
      "  ├─dataset_5654b0253faa6254d30005e3\n",
      "  └─dataset_gen\n",
      "\n",
      "[2015-11-24 19:44:56] Retrieving dataset. https://bigml.com/dashboard/dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 19:44:56] Creating dataset.\n",
      "[2015-11-24 19:44:59] Dataset created: https://bigml.com/dashboard/dataset/5654b0293faa6254d30005e7\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654affc3faa625ace0006b9\n",
      "  ├─dataset_5654b0253faa6254d30005e3\n",
      "  ├─dataset_5654b0293faa6254d30005e7\n",
      "  └─dataset_gen\n",
      "\n",
      "[2015-11-24 19:44:59] Retrieving dataset. https://bigml.com/dashboard/dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 19:44:59] Creating dataset.\n",
      "[2015-11-24 19:45:02] Dataset created: https://bigml.com/dashboard/dataset/5654b02c3faa625ace0006c7\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654affc3faa625ace0006b9\n",
      "  ├─dataset_5654b02c3faa625ace0006c7\n",
      "  ├─dataset_5654b0253faa6254d30005e3\n",
      "  ├─dataset_5654b0293faa6254d30005e7\n",
      "  └─dataset_gen\n",
      "\n",
      "[2015-11-24 19:45:02] Retrieving dataset. https://bigml.com/dashboard/dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 19:45:03] Creating dataset.\n",
      "[2015-11-24 19:45:06] Dataset created: https://bigml.com/dashboard/dataset/5654b02f3faa6252120002f2\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654affc3faa625ace0006b9\n",
      "  ├─dataset_5654b02f3faa6252120002f2\n",
      "  ├─dataset_5654b02c3faa625ace0006c7\n",
      "  ├─dataset_5654b0253faa6254d30005e3\n",
      "  ├─dataset_5654b0293faa6254d30005e7\n",
      "  └─dataset_gen\n",
      "\n",
      "[2015-11-24 19:45:06] Retrieving dataset. https://bigml.com/dashboard/dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 19:45:07] Creating dataset.\n",
      "[2015-11-24 19:45:10] Dataset created: https://bigml.com/dashboard/dataset/5654b0343faa62548800033f\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654affc3faa625ace0006b9\n",
      "  ├─dataset_5654b02f3faa6252120002f2\n",
      "  ├─dataset_5654b0343faa62548800033f\n",
      "  ├─dataset_5654b02c3faa625ace0006c7\n",
      "  ├─dataset_5654b0253faa6254d30005e3\n",
      "  ├─dataset_5654b0293faa6254d30005e7\n",
      "  └─dataset_gen\n",
      "\n",
      "Creating the kfold evaluations.........\n",
      "[2015-11-24 19:45:10] Creating models.\n",
      "[2015-11-24 19:45:47] Creating evaluations.\n",
      "[2015-11-24 19:46:01] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654b05d3faa625488000346\n",
      "[2015-11-24 19:46:03] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654b0623faa625ace0006e6\n",
      "[2015-11-24 19:46:05] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654b0648ea1a454e70008c4\n",
      "[2015-11-24 19:46:07] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654b0678ea1a451410002f8\n",
      "[2015-11-24 19:46:09] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654b0698ea1a4549d000338\n",
      "\n",
      "Generated files:\n",
      "\n",
      " k_fold0\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation__5654b05d3faa625488000346.txt\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__5654b05d3faa625488000346.json\n",
      "  ├─evaluation__5654b0623faa625ace0006e6.txt\n",
      "  ├─evaluation__5654b0623faa625ace0006e6.json\n",
      "  ├─evaluation__5654b0698ea1a4549d000338.txt\n",
      "  ├─evaluation__5654b0648ea1a454e70008c4.txt\n",
      "  ├─evaluation.json\n",
      "  ├─evaluation__5654b0648ea1a454e70008c4.json\n",
      "  ├─models\n",
      "  ├─evaluation__5654b0678ea1a451410002f8.txt\n",
      "  ├─evaluations\n",
      "  ├─evaluation__5654b0678ea1a451410002f8.json\n",
      "  └─evaluation__5654b0698ea1a4549d000338.json\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!bigmler analyze --cross-validation \\\n",
    "                 --dataset $(cat diabetes/dataset) \\\n",
    "                 --k-folds 5 \\\n",
    "                 --output-dir diabetes_cv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{   'class_names': ['false', 'true'],\r\n",
      "    'mode': {   'accuracy_standard_deviation': 0.07314369419163898,\r\n",
      "                'average_accuracy': 0.595,\r\n",
      "                'average_f_measure': 0.371684,\r\n",
      "                'average_f_measure_standard_deviation': 0.029637730412432065,\r\n",
      "                'average_phi': 0.0,\r\n",
      "                'average_phi_standard_deviation': 0.0,\r\n",
      "                'average_precision': 0.2975,\r\n",
      "                'average_precision_standard_deviation': 0.03657184709581949,\r\n",
      "                'average_recall': 0.5,\r\n",
      "                'average_recall_standard_deviation': 0.0,\r\n",
      "                'confusion_matrix': [[119, 0], [81, 0]],\r\n",
      "                'per_class_statistics': [   {   'accuracy_standard_deviation': 0.07314369419163898,\r\n",
      "                                                'average_accuracy': 0.595,\r\n",
      "                                                'average_f_measure': 0.7433649844656594,\r\n",
      "                                                'average_phi_coefficient': 0.0,\r\n",
      "                                                'average_precision': 0.595,\r\n",
      "                                                'average_recall': 1.0,\r\n",
      "                                                'class_name': 'false',\r\n",
      "                                                'f_measure_standard_deviation': 0.059270072942528985,\r\n",
      "                                                'occurrences': 5,\r\n",
      "                                                'phi_coefficient_standard_deviation': 0.0,\r\n",
      "                                                'precision_standard_deviation': 0.07314369419163898,\r\n",
      "                                                'present_in_test_data': True,\r\n",
      "                                                'recall_standard_deviation': 0.0},\r\n",
      "                                            {   'accuracy_standard_deviation': 0.07314369419163898,\r\n",
      "                                                'average_accuracy': 0.595,\r\n",
      "                                                'average_f_measure': 0.0,\r\n",
      "                                                'average_phi_coefficient': 0.0,\r\n",
      "                                                'average_precision': 0.0,\r\n",
      "                                                'average_recall': 0.0,\r\n",
      "                                                'class_name': 'true',\r\n",
      "                                                'f_measure_standard_deviation': 0.0,\r\n",
      "                                                'occurrences': 5,\r\n",
      "                                                'phi_coefficient_standard_deviation': 0.0,\r\n",
      "                                                'precision_standard_deviation': 0.0,\r\n",
      "                                                'present_in_test_data': True,\r\n",
      "                                                'recall_standard_deviation': 0.0}]},\r\n",
      "    'model': {   'accuracy_standard_deviation': 0.04636809247747853,\r\n",
      "                 'average_accuracy': 0.635,\r\n",
      "                 'average_f_measure': 0.6105100000000001,\r\n",
      "                 'average_f_measure_standard_deviation': 0.0631546026826232,\r\n",
      "                 'average_phi': 0.23418199999999997,\r\n",
      "                 'average_phi_standard_deviation': 0.12006536759615571,\r\n",
      "                 'average_precision': 0.618002,\r\n",
      "                 'average_precision_standard_deviation': 0.05829306799268675,\r\n",
      "                 'average_recall': 0.616562,\r\n",
      "                 'average_recall_standard_deviation': 0.06214006771801911,\r\n",
      "                 'confusion_matrix': [[84, 35], [38, 43]],\r\n",
      "                 'per_class_statistics': [   {   'accuracy_standard_deviation': 0.04636809247747853,\r\n",
      "                                                 'average_accuracy': 0.635,\r\n",
      "                                                 'average_f_measure': 0.6964194386027269,\r\n",
      "                                                 'average_phi_coefficient': 0.23418084455531116,\r\n",
      "                                                 'average_precision': 0.692809364548495,\r\n",
      "                                                 'average_recall': 0.7085810330020856,\r\n",
      "                                                 'class_name': 'false',\r\n",
      "                                                 'f_measure_standard_deviation': 0.028546184570343625,\r\n",
      "                                                 'occurrences': 5,\r\n",
      "                                                 'phi_coefficient_standard_deviation': 0.12006661387806686,\r\n",
      "                                                 'precision_standard_deviation': 0.06427446293868262,\r\n",
      "                                                 'present_in_test_data': True,\r\n",
      "                                                 'recall_standard_deviation': 0.05914282309287293},\r\n",
      "                                             {   'accuracy_standard_deviation': 0.04636809247747853,\r\n",
      "                                                 'average_accuracy': 0.635,\r\n",
      "                                                 'average_f_measure': 0.5245995320936312,\r\n",
      "                                                 'average_phi_coefficient': 0.23418084455531116,\r\n",
      "                                                 'average_precision': 0.5431932773109244,\r\n",
      "                                                 'average_recall': 0.5245421245421245,\r\n",
      "                                                 'class_name': 'true',\r\n",
      "                                                 'f_measure_standard_deviation': 0.1168140122980512,\r\n",
      "                                                 'occurrences': 5,\r\n",
      "                                                 'phi_coefficient_standard_deviation': 0.12006661387806686,\r\n",
      "                                                 'precision_standard_deviation': 0.11809351063640704,\r\n",
      "                                                 'present_in_test_data': True,\r\n",
      "                                                 'recall_standard_deviation': 0.14052573019337755}]},\r\n",
      "    'random': {   'accuracy_standard_deviation': 0.09082951062292476,\r\n",
      "                  'average_accuracy': 0.525,\r\n",
      "                  'average_f_measure': 0.517402,\r\n",
      "                  'average_f_measure_standard_deviation': 0.09204940877593945,\r\n",
      "                  'average_phi': 0.068816,\r\n",
      "                  'average_phi_standard_deviation': 0.18460109995338597,\r\n",
      "                  'average_precision': 0.535818,\r\n",
      "                  'average_precision_standard_deviation': 0.09407348157690348,\r\n",
      "                  'average_recall': 0.533046,\r\n",
      "                  'average_recall_standard_deviation': 0.09061168436796657,\r\n",
      "                  'confusion_matrix': [[60, 59], [36, 45]],\r\n",
      "                  'per_class_statistics': [   {   'accuracy_standard_deviation': 0.09082951062292476,\r\n",
      "                                                  'average_accuracy': 0.525,\r\n",
      "                                                  'average_f_measure': 0.557698781954101,\r\n",
      "                                                  'average_phi_coefficient': 0.06881400385377819,\r\n",
      "                                                  'average_precision': 0.6344761904761905,\r\n",
      "                                                  'average_recall': 0.5136367180577708,\r\n",
      "                                                  'class_name': 'false',\r\n",
      "                                                  'f_measure_standard_deviation': 0.08451214683326931,\r\n",
      "                                                  'occurrences': 5,\r\n",
      "                                                  'phi_coefficient_standard_deviation': 0.1846005188541411,\r\n",
      "                                                  'precision_standard_deviation': 0.10513250466632557,\r\n",
      "                                                  'present_in_test_data': True,\r\n",
      "                                                  'recall_standard_deviation': 0.11645837783449035},\r\n",
      "                                              {   'accuracy_standard_deviation': 0.09082951062292476,\r\n",
      "                                                  'average_accuracy': 0.525,\r\n",
      "                                                  'average_f_measure': 0.47711064129668784,\r\n",
      "                                                  'average_phi_coefficient': 0.06881400385377819,\r\n",
      "                                                  'average_precision': 0.4371578947368421,\r\n",
      "                                                  'average_recall': 0.5524542124542124,\r\n",
      "                                                  'class_name': 'true',\r\n",
      "                                                  'f_measure_standard_deviation': 0.11898723769246959,\r\n",
      "                                                  'occurrences': 5,\r\n",
      "                                                  'phi_coefficient_standard_deviation': 0.1846005188541411,\r\n",
      "                                                  'precision_standard_deviation': 0.13348248352200454,\r\n",
      "                                                  'present_in_test_data': True,\r\n",
      "                                                  'recall_standard_deviation': 0.148423968263044}]}}\r\n"
     ]
    }
   ],
   "source": [
    "!cat ./diabetes_cv/test/k_fold0/evaluation.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Removing anomalies\n",
    "We can also get rid of outliers by detecting the anomalies in the dataset to generate an **anomaly-free dataset**."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2015-11-24 19:46:25] Retrieving dataset. https://bigml.com/dashboard/dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 19:46:26] Creating anomaly detector.\n",
      "[2015-11-24 19:46:48] Anomaly created: https://bigml.com/dashboard/anomaly/5654b0823faa625ace0006fb\n",
      "[2015-11-24 19:46:48] Creating dataset.\n",
      "[2015-11-24 19:46:52] Dataset created: https://bigml.com/dashboard/dataset/5654b0998ea1a407ae005f09\n",
      "\n",
      "Generated files:\n",
      "\n",
      " diabetes_anomaly\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_gen\n",
      "  └─anomalies\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!bigmler anomaly --datasets diabetes/dataset \\\n",
    "                 --name \"Diabetes anomaly\" \\\n",
    "                 --seed \"PyConES 2015\" \\\n",
    "                 --tag \"PyConES\" --tag \"diabetes\" \\\n",
    "                 --top-n 1 \\\n",
    "                 --forest-size 50 \\\n",
    "                 --anomalies-dataset out \\\n",
    "                 --output-dir diabetes_anomaly"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dataset/5654b0998ea1a407ae005f09\r\n"
     ]
    }
   ],
   "source": [
    "!cat diabetes_anomaly/dataset_gen"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The model based in the anomaly-free dataset performs better than the original one."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Creating the kfold datasets............\n",
      "[2015-11-24 19:47:02] Retrieving dataset. https://bigml.com/dashboard/dataset/5654b0998ea1a407ae005f09\n",
      "[2015-11-24 19:47:02] Creating dataset.\n",
      "[2015-11-24 19:47:05] Dataset created: https://bigml.com/dashboard/dataset/5654b0a73faa6254d3000605\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654b0998ea1a407ae005f09\n",
      "  ├─dataset_gen\n",
      "  └─dataset_5654b0a73faa6254d3000605\n",
      "\n",
      "[2015-11-24 19:47:05] Retrieving dataset. https://bigml.com/dashboard/dataset/5654b0998ea1a407ae005f09\n",
      "[2015-11-24 19:47:06] Creating dataset.\n",
      "[2015-11-24 19:48:52] Dataset created: https://bigml.com/dashboard/dataset/5654b0aa8ea1a407ae005f12\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654b0aa8ea1a407ae005f12\n",
      "  ├─dataset_5654b0998ea1a407ae005f09\n",
      "  ├─dataset_gen\n",
      "  └─dataset_5654b0a73faa6254d3000605\n",
      "\n",
      "[2015-11-24 19:48:52] Retrieving dataset. https://bigml.com/dashboard/dataset/5654b0998ea1a407ae005f09\n",
      "[2015-11-24 19:48:53] Creating dataset.\n",
      "[2015-11-24 19:48:56] Dataset created: https://bigml.com/dashboard/dataset/5654b1158ea1a454e70008e5\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654b0aa8ea1a407ae005f12\n",
      "  ├─dataset_5654b0998ea1a407ae005f09\n",
      "  ├─dataset_5654b1158ea1a454e70008e5\n",
      "  ├─dataset_gen\n",
      "  └─dataset_5654b0a73faa6254d3000605\n",
      "\n",
      "[2015-11-24 19:48:56] Retrieving dataset. https://bigml.com/dashboard/dataset/5654b0998ea1a407ae005f09\n",
      "[2015-11-24 19:48:56] Creating dataset.\n",
      "[2015-11-24 19:49:00] Dataset created: https://bigml.com/dashboard/dataset/5654b1193faa625488000358\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654b0aa8ea1a407ae005f12\n",
      "  ├─dataset_5654b0998ea1a407ae005f09\n",
      "  ├─dataset_5654b1158ea1a454e70008e5\n",
      "  ├─dataset_5654b1193faa625488000358\n",
      "  ├─dataset_gen\n",
      "  └─dataset_5654b0a73faa6254d3000605\n",
      "\n",
      "[2015-11-24 19:49:00] Retrieving dataset. https://bigml.com/dashboard/dataset/5654b0998ea1a407ae005f09\n",
      "[2015-11-24 19:49:00] Creating dataset.\n",
      "[2015-11-24 19:49:03] Dataset created: https://bigml.com/dashboard/dataset/5654b11d8ea1a45af90003d4\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654b0aa8ea1a407ae005f12\n",
      "  ├─dataset_5654b0998ea1a407ae005f09\n",
      "  ├─dataset_5654b11d8ea1a45af90003d4\n",
      "  ├─dataset_5654b1158ea1a454e70008e5\n",
      "  ├─dataset_5654b1193faa625488000358\n",
      "  ├─dataset_gen\n",
      "  └─dataset_5654b0a73faa6254d3000605\n",
      "\n",
      "Creating the kfold evaluations.........\n",
      "[2015-11-24 19:49:03] Creating models.\n",
      "[2015-11-24 19:49:45] Creating evaluations.\n",
      "[2015-11-24 19:50:04] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654b14a8ea1a45141000318\n",
      "[2015-11-24 19:50:06] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654b14f3faa625212000317\n",
      "[2015-11-24 19:50:08] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654b1548ea1a454e70008fa\n",
      "[2015-11-24 19:50:10] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654b1568ea1a45af90003ed\n",
      "[2015-11-24 19:50:12] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654b15b8ea1a454e70008ff\n",
      "\n",
      "Generated files:\n",
      "\n",
      " k_fold0\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__5654b1568ea1a45af90003ed.txt\n",
      "  ├─evaluation__5654b14f3faa625212000317.txt\n",
      "  ├─evaluation__5654b14a8ea1a45141000318.json\n",
      "  ├─evaluation__5654b1548ea1a454e70008fa.txt\n",
      "  ├─evaluation__5654b15b8ea1a454e70008ff.txt\n",
      "  ├─evaluation.json\n",
      "  ├─evaluation__5654b14f3faa625212000317.json\n",
      "  ├─models\n",
      "  ├─evaluation__5654b1548ea1a454e70008fa.json\n",
      "  ├─evaluations\n",
      "  ├─evaluation__5654b1568ea1a45af90003ed.json\n",
      "  ├─evaluation__5654b15b8ea1a454e70008ff.json\n",
      "  └─evaluation__5654b14a8ea1a45141000318.txt\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!bigmler analyze --cross-validation \\\n",
    "                 --dataset $(cat diabetes_anomaly/dataset_gen) \\\n",
    "                 --k-folds 5 \\\n",
    "                 --output-dir diabetes_cv_anomaly_free"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[0m0.6688459999999998\u001b[0m\r\n"
     ]
    }
   ],
   "source": [
    "!cat ./diabetes_cv_anomaly_free/test/k_fold0/evaluation.json | jq .model.average_accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[0m0.635\u001b[0m\r\n"
     ]
    }
   ],
   "source": [
    "!cat ./diabetes_cv/test/k_fold0/evaluation.json | jq .model.average_accuracy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Clustering your data\n",
    "We can cluster together similar data points and show the cluster-associated datasets."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2015-11-24 20:16:57] Retrieving dataset. https://bigml.com/dashboard/dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 20:16:57] Creating cluster.\n",
      "[2015-11-24 20:17:15] Cluster created: https://bigml.com/dashboard/cluster/5654b7aa8ea1a4549d000398\n",
      "[2015-11-24 20:17:15] Creating dataset.\n",
      "[2015-11-24 20:17:19] Dataset created: https://bigml.com/dashboard/dataset/5654b7bc3faa6254d30006b3\n",
      "[2015-11-24 20:17:19] Creating dataset.\n",
      "[2015-11-24 20:17:22] Dataset created: https://bigml.com/dashboard/dataset/5654b7c08ea1a454e70009cf\n",
      "\n",
      "Generated files:\n",
      "\n",
      " diabetes_cluster\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_cluster\n",
      "  └─clusters\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!bigmler cluster --datasets diabetes/dataset \\\n",
    "                 --name \"Diabetes cluster\" \\\n",
    "                 --seed \"PyConES 2015\" \\\n",
    "                 --tag \"PyConES\" --tag \"diabetes\" \\\n",
    "                 --cluster-datasets \\\n",
    "                 --output-dir diabetes_cluster"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Reproducible resources\n",
    "We can reproduce in a script how the dataset was created"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting reification for dataset/5654b3a73faa625ace0007cd\n",
      "\n",
      "[2015-11-24 20:17:25] Analyzing dataset/5654b3a73faa625ace0007cd.\n",
      "[2015-11-24 20:17:27] New origin found for dataset/5654b3a73faa625ace0007cd: cluster/5654b3978ea1a454e700098d\n",
      "[2015-11-24 20:17:27] Analyzing cluster/5654b3978ea1a454e700098d.\n",
      "[2015-11-24 20:17:27] New origin found for cluster/5654b3978ea1a454e700098d: dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 20:17:27] Analyzing dataset/5654affc3faa625ace0006b9.\n",
      "[2015-11-24 20:17:28] New origin found for dataset/5654affc3faa625ace0006b9: source/5654aff93faa625ace0006b4\n",
      "[2015-11-24 20:17:28] Analyzing source/5654aff93faa625ace0006b4.\n",
      "[2015-11-24 20:17:28] New origin found for source/5654aff93faa625ace0006b4: diabetes.csv\n",
      "\n",
      "Reification complete. See the results in diabetes_dataset_reified/reify.py\n",
      "\n",
      "\n",
      "Generated files:\n",
      "\n",
      " diabetes_dataset_reified\n",
      "  ├─bigmler_sessions\n",
      "  └─reify.py\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!bigmler reify --id $(head -n 1 diabetes_cluster/dataset_cluster) \\\n",
    "               --output-dir diabetes_dataset_reified"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The python code that is needed to reproduce the dataset can be found in the reify.py file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\"\"\"Python code to reify dataset/5654b3a73faa625ace0007cd\r\n",
      "\r\n",
      "\"\"\"\r\n",
      "\r\n",
      "from bigml.api import BigML\r\n",
      "api = BigML()\r\n",
      "\r\n",
      "source1 = api.create_source(\"diabetes.csv\", \\\r\n",
      "    {'category': 12,\r\n",
      "     'description': 'Created using BigMLer',\r\n",
      "     'name': 'Diabetes dataset',\r\n",
      "     'project': 'project/56468dd2c6c19b6cf3004e1e',\r\n",
      "     'tags': ['PyConES', 'diabetes', 'BigMLer', 'BigMLer_TueNov2415_194408']})\r\n",
      "api.ok(source1)\r\n",
      "\r\n",
      "dataset1 = api.create_dataset(source1, \\\r\n",
      "    {'name': 'Diabetes dataset',\r\n",
      "     'tags': ['PyConES', 'diabetes', 'BigMLer', 'BigMLer_TueNov2415_194408']})\r\n",
      "api.ok(dataset1)\r\n",
      "\r\n",
      "cluster1 = api.create_cluster(dataset1, \\\r\n",
      "    {'cluster_seed': 'BigML, Machine Learning made easy',\r\n",
      "     'name': 'Diabetes cluster',\r\n",
      "     'seed': 'PyConES 2015',\r\n",
      "     'tags': ['PyConES', 'diabetes', 'BigMLer', 'BigMLer_TueNov2415_195934']})\r\n",
      "api.ok(cluster1)\r\n",
      "\r\n",
      "dataset2 = api.create_dataset(cluster1, \\\r\n",
      "    {'centroid': '000001',\r\n",
      "     'tags': ['PyConES', 'diabetes', 'BigMLer', 'BigMLer_TueNov2415_195934']})\r\n",
      "api.ok(dataset2)\r\n",
      "\r\n"
     ]
    }
   ],
   "source": [
    "!cat diabetes_dataset_reified/reify.py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Model optimization: number of nodes\n",
    "And we can improve our models by finding the **number of nodes** that best optimizes a concrete evaluation metric."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Creating the kfold datasets............\n",
      "[2015-11-24 20:53:24] Retrieving dataset. https://bigml.com/dashboard/dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 20:53:25] Creating dataset.\n",
      "[2015-11-24 20:53:28] Dataset created: https://bigml.com/dashboard/dataset/5654c0353faa625ace00089d\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654affc3faa625ace0006b9\n",
      "  ├─dataset_5654c0353faa625ace00089d\n",
      "  └─dataset_gen\n",
      "\n",
      "[2015-11-24 20:53:28] Retrieving dataset. https://bigml.com/dashboard/dataset/5654affc3faa625ace0006b9\n",
      "[2015-11-24 20:53:28] Creating dataset.\n",
      "[2015-11-24 20:53:31] Dataset created: https://bigml.com/dashboard/dataset/5654c0393faa625ace0008a3\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─dataset_5654c0393faa625ace0008a3\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_5654affc3faa625ace0006b9\n",
      "  ├─dataset_5654c0353faa625ace00089d\n",
      "  └─dataset_gen\n",
      "\n",
      "Creating the node threshold set..........\n",
      "[2015-11-24 20:53:31] Retrieving dataset. https://bigml.com/dashboard/dataset/5654c0353faa625ace00089d\n",
      "[2015-11-24 20:53:31] Creating models.\n",
      "[2015-11-24 20:53:39] Creating evaluations.\n",
      "[2015-11-24 20:53:43] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0443faa62521200038d\n",
      "[2015-11-24 20:53:45] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0468ea1a454e7000aa3\n",
      "\n",
      "Generated files:\n",
      "\n",
      " node_th3\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__5654c0468ea1a454e7000aa3.json\n",
      "  ├─evaluation__5654c0468ea1a454e7000aa3.txt\n",
      "  ├─evaluation__5654c0443faa62521200038d.json\n",
      "  ├─evaluation.json\n",
      "  ├─models\n",
      "  ├─evaluations\n",
      "  └─evaluation__5654c0443faa62521200038d.txt\n",
      "\n",
      "New best node threshold: 3\n",
      "Accuracy = 68.00% (score = 0.6799999999999999)\n",
      "[2015-11-24 20:53:47] Retrieving dataset. https://bigml.com/dashboard/dataset/5654c0353faa625ace00089d\n",
      "[2015-11-24 20:53:47] Creating models.\n",
      "[2015-11-24 20:53:57] Creating evaluations.\n",
      "[2015-11-24 20:54:01] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0568ea1a45af90004a9\n",
      "[2015-11-24 20:54:03] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0598ea1a454e7000ab0\n",
      "\n",
      "Generated files:\n",
      "\n",
      " node_th13\n",
      "  ├─evaluation__5654c0598ea1a454e7000ab0.txt\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__5654c0568ea1a45af90004a9.json\n",
      "  ├─evaluation__5654c0568ea1a45af90004a9.txt\n",
      "  ├─evaluation.json\n",
      "  ├─models\n",
      "  ├─evaluations\n",
      "  └─evaluation__5654c0598ea1a454e7000ab0.json\n",
      "\n",
      "New best node threshold: 13\n",
      "Accuracy = 69.00% (score = 0.69)\n",
      "[2015-11-24 20:54:05] Retrieving dataset. https://bigml.com/dashboard/dataset/5654c0353faa625ace00089d\n",
      "[2015-11-24 20:54:05] Creating models.\n",
      "[2015-11-24 20:54:13] Creating evaluations.\n",
      "[2015-11-24 20:54:17] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0668ea1a407ae006007\n",
      "[2015-11-24 20:54:19] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0693faa625ace0008be\n",
      "\n",
      "Generated files:\n",
      "\n",
      " node_th23\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation__5654c0693faa625ace0008be.json\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__5654c0668ea1a407ae006007.txt\n",
      "  ├─evaluation__5654c0693faa625ace0008be.txt\n",
      "  ├─evaluation__5654c0668ea1a407ae006007.json\n",
      "  ├─evaluation.json\n",
      "  ├─models\n",
      "  └─evaluations\n",
      "\n",
      "[2015-11-24 20:54:21] Retrieving dataset. https://bigml.com/dashboard/dataset/5654c0353faa625ace00089d\n",
      "[2015-11-24 20:54:22] Creating models.\n",
      "[2015-11-24 20:54:58] Creating evaluations.\n",
      "[2015-11-24 20:55:02] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0943faa625ace0008cb\n",
      "[2015-11-24 20:55:05] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0963faa625ace0008d1\n",
      "\n",
      "Generated files:\n",
      "\n",
      " node_th33\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation__5654c0963faa625ace0008d1.json\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__5654c0963faa625ace0008d1.txt\n",
      "  ├─evaluation.json\n",
      "  ├─evaluation__5654c0943faa625ace0008cb.json\n",
      "  ├─models\n",
      "  ├─evaluation__5654c0943faa625ace0008cb.txt\n",
      "  └─evaluations\n",
      "\n",
      "[2015-11-24 20:55:07] Retrieving dataset. https://bigml.com/dashboard/dataset/5654c0353faa625ace00089d\n",
      "[2015-11-24 20:55:07] Creating models.\n",
      "[2015-11-24 20:55:15] Creating evaluations.\n",
      "[2015-11-24 20:55:19] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0a58ea1a45af90004b3\n",
      "[2015-11-24 20:55:21] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0a78ea1a454e7000adf\n",
      "\n",
      "Generated files:\n",
      "\n",
      " node_th43\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__5654c0a78ea1a454e7000adf.txt\n",
      "  ├─evaluation__5654c0a58ea1a45af90004b3.txt\n",
      "  ├─evaluation.json\n",
      "  ├─evaluation__5654c0a78ea1a454e7000adf.json\n",
      "  ├─models\n",
      "  ├─evaluations\n",
      "  └─evaluation__5654c0a58ea1a45af90004b3.json\n",
      "\n",
      "[2015-11-24 20:55:23] Retrieving dataset. https://bigml.com/dashboard/dataset/5654c0353faa625ace00089d\n",
      "[2015-11-24 20:55:23] Creating models.\n",
      "[2015-11-24 20:55:38] Creating evaluations.\n",
      "[2015-11-24 20:55:45] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0bc8ea1a454e7000ae6\n",
      "[2015-11-24 20:55:47] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0c03faa6252120003a7\n",
      "\n",
      "Generated files:\n",
      "\n",
      " node_th53\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation__5654c0c03faa6252120003a7.json\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__5654c0c03faa6252120003a7.txt\n",
      "  ├─evaluation__5654c0bc8ea1a454e7000ae6.json\n",
      "  ├─evaluation.json\n",
      "  ├─models\n",
      "  ├─evaluation__5654c0bc8ea1a454e7000ae6.txt\n",
      "  └─evaluations\n",
      "\n",
      "[2015-11-24 20:55:49] Retrieving dataset. https://bigml.com/dashboard/dataset/5654c0353faa625ace00089d\n",
      "[2015-11-24 20:55:49] Creating models.\n",
      "[2015-11-24 20:55:57] Creating evaluations.\n",
      "[2015-11-24 20:56:01] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0ce3faa6251d9000566\n",
      "[2015-11-24 20:56:02] Retrieving evaluation. https://bigml.com/dashboard/evaluation/5654c0d08ea1a451410003bf\n",
      "\n",
      "Generated files:\n",
      "\n",
      " node_th63\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__5654c0d08ea1a451410003bf.txt\n",
      "  ├─evaluation__5654c0ce3faa6251d9000566.txt\n",
      "  ├─evaluation.json\n",
      "  ├─evaluation__5654c0ce3faa6251d9000566.json\n",
      "  ├─models\n",
      "  ├─evaluations\n",
      "  └─evaluation__5654c0d08ea1a451410003bf.json\n",
      "\n",
      "The best node threshold is: 13 \n",
      "Accuracy = 69.00%\n",
      "To create the final model with the entire dataset using the selected maximum number of nodes use:\n",
      "bigmler --dataset dataset/5654affc3faa625ace0006b9 --node-threshold 13\n",
      "\n",
      "To delete all the resources generated by this analyze subcommand use:\n",
      "bigmler delete --from-dir diabetes_nodes_selection\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!bigmler analyze --nodes \\\n",
    "                 --dataset $(cat diabetes/dataset) \\\n",
    "                 --k-folds 2  \\\n",
    "                 --optimize accuracy \\\n",
    "                 --max-nodes 100 \\\n",
    "                 --min-nodes 3 \\\n",
    "                 --nodes-step 10 \\\n",
    "                 --output-dir diabetes_nodes_selection"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "and show in a report the ROC-curve distribution for the generated models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!bigmler report --from-dir diabetes_nodes_selection"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Model optimization: smart feature selection\n",
    "Or the **set of features** that generates the model which optimizes a concrete evaluation metric."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2015-11-24 20:58:34] Retrieving project info.\n",
      "[2015-11-24 20:58:34] Creating project.\n",
      "[2015-11-24 20:58:35] Project \"PyConES 4-featured diabetes\" has been created.\n",
      "[2015-11-24 20:58:35] Creating source.\n",
      "[2015-11-24 20:58:37] Source created: https://bigml.com/dashboard/source/5654c16b8ea1a407ae00602d\n",
      "[2015-11-24 20:58:37] Creating dataset.\n",
      "[2015-11-24 20:58:40] Dataset created: https://bigml.com/dashboard/dataset/5654c16e8ea1a407ae006034\n",
      "\n",
      "Generated files:\n",
      "\n",
      " diabetes_4\n",
      "  ├─project\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset\n",
      "  └─source\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!bigmler --train data/diabetes.csv \\\n",
    "        --no-model \\\n",
    "        --name \"4-featured diabetes\" \\\n",
    "        --dataset-fields \\\n",
    "        \"plasma glucose,insulin,diabetes pedigree,diabetes\" \\\n",
    "        --output-dir diabetes_4 \\\n",
    "        --project \"PyConES 4-featured diabetes\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Creating the kfold datasets............\n",
      "[2015-11-19 20:18:56] Retrieving dataset. https://bigml.com/dashboard/dataset/564e207ac6c19b02530002e8\n",
      "[2015-11-19 20:18:56] Creating dataset.\n",
      "[2015-11-19 20:18:59] Dataset created: https://bigml.com/dashboard/dataset/564e20a1c6c19b02530002ec\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_gen\n",
      "  ├─dataset_564e207ac6c19b02530002e8\n",
      "  └─dataset_564e20a1c6c19b02530002ec\n",
      "\n",
      "[2015-11-19 20:18:59] Retrieving dataset. https://bigml.com/dashboard/dataset/564e207ac6c19b02530002e8\n",
      "[2015-11-19 20:19:00] Creating dataset.\n",
      "[2015-11-19 20:19:03] Dataset created: https://bigml.com/dashboard/dataset/564e20a4636e1c1c0a0001a8\n",
      "\n",
      "Generated files:\n",
      "\n",
      " test\n",
      "  ├─bigmler_sessions\n",
      "  ├─dataset_564e20a4636e1c1c0a0001a8\n",
      "  ├─dataset_gen\n",
      "  ├─dataset_564e207ac6c19b02530002e8\n",
      "  └─dataset_564e20a1c6c19b02530002ec\n",
      "\n",
      "Creating the best features set..........\n",
      "[2015-11-19 20:19:03] Creating models.\n",
      "[2015-11-19 20:19:11] Creating evaluations.\n",
      "[2015-11-19 20:19:15] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e20b1636e1c1c07000254\n",
      "[2015-11-19 20:19:17] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e20b3636e1c1c1200043f\n",
      "\n",
      "Generated files:\n",
      "\n",
      " kfold1\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__564e20b1636e1c1c07000254.txt\n",
      "  ├─evaluation__564e20b1636e1c1c07000254.json\n",
      "  ├─evaluation__564e20b3636e1c1c1200043f.json\n",
      "  ├─evaluation__564e20b3636e1c1c1200043f.txt\n",
      "  ├─evaluation.json\n",
      "  ├─models\n",
      "  └─evaluations\n",
      "\n",
      "[2015-11-19 20:19:19] Creating models.\n",
      "[2015-11-19 20:19:28] Creating evaluations.\n",
      "[2015-11-19 20:19:32] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e20c1c6c19b0242000242\n",
      "[2015-11-19 20:19:34] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e20c3636e1c1c12000446\n",
      "\n",
      "Generated files:\n",
      "\n",
      " kfold2\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__564e20c3636e1c1c12000446.json\n",
      "  ├─evaluation__564e20c3636e1c1c12000446.txt\n",
      "  ├─evaluation.json\n",
      "  ├─models\n",
      "  ├─evaluation__564e20c1c6c19b0242000242.json\n",
      "  ├─evaluations\n",
      "  └─evaluation__564e20c1c6c19b0242000242.txt\n",
      "\n",
      "[2015-11-19 20:19:36] Creating models.\n",
      "[2015-11-19 20:19:44] Creating evaluations.\n",
      "[2015-11-19 20:19:47] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e20d1c6c19b02530002f5\n",
      "[2015-11-19 20:19:49] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e20d3c6c19b02530002fa\n",
      "\n",
      "Generated files:\n",
      "\n",
      " kfold3\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__564e20d1c6c19b02530002f5.txt\n",
      "  ├─evaluation__564e20d3c6c19b02530002fa.json\n",
      "  ├─evaluation__564e20d3c6c19b02530002fa.txt\n",
      "  ├─evaluation__564e20d1c6c19b02530002f5.json\n",
      "  ├─evaluation.json\n",
      "  ├─models\n",
      "  └─evaluations\n",
      "\n",
      "New best state: ['plasma glucose']\n",
      "Precision = 61.20% (score = -0.3879999999999999)\n",
      "[2015-11-19 20:19:52] Creating models.\n",
      "[2015-11-19 20:19:59] Creating evaluations.\n",
      "[2015-11-19 20:20:04] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e20e1636e1c1c0d0001a3\n",
      "[2015-11-19 20:20:06] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e20e3c6c19b0253000313\n",
      "\n",
      "Generated files:\n",
      "\n",
      " kfold4\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__564e20e3c6c19b0253000313.json\n",
      "  ├─evaluation__564e20e1636e1c1c0d0001a3.txt\n",
      "  ├─evaluation.json\n",
      "  ├─models\n",
      "  ├─evaluation__564e20e3c6c19b0253000313.txt\n",
      "  ├─evaluation__564e20e1636e1c1c0d0001a3.json\n",
      "  └─evaluations\n",
      "\n",
      "[2015-11-19 20:20:08] Creating models.\n",
      "[2015-11-19 20:20:16] Creating evaluations.\n",
      "[2015-11-19 20:20:19] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e20f1c6c19b0253000325\n",
      "[2015-11-19 20:20:21] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e20f3636e1c1c0700026d\n",
      "\n",
      "Generated files:\n",
      "\n",
      " kfold5\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation__564e20f3636e1c1c0700026d.txt\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__564e20f1c6c19b0253000325.txt\n",
      "  ├─evaluation__564e20f1c6c19b0253000325.json\n",
      "  ├─evaluation.json\n",
      "  ├─models\n",
      "  ├─evaluations\n",
      "  └─evaluation__564e20f3636e1c1c0700026d.json\n",
      "\n",
      "[2015-11-19 20:20:23] Creating models.\n",
      "[2015-11-19 20:20:34] Creating evaluations.\n",
      "[2015-11-19 20:20:39] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e2104c6c19b02440001f6\n",
      "[2015-11-19 20:20:41] Retrieving evaluation. https://bigml.com/dashboard/evaluation/564e2106636e1c1c0d0001ae\n",
      "\n",
      "Generated files:\n",
      "\n",
      " kfold6\n",
      "  ├─bigmler_sessions\n",
      "  ├─evaluation.txt\n",
      "  ├─evaluation__564e2106636e1c1c0d0001ae.txt\n",
      "  ├─evaluation__564e2104c6c19b02440001f6.json\n",
      "  ├─evaluation__564e2106636e1c1c0d0001ae.json\n",
      "  ├─evaluation.json\n",
      "  ├─models\n",
      "  ├─evaluation__564e2104c6c19b02440001f6.txt\n",
      "  └─evaluations\n",
      "\n",
      "The best feature subset is: plasma glucose \n",
      "Precision = 61.20%\n",
      "Evaluated 6/7 feature subsets\n",
      "\n",
      "To create the final model with the entire dataset using the selected feature subset use:\n",
      "bigmler --dataset dataset/564e207ac6c19b02530002e8 --model-fields=\"plasma glucose\"\n",
      "\n",
      "To delete all the resources generated by this analyze subcommand use:\n",
      "bigmler delete --from-dir diabetes_features_selection\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!bigmler analyze --features \\\n",
    "                --dataset $(cat diabetes_4/dataset) \\\n",
    "                --k-folds 2  \\\n",
    "                --staleness 2 \\\n",
    "                --optimize precision \\\n",
    "                --penalty 1 \\\n",
    "                --output-dir diabetes_features_selection"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

## diabetes.csv

          
            pregnancies
            plasma glucose
            blood pressure
            triceps skin thickness
            insulin
            bmi
            diabetes pedigree
            age
            diabetes

            
              6
              148
              72
              35
              0
              33.6
              0.627
              50
              true

            
              1
              85
              66
              29
              0
              26.6
              0.351
              31
              false

            
              8
              183
              64
              0
              0
              23.3
              0.672
              32
              true

            
              1
              89
              66
              23
              94
              28.1
              0.167
              21
              false

            
              0
              137
              40
              35
              168
              43.1
              2.288
              33
              true

            
              5
              116
              74
              0
              0
              25.6
              0.201
              30
              false

            
              3
              78
              50
              32
              88
              31.0
              0.248
              26
              true

            
              10
              115
              0
              0
              0
              35.3
              0.134
              29
              false

            
              2
              197
              70
              45
              543
              30.5
              0.158
              53
              true

            
              8
              125
              96
              0
              0
              0.0
              0.232
              54
              true

            
              4
              110
              92
              0
              0
              37.6
              0.191
              30
              false

            
              10
              168
              74
              0
              0
              38.0
              0.537
              34
              true

            
              10
              139
              80
              0
              0
              27.1
              1.441
              57
              false

            
              1
              189
              60
              23
              846
              30.1
              0.398
              59
              true

            
              5
              166
              72
              19
              175
              25.8
              0.587
              51
              true

            
              7
              100
              0
              0
              0
              30.0
              0.484
              32
              true

            
              0
              118
              84
              47
              230
              45.8
              0.551
              31
              true

            
              7
              107
              74
              0
              0
              29.6
              0.254
              31
              true

            
              1
              103
              30
              38
              83
              43.3
              0.183
              33
              false

            
              1
              115
              70
              30
              96
              34.6
              0.529
              32
              true

            
              3
              126
              88
              41
              235
              39.3
              0.704
              27
              false

            
              8
              99
              84
              0
              0
              35.4
              0.388
              50
              false

            
              7
              196
              90
              0
              0
              39.8
              0.451
              41
              true

            
              9
              119
              80
              35
              0
              29.0
              0.263
              29
              true

            
              11
              143
              94
              33
              146
              36.6
              0.254
              51
              true

            
              10
              125
              70
              26
              115
              31.1
              0.205
              41
              true

            
              7
              147
              76
              0
              0
              39.4
              0.257
              43
              true

            
              1
              97
              66
              15
              140
              23.2
              0.487
              22
              false

            
              13
              145
              82
              19
              110
              22.2
              0.245
              57
              false

            
              5
              117
              92
              0
              0
              34.1
              0.337
              38
              false

            
              5
              109
              75
              26
              0
              36.0
              0.546
              60
              false

            
              3
              158
              76
              36
              245
              31.6
              0.851
              28
              true

            
              3
              88
              58
              11
              54
              24.8
              0.267
              22
              false

            
              6
              92
              92
              0
              0
              19.9
              0.188
              28
              false

            
              10
              122
              78
              31
              0
              27.6
              0.512
              45
              false

            
              4
              103
              60
              33
              192
              24.0
              0.966
              33
              false

            
              11
              138
              76
              0
              0
              33.2
              0.420
              35
              false

            
              9
              102
              76
              37
              0
              32.9
              0.665
              46
              true

            
              2
              90
              68
              42
              0
              38.2
              0.503
              27
              true

            
              4
              111
              72
              47
              207
              37.1
              1.390
              56
              true

            
              3
              180
              64
              25
              70
              34.0
              0.271
              26
              false

            
              7
              133
              84
              0
              0
              40.2
              0.696
              37
              false

            
              7
              106
              92
              18
              0
              22.7
              0.235
              48
              false

            
              9
              171
              110
              24
              240
              45.4
              0.721
              54
              true

            
              7
              159
              64
              0
              0
              27.4
              0.294
              40
              false

            
              0
              180
              66
              39
              0
              42.0
              1.893
              25
              true

            
              1
              146
              56
              0
              0
              29.7
              0.564
              29
              false

            
              2
              71
              70
              27
              0
              28.0
              0.586
              22
              false

            
              7
              103
              66
              32
              0
              39.1
              0.344
              31
              true

            
              7
              105
              0
              0
              0
              0.0
              0.305
              24
              false

            
              1
              103
              80
              11
              82
              19.4
              0.491
              22
              false

            
              1
              101
              50
              15
              36
              24.2
              0.526
              26
              false

            
              5
              88
              66
              21
              23
              24.4
              0.342
              30
              false

            
              8
              176
              90
              34
              300
              33.7
              0.467
              58
              true

            
              7
              150
              66
              42
              342
              34.7
              0.718
              42
              false

            
              1
              73
              50
              10
              0
              23.0
              0.248
              21
              false

            
              7
              187
              68
              39
              304
              37.7
              0.254
              41
              true

            
              0
              100
              88
              60
              110
              46.8
              0.962
              31
              false

            
              0
              146
              82
              0
              0
              40.5
              1.781
              44
              false

            
              0
              105
              64
              41
              142
              41.5
              0.173
              22
              false

            
              2
              84
              0
              0
              0
              0.0
              0.304
              21
              false

            
              8
              133
              72
              0
              0
              32.9
              0.270
              39
              true

            
              5
              44
              62
              0
              0
              25.0
              0.587
              36
              false

            
              2
              141
              58
              34
              128
              25.4
              0.699
              24
              false

            
              7
              114
              66
              0
              0
              32.8
              0.258
              42
              true

            
              5
              99
              74
              27
              0
              29.0
              0.203
              32
              false

            
              0
              109
              88
              30
              0
              32.5
              0.855
              38
              true

            
              2
              109
              92
              0
              0
              42.7
              0.845
              54
              false

            
              1
              95
              66
              13
              38
              19.6
              0.334
              25
              false

            
              4
              146
              85
              27
              100
              28.9
              0.189
              27
              false

            
              2
              100
              66
              20
              90
              32.9
              0.867
              28
              true

            
              5
              139
              64
              35
              140
              28.6
              0.411
              26
              false

            
              13
              126
              90
              0
              0
              43.4
              0.583
              42
              true

            
              4
              129
              86
              20
              270
              35.1
              0.231
              23
              false

            
              1
              79
              75
              30
              0
              32.0
              0.396
              22
              false

            
              1
              0
              48
              20
              0
              24.7
              0.140
              22
              false

            
              7
              62
              78
              0
              0
              32.6
              0.391
              41
              false

            
              5
              95
              72
              33
              0
              37.7
              0.370
              27
              false

            
              0
              131
              0
              0
              0
              43.2
              0.270
              26
              true

            
              2
              112
              66
              22
              0
              25.0
              0.307
              24
              false

            
              11
              127
              106
              0
              0
              39.0
              0.190
              51
              false

            
              3
              80
              82
              31
              70
              34.2
              1.292
              27
              true

            
              10
              162
              84
              0
              0
              27.7
              0.182
              54
              false

            
              1
              199
              76
              43
              0
              42.9
              1.394
              22
              true

            
              8
              167
              106
              46
              231
              37.6
              0.165
              43
              true

            
              9
              145
              80
              46
              130
              37.9
              0.637
              40
              true

            
              6
              115
              60
              39
              0
              33.7
              0.245
              40
              true

            
              1
              112
              80
              45
              132
              34.8
              0.217
              24
              false

            
              4
              145
              82
              18
              0
              32.5
              0.235
              70
              true

            
              10
              111
              70
              27
              0
              27.5
              0.141
              40
              true

            
              6
              98
              58
              33
              190
              34.0
              0.430
              43
              false

            
              9
              154
              78
              30
              100
              30.9
              0.164
              45
              false

            
              6
              165
              68
              26
              168
              33.6
              0.631
              49
              false

            
              1
              99
              58
              10
              0
              25.4
              0.551
              21
              false

            
              10
              68
              106
              23
              49
              35.5
              0.285
              47
              false

            
              3
              123
              100
              35
              240
              57.3
              0.880
              22
              false

            
              8
              91
              82
              0
              0
              35.6
              0.587
              68
              false

            
              6
              195
              70
              0
              0
              30.9
              0.328
              31
              true

            
              9
              156
              86
              0
              0
              24.8
              0.230
              53
              true

            
              0
              93
              60
              0
              0
              35.3
              0.263
              25
              false

            
              3
              121
              52
              0
              0
              36.0
              0.127
              25
              true

            
              2
              101
              58
              17
              265
              24.2
              0.614
              23
              false

            
              2
              56
              56
              28
              45
              24.2
              0.332
              22
              false

            
              0
              162
              76
              36
              0
              49.6
              0.364
              26
              true

            
              0
              95
              64
              39
              105
              44.6
              0.366
              22
              false

            
              4
              125
              80
              0
              0
              32.3
              0.536
              27
              true

            
              5
              136
              82
              0
              0
              0.0
              0.640
              69
              false

            
              2
              129
              74
              26
              205
              33.2
              0.591
              25
              false

            
              3
              130
              64
              0
              0
              23.1
              0.314
              22
              false

            
              1
              107
              50
              19
              0
              28.3
              0.181
              29
              false

            
              1
              140
              74
              26
              180
              24.1
              0.828
              23
              false

            
              1
              144
              82
              46
              180
              46.1
              0.335
              46
              true

            
              8
              107
              80
              0
              0
              24.6
              0.856
              34
              false

            
              13
              158
              114
              0
              0
              42.3
              0.257
              44
              true

            
              2
              121
              70
              32
              95
              39.1
              0.886
              23
              false

            
              7
              129
              68
              49
              125
              38.5
              0.439
              43
              true

            
              2
              90
              60
              0
              0
              23.5
              0.191
              25
              false

            
              7
              142
              90
              24
              480
              30.4
              0.128
              43
              true

            
              3
              169
              74
              19
              125
              29.9
              0.268
              31
              true

            
              0
              99
              0
              0
              0
              25.0
              0.253
              22
              false

            
              4
              127
              88
              11
              155
              34.5
              0.598
              28
              false

            
              4
              118
              70
              0
              0
              44.5
              0.904
              26
              false

            
              2
              122
              76
              27
              200
              35.9
              0.483
              26
              false

            
              6
              125
              78
              31
              0
              27.6
              0.565
              49
              true

            
              1
              168
              88
              29
              0
              35.0
              0.905
              52
              true

            
              2
              129
              0
              0
              0
              38.5
              0.304
              41
              false

            
              4
              110
              76
              20
              100
              28.4
              0.118
              27
              false

            
              6
              80
              80
              36
              0
              39.8
              0.177
              28
              false

            
              10
              115
              0
              0
              0
              0.0
              0.261
              30
              true

            
              2
              127
              46
              21
              335
              34.4
              0.176
              22
              false

            
              9
              164
              78
              0
              0
              32.8
              0.148
              45
              true

            
              2
              93
              64
              32
              160
              38.0
              0.674
              23
              true

            
              3
              158
              64
              13
              387
              31.2
              0.295
              24
              false

            
              5
              126
              78
              27
              22
              29.6
              0.439
              40
              false

            
              10
              129
              62
              36
              0
              41.2
              0.441
              38
              true

            
              0
              134
              58
              20
              291
              26.4
              0.352
              21
              false

            
              3
              102
              74
              0
              0
              29.5
              0.121
              32
              false

            
              7
              187
              50
              33
              392
              33.9
              0.826
              34
              true

            
              3
              173
              78
              39
              185
              33.8
              0.970
              31
              true

            
              10
              94
              72
              18
              0
              23.1
              0.595
              56
              false

            
              1
              108
              60
              46
              178
              35.5
              0.415
              24
              false

            
              5
              97
              76
              27
              0
              35.6
              0.378
              52
              true

            
              4
              83
              86
              19
              0
              29.3
              0.317
              34
              false

            
              1
              114
              66
              36
              200
              38.1
              0.289
              21
              false

            
              1
              149
              68
              29
              127
              29.3
              0.349
              42
              true

            
              5
              117
              86
              30
              105
              39.1
              0.251
              42
              false

            
              1
              111
              94
              0
              0
              32.8
              0.265
              45
              false

            
              4
              112
              78
              40
              0
              39.4
              0.236
              38
              false

            
              1
              116
              78
              29
              180
              36.1
              0.496
              25
              false

            
              0
              141
              84
              26
              0
              32.4
              0.433
              22
              false

            
              2
              175
              88
              0
              0
              22.9
              0.326
              22
              false

            
              2
              92
              52
              0
              0
              30.1
              0.141
              22
              false

            
              3
              130
              78
              23
              79
              28.4
              0.323
              34
              true

            
              8
              120
              86
              0
              0
              28.4
              0.259
              22
              true

            
              2
              174
              88
              37
              120
              44.5
              0.646
              24
              true

            
              2
              106
              56
              27
              165
              29.0
              0.426
              22
              false

            
              2
              105
              75
              0
              0
              23.3
              0.560
              53
              false

            
              4
              95
              60
              32
              0
              35.4
              0.284
              28
              false

            
              0
              126
              86
              27
              120
              27.4
              0.515
              21
              false

            
              8
              65
              72
              23
              0
              32.0
              0.600
              42
              false

            
              2
              99
              60
              17
              160
              36.6
              0.453
              21
              false

            
              1
              102
              74
              0
              0
              39.5
              0.293
              42
              true

            
              11
              120
              80
              37
              150
              42.3
              0.785
              48
              true

            
              3
              102
              44
              20
              94
              30.8
              0.400
              26
              false

            
              1
              109
              58
              18
              116
              28.5
              0.219
              22
              false

            
              9
              140
              94
              0
              0
              32.7
              0.734
              45
              true

            
              13
              153
              88
              37
              140
              40.6
              1.174
              39
              false

            
              12
              100
              84
              33
              105
              30.0
              0.488
              46
              false

            
              1
              147
              94
              41
              0
              49.3
              0.358
              27
              true

            
              1
              81
              74
              41
              57
              46.3
              1.096
              32
              false

            
              3
              187
              70
              22
              200
              36.4
              0.408
              36
              true

            
              6
              162
              62
              0
              0
              24.3
              0.178
              50
              true

            
              4
              136
              70
              0
              0
              31.2
              1.182
              22
              true

            
              1
              121
              78
              39
              74
              39.0
              0.261
              28
              false

            
              3
              108
              62
              24
              0
              26.0
              0.223
              25
              false

            
              0
              181
              88
              44
              510
              43.3
              0.222
              26
              true

            
              8
              154
              78
              32
              0
              32.4
              0.443
              45
              true

            
              1
              128
              88
              39
              110
              36.5
              1.057
              37
              true

            
              7
              137
              90
              41
              0
              32.0
              0.391
              39
              false

            
              0
              123
              72
              0
              0
              36.3
              0.258
              52
              true

            
              1
              106
              76
              0
              0
              37.5
              0.197
              26
              false

            
              6
              190
              92
              0
              0
              35.5
              0.278
              66
              true

            
              2
              88
              58
              26
              16
              28.4
              0.766
              22
              false

            
              9
              170
              74
              31
              0
              44.0
              0.403
              43
              true

            
              9
              89
              62
              0
              0
              22.5
              0.142
              33
              false

            
              10
              101
              76
              48
              180
              32.9
              0.171
              63
              false

            
              2
              122
              70
              27
              0
              36.8
              0.340
              27
              false

            
              5
              121
              72
              23
              112
              26.2
              0.245
              30
              false

            
              1
              126
              60
              0
              0
              30.1
              0.349
              47
              true

            
              2
              157
              74
              35
              440
              39.4
              0.134
              30
              false

            
              1
              167
              74
              17
              144
              23.4
              0.447
              33
              true

            
              0
              179
              50
              36
              159
              37.8
              0.455
              22
              true

            
              11
              136
              84
              35
              130
              28.3
              0.260
              42
              true

            
              0
              107
              60
              25
              0
              26.4
              0.133
              23
              false

            
              1
              91
              54
              25
              100
              25.2
              0.234
              23
              false

            
              1
              117
              60
              23
              106
              33.8
              0.466
              27
              false

            
              5
              123
              74
              40
              77
              34.1
              0.269
              28
              false

            
              2
              120
              54
              0
              0
              26.8
              0.455
              27
              false

            
              1
              106
              70
              28
              135
              34.2
              0.142
              22
              false

            
              2
              155
              52
              27
              540
              38.7
              0.240
              25
              true

## diabetes_test.csv

          
            pregnancies
            plasma glucose
            blood pressure
            triceps skin thickness
            insulin
            bmi
            diabetes pedigree
            age

            
              2
              197
              70
              45
              543
              30.5
              158
              53

            
              8
              125
              96
              
              0
              0.0
              232
              54

            
              4
              110
              92
              
              0
              37.6
              191
              30

            
              3
              126
              
              
              235
              39.3
              704
              27

            
              8
              99
              84
              
              0
              35.4
              388
              50

            
              7
              196
              90
              
              0
              39.8
              451
              41

            
              9
              119
              80
              35
              0
              29.0
              263
              29

            
              11
              143
              94
              33
              146
              36.6
              254
              51

            
              10
              125
              70
              26
              115
              31.1
              205
              41

            
              7
              147
              76
              0
              0
              39.4
              257
              43

            
              
              103
              
              
              192
              24.0
              966
              33

            
              
              138
              
              
              
              33.2
              420
              35

            
              
              102
              
              37
              
              32.9
              665
              46

            
              
              90
              
              42
              
              38.2
              503
              27

            
              
              103
              
              
              
              19.4
              491
              22

            
              1
              101
              
              15
              
              24.2
              526
              26

            
              5
              88
              66
              
              23
              24.4
              342
              30

            
              8
              176
              90
              
              300
              33.7
              467
              58

            
              7
              114
              66
              
              0
              32.8
              258
              42

            
              5
              99
              74
              
              0
              29.0
              203
              32

            
              0
              109
              88
              
              0
              32.5
              855
              38

            
              2
              109
              92
              
              
              42.7
              845
              54

            
              2
              112
              66
              
              
              25.0
              307
              24

            
              11
              127
              106
              0
              
              39.0
              190
              51

            
              3
              80
              82
              31
              
              34.2
              1292
              27

            
              10
              162
              84
              0
              0
              27.7
              182
              54

            
              1
              199
              76
              43
              0
              42.9
              1394
              22

            
              8
              167
              106
              46
              231
              37.6
              165
              43
pregnancies	plasma glucose	blood pressure	triceps skin thickness	insulin	bmi	diabetes pedigree	age	diabetes
6	148	72	35	0	33.6	0.627	50	true
1	85	66	29	0	26.6	0.351	31	false
8	183	64	0	0	23.3	0.672	32	true
1	89	66	23	94	28.1	0.167	21	false
0	137	40	35	168	43.1	2.288	33	true
5	116	74	0	0	25.6	0.201	30	false
3	78	50	32	88	31.0	0.248	26	true
10	115	0	0	0	35.3	0.134	29	false
2	197	70	45	543	30.5	0.158	53	true
8	125	96	0	0	0.0	0.232	54	true
4	110	92	0	0	37.6	0.191	30	false
10	168	74	0	0	38.0	0.537	34	true
10	139	80	0	0	27.1	1.441	57	false
1	189	60	23	846	30.1	0.398	59	true
5	166	72	19	175	25.8	0.587	51	true
7	100	0	0	0	30.0	0.484	32	true
0	118	84	47	230	45.8	0.551	31	true
7	107	74	0	0	29.6	0.254	31	true
1	103	30	38	83	43.3	0.183	33	false
1	115	70	30	96	34.6	0.529	32	true
3	126	88	41	235	39.3	0.704	27	false
8	99	84	0	0	35.4	0.388	50	false
7	196	90	0	0	39.8	0.451	41	true
9	119	80	35	0	29.0	0.263	29	true
11	143	94	33	146	36.6	0.254	51	true
10	125	70	26	115	31.1	0.205	41	true
7	147	76	0	0	39.4	0.257	43	true
1	97	66	15	140	23.2	0.487	22	false
13	145	82	19	110	22.2	0.245	57	false
5	117	92	0	0	34.1	0.337	38	false
5	109	75	26	0	36.0	0.546	60	false
3	158	76	36	245	31.6	0.851	28	true
3	88	58	11	54	24.8	0.267	22	false
6	92	92	0	0	19.9	0.188	28	false
10	122	78	31	0	27.6	0.512	45	false
4	103	60	33	192	24.0	0.966	33	false
11	138	76	0	0	33.2	0.420	35	false
9	102	76	37	0	32.9	0.665	46	true
2	90	68	42	0	38.2	0.503	27	true
4	111	72	47	207	37.1	1.390	56	true
3	180	64	25	70	34.0	0.271	26	false
7	133	84	0	0	40.2	0.696	37	false
7	106	92	18	0	22.7	0.235	48	false
9	171	110	24	240	45.4	0.721	54	true
7	159	64	0	0	27.4	0.294	40	false
0	180	66	39	0	42.0	1.893	25	true
1	146	56	0	0	29.7	0.564	29	false
2	71	70	27	0	28.0	0.586	22	false
7	103	66	32	0	39.1	0.344	31	true
7	105	0	0	0	0.0	0.305	24	false
1	103	80	11	82	19.4	0.491	22	false
1	101	50	15	36	24.2	0.526	26	false
5	88	66	21	23	24.4	0.342	30	false
8	176	90	34	300	33.7	0.467	58	true
7	150	66	42	342	34.7	0.718	42	false
1	73	50	10	0	23.0	0.248	21	false
7	187	68	39	304	37.7	0.254	41	true
0	100	88	60	110	46.8	0.962	31	false
0	146	82	0	0	40.5	1.781	44	false
0	105	64	41	142	41.5	0.173	22	false
2	84	0	0	0	0.0	0.304	21	false
8	133	72	0	0	32.9	0.270	39	true
5	44	62	0	0	25.0	0.587	36	false
2	141	58	34	128	25.4	0.699	24	false
7	114	66	0	0	32.8	0.258	42	true
5	99	74	27	0	29.0	0.203	32	false
0	109	88	30	0	32.5	0.855	38	true
2	109	92	0	0	42.7	0.845	54	false
1	95	66	13	38	19.6	0.334	25	false
4	146	85	27	100	28.9	0.189	27	false
2	100	66	20	90	32.9	0.867	28	true
5	139	64	35	140	28.6	0.411	26	false
13	126	90	0	0	43.4	0.583	42	true
4	129	86	20	270	35.1	0.231	23	false
1	79	75	30	0	32.0	0.396	22	false
1	0	48	20	0	24.7	0.140	22	false
7	62	78	0	0	32.6	0.391	41	false
5	95	72	33	0	37.7	0.370	27	false
0	131	0	0	0	43.2	0.270	26	true
2	112	66	22	0	25.0	0.307	24	false
11	127	106	0	0	39.0	0.190	51	false
3	80	82	31	70	34.2	1.292	27	true
10	162	84	0	0	27.7	0.182	54	false
1	199	76	43	0	42.9	1.394	22	true
8	167	106	46	231	37.6	0.165	43	true
9	145	80	46	130	37.9	0.637	40	true
6	115	60	39	0	33.7	0.245	40	true
1	112	80	45	132	34.8	0.217	24	false
4	145	82	18	0	32.5	0.235	70	true
10	111	70	27	0	27.5	0.141	40	true
6	98	58	33	190	34.0	0.430	43	false
9	154	78	30	100	30.9	0.164	45	false
6	165	68	26	168	33.6	0.631	49	false
1	99	58	10	0	25.4	0.551	21	false
10	68	106	23	49	35.5	0.285	47	false
3	123	100	35	240	57.3	0.880	22	false
8	91	82	0	0	35.6	0.587	68	false
6	195	70	0	0	30.9	0.328	31	true
9	156	86	0	0	24.8	0.230	53	true
0	93	60	0	0	35.3	0.263	25	false
3	121	52	0	0	36.0	0.127	25	true
2	101	58	17	265	24.2	0.614	23	false
2	56	56	28	45	24.2	0.332	22	false
0	162	76	36	0	49.6	0.364	26	true
0	95	64	39	105	44.6	0.366	22	false
4	125	80	0	0	32.3	0.536	27	true
5	136	82	0	0	0.0	0.640	69	false
2	129	74	26	205	33.2	0.591	25	false
3	130	64	0	0	23.1	0.314	22	false
1	107	50	19	0	28.3	0.181	29	false
1	140	74	26	180	24.1	0.828	23	false
1	144	82	46	180	46.1	0.335	46	true
8	107	80	0	0	24.6	0.856	34	false
13	158	114	0	0	42.3	0.257	44	true
2	121	70	32	95	39.1	0.886	23	false
7	129	68	49	125	38.5	0.439	43	true
2	90	60	0	0	23.5	0.191	25	false
7	142	90	24	480	30.4	0.128	43	true
3	169	74	19	125	29.9	0.268	31	true
0	99	0	0	0	25.0	0.253	22	false
4	127	88	11	155	34.5	0.598	28	false
4	118	70	0	0	44.5	0.904	26	false
2	122	76	27	200	35.9	0.483	26	false
6	125	78	31	0	27.6	0.565	49	true
1	168	88	29	0	35.0	0.905	52	true
2	129	0	0	0	38.5	0.304	41	false
4	110	76	20	100	28.4	0.118	27	false
6	80	80	36	0	39.8	0.177	28	false
10	115	0	0	0	0.0	0.261	30	true
2	127	46	21	335	34.4	0.176	22	false
9	164	78	0	0	32.8	0.148	45	true
2	93	64	32	160	38.0	0.674	23	true
3	158	64	13	387	31.2	0.295	24	false
5	126	78	27	22	29.6	0.439	40	false
10	129	62	36	0	41.2	0.441	38	true
0	134	58	20	291	26.4	0.352	21	false
3	102	74	0	0	29.5	0.121	32	false
7	187	50	33	392	33.9	0.826	34	true
3	173	78	39	185	33.8	0.970	31	true
10	94	72	18	0	23.1	0.595	56	false
1	108	60	46	178	35.5	0.415	24	false
5	97	76	27	0	35.6	0.378	52	true
4	83	86	19	0	29.3	0.317	34	false
1	114	66	36	200	38.1	0.289	21	false
1	149	68	29	127	29.3	0.349	42	true
5	117	86	30	105	39.1	0.251	42	false
1	111	94	0	0	32.8	0.265	45	false
4	112	78	40	0	39.4	0.236	38	false
1	116	78	29	180	36.1	0.496	25	false
0	141	84	26	0	32.4	0.433	22	false
2	175	88	0	0	22.9	0.326	22	false
2	92	52	0	0	30.1	0.141	22	false
3	130	78	23	79	28.4	0.323	34	true
8	120	86	0	0	28.4	0.259	22	true
2	174	88	37	120	44.5	0.646	24	true
2	106	56	27	165	29.0	0.426	22	false
2	105	75	0	0	23.3	0.560	53	false
4	95	60	32	0	35.4	0.284	28	false
0	126	86	27	120	27.4	0.515	21	false
8	65	72	23	0	32.0	0.600	42	false
2	99	60	17	160	36.6	0.453	21	false
1	102	74	0	0	39.5	0.293	42	true
11	120	80	37	150	42.3	0.785	48	true
3	102	44	20	94	30.8	0.400	26	false
1	109	58	18	116	28.5	0.219	22	false
9	140	94	0	0	32.7	0.734	45	true
13	153	88	37	140	40.6	1.174	39	false
12	100	84	33	105	30.0	0.488	46	false
1	147	94	41	0	49.3	0.358	27	true
1	81	74	41	57	46.3	1.096	32	false
3	187	70	22	200	36.4	0.408	36	true
6	162	62	0	0	24.3	0.178	50	true
4	136	70	0	0	31.2	1.182	22	true
1	121	78	39	74	39.0	0.261	28	false
3	108	62	24	0	26.0	0.223	25	false
0	181	88	44	510	43.3	0.222	26	true
8	154	78	32	0	32.4	0.443	45	true
1	128	88	39	110	36.5	1.057	37	true
7	137	90	41	0	32.0	0.391	39	false
0	123	72	0	0	36.3	0.258	52	true
1	106	76	0	0	37.5	0.197	26	false
6	190	92	0	0	35.5	0.278	66	true
2	88	58	26	16	28.4	0.766	22	false
9	170	74	31	0	44.0	0.403	43	true
9	89	62	0	0	22.5	0.142	33	false
10	101	76	48	180	32.9	0.171	63	false
2	122	70	27	0	36.8	0.340	27	false
5	121	72	23	112	26.2	0.245	30	false
1	126	60	0	0	30.1	0.349	47	true
2	157	74	35	440	39.4	0.134	30	false
1	167	74	17	144	23.4	0.447	33	true
0	179	50	36	159	37.8	0.455	22	true
11	136	84	35	130	28.3	0.260	42	true
0	107	60	25	0	26.4	0.133	23	false
1	91	54	25	100	25.2	0.234	23	false
1	117	60	23	106	33.8	0.466	27	false
5	123	74	40	77	34.1	0.269	28	false
2	120	54	0	0	26.8	0.455	27	false
1	106	70	28	135	34.2	0.142	22	false
2	155	52	27	540	38.7	0.240	25	true