callmemaze/final.ipynb

## final.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import joblib\n",
    "import pandas as pd\n",
    "from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
    "from time import time\n",
    "\n",
    "val_features = pd.read_csv('../../../val_features.csv')\n",
    "val_labels = pd.read_csv('../../../val_labels.csv', header=None)\n",
    "\n",
    "te_features = pd.read_csv('../../../test_features.csv')\n",
    "te_labels = pd.read_csv('../../../test_labels.csv', header=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read in Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "models = {}\n",
    "\n",
    "for mdl in ['LR', 'SVM', 'MLP', 'RF', 'GB']:\n",
    "    models[mdl] = joblib.load('../../../{}_model.pkl'.format(mdl))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'LR': LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,\n",
       "           intercept_scaling=1, max_iter=100, multi_class='warn',\n",
       "           n_jobs=None, penalty='l2', random_state=None, solver='warn',\n",
       "           tol=0.0001, verbose=0, warm_start=False),\n",
       " 'SVM': SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,\n",
       "   decision_function_shape='ovr', degree=3, gamma='auto_deprecated',\n",
       "   kernel='linear', max_iter=-1, probability=False, random_state=None,\n",
       "   shrinking=True, tol=0.001, verbose=False),\n",
       " 'MLP': MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,\n",
       "        beta_2=0.999, early_stopping=False, epsilon=1e-08,\n",
       "        hidden_layer_sizes=(50,), learning_rate='invscaling',\n",
       "        learning_rate_init=0.001, max_iter=200, momentum=0.9,\n",
       "        n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,\n",
       "        random_state=None, shuffle=True, solver='adam', tol=0.0001,\n",
       "        validation_fraction=0.1, verbose=False, warm_start=False),\n",
       " 'RF': RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
       "             max_depth=4, max_features='auto', max_leaf_nodes=None,\n",
       "             min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "             min_samples_leaf=1, min_samples_split=2,\n",
       "             min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=None,\n",
       "             oob_score=False, random_state=None, verbose=0,\n",
       "             warm_start=False),\n",
       " 'GB': GradientBoostingClassifier(criterion='friedman_mse', init=None,\n",
       "               learning_rate=0.01, loss='deviance', max_depth=3,\n",
       "               max_features=None, max_leaf_nodes=None,\n",
       "               min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "               min_samples_leaf=1, min_samples_split=2,\n",
       "               min_weight_fraction_leaf=0.0, n_estimators=500,\n",
       "               n_iter_no_change=None, presort='auto', random_state=None,\n",
       "               subsample=1.0, tol=0.0001, validation_fraction=0.1,\n",
       "               verbose=0, warm_start=False)}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "models"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Evaluate models on the validation set\n",
    "\n",
    "![Evaluation Metrics](../../img/eval_metrics.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate_model(name, model, features, labels):\n",
    "    start = time()\n",
    "    pred = model.predict(features)\n",
    "    end = time()\n",
    "    accuracy = round(accuracy_score(labels, pred), 3)\n",
    "    precision = round(precision_score(labels, pred), 3)\n",
    "    recall = round(recall_score(labels, pred), 3)\n",
    "    print('{} -- Accuracy: {} / Precision: {} / Recall: {} / Latency: {}ms'.format(name,\n",
    "                                                                                   accuracy,\n",
    "                                                                                   precision,\n",
    "                                                                                   recall,\n",
    "                                                                                   round((end - start)*1000, 1)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LR -- Accuracy: 0.77 / Precision: 0.707 / Recall: 0.631 / Latency: 1.5ms\n",
      "SVM -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 1.4ms\n",
      "MLP -- Accuracy: 0.747 / Precision: 0.667 / Recall: 0.615 / Latency: 1.2ms\n",
      "RF -- Accuracy: 0.82 / Precision: 0.824 / Recall: 0.646 / Latency: 7.0ms\n",
      "GB -- Accuracy: 0.815 / Precision: 0.808 / Recall: 0.646 / Latency: 2.4ms\n"
     ]
    }
   ],
   "source": [
    "for name, mdl in models.items():\n",
    "    evaluate_model(name, mdl, val_features, val_labels)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Evaluate best model on test set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random Forest -- Accuracy: 0.81 / Precision: 0.875 / Recall: 0.645 / Latency: 7.5ms\n"
     ]
    }
   ],
   "source": [
    "evaluate_model('Random Forest', models['RF'], te_features, te_labels)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import joblib\n",
	"import pandas as pd\n",
	"from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
	"from time import time\n",
	"\n",
	"val_features = pd.read_csv('../../../val_features.csv')\n",
	"val_labels = pd.read_csv('../../../val_labels.csv', header=None)\n",
	"\n",
	"te_features = pd.read_csv('../../../test_features.csv')\n",
	"te_labels = pd.read_csv('../../../test_labels.csv', header=None)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Read in Models"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"models = {}\n",
	"\n",
	"for mdl in ['LR', 'SVM', 'MLP', 'RF', 'GB']:\n",
	" models[mdl] = joblib.load('../../../{}_model.pkl'.format(mdl))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{'LR': LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,\n",
	" intercept_scaling=1, max_iter=100, multi_class='warn',\n",
	" n_jobs=None, penalty='l2', random_state=None, solver='warn',\n",
	" tol=0.0001, verbose=0, warm_start=False),\n",
	" 'SVM': SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,\n",
	" decision_function_shape='ovr', degree=3, gamma='auto_deprecated',\n",
	" kernel='linear', max_iter=-1, probability=False, random_state=None,\n",
	" shrinking=True, tol=0.001, verbose=False),\n",
	" 'MLP': MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,\n",
	" beta_2=0.999, early_stopping=False, epsilon=1e-08,\n",
	" hidden_layer_sizes=(50,), learning_rate='invscaling',\n",
	" learning_rate_init=0.001, max_iter=200, momentum=0.9,\n",
	" n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,\n",
	" random_state=None, shuffle=True, solver='adam', tol=0.0001,\n",
	" validation_fraction=0.1, verbose=False, warm_start=False),\n",
	" 'RF': RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
	" max_depth=4, max_features='auto', max_leaf_nodes=None,\n",
	" min_impurity_decrease=0.0, min_impurity_split=None,\n",
	" min_samples_leaf=1, min_samples_split=2,\n",
	" min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=None,\n",
	" oob_score=False, random_state=None, verbose=0,\n",
	" warm_start=False),\n",
	" 'GB': GradientBoostingClassifier(criterion='friedman_mse', init=None,\n",
	" learning_rate=0.01, loss='deviance', max_depth=3,\n",
	" max_features=None, max_leaf_nodes=None,\n",
	" min_impurity_decrease=0.0, min_impurity_split=None,\n",
	" min_samples_leaf=1, min_samples_split=2,\n",
	" min_weight_fraction_leaf=0.0, n_estimators=500,\n",
	" n_iter_no_change=None, presort='auto', random_state=None,\n",
	" subsample=1.0, tol=0.0001, validation_fraction=0.1,\n",
	" verbose=0, warm_start=False)}"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"models"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Evaluate models on the validation set\n",
	"\n",
	"![Evaluation Metrics](../../img/eval_metrics.png)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"def evaluate_model(name, model, features, labels):\n",
	" start = time()\n",
	" pred = model.predict(features)\n",
	" end = time()\n",
	" accuracy = round(accuracy_score(labels, pred), 3)\n",
	" precision = round(precision_score(labels, pred), 3)\n",
	" recall = round(recall_score(labels, pred), 3)\n",
	" print('{} -- Accuracy: {} / Precision: {} / Recall: {} / Latency: {}ms'.format(name,\n",
	" accuracy,\n",
	" precision,\n",
	" recall,\n",
	" round((end - start)*1000, 1)))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"LR -- Accuracy: 0.77 / Precision: 0.707 / Recall: 0.631 / Latency: 1.5ms\n",
	"SVM -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 1.4ms\n",
	"MLP -- Accuracy: 0.747 / Precision: 0.667 / Recall: 0.615 / Latency: 1.2ms\n",
	"RF -- Accuracy: 0.82 / Precision: 0.824 / Recall: 0.646 / Latency: 7.0ms\n",
	"GB -- Accuracy: 0.815 / Precision: 0.808 / Recall: 0.646 / Latency: 2.4ms\n"
	]
	}
	],
	"source": [
	"for name, mdl in models.items():\n",
	" evaluate_model(name, mdl, val_features, val_labels)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Evaluate best model on test set"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Random Forest -- Accuracy: 0.81 / Precision: 0.875 / Recall: 0.645 / Latency: 7.5ms\n"
	]
	}
	],
	"source": [
	"evaluate_model('Random Forest', models['RF'], te_features, te_labels)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}