Skip to content

Instantly share code, notes, and snippets.

@callmemaze
Created September 9, 2022 14:59
Show Gist options
  • Save callmemaze/fca6aef0b0feed9a33b860d31e10563f to your computer and use it in GitHub Desktop.
Save callmemaze/fca6aef0b0feed9a33b860d31e10563f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import joblib\n",
"import pandas as pd\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
"from time import time\n",
"\n",
"val_features = pd.read_csv('../../../val_features.csv')\n",
"val_labels = pd.read_csv('../../../val_labels.csv', header=None)\n",
"\n",
"te_features = pd.read_csv('../../../test_features.csv')\n",
"te_labels = pd.read_csv('../../../test_labels.csv', header=None)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Read in Models"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"models = {}\n",
"\n",
"for mdl in ['LR', 'SVM', 'MLP', 'RF', 'GB']:\n",
" models[mdl] = joblib.load('../../../{}_model.pkl'.format(mdl))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'LR': LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,\n",
" intercept_scaling=1, max_iter=100, multi_class='warn',\n",
" n_jobs=None, penalty='l2', random_state=None, solver='warn',\n",
" tol=0.0001, verbose=0, warm_start=False),\n",
" 'SVM': SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,\n",
" decision_function_shape='ovr', degree=3, gamma='auto_deprecated',\n",
" kernel='linear', max_iter=-1, probability=False, random_state=None,\n",
" shrinking=True, tol=0.001, verbose=False),\n",
" 'MLP': MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,\n",
" beta_2=0.999, early_stopping=False, epsilon=1e-08,\n",
" hidden_layer_sizes=(50,), learning_rate='invscaling',\n",
" learning_rate_init=0.001, max_iter=200, momentum=0.9,\n",
" n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,\n",
" random_state=None, shuffle=True, solver='adam', tol=0.0001,\n",
" validation_fraction=0.1, verbose=False, warm_start=False),\n",
" 'RF': RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
" max_depth=4, max_features='auto', max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=None,\n",
" oob_score=False, random_state=None, verbose=0,\n",
" warm_start=False),\n",
" 'GB': GradientBoostingClassifier(criterion='friedman_mse', init=None,\n",
" learning_rate=0.01, loss='deviance', max_depth=3,\n",
" max_features=None, max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, n_estimators=500,\n",
" n_iter_no_change=None, presort='auto', random_state=None,\n",
" subsample=1.0, tol=0.0001, validation_fraction=0.1,\n",
" verbose=0, warm_start=False)}"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"models"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Evaluate models on the validation set\n",
"\n",
"![Evaluation Metrics](../../img/eval_metrics.png)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def evaluate_model(name, model, features, labels):\n",
" start = time()\n",
" pred = model.predict(features)\n",
" end = time()\n",
" accuracy = round(accuracy_score(labels, pred), 3)\n",
" precision = round(precision_score(labels, pred), 3)\n",
" recall = round(recall_score(labels, pred), 3)\n",
" print('{} -- Accuracy: {} / Precision: {} / Recall: {} / Latency: {}ms'.format(name,\n",
" accuracy,\n",
" precision,\n",
" recall,\n",
" round((end - start)*1000, 1)))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LR -- Accuracy: 0.77 / Precision: 0.707 / Recall: 0.631 / Latency: 1.5ms\n",
"SVM -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 1.4ms\n",
"MLP -- Accuracy: 0.747 / Precision: 0.667 / Recall: 0.615 / Latency: 1.2ms\n",
"RF -- Accuracy: 0.82 / Precision: 0.824 / Recall: 0.646 / Latency: 7.0ms\n",
"GB -- Accuracy: 0.815 / Precision: 0.808 / Recall: 0.646 / Latency: 2.4ms\n"
]
}
],
"source": [
"for name, mdl in models.items():\n",
" evaluate_model(name, mdl, val_features, val_labels)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Evaluate best model on test set"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Random Forest -- Accuracy: 0.81 / Precision: 0.875 / Recall: 0.645 / Latency: 7.5ms\n"
]
}
],
"source": [
"evaluate_model('Random Forest', models['RF'], te_features, te_labels)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment