Skip to content

Instantly share code, notes, and snippets.

@tam17aki
Last active April 11, 2022 01:53
Show Gist options
  • Save tam17aki/0f1d388a9373246f5973ee8c0c99f643 to your computer and use it in GitHub Desktop.
Save tam17aki/0f1d388a9373246f5973ee8c0c99f643 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Benchmark of various outlier detection models\n",
"\n",
"### The models are evaluated by ROC, Precision @ n and execution time on 15 benchmark datasets. All datasets are split (60% for training and 40% for testing). \n",
"\n",
"**[PyOD](https://github.com/yzhao062/pyod)** is a comprehensive **Python toolkit** to **identify outlying objects** in \n",
"multivariate data with both unsupervised and supervised approaches.\n",
"The model covered in this example includes:\n",
"\n",
" 1. Linear Models for Outlier Detection:\n",
" 1. **OCSVM: One-Class Support Vector Machines**\n",
" 2. **PCA: Principal Component Analysis**\n",
" 3. **KPCA: Kernel Principal Component Analysis**\n",
" \n",
" 2. Proximity-Based Outlier Detection Models:\n",
" 1. **LOF: Local Outlier Factor**\n",
" 2. **kNN: k Nearest Neighbors** (use the distance to the kth nearest \n",
" neighbor as the outlier score)\n",
"\n",
" 3. Probabilistic Models for Outlier Detection:\n",
" 1. **ABOD: Angle-Based Outlier Detection**\n",
" \n",
" 4. Outlier Ensembles and Combination Frameworks\n",
" 1. **Isolation Forest**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from __future__ import division\n",
"from __future__ import print_function\n",
"\n",
"import os\n",
"import sys\n",
"import warnings\n",
"from time import time\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"from pyod.models.abod import ABOD\n",
"from pyod.models.iforest import IForest\n",
"from pyod.models.knn import KNN\n",
"from pyod.models.lof import LOF\n",
"from pyod.models.ocsvm import OCSVM\n",
"from pyod.models.pca import PCA\n",
"from pyod.utils.utility import precision_n_scores, standardizer\n",
"from scipy.io import loadmat\n",
"from sklearn.metrics import roc_auc_score\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"from kpca import KPCA\n",
"\n",
"# temporary solution for relative imports in case pyod is not installed\n",
"# if pyod is installed, no need to use the following line\n",
"sys.path.append(os.path.abspath(os.path.join(os.path.dirname(\"__file__\"), \"..\")))\n",
"# supress warnings for clean output\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"... Processing arrhythmia.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 0.8076s\n",
"K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.0292s\n",
"Isolation Forest (IForest) ROC:0.8464, precision @ rank n:0.5714, execution time: 0.247s\n",
"Local Outlier Factor (LOF) ROC:0.7619, precision @ rank n:0.4286, execution time: 0.0036s\n",
"One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.0226s\n",
"Principal Component Analysis(PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.0311s\n",
"Kernel PCA(KPCA) ROC:0.7883, precision @ rank n:0.4643, execution time: 0.0816s\n",
"Kernel PCA(KPCA) with samling ROC:0.7775, precision @ rank n:0.5, execution time: 0.0355s\n",
"\n",
"... Processing cardio.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.5703, precision @ rank n:0.1923, execution time: 0.2436s\n",
"K Nearest Neighbors (KNN) ROC:0.7435, precision @ rank n:0.3718, execution time: 0.0893s\n",
"Isolation Forest (IForest) ROC:0.9348, precision @ rank n:0.5641, execution time: 0.2443s\n",
"Local Outlier Factor (LOF) ROC:0.5193, precision @ rank n:0.1282, execution time: 0.043s\n",
"One-class SVM (OCSVM) ROC:0.9415, precision @ rank n:0.5769, execution time: 0.0961s\n",
"Principal Component Analysis(PCA) ROC:0.9543, precision @ rank n:0.6538, execution time: 0.0026s\n",
"Kernel PCA(KPCA) ROC:0.8249, precision @ rank n:0.3846, execution time: 0.4285s\n",
"Kernel PCA(KPCA) with samling ROC:0.8095, precision @ rank n:0.4231, execution time: 0.1398s\n",
"\n",
"... Processing glass.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.7287, precision @ rank n:0.0, execution time: 0.0225s\n",
"K Nearest Neighbors (KNN) ROC:0.811, precision @ rank n:0.0, execution time: 0.0058s\n",
"Isolation Forest (IForest) ROC:0.6067, precision @ rank n:0.0, execution time: 0.1852s\n",
"Local Outlier Factor (LOF) ROC:0.7287, precision @ rank n:0.0, execution time: 0.0017s\n",
"One-class SVM (OCSVM) ROC:0.4116, precision @ rank n:0.0, execution time: 0.0015s\n",
"Principal Component Analysis(PCA) ROC:0.5305, precision @ rank n:0.0, execution time: 0.001s\n",
"Kernel PCA(KPCA) ROC:0.8201, precision @ rank n:0.0, execution time: 0.0456s\n",
"Kernel PCA(KPCA) with samling ROC:0.6555, precision @ rank n:0.0, execution time: 0.0175s\n",
"\n",
"... Processing ionosphere.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.8585, precision @ rank n:0.7955, execution time: 0.0561s\n",
"K Nearest Neighbors (KNN) ROC:0.9407, precision @ rank n:0.8182, execution time: 0.0122s\n",
"Isolation Forest (IForest) ROC:0.8175, precision @ rank n:0.5682, execution time: 0.2141s\n",
"Local Outlier Factor (LOF) ROC:0.8648, precision @ rank n:0.75, execution time: 0.0113s\n",
"One-class SVM (OCSVM) ROC:0.7819, precision @ rank n:0.6364, execution time: 0.0053s\n",
"Principal Component Analysis(PCA) ROC:0.7505, precision @ rank n:0.5909, execution time: 0.0021s\n",
"Kernel PCA(KPCA) ROC:0.9112, precision @ rank n:0.7955, execution time: 0.0611s\n",
"Kernel PCA(KPCA) with samling ROC:0.9456, precision @ rank n:0.8182, execution time: 0.0274s\n",
"\n",
"... Processing letter.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.9104, precision @ rank n:0.3953, execution time: 0.2179s\n",
"K Nearest Neighbors (KNN) ROC:0.857, precision @ rank n:0.3488, execution time: 0.0782s\n",
"Isolation Forest (IForest) ROC:0.6048, precision @ rank n:0.1163, execution time: 0.2337s\n",
"Local Outlier Factor (LOF) ROC:0.896, precision @ rank n:0.4419, execution time: 0.0285s\n",
"One-class SVM (OCSVM) ROC:0.5712, precision @ rank n:0.1628, execution time: 0.0789s\n",
"Principal Component Analysis(PCA) ROC:0.5146, precision @ rank n:0.093, execution time: 0.0034s\n",
"Kernel PCA(KPCA) ROC:0.8478, precision @ rank n:0.2093, execution time: 0.3707s\n",
"Kernel PCA(KPCA) with samling ROC:0.8102, precision @ rank n:0.186, execution time: 0.1209s\n",
"\n",
"... Processing mnist.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.7824, precision @ rank n:0.3611, execution time: 2.2379s\n",
"K Nearest Neighbors (KNN) ROC:0.8506, precision @ rank n:0.4306, execution time: 1.4719s\n",
"Isolation Forest (IForest) ROC:0.7974, precision @ rank n:0.2604, execution time: 0.5817s\n",
"Local Outlier Factor (LOF) ROC:0.67, precision @ rank n:0.3264, execution time: 0.6347s\n",
"One-class SVM (OCSVM) ROC:0.8543, precision @ rank n:0.3854, execution time: 1.8226s\n",
"Principal Component Analysis(PCA) ROC:0.8518, precision @ rank n:0.3785, execution time: 0.0561s\n",
"Kernel PCA(KPCA) ROC:0.8588, precision @ rank n:0.3958, execution time: 14.8045s\n",
"Kernel PCA(KPCA) with samling ROC:0.8494, precision @ rank n:0.3785, execution time: 0.6482s\n",
"\n",
"... Processing musk.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.2741, precision @ rank n:0.0426, execution time: 0.7341s\n",
"K Nearest Neighbors (KNN) ROC:0.876, precision @ rank n:0.3404, execution time: 0.4263s\n",
"Isolation Forest (IForest) ROC:1.0, precision @ rank n:1.0, execution time: 0.3897s\n",
"Local Outlier Factor (LOF) ROC:0.668, precision @ rank n:0.2553, execution time: 0.1178s\n",
"One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 0.4749s\n",
"Principal Component Analysis(PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.0574s\n",
"Kernel PCA(KPCA) ROC:0.7274, precision @ rank n:0.4043, execution time: 0.9847s\n",
"Kernel PCA(KPCA) with samling ROC:0.8434, precision @ rank n:0.5106, execution time: 0.2369s\n",
"\n",
"... Processing optdigits.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.5204, precision @ rank n:0.0312, execution time: 1.0504s\n",
"K Nearest Neighbors (KNN) ROC:0.3974, precision @ rank n:0.0, execution time: 0.5476s\n",
"Isolation Forest (IForest) ROC:0.6907, precision @ rank n:0.0156, execution time: 0.4118s\n",
"Local Outlier Factor (LOF) ROC:0.5012, precision @ rank n:0.0781, execution time: 0.3103s\n",
"One-class SVM (OCSVM) ROC:0.5125, precision @ rank n:0.0, execution time: 0.6467s\n",
"Principal Component Analysis(PCA) ROC:0.5141, precision @ rank n:0.0, execution time: 0.0198s\n",
"Kernel PCA(KPCA) ROC:0.5284, precision @ rank n:0.0, execution time: 4.3066s\n",
"Kernel PCA(KPCA) with samling ROC:0.5023, precision @ rank n:0.0, execution time: 0.3975s\n",
"\n",
"... Processing pendigits.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.6728, precision @ rank n:0.06, execution time: 0.997s\n",
"K Nearest Neighbors (KNN) ROC:0.6995, precision @ rank n:0.08, execution time: 0.5151s\n",
"Isolation Forest (IForest) ROC:0.955, precision @ rank n:0.38, execution time: 0.3741s\n",
"Local Outlier Factor (LOF) ROC:0.4443, precision @ rank n:0.06, execution time: 0.4893s\n",
"One-class SVM (OCSVM) ROC:0.9293, precision @ rank n:0.34, execution time: 0.9349s\n",
"Principal Component Analysis(PCA) ROC:0.9426, precision @ rank n:0.28, execution time: 0.013s\n",
"Kernel PCA(KPCA) ROC:0.9207, precision @ rank n:0.14, execution time: 9.0731s\n",
"Kernel PCA(KPCA) with samling ROC:0.9661, precision @ rank n:0.34, execution time: 0.545s\n",
"\n",
"... Processing pima.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.6614, precision @ rank n:0.5048, execution time: 0.074s\n",
"K Nearest Neighbors (KNN) ROC:0.6909, precision @ rank n:0.5429, execution time: 0.0178s\n",
"Isolation Forest (IForest) ROC:0.6504, precision @ rank n:0.4857, execution time: 0.1928s\n",
"Local Outlier Factor (LOF) ROC:0.5148, precision @ rank n:0.3714, execution time: 0.0045s\n",
"One-class SVM (OCSVM) ROC:0.6184, precision @ rank n:0.4762, execution time: 0.011s\n",
"Principal Component Analysis(PCA) ROC:0.6351, precision @ rank n:0.4857, execution time: 0.001s\n",
"Kernel PCA(KPCA) ROC:0.6696, precision @ rank n:0.4952, execution time: 0.1347s\n",
"Kernel PCA(KPCA) with samling ROC:0.6753, precision @ rank n:0.5048, execution time: 0.0712s\n",
"\n",
"... Processing satellite.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.5645, precision @ rank n:0.3882, execution time: 0.9812s\n",
"K Nearest Neighbors (KNN) ROC:0.6792, precision @ rank n:0.4794, execution time: 0.5223s\n",
"Isolation Forest (IForest) ROC:0.6967, precision @ rank n:0.5656, execution time: 0.3981s\n",
"Local Outlier Factor (LOF) ROC:0.5468, precision @ rank n:0.3676, execution time: 0.4176s\n",
"One-class SVM (OCSVM) ROC:0.6607, precision @ rank n:0.5244, execution time: 0.9794s\n",
"Principal Component Analysis(PCA) ROC:0.5976, precision @ rank n:0.464, execution time: 0.0079s\n",
"Kernel PCA(KPCA) ROC:0.6878, precision @ rank n:0.5039, execution time: 7.7041s\n",
"Kernel PCA(KPCA) with samling ROC:0.6909, precision @ rank n:0.509, execution time: 0.5056s\n",
"\n",
"... Processing satimage-2.mat ...\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Angle-based Outlier Detector (ABOD) ROC:0.8464, precision @ rank n:0.2647, execution time: 0.8863s\n",
"K Nearest Neighbors (KNN) ROC:0.9549, precision @ rank n:0.4412, execution time: 0.4757s\n",
"Isolation Forest (IForest) ROC:0.991, precision @ rank n:0.9118, execution time: 0.3707s\n",
"Local Outlier Factor (LOF) ROC:0.5507, precision @ rank n:0.0588, execution time: 0.3591s\n",
"One-class SVM (OCSVM) ROC:0.995, precision @ rank n:0.9412, execution time: 0.7805s\n",
"Principal Component Analysis(PCA) ROC:0.9713, precision @ rank n:0.8824, execution time: 0.0095s\n",
"Kernel PCA(KPCA) ROC:0.9841, precision @ rank n:0.5, execution time: 6.4915s\n",
"Kernel PCA(KPCA) with samling ROC:0.9589, precision @ rank n:0.4118, execution time: 0.4616s\n",
"\n",
"... Processing vertebral.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.5843, precision @ rank n:0.25, execution time: 0.0248s\n",
"K Nearest Neighbors (KNN) ROC:0.495, precision @ rank n:0.0833, execution time: 0.0058s\n",
"Isolation Forest (IForest) ROC:0.4246, precision @ rank n:0.0, execution time: 0.1979s\n",
"Local Outlier Factor (LOF) ROC:0.503, precision @ rank n:0.0833, execution time: 0.0024s\n",
"One-class SVM (OCSVM) ROC:0.495, precision @ rank n:0.0833, execution time: 0.0019s\n",
"Principal Component Analysis(PCA) ROC:0.3919, precision @ rank n:0.0, execution time: 0.001s\n",
"Kernel PCA(KPCA) ROC:0.497, precision @ rank n:0.0833, execution time: 0.0902s\n",
"Kernel PCA(KPCA) with samling ROC:0.5694, precision @ rank n:0.0833, execution time: 0.0225s\n",
"\n",
"... Processing vowels.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.956, precision @ rank n:0.4, execution time: 0.1645s\n",
"K Nearest Neighbors (KNN) ROC:0.9433, precision @ rank n:0.35, execution time: 0.0406s\n",
"Isolation Forest (IForest) ROC:0.6669, precision @ rank n:0.1, execution time: 0.2232s\n",
"Local Outlier Factor (LOF) ROC:0.9296, precision @ rank n:0.25, execution time: 0.014s\n",
"One-class SVM (OCSVM) ROC:0.7211, precision @ rank n:0.15, execution time: 0.0439s\n",
"Principal Component Analysis(PCA) ROC:0.4931, precision @ rank n:0.1, execution time: 0.0014s\n",
"Kernel PCA(KPCA) ROC:0.8834, precision @ rank n:0.2, execution time: 0.3537s\n",
"Kernel PCA(KPCA) with samling ROC:0.8444, precision @ rank n:0.2, execution time: 0.1124s\n",
"\n",
"... Processing wbc.mat ...\n",
"Angle-based Outlier Detector (ABOD) ROC:0.9456, precision @ rank n:0.2, execution time: 0.0593s\n",
"K Nearest Neighbors (KNN) ROC:0.9619, precision @ rank n:0.4, execution time: 0.013s\n",
"Isolation Forest (IForest) ROC:0.9728, precision @ rank n:0.6, execution time: 0.2198s\n",
"Local Outlier Factor (LOF) ROC:0.9361, precision @ rank n:0.2, execution time: 0.0036s\n",
"One-class SVM (OCSVM) ROC:0.966, precision @ rank n:0.4, execution time: 0.0067s\n",
"Principal Component Analysis(PCA) ROC:0.9646, precision @ rank n:0.4, execution time: 0.002s\n",
"Kernel PCA(KPCA) ROC:0.966, precision @ rank n:0.4, execution time: 0.0616s\n",
"Kernel PCA(KPCA) with samling ROC:0.9524, precision @ rank n:0.2, execution time: 0.0295s\n"
]
}
],
"source": [
"# Define data file and read X and y\n",
"mat_file_list = ['arrhythmia.mat',\n",
" 'cardio.mat',\n",
" 'glass.mat',\n",
" 'ionosphere.mat',\n",
" 'letter.mat',\n",
" 'mnist.mat',\n",
" 'musk.mat',\n",
" 'optdigits.mat',\n",
" 'pendigits.mat',\n",
" 'pima.mat',\n",
" 'satellite.mat',\n",
" 'satimage-2.mat',\n",
" # 'shuttle.mat',\n",
" 'vertebral.mat',\n",
" 'vowels.mat',\n",
" 'wbc.mat']\n",
"\n",
"# Define outlier detection tools to be compared\n",
"random_state = np.random.RandomState(42)\n",
"\n",
"df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc',\n",
" 'ABOD', 'KNN', 'IForest', 'LOF', 'OCSVM', 'PCA', 'KPCA', 'KPCA(SP)']\n",
"roc_df = pd.DataFrame(columns=df_columns)\n",
"prn_df = pd.DataFrame(columns=df_columns)\n",
"time_df = pd.DataFrame(columns=df_columns)\n",
"\n",
"\n",
"for mat_file in mat_file_list:\n",
" print(\"\\n... Processing\", mat_file, '...')\n",
" mat = loadmat(os.path.join('data', mat_file))\n",
"\n",
" X = mat['X']\n",
" y = mat['y'].ravel()\n",
" outliers_fraction = np.count_nonzero(y) / len(y)\n",
" outliers_percentage = round(outliers_fraction * 100, ndigits=4)\n",
"\n",
" # construct containers for saving results\n",
" roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]\n",
" prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]\n",
" time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]\n",
"\n",
" # 60% data for training and 40% for testing\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,\n",
" random_state=random_state)\n",
"\n",
" # standardizing data for processing\n",
" X_train_norm, X_test_norm = standardizer(X_train, X_test)\n",
"\n",
" classifiers = {\n",
" 'Angle-based Outlier Detector (ABOD)': ABOD(\n",
" contamination=outliers_fraction),\n",
" 'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),\n",
" 'Isolation Forest (IForest)': IForest(\n",
" contamination=outliers_fraction,\n",
" random_state=random_state),\n",
" 'Local Outlier Factor (LOF)': LOF(\n",
" n_neighbors=10,\n",
" contamination=outliers_fraction),\n",
" 'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),\n",
" 'Principal Component Analysis(PCA)': PCA(\n",
" contamination=outliers_fraction, random_state=random_state),\n",
" 'Kernel PCA(KPCA)': KPCA(contamination=outliers_fraction, random_state=random_state),\n",
" 'Kernel PCA(KPCA) with samling': KPCA(sampling=True, subset_size=0.2,\n",
" contamination=outliers_fraction, random_state=random_state),\n",
" }\n",
"\n",
" for clf_name, clf in classifiers.items():\n",
" t0 = time()\n",
" clf.fit(X_train_norm)\n",
" test_scores = clf.decision_function(X_test_norm)\n",
" t1 = time()\n",
" duration = round(t1 - t0, ndigits=4)\n",
" time_list.append(duration)\n",
"\n",
" roc = round(roc_auc_score(y_test, test_scores), ndigits=4)\n",
" prn = round(precision_n_scores(y_test, test_scores), ndigits=4)\n",
"\n",
" print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '\n",
" 'execution time: {duration}s'.format(\n",
" clf_name=clf_name, roc=roc, prn=prn, duration=duration))\n",
"\n",
" roc_list.append(roc)\n",
" prn_list.append(prn)\n",
"\n",
" temp_df = pd.DataFrame(time_list).transpose()\n",
" temp_df.columns = df_columns\n",
" time_df = pd.concat([time_df, temp_df], axis=0)\n",
"\n",
" temp_df = pd.DataFrame(roc_list).transpose()\n",
" temp_df.columns = df_columns\n",
" roc_df = pd.concat([roc_df, temp_df], axis=0)\n",
"\n",
" temp_df = pd.DataFrame(prn_list).transpose()\n",
" temp_df.columns = df_columns\n",
" prn_df = pd.concat([prn_df, temp_df], axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time complexity\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Data</th>\n",
" <th>#Samples</th>\n",
" <th># Dimensions</th>\n",
" <th>Outlier Perc</th>\n",
" <th>ABOD</th>\n",
" <th>KNN</th>\n",
" <th>IForest</th>\n",
" <th>LOF</th>\n",
" <th>OCSVM</th>\n",
" <th>PCA</th>\n",
" <th>KPCA</th>\n",
" <th>KPCA(SP)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>arrhythmia</td>\n",
" <td>452</td>\n",
" <td>274</td>\n",
" <td>14.6018</td>\n",
" <td>0.8076</td>\n",
" <td>0.0292</td>\n",
" <td>0.247</td>\n",
" <td>0.0036</td>\n",
" <td>0.0226</td>\n",
" <td>0.0311</td>\n",
" <td>0.0816</td>\n",
" <td>0.0355</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>cardio</td>\n",
" <td>1831</td>\n",
" <td>21</td>\n",
" <td>9.6122</td>\n",
" <td>0.2436</td>\n",
" <td>0.0893</td>\n",
" <td>0.2443</td>\n",
" <td>0.043</td>\n",
" <td>0.0961</td>\n",
" <td>0.0026</td>\n",
" <td>0.4285</td>\n",
" <td>0.1398</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>glass</td>\n",
" <td>214</td>\n",
" <td>9</td>\n",
" <td>4.2056</td>\n",
" <td>0.0225</td>\n",
" <td>0.0058</td>\n",
" <td>0.1852</td>\n",
" <td>0.0017</td>\n",
" <td>0.0015</td>\n",
" <td>0.001</td>\n",
" <td>0.0456</td>\n",
" <td>0.0175</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>ionosphere</td>\n",
" <td>351</td>\n",
" <td>33</td>\n",
" <td>35.8974</td>\n",
" <td>0.0561</td>\n",
" <td>0.0122</td>\n",
" <td>0.2141</td>\n",
" <td>0.0113</td>\n",
" <td>0.0053</td>\n",
" <td>0.0021</td>\n",
" <td>0.0611</td>\n",
" <td>0.0274</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>letter</td>\n",
" <td>1600</td>\n",
" <td>32</td>\n",
" <td>6.25</td>\n",
" <td>0.2179</td>\n",
" <td>0.0782</td>\n",
" <td>0.2337</td>\n",
" <td>0.0285</td>\n",
" <td>0.0789</td>\n",
" <td>0.0034</td>\n",
" <td>0.3707</td>\n",
" <td>0.1209</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>mnist</td>\n",
" <td>7603</td>\n",
" <td>100</td>\n",
" <td>9.2069</td>\n",
" <td>2.2379</td>\n",
" <td>1.4719</td>\n",
" <td>0.5817</td>\n",
" <td>0.6347</td>\n",
" <td>1.8226</td>\n",
" <td>0.0561</td>\n",
" <td>14.8045</td>\n",
" <td>0.6482</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>musk</td>\n",
" <td>3062</td>\n",
" <td>166</td>\n",
" <td>3.1679</td>\n",
" <td>0.7341</td>\n",
" <td>0.4263</td>\n",
" <td>0.3897</td>\n",
" <td>0.1178</td>\n",
" <td>0.4749</td>\n",
" <td>0.0574</td>\n",
" <td>0.9847</td>\n",
" <td>0.2369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>optdigits</td>\n",
" <td>5216</td>\n",
" <td>64</td>\n",
" <td>2.8758</td>\n",
" <td>1.0504</td>\n",
" <td>0.5476</td>\n",
" <td>0.4118</td>\n",
" <td>0.3103</td>\n",
" <td>0.6467</td>\n",
" <td>0.0198</td>\n",
" <td>4.3066</td>\n",
" <td>0.3975</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>pendigits</td>\n",
" <td>6870</td>\n",
" <td>16</td>\n",
" <td>2.2707</td>\n",
" <td>0.997</td>\n",
" <td>0.5151</td>\n",
" <td>0.3741</td>\n",
" <td>0.4893</td>\n",
" <td>0.9349</td>\n",
" <td>0.013</td>\n",
" <td>9.0731</td>\n",
" <td>0.545</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>pima</td>\n",
" <td>768</td>\n",
" <td>8</td>\n",
" <td>34.8958</td>\n",
" <td>0.074</td>\n",
" <td>0.0178</td>\n",
" <td>0.1928</td>\n",
" <td>0.0045</td>\n",
" <td>0.011</td>\n",
" <td>0.001</td>\n",
" <td>0.1347</td>\n",
" <td>0.0712</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>satellite</td>\n",
" <td>6435</td>\n",
" <td>36</td>\n",
" <td>31.6395</td>\n",
" <td>0.9812</td>\n",
" <td>0.5223</td>\n",
" <td>0.3981</td>\n",
" <td>0.4176</td>\n",
" <td>0.9794</td>\n",
" <td>0.0079</td>\n",
" <td>7.7041</td>\n",
" <td>0.5056</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>satimage-2</td>\n",
" <td>5803</td>\n",
" <td>36</td>\n",
" <td>1.2235</td>\n",
" <td>0.8863</td>\n",
" <td>0.4757</td>\n",
" <td>0.3707</td>\n",
" <td>0.3591</td>\n",
" <td>0.7805</td>\n",
" <td>0.0095</td>\n",
" <td>6.4915</td>\n",
" <td>0.4616</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>vertebral</td>\n",
" <td>240</td>\n",
" <td>6</td>\n",
" <td>12.5</td>\n",
" <td>0.0248</td>\n",
" <td>0.0058</td>\n",
" <td>0.1979</td>\n",
" <td>0.0024</td>\n",
" <td>0.0019</td>\n",
" <td>0.001</td>\n",
" <td>0.0902</td>\n",
" <td>0.0225</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>vowels</td>\n",
" <td>1456</td>\n",
" <td>12</td>\n",
" <td>3.4341</td>\n",
" <td>0.1645</td>\n",
" <td>0.0406</td>\n",
" <td>0.2232</td>\n",
" <td>0.014</td>\n",
" <td>0.0439</td>\n",
" <td>0.0014</td>\n",
" <td>0.3537</td>\n",
" <td>0.1124</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>wbc</td>\n",
" <td>378</td>\n",
" <td>30</td>\n",
" <td>5.5556</td>\n",
" <td>0.0593</td>\n",
" <td>0.013</td>\n",
" <td>0.2198</td>\n",
" <td>0.0036</td>\n",
" <td>0.0067</td>\n",
" <td>0.002</td>\n",
" <td>0.0616</td>\n",
" <td>0.0295</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Data #Samples # Dimensions Outlier Perc ABOD KNN IForest \\\n",
"0 arrhythmia 452 274 14.6018 0.8076 0.0292 0.247 \n",
"0 cardio 1831 21 9.6122 0.2436 0.0893 0.2443 \n",
"0 glass 214 9 4.2056 0.0225 0.0058 0.1852 \n",
"0 ionosphere 351 33 35.8974 0.0561 0.0122 0.2141 \n",
"0 letter 1600 32 6.25 0.2179 0.0782 0.2337 \n",
"0 mnist 7603 100 9.2069 2.2379 1.4719 0.5817 \n",
"0 musk 3062 166 3.1679 0.7341 0.4263 0.3897 \n",
"0 optdigits 5216 64 2.8758 1.0504 0.5476 0.4118 \n",
"0 pendigits 6870 16 2.2707 0.997 0.5151 0.3741 \n",
"0 pima 768 8 34.8958 0.074 0.0178 0.1928 \n",
"0 satellite 6435 36 31.6395 0.9812 0.5223 0.3981 \n",
"0 satimage-2 5803 36 1.2235 0.8863 0.4757 0.3707 \n",
"0 vertebral 240 6 12.5 0.0248 0.0058 0.1979 \n",
"0 vowels 1456 12 3.4341 0.1645 0.0406 0.2232 \n",
"0 wbc 378 30 5.5556 0.0593 0.013 0.2198 \n",
"\n",
" LOF OCSVM PCA KPCA KPCA(SP) \n",
"0 0.0036 0.0226 0.0311 0.0816 0.0355 \n",
"0 0.043 0.0961 0.0026 0.4285 0.1398 \n",
"0 0.0017 0.0015 0.001 0.0456 0.0175 \n",
"0 0.0113 0.0053 0.0021 0.0611 0.0274 \n",
"0 0.0285 0.0789 0.0034 0.3707 0.1209 \n",
"0 0.6347 1.8226 0.0561 14.8045 0.6482 \n",
"0 0.1178 0.4749 0.0574 0.9847 0.2369 \n",
"0 0.3103 0.6467 0.0198 4.3066 0.3975 \n",
"0 0.4893 0.9349 0.013 9.0731 0.545 \n",
"0 0.0045 0.011 0.001 0.1347 0.0712 \n",
"0 0.4176 0.9794 0.0079 7.7041 0.5056 \n",
"0 0.3591 0.7805 0.0095 6.4915 0.4616 \n",
"0 0.0024 0.0019 0.001 0.0902 0.0225 \n",
"0 0.014 0.0439 0.0014 0.3537 0.1124 \n",
"0 0.0036 0.0067 0.002 0.0616 0.0295 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print('Time complexity')\n",
"time_df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 576x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# https://dajiro.com/entry/2020/06/06/113437\n",
"def autolabel(rects):\n",
" for rect in rects:\n",
" height = rect.get_height()\n",
" ax.annotate('{:.4g}sec'.format(height),\n",
" xy=(rect.get_x() + rect.get_width() / 2, height),\n",
" xytext=(0, 3),\n",
" textcoords=\"offset points\",\n",
" ha='center', va='bottom',\n",
" fontsize=10)\n",
"\n",
"fig, ax = plt.subplots(figsize = (8, 4))\n",
"label = [\"ABOD\", \"KNN\", \"LOF\", \"IForest\", \"OCSVM\", \"PCA\", \"KPCA\", \"KPCA(SP)\"]\n",
"time_ave = [time_df[\"ABOD\"].mean(), time_df[\"KNN\"].mean(), time_df[\"LOF\"].mean(), time_df[\"IForest\"].mean(), \n",
" time_df[\"OCSVM\"].mean(), time_df[\"PCA\"].mean(), time_df[\"KPCA\"].mean(), time_df[\"KPCA(SP)\"].mean()]\n",
"rect = ax.bar(label, time_ave, align=\"center\")\n",
"ax.set_ylabel(\"Time complexity (sec)\")\n",
"# ax.set_ylim(0, 9.0)\n",
"autolabel(rect)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Analyze the performance of ROC and Precision @ n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ROC Performance\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Data</th>\n",
" <th>#Samples</th>\n",
" <th># Dimensions</th>\n",
" <th>Outlier Perc</th>\n",
" <th>ABOD</th>\n",
" <th>KNN</th>\n",
" <th>IForest</th>\n",
" <th>LOF</th>\n",
" <th>OCSVM</th>\n",
" <th>PCA</th>\n",
" <th>KPCA</th>\n",
" <th>KPCA(SP)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>arrhythmia</td>\n",
" <td>452</td>\n",
" <td>274</td>\n",
" <td>14.6018</td>\n",
" <td>0.7687</td>\n",
" <td>0.782</td>\n",
" <td>0.8464</td>\n",
" <td>0.7619</td>\n",
" <td>0.7986</td>\n",
" <td>0.7997</td>\n",
" <td>0.7883</td>\n",
" <td>0.7775</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>cardio</td>\n",
" <td>1831</td>\n",
" <td>21</td>\n",
" <td>9.6122</td>\n",
" <td>0.5703</td>\n",
" <td>0.7435</td>\n",
" <td>0.9348</td>\n",
" <td>0.5193</td>\n",
" <td>0.9415</td>\n",
" <td>0.9543</td>\n",
" <td>0.8249</td>\n",
" <td>0.8095</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>glass</td>\n",
" <td>214</td>\n",
" <td>9</td>\n",
" <td>4.2056</td>\n",
" <td>0.7287</td>\n",
" <td>0.811</td>\n",
" <td>0.6067</td>\n",
" <td>0.7287</td>\n",
" <td>0.4116</td>\n",
" <td>0.5305</td>\n",
" <td>0.8201</td>\n",
" <td>0.6555</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>ionosphere</td>\n",
" <td>351</td>\n",
" <td>33</td>\n",
" <td>35.8974</td>\n",
" <td>0.8585</td>\n",
" <td>0.9407</td>\n",
" <td>0.8175</td>\n",
" <td>0.8648</td>\n",
" <td>0.7819</td>\n",
" <td>0.7505</td>\n",
" <td>0.9112</td>\n",
" <td>0.9456</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>letter</td>\n",
" <td>1600</td>\n",
" <td>32</td>\n",
" <td>6.25</td>\n",
" <td>0.9104</td>\n",
" <td>0.857</td>\n",
" <td>0.6048</td>\n",
" <td>0.896</td>\n",
" <td>0.5712</td>\n",
" <td>0.5146</td>\n",
" <td>0.8478</td>\n",
" <td>0.8102</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>mnist</td>\n",
" <td>7603</td>\n",
" <td>100</td>\n",
" <td>9.2069</td>\n",
" <td>0.7824</td>\n",
" <td>0.8506</td>\n",
" <td>0.7974</td>\n",
" <td>0.67</td>\n",
" <td>0.8543</td>\n",
" <td>0.8518</td>\n",
" <td>0.8588</td>\n",
" <td>0.8494</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>musk</td>\n",
" <td>3062</td>\n",
" <td>166</td>\n",
" <td>3.1679</td>\n",
" <td>0.2741</td>\n",
" <td>0.876</td>\n",
" <td>1</td>\n",
" <td>0.668</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.7274</td>\n",
" <td>0.8434</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>optdigits</td>\n",
" <td>5216</td>\n",
" <td>64</td>\n",
" <td>2.8758</td>\n",
" <td>0.5204</td>\n",
" <td>0.3974</td>\n",
" <td>0.6907</td>\n",
" <td>0.5012</td>\n",
" <td>0.5125</td>\n",
" <td>0.5141</td>\n",
" <td>0.5284</td>\n",
" <td>0.5023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>pendigits</td>\n",
" <td>6870</td>\n",
" <td>16</td>\n",
" <td>2.2707</td>\n",
" <td>0.6728</td>\n",
" <td>0.6995</td>\n",
" <td>0.955</td>\n",
" <td>0.4443</td>\n",
" <td>0.9293</td>\n",
" <td>0.9426</td>\n",
" <td>0.9207</td>\n",
" <td>0.9661</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>pima</td>\n",
" <td>768</td>\n",
" <td>8</td>\n",
" <td>34.8958</td>\n",
" <td>0.6614</td>\n",
" <td>0.6909</td>\n",
" <td>0.6504</td>\n",
" <td>0.5148</td>\n",
" <td>0.6184</td>\n",
" <td>0.6351</td>\n",
" <td>0.6696</td>\n",
" <td>0.6753</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>satellite</td>\n",
" <td>6435</td>\n",
" <td>36</td>\n",
" <td>31.6395</td>\n",
" <td>0.5645</td>\n",
" <td>0.6792</td>\n",
" <td>0.6967</td>\n",
" <td>0.5468</td>\n",
" <td>0.6607</td>\n",
" <td>0.5976</td>\n",
" <td>0.6878</td>\n",
" <td>0.6909</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>satimage-2</td>\n",
" <td>5803</td>\n",
" <td>36</td>\n",
" <td>1.2235</td>\n",
" <td>0.8464</td>\n",
" <td>0.9549</td>\n",
" <td>0.991</td>\n",
" <td>0.5507</td>\n",
" <td>0.995</td>\n",
" <td>0.9713</td>\n",
" <td>0.9841</td>\n",
" <td>0.9589</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>vertebral</td>\n",
" <td>240</td>\n",
" <td>6</td>\n",
" <td>12.5</td>\n",
" <td>0.5843</td>\n",
" <td>0.495</td>\n",
" <td>0.4246</td>\n",
" <td>0.503</td>\n",
" <td>0.495</td>\n",
" <td>0.3919</td>\n",
" <td>0.497</td>\n",
" <td>0.5694</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>vowels</td>\n",
" <td>1456</td>\n",
" <td>12</td>\n",
" <td>3.4341</td>\n",
" <td>0.956</td>\n",
" <td>0.9433</td>\n",
" <td>0.6669</td>\n",
" <td>0.9296</td>\n",
" <td>0.7211</td>\n",
" <td>0.4931</td>\n",
" <td>0.8834</td>\n",
" <td>0.8444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>wbc</td>\n",
" <td>378</td>\n",
" <td>30</td>\n",
" <td>5.5556</td>\n",
" <td>0.9456</td>\n",
" <td>0.9619</td>\n",
" <td>0.9728</td>\n",
" <td>0.9361</td>\n",
" <td>0.966</td>\n",
" <td>0.9646</td>\n",
" <td>0.966</td>\n",
" <td>0.9524</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Data #Samples # Dimensions Outlier Perc ABOD KNN IForest \\\n",
"0 arrhythmia 452 274 14.6018 0.7687 0.782 0.8464 \n",
"0 cardio 1831 21 9.6122 0.5703 0.7435 0.9348 \n",
"0 glass 214 9 4.2056 0.7287 0.811 0.6067 \n",
"0 ionosphere 351 33 35.8974 0.8585 0.9407 0.8175 \n",
"0 letter 1600 32 6.25 0.9104 0.857 0.6048 \n",
"0 mnist 7603 100 9.2069 0.7824 0.8506 0.7974 \n",
"0 musk 3062 166 3.1679 0.2741 0.876 1 \n",
"0 optdigits 5216 64 2.8758 0.5204 0.3974 0.6907 \n",
"0 pendigits 6870 16 2.2707 0.6728 0.6995 0.955 \n",
"0 pima 768 8 34.8958 0.6614 0.6909 0.6504 \n",
"0 satellite 6435 36 31.6395 0.5645 0.6792 0.6967 \n",
"0 satimage-2 5803 36 1.2235 0.8464 0.9549 0.991 \n",
"0 vertebral 240 6 12.5 0.5843 0.495 0.4246 \n",
"0 vowels 1456 12 3.4341 0.956 0.9433 0.6669 \n",
"0 wbc 378 30 5.5556 0.9456 0.9619 0.9728 \n",
"\n",
" LOF OCSVM PCA KPCA KPCA(SP) \n",
"0 0.7619 0.7986 0.7997 0.7883 0.7775 \n",
"0 0.5193 0.9415 0.9543 0.8249 0.8095 \n",
"0 0.7287 0.4116 0.5305 0.8201 0.6555 \n",
"0 0.8648 0.7819 0.7505 0.9112 0.9456 \n",
"0 0.896 0.5712 0.5146 0.8478 0.8102 \n",
"0 0.67 0.8543 0.8518 0.8588 0.8494 \n",
"0 0.668 1 1 0.7274 0.8434 \n",
"0 0.5012 0.5125 0.5141 0.5284 0.5023 \n",
"0 0.4443 0.9293 0.9426 0.9207 0.9661 \n",
"0 0.5148 0.6184 0.6351 0.6696 0.6753 \n",
"0 0.5468 0.6607 0.5976 0.6878 0.6909 \n",
"0 0.5507 0.995 0.9713 0.9841 0.9589 \n",
"0 0.503 0.495 0.3919 0.497 0.5694 \n",
"0 0.9296 0.7211 0.4931 0.8834 0.8444 \n",
"0 0.9361 0.966 0.9646 0.966 0.9524 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print('ROC Performance')\n",
"roc_df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Show average ROC over data\n",
"\n",
"# https://dajiro.com/entry/2020/06/06/113437\n",
"def autolabel(rects):\n",
" for rect in rects:\n",
" height = rect.get_height()\n",
" ax.annotate('{:.4g}%'.format(height),\n",
" xy=(rect.get_x() + rect.get_width() / 2, height),\n",
" xytext=(0, 3),\n",
" textcoords=\"offset points\",\n",
" ha='center', va='bottom',\n",
" fontsize=10)\n",
"\n",
"fig, ax = plt.subplots(figsize = (6, 4))\n",
"label = [\"ABOD\", \"KNN\", \"LOF\", \"IForest\", \"OCSVM\", \"PCA\", \"KPCA\", \"KPCA(SP)\"]\n",
"roc = [roc_df[\"ABOD\"].mean(), roc_df[\"KNN\"].mean(), roc_df[\"LOF\"].mean(), roc_df[\"IForest\"].mean(), \n",
" roc_df[\"OCSVM\"].mean(), roc_df[\"PCA\"].mean(), roc_df[\"KPCA\"].mean(),roc_df[\"KPCA(SP)\"].mean()]\n",
"roc = np.array(roc) * 100\n",
"rect = ax.bar(label, roc, align=\"center\")\n",
"ax.set_ylabel(\"ROC Performance (%)\")\n",
"ax.set_ylim(0, 100.0)\n",
"autolabel(rect)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Precision @ n Performance\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Data</th>\n",
" <th>#Samples</th>\n",
" <th># Dimensions</th>\n",
" <th>Outlier Perc</th>\n",
" <th>ABOD</th>\n",
" <th>KNN</th>\n",
" <th>IForest</th>\n",
" <th>LOF</th>\n",
" <th>OCSVM</th>\n",
" <th>PCA</th>\n",
" <th>KPCA</th>\n",
" <th>KPCA(SP)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>arrhythmia</td>\n",
" <td>452</td>\n",
" <td>274</td>\n",
" <td>14.6018</td>\n",
" <td>0.3571</td>\n",
" <td>0.5</td>\n",
" <td>0.5714</td>\n",
" <td>0.4286</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.4643</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>cardio</td>\n",
" <td>1831</td>\n",
" <td>21</td>\n",
" <td>9.6122</td>\n",
" <td>0.1923</td>\n",
" <td>0.3718</td>\n",
" <td>0.5641</td>\n",
" <td>0.1282</td>\n",
" <td>0.5769</td>\n",
" <td>0.6538</td>\n",
" <td>0.3846</td>\n",
" <td>0.4231</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>glass</td>\n",
" <td>214</td>\n",
" <td>9</td>\n",
" <td>4.2056</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>ionosphere</td>\n",
" <td>351</td>\n",
" <td>33</td>\n",
" <td>35.8974</td>\n",
" <td>0.7955</td>\n",
" <td>0.8182</td>\n",
" <td>0.5682</td>\n",
" <td>0.75</td>\n",
" <td>0.6364</td>\n",
" <td>0.5909</td>\n",
" <td>0.7955</td>\n",
" <td>0.8182</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>letter</td>\n",
" <td>1600</td>\n",
" <td>32</td>\n",
" <td>6.25</td>\n",
" <td>0.3953</td>\n",
" <td>0.3488</td>\n",
" <td>0.1163</td>\n",
" <td>0.4419</td>\n",
" <td>0.1628</td>\n",
" <td>0.093</td>\n",
" <td>0.2093</td>\n",
" <td>0.186</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>mnist</td>\n",
" <td>7603</td>\n",
" <td>100</td>\n",
" <td>9.2069</td>\n",
" <td>0.3611</td>\n",
" <td>0.4306</td>\n",
" <td>0.2604</td>\n",
" <td>0.3264</td>\n",
" <td>0.3854</td>\n",
" <td>0.3785</td>\n",
" <td>0.3958</td>\n",
" <td>0.3785</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>musk</td>\n",
" <td>3062</td>\n",
" <td>166</td>\n",
" <td>3.1679</td>\n",
" <td>0.0426</td>\n",
" <td>0.3404</td>\n",
" <td>1</td>\n",
" <td>0.2553</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.4043</td>\n",
" <td>0.5106</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>optdigits</td>\n",
" <td>5216</td>\n",
" <td>64</td>\n",
" <td>2.8758</td>\n",
" <td>0.0312</td>\n",
" <td>0</td>\n",
" <td>0.0156</td>\n",
" <td>0.0781</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>pendigits</td>\n",
" <td>6870</td>\n",
" <td>16</td>\n",
" <td>2.2707</td>\n",
" <td>0.06</td>\n",
" <td>0.08</td>\n",
" <td>0.38</td>\n",
" <td>0.06</td>\n",
" <td>0.34</td>\n",
" <td>0.28</td>\n",
" <td>0.14</td>\n",
" <td>0.34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>pima</td>\n",
" <td>768</td>\n",
" <td>8</td>\n",
" <td>34.8958</td>\n",
" <td>0.5048</td>\n",
" <td>0.5429</td>\n",
" <td>0.4857</td>\n",
" <td>0.3714</td>\n",
" <td>0.4762</td>\n",
" <td>0.4857</td>\n",
" <td>0.4952</td>\n",
" <td>0.5048</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>satellite</td>\n",
" <td>6435</td>\n",
" <td>36</td>\n",
" <td>31.6395</td>\n",
" <td>0.3882</td>\n",
" <td>0.4794</td>\n",
" <td>0.5656</td>\n",
" <td>0.3676</td>\n",
" <td>0.5244</td>\n",
" <td>0.464</td>\n",
" <td>0.5039</td>\n",
" <td>0.509</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>satimage-2</td>\n",
" <td>5803</td>\n",
" <td>36</td>\n",
" <td>1.2235</td>\n",
" <td>0.2647</td>\n",
" <td>0.4412</td>\n",
" <td>0.9118</td>\n",
" <td>0.0588</td>\n",
" <td>0.9412</td>\n",
" <td>0.8824</td>\n",
" <td>0.5</td>\n",
" <td>0.4118</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>vertebral</td>\n",
" <td>240</td>\n",
" <td>6</td>\n",
" <td>12.5</td>\n",
" <td>0.25</td>\n",
" <td>0.0833</td>\n",
" <td>0</td>\n",
" <td>0.0833</td>\n",
" <td>0.0833</td>\n",
" <td>0</td>\n",
" <td>0.0833</td>\n",
" <td>0.0833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>vowels</td>\n",
" <td>1456</td>\n",
" <td>12</td>\n",
" <td>3.4341</td>\n",
" <td>0.4</td>\n",
" <td>0.35</td>\n",
" <td>0.1</td>\n",
" <td>0.25</td>\n",
" <td>0.15</td>\n",
" <td>0.1</td>\n",
" <td>0.2</td>\n",
" <td>0.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>wbc</td>\n",
" <td>378</td>\n",
" <td>30</td>\n",
" <td>5.5556</td>\n",
" <td>0.2</td>\n",
" <td>0.4</td>\n",
" <td>0.6</td>\n",
" <td>0.2</td>\n",
" <td>0.4</td>\n",
" <td>0.4</td>\n",
" <td>0.4</td>\n",
" <td>0.2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Data #Samples # Dimensions Outlier Perc ABOD KNN IForest \\\n",
"0 arrhythmia 452 274 14.6018 0.3571 0.5 0.5714 \n",
"0 cardio 1831 21 9.6122 0.1923 0.3718 0.5641 \n",
"0 glass 214 9 4.2056 0 0 0 \n",
"0 ionosphere 351 33 35.8974 0.7955 0.8182 0.5682 \n",
"0 letter 1600 32 6.25 0.3953 0.3488 0.1163 \n",
"0 mnist 7603 100 9.2069 0.3611 0.4306 0.2604 \n",
"0 musk 3062 166 3.1679 0.0426 0.3404 1 \n",
"0 optdigits 5216 64 2.8758 0.0312 0 0.0156 \n",
"0 pendigits 6870 16 2.2707 0.06 0.08 0.38 \n",
"0 pima 768 8 34.8958 0.5048 0.5429 0.4857 \n",
"0 satellite 6435 36 31.6395 0.3882 0.4794 0.5656 \n",
"0 satimage-2 5803 36 1.2235 0.2647 0.4412 0.9118 \n",
"0 vertebral 240 6 12.5 0.25 0.0833 0 \n",
"0 vowels 1456 12 3.4341 0.4 0.35 0.1 \n",
"0 wbc 378 30 5.5556 0.2 0.4 0.6 \n",
"\n",
" LOF OCSVM PCA KPCA KPCA(SP) \n",
"0 0.4286 0.5 0.5 0.4643 0.5 \n",
"0 0.1282 0.5769 0.6538 0.3846 0.4231 \n",
"0 0 0 0 0 0 \n",
"0 0.75 0.6364 0.5909 0.7955 0.8182 \n",
"0 0.4419 0.1628 0.093 0.2093 0.186 \n",
"0 0.3264 0.3854 0.3785 0.3958 0.3785 \n",
"0 0.2553 1 1 0.4043 0.5106 \n",
"0 0.0781 0 0 0 0 \n",
"0 0.06 0.34 0.28 0.14 0.34 \n",
"0 0.3714 0.4762 0.4857 0.4952 0.5048 \n",
"0 0.3676 0.5244 0.464 0.5039 0.509 \n",
"0 0.0588 0.9412 0.8824 0.5 0.4118 \n",
"0 0.0833 0.0833 0 0.0833 0.0833 \n",
"0 0.25 0.15 0.1 0.2 0.2 \n",
"0 0.2 0.4 0.4 0.4 0.2 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print('Precision @ n Performance')\n",
"prn_df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# https://dajiro.com/entry/2020/06/06/113437\n",
"def autolabel(rects):\n",
" for rect in rects:\n",
" height = rect.get_height()\n",
" ax.annotate('{:.4g}%'.format(height),\n",
" xy=(rect.get_x() + rect.get_width() / 2, height),\n",
" xytext=(0, 3),\n",
" textcoords=\"offset points\",\n",
" ha='center', va='bottom',\n",
" fontsize=10)\n",
"\n",
"fig, ax = plt.subplots(figsize = (6, 4))\n",
"label = [\"ABOD\", \"KNN\", \"LOF\", \"IForest\", \"OCSVM\", \"PCA\", \"KPCA\", \"KPCA(SP)\"]\n",
"prn = [prn_df[\"ABOD\"].mean(), prn_df[\"KNN\"].mean(), prn_df[\"LOF\"].mean(), prn_df[\"IForest\"].mean(), \n",
" prn_df[\"OCSVM\"].mean(), prn_df[\"PCA\"].mean(), prn_df[\"KPCA\"].mean(), prn_df[\"KPCA(SP)\"].mean()]\n",
"prn = np.array(prn) * 100\n",
"rect = ax.bar(label, prn, align=\"center\")\n",
"ax.set_ylabel(\"Precision @ n Performance (%)\")\n",
"ax.set_ylim(0, 100.0)\n",
"autolabel(rect)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment