tam17aki/ocnn_example.ipynb

## ocnn_example.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "\n",
    "from pyod.utils.data import evaluate_print, generate_data\n",
    "\n",
    "from ocnn import OneClassNN"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "まずは学習データ量が1000個の場合。異常データ量は10%とする。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "On Training Data:\n",
      "OCNN ROC:1.0, precision @ rank n:1.0\n",
      "\n",
      "On Test Data:\n",
      "OCNN ROC:1.0, precision @ rank n:1.0\n"
     ]
    }
   ],
   "source": [
    "contamination = 0.1  # percentage of outliers\n",
    "n_train = 1000  # number of training points\n",
    "n_test = 100  # number of testing points\n",
    "n_features = 300  # number of features\n",
    "\n",
    "# Generate sample data\n",
    "X_train, y_train, X_test, y_test = \\\n",
    "    generate_data(n_train=n_train,\n",
    "                  n_test=n_test,\n",
    "                  n_features=n_features,\n",
    "                  contamination=contamination,\n",
    "                  random_state=42)\n",
    "\n",
    "# train one_class_svm detector\n",
    "clf_name = 'OCNN'\n",
    "clf = OneClassNN(hidden_neurons=[256, 128], output_activation=\"linear\",\n",
    "                 ocnn_neurons=[128, 1], epochs=50, batch_size=32, nu=0.1)\n",
    "clf.fit(X_train)\n",
    "\n",
    "# get the prediction labels and outlier scores of the training data\n",
    "y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)\n",
    "y_train_scores = clf.decision_scores_  # raw outlier scores\n",
    "\n",
    "# get the prediction on the test data\n",
    "y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)\n",
    "y_test_scores = clf.decision_function(X_test)  # outlier scores\n",
    "\n",
    "# evaluate and print the results\n",
    "print(\"\\nOn Training Data:\")\n",
    "evaluate_print(clf_name, y_train, y_train_scores)\n",
    "print(\"\\nOn Test Data:\")\n",
    "evaluate_print(clf_name, y_test, y_test_scores)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "学習データ量が3000個の場合。異常データ量は10%とする。バッチサイズを少し増やす。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "On Training Data:\n",
      "OCNN ROC:1.0, precision @ rank n:1.0\n",
      "\n",
      "On Test Data:\n",
      "OCNN ROC:1.0, precision @ rank n:1.0\n"
     ]
    }
   ],
   "source": [
    "contamination = 0.1  # percentage of outliers\n",
    "n_train = 3000  # number of training points\n",
    "n_test = 300  # number of testing points\n",
    "n_features = 300  # number of features\n",
    "\n",
    "# Generate sample data\n",
    "X_train, y_train, X_test, y_test = \\\n",
    "    generate_data(n_train=n_train,\n",
    "                  n_test=n_test,\n",
    "                  n_features=n_features,\n",
    "                  contamination=contamination,\n",
    "                  random_state=42)\n",
    "\n",
    "# train one_class_svm detector\n",
    "clf_name = 'OCNN'\n",
    "clf = OneClassNN(hidden_neurons=[256, 128], output_activation=\"linear\",\n",
    "                 ocnn_neurons=[128, 1], epochs=50, batch_size=64, nu=0.1)\n",
    "clf.fit(X_train)\n",
    "\n",
    "# get the prediction labels and outlier scores of the training data\n",
    "y_train_pred = clf.labels_  # binary labels\n",
    "y_train_scores = clf.decision_scores_  # raw outlier scores\n",
    "\n",
    "# get the prediction on the test data\n",
    "y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)\n",
    "y_test_scores = clf.decision_function(X_test)  # outlier scores\n",
    "\n",
    "# evaluate and print the results\n",
    "print(\"\\nOn Training Data:\")\n",
    "evaluate_print(clf_name, y_train, y_train_scores)\n",
    "print(\"\\nOn Test Data:\")\n",
    "evaluate_print(clf_name, y_test, y_test_scores)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "学習データ量が10000個の場合。異常データ量は10%とする。バッチサイズを少し増やす。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "On Training Data:\n",
      "OCNN ROC:1.0, precision @ rank n:1.0\n",
      "\n",
      "On Test Data:\n",
      "OCNN ROC:1.0, precision @ rank n:1.0\n"
     ]
    }
   ],
   "source": [
    "contamination = 0.1  # percentage of outliers\n",
    "n_train = 10000  # number of training points\n",
    "n_test = 1000  # number of testing points\n",
    "n_features = 300  # number of features\n",
    "\n",
    "# Generate sample data\n",
    "X_train, y_train, X_test, y_test = \\\n",
    "    generate_data(n_train=n_train,\n",
    "                  n_test=n_test,\n",
    "                  n_features=n_features,\n",
    "                  contamination=contamination,\n",
    "                  random_state=42)\n",
    "\n",
    "# train one_class_svm detector\n",
    "clf_name = 'OCNN'\n",
    "clf = OneClassNN(hidden_neurons=[256, 128], output_activation=\"linear\",\n",
    "                 ocnn_neurons=[128, 1], epochs=50, batch_size=128, nu=0.1)\n",
    "clf.fit(X_train)\n",
    "\n",
    "# get the prediction labels and outlier scores of the training data\n",
    "y_train_pred = clf.labels_  # binary labels\n",
    "y_train_scores = clf.decision_scores_  # raw outlier scores\n",
    "\n",
    "# get the prediction on the test data\n",
    "y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)\n",
    "y_test_scores = clf.decision_function(X_test)  # outlier scores\n",
    "\n",
    "# evaluate and print the results\n",
    "print(\"\\nOn Training Data:\")\n",
    "evaluate_print(clf_name, y_train, y_train_scores)\n",
    "print(\"\\nOn Test Data:\")\n",
    "evaluate_print(clf_name, y_test, y_test_scores)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import os\n",
	"import sys\n",
	"\n",
	"from pyod.utils.data import evaluate_print, generate_data\n",
	"\n",
	"from ocnn import OneClassNN"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"まずは学習データ量が1000個の場合。異常データ量は10%とする。"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"On Training Data:\n",
	"OCNN ROC:1.0, precision @ rank n:1.0\n",
	"\n",
	"On Test Data:\n",
	"OCNN ROC:1.0, precision @ rank n:1.0\n"
	]
	}
	],
	"source": [
	"contamination = 0.1 # percentage of outliers\n",
	"n_train = 1000 # number of training points\n",
	"n_test = 100 # number of testing points\n",
	"n_features = 300 # number of features\n",
	"\n",
	"# Generate sample data\n",
	"X_train, y_train, X_test, y_test = \\\n",
	" generate_data(n_train=n_train,\n",
	" n_test=n_test,\n",
	" n_features=n_features,\n",
	" contamination=contamination,\n",
	" random_state=42)\n",
	"\n",
	"# train one_class_svm detector\n",
	"clf_name = 'OCNN'\n",
	"clf = OneClassNN(hidden_neurons=[256, 128], output_activation=\"linear\",\n",
	" ocnn_neurons=[128, 1], epochs=50, batch_size=32, nu=0.1)\n",
	"clf.fit(X_train)\n",
	"\n",
	"# get the prediction labels and outlier scores of the training data\n",
	"y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)\n",
	"y_train_scores = clf.decision_scores_ # raw outlier scores\n",
	"\n",
	"# get the prediction on the test data\n",
	"y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)\n",
	"y_test_scores = clf.decision_function(X_test) # outlier scores\n",
	"\n",
	"# evaluate and print the results\n",
	"print(\"\\nOn Training Data:\")\n",
	"evaluate_print(clf_name, y_train, y_train_scores)\n",
	"print(\"\\nOn Test Data:\")\n",
	"evaluate_print(clf_name, y_test, y_test_scores)\n"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"学習データ量が3000個の場合。異常データ量は10%とする。バッチサイズを少し増やす。"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"On Training Data:\n",
	"OCNN ROC:1.0, precision @ rank n:1.0\n",
	"\n",
	"On Test Data:\n",
	"OCNN ROC:1.0, precision @ rank n:1.0\n"
	]
	}
	],
	"source": [
	"contamination = 0.1 # percentage of outliers\n",
	"n_train = 3000 # number of training points\n",
	"n_test = 300 # number of testing points\n",
	"n_features = 300 # number of features\n",
	"\n",
	"# Generate sample data\n",
	"X_train, y_train, X_test, y_test = \\\n",
	" generate_data(n_train=n_train,\n",
	" n_test=n_test,\n",
	" n_features=n_features,\n",
	" contamination=contamination,\n",
	" random_state=42)\n",
	"\n",
	"# train one_class_svm detector\n",
	"clf_name = 'OCNN'\n",
	"clf = OneClassNN(hidden_neurons=[256, 128], output_activation=\"linear\",\n",
	" ocnn_neurons=[128, 1], epochs=50, batch_size=64, nu=0.1)\n",
	"clf.fit(X_train)\n",
	"\n",
	"# get the prediction labels and outlier scores of the training data\n",
	"y_train_pred = clf.labels_ # binary labels\n",
	"y_train_scores = clf.decision_scores_ # raw outlier scores\n",
	"\n",
	"# get the prediction on the test data\n",
	"y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)\n",
	"y_test_scores = clf.decision_function(X_test) # outlier scores\n",
	"\n",
	"# evaluate and print the results\n",
	"print(\"\\nOn Training Data:\")\n",
	"evaluate_print(clf_name, y_train, y_train_scores)\n",
	"print(\"\\nOn Test Data:\")\n",
	"evaluate_print(clf_name, y_test, y_test_scores)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"学習データ量が10000個の場合。異常データ量は10%とする。バッチサイズを少し増やす。"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"On Training Data:\n",
	"OCNN ROC:1.0, precision @ rank n:1.0\n",
	"\n",
	"On Test Data:\n",
	"OCNN ROC:1.0, precision @ rank n:1.0\n"
	]
	}
	],
	"source": [
	"contamination = 0.1 # percentage of outliers\n",
	"n_train = 10000 # number of training points\n",
	"n_test = 1000 # number of testing points\n",
	"n_features = 300 # number of features\n",
	"\n",
	"# Generate sample data\n",
	"X_train, y_train, X_test, y_test = \\\n",
	" generate_data(n_train=n_train,\n",
	" n_test=n_test,\n",
	" n_features=n_features,\n",
	" contamination=contamination,\n",
	" random_state=42)\n",
	"\n",
	"# train one_class_svm detector\n",
	"clf_name = 'OCNN'\n",
	"clf = OneClassNN(hidden_neurons=[256, 128], output_activation=\"linear\",\n",
	" ocnn_neurons=[128, 1], epochs=50, batch_size=128, nu=0.1)\n",
	"clf.fit(X_train)\n",
	"\n",
	"# get the prediction labels and outlier scores of the training data\n",
	"y_train_pred = clf.labels_ # binary labels\n",
	"y_train_scores = clf.decision_scores_ # raw outlier scores\n",
	"\n",
	"# get the prediction on the test data\n",
	"y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)\n",
	"y_test_scores = clf.decision_function(X_test) # outlier scores\n",
	"\n",
	"# evaluate and print the results\n",
	"print(\"\\nOn Training Data:\")\n",
	"evaluate_print(clf_name, y_train, y_train_scores)\n",
	"print(\"\\nOn Test Data:\")\n",
	"evaluate_print(clf_name, y_test, y_test_scores)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.9"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}