Created
August 17, 2021 05:39
-
-
Save tam17aki/d0a272aa75e0844e658f26ee9af2608f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import sys\n", | |
"\n", | |
"from pyod.utils.data import evaluate_print, generate_data\n", | |
"\n", | |
"from ocnn import OneClassNN" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"まずは学習データ量が1000個の場合。異常データ量は10%とする。" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"On Training Data:\n", | |
"OCNN ROC:1.0, precision @ rank n:1.0\n", | |
"\n", | |
"On Test Data:\n", | |
"OCNN ROC:1.0, precision @ rank n:1.0\n" | |
] | |
} | |
], | |
"source": [ | |
"contamination = 0.1 # percentage of outliers\n", | |
"n_train = 1000 # number of training points\n", | |
"n_test = 100 # number of testing points\n", | |
"n_features = 300 # number of features\n", | |
"\n", | |
"# Generate sample data\n", | |
"X_train, y_train, X_test, y_test = \\\n", | |
" generate_data(n_train=n_train,\n", | |
" n_test=n_test,\n", | |
" n_features=n_features,\n", | |
" contamination=contamination,\n", | |
" random_state=42)\n", | |
"\n", | |
"# train one_class_svm detector\n", | |
"clf_name = 'OCNN'\n", | |
"clf = OneClassNN(hidden_neurons=[256, 128], output_activation=\"linear\",\n", | |
" ocnn_neurons=[128, 1], epochs=50, batch_size=32, nu=0.1)\n", | |
"clf.fit(X_train)\n", | |
"\n", | |
"# get the prediction labels and outlier scores of the training data\n", | |
"y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)\n", | |
"y_train_scores = clf.decision_scores_ # raw outlier scores\n", | |
"\n", | |
"# get the prediction on the test data\n", | |
"y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)\n", | |
"y_test_scores = clf.decision_function(X_test) # outlier scores\n", | |
"\n", | |
"# evaluate and print the results\n", | |
"print(\"\\nOn Training Data:\")\n", | |
"evaluate_print(clf_name, y_train, y_train_scores)\n", | |
"print(\"\\nOn Test Data:\")\n", | |
"evaluate_print(clf_name, y_test, y_test_scores)\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"学習データ量が3000個の場合。異常データ量は10%とする。バッチサイズを少し増やす。" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"On Training Data:\n", | |
"OCNN ROC:1.0, precision @ rank n:1.0\n", | |
"\n", | |
"On Test Data:\n", | |
"OCNN ROC:1.0, precision @ rank n:1.0\n" | |
] | |
} | |
], | |
"source": [ | |
"contamination = 0.1 # percentage of outliers\n", | |
"n_train = 3000 # number of training points\n", | |
"n_test = 300 # number of testing points\n", | |
"n_features = 300 # number of features\n", | |
"\n", | |
"# Generate sample data\n", | |
"X_train, y_train, X_test, y_test = \\\n", | |
" generate_data(n_train=n_train,\n", | |
" n_test=n_test,\n", | |
" n_features=n_features,\n", | |
" contamination=contamination,\n", | |
" random_state=42)\n", | |
"\n", | |
"# train one_class_svm detector\n", | |
"clf_name = 'OCNN'\n", | |
"clf = OneClassNN(hidden_neurons=[256, 128], output_activation=\"linear\",\n", | |
" ocnn_neurons=[128, 1], epochs=50, batch_size=64, nu=0.1)\n", | |
"clf.fit(X_train)\n", | |
"\n", | |
"# get the prediction labels and outlier scores of the training data\n", | |
"y_train_pred = clf.labels_ # binary labels\n", | |
"y_train_scores = clf.decision_scores_ # raw outlier scores\n", | |
"\n", | |
"# get the prediction on the test data\n", | |
"y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)\n", | |
"y_test_scores = clf.decision_function(X_test) # outlier scores\n", | |
"\n", | |
"# evaluate and print the results\n", | |
"print(\"\\nOn Training Data:\")\n", | |
"evaluate_print(clf_name, y_train, y_train_scores)\n", | |
"print(\"\\nOn Test Data:\")\n", | |
"evaluate_print(clf_name, y_test, y_test_scores)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"学習データ量が10000個の場合。異常データ量は10%とする。バッチサイズを少し増やす。" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"On Training Data:\n", | |
"OCNN ROC:1.0, precision @ rank n:1.0\n", | |
"\n", | |
"On Test Data:\n", | |
"OCNN ROC:1.0, precision @ rank n:1.0\n" | |
] | |
} | |
], | |
"source": [ | |
"contamination = 0.1 # percentage of outliers\n", | |
"n_train = 10000 # number of training points\n", | |
"n_test = 1000 # number of testing points\n", | |
"n_features = 300 # number of features\n", | |
"\n", | |
"# Generate sample data\n", | |
"X_train, y_train, X_test, y_test = \\\n", | |
" generate_data(n_train=n_train,\n", | |
" n_test=n_test,\n", | |
" n_features=n_features,\n", | |
" contamination=contamination,\n", | |
" random_state=42)\n", | |
"\n", | |
"# train one_class_svm detector\n", | |
"clf_name = 'OCNN'\n", | |
"clf = OneClassNN(hidden_neurons=[256, 128], output_activation=\"linear\",\n", | |
" ocnn_neurons=[128, 1], epochs=50, batch_size=128, nu=0.1)\n", | |
"clf.fit(X_train)\n", | |
"\n", | |
"# get the prediction labels and outlier scores of the training data\n", | |
"y_train_pred = clf.labels_ # binary labels\n", | |
"y_train_scores = clf.decision_scores_ # raw outlier scores\n", | |
"\n", | |
"# get the prediction on the test data\n", | |
"y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)\n", | |
"y_test_scores = clf.decision_function(X_test) # outlier scores\n", | |
"\n", | |
"# evaluate and print the results\n", | |
"print(\"\\nOn Training Data:\")\n", | |
"evaluate_print(clf_name, y_train, y_train_scores)\n", | |
"print(\"\\nOn Test Data:\")\n", | |
"evaluate_print(clf_name, y_test, y_test_scores)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment