This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The code below is from the Keras Blogs | |
# https://blog.keras.io/building-autoencoders-in-keras.html | |
noise_factor = 0.5 | |
x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape) | |
x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape) | |
x_train_noisy = np.clip(x_train_noisy, 0., 1.) | |
x_test_noisy = np.clip(x_test_noisy, 0., 1.) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%matplotlib inline | |
from keras.preprocessing import image | |
# if the img.png is not one of the MNIST dataset that the model was trained on, the error will be very high. | |
img = image.load_img("./img.png", target_size=(28, 28), color_mode = "grayscale") | |
input_img = image.img_to_array(img) | |
inputs = input_img.reshape(1,784) | |
target_data = autoencoder.predict(inputs) | |
dist = np.linalg.norm(inputs - target_data, axis=-1) | |
print(dist) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import keras | |
from keras.datasets import mnist | |
from keras.models import Sequential, Model | |
from keras.layers import Dense, Input | |
from keras import optimizers | |
from keras.optimizers import Adam | |
(x_train, y_train), (x_test, y_test) = mnist.load_data() | |
train_x = x_train.reshape(60000, 784) / 255 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import IsolationForest | |
import numpy as np | |
np.random.seed(1) | |
random_data = np.random.randn(50000,2) * 20 + 20 | |
clf = IsolationForest( behaviour = 'new', max_samples=100, random_state = 1, contamination= 'auto') | |
preds = clf.fit_predict(random_data) | |
preds |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import seaborn as sns | |
import matplotlib.pyplot as plt | |
sns.boxplot(data=random_data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.cluster import DBSCAN | |
seed(1) | |
random_data = np.random.randn(50000,2) * 20 + 20 | |
outlier_detection = DBSCAN(min_samples = 2, eps = 3) | |
clusters = outlier_detection.fit_predict(random_data) | |
list(clusters).count(-1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
seed(1) | |
# multiply and add by random numbers to get some real values | |
data = np.random.randn(50000) * 20 + 20 | |
# Function to Detection Outlier on one-dimentional datasets. | |
def find_anomalies(data): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from imblearn.ensemble import BalancedBaggingClassifier | |
from sklearn.tree import DecisionTreeClassifier | |
#Create an object of the classifier. | |
bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(), | |
sampling_strategy='auto', | |
replacement=False, | |
random_state=0) | |
y_train = credit_df['Class'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from imblearn.over_sampling import SMOTE | |
# Resample the minority class. You can change the strategy to 'auto' if you are not sure. | |
sm = SMOTE(sampling_strategy='minority', random_state=7) | |
# Fit the model to generate the data. | |
oversampled_trainX, oversampled_trainY = sm.fit_sample(credit_df.drop('Class', axis=1), credit_df['Class']) | |
oversampled_train = pd.concat([pd.DataFrame(oversampled_trainY), pd.DataFrame(oversampled_trainX)], axis=1) | |
oversampled_train.columns = normalized_df.columns |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Shuffle the Dataset. | |
shuffled_df = credit_df.sample(frac=1,random_state=4) | |
# Put all the fraud class in a separate dataset. | |
fraud_df = shuffled_df.loc[shuffled_df['Class'] == 1] | |
#Randomly select 492 observations from the non-fraud (majority class) | |
non_fraud_df = shuffled_df.loc[shuffled_df['Class'] == 0].sample(n=492,random_state=42) | |
# Concatenate both dataframes again |
NewerOlder