Will Badr wmlba

## noise_images.py
# The code below is from the Keras Blogs
# https://blog.keras.io/building-autoencoders-in-keras.html

noise_factor = 0.5
x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape)
x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape)

x_train_noisy = np.clip(x_train_noisy, 0., 1.)
x_test_noisy = np.clip(x_test_noisy, 0., 1.)

## predict_anomaly.autoencoder.py
%matplotlib inline
from keras.preprocessing import image
# if the img.png is not one of the MNIST dataset that the model was trained on, the error will be very high.
img = image.load_img("./img.png", target_size=(28, 28), color_mode = "grayscale")
input_img = image.img_to_array(img)
inputs = input_img.reshape(1,784)
target_data = autoencoder.predict(inputs)
dist = np.linalg.norm(inputs - target_data, axis=-1)
print(dist)

## autoencoder_feedforward.py
import numpy as np
import keras
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense, Input
from keras import optimizers
from keras.optimizers import Adam

(x_train, y_train), (x_test, y_test) = mnist.load_data()
train_x = x_train.reshape(60000, 784) / 255

## isolationForest.py
from sklearn.ensemble import IsolationForest
import numpy as np
np.random.seed(1)
random_data = np.random.randn(50000,2)  * 20 + 20

clf = IsolationForest( behaviour = 'new', max_samples=100, random_state = 1, contamination= 'auto')
preds = clf.fit_predict(random_data)
preds

## boxplot.py
import seaborn as sns
import matplotlib.pyplot as plt

sns.boxplot(data=random_data)

## dbscan_outlier.py
from sklearn.cluster import DBSCAN
seed(1)
random_data = np.random.randn(50000,2)  * 20 + 20

outlier_detection = DBSCAN(min_samples = 2, eps = 3)
clusters = outlier_detection.fit_predict(random_data)
list(clusters).count(-1)

## outlier_std.py
import numpy as np
import matplotlib.pyplot as plt
seed(1)


# multiply and add by random numbers to get some real values
data = np.random.randn(50000)  * 20 + 20

# Function to Detection Outlier on one-dimentional datasets.
def find_anomalies(data):

## ensemble_imbalance.py
from imblearn.ensemble import BalancedBaggingClassifier
from sklearn.tree import DecisionTreeClassifier

#Create an object of the classifier.
bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),
                                sampling_strategy='auto',
                                replacement=False,
                                random_state=0)

y_train = credit_df['Class']

## oversampling.py
from imblearn.over_sampling import SMOTE

# Resample the minority class. You can change the strategy to 'auto' if you are not sure.
sm = SMOTE(sampling_strategy='minority', random_state=7)

# Fit the model to generate the data.
oversampled_trainX, oversampled_trainY = sm.fit_sample(credit_df.drop('Class', axis=1), credit_df['Class'])
oversampled_train = pd.concat([pd.DataFrame(oversampled_trainY), pd.DataFrame(oversampled_trainX)], axis=1)
oversampled_train.columns = normalized_df.columns

## undersampling.py
# Shuffle the Dataset.
shuffled_df = credit_df.sample(frac=1,random_state=4)

# Put all the fraud class in a separate dataset.
fraud_df = shuffled_df.loc[shuffled_df['Class'] == 1]

#Randomly select 492 observations from the non-fraud (majority class)
non_fraud_df = shuffled_df.loc[shuffled_df['Class'] == 0].sample(n=492,random_state=42)

# Concatenate both dataframes again
	# The code below is from the Keras Blogs
	# https://blog.keras.io/building-autoencoders-in-keras.html

	noise_factor = 0.5
	x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape)
	x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape)

	x_train_noisy = np.clip(x_train_noisy, 0., 1.)
	x_test_noisy = np.clip(x_test_noisy, 0., 1.)
	%matplotlib inline
	from keras.preprocessing import image
	# if the img.png is not one of the MNIST dataset that the model was trained on, the error will be very high.
	img = image.load_img("./img.png", target_size=(28, 28), color_mode = "grayscale")
	input_img = image.img_to_array(img)
	inputs = input_img.reshape(1,784)
	target_data = autoencoder.predict(inputs)
	dist = np.linalg.norm(inputs - target_data, axis=-1)
	print(dist)
	import numpy as np
	import keras
	from keras.datasets import mnist
	from keras.models import Sequential, Model
	from keras.layers import Dense, Input
	from keras import optimizers
	from keras.optimizers import Adam

	(x_train, y_train), (x_test, y_test) = mnist.load_data()
	train_x = x_train.reshape(60000, 784) / 255
	from sklearn.ensemble import IsolationForest
	import numpy as np
	np.random.seed(1)
	random_data = np.random.randn(50000,2) * 20 + 20

	clf = IsolationForest( behaviour = 'new', max_samples=100, random_state = 1, contamination= 'auto')
	preds = clf.fit_predict(random_data)
	preds
	import seaborn as sns
	import matplotlib.pyplot as plt

	sns.boxplot(data=random_data)
	from sklearn.cluster import DBSCAN
	seed(1)
	random_data = np.random.randn(50000,2) * 20 + 20

	outlier_detection = DBSCAN(min_samples = 2, eps = 3)
	clusters = outlier_detection.fit_predict(random_data)
	list(clusters).count(-1)
	from imblearn.ensemble import BalancedBaggingClassifier
	from sklearn.tree import DecisionTreeClassifier

	#Create an object of the classifier.
	bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),
	sampling_strategy='auto',
	replacement=False,
	random_state=0)

	y_train = credit_df['Class']
	from imblearn.over_sampling import SMOTE

	# Resample the minority class. You can change the strategy to 'auto' if you are not sure.
	sm = SMOTE(sampling_strategy='minority', random_state=7)

	# Fit the model to generate the data.
	oversampled_trainX, oversampled_trainY = sm.fit_sample(credit_df.drop('Class', axis=1), credit_df['Class'])
	oversampled_train = pd.concat([pd.DataFrame(oversampled_trainY), pd.DataFrame(oversampled_trainX)], axis=1)
	oversampled_train.columns = normalized_df.columns
	# Shuffle the Dataset.
	shuffled_df = credit_df.sample(frac=1,random_state=4)

	# Put all the fraud class in a separate dataset.
	fraud_df = shuffled_df.loc[shuffled_df['Class'] == 1]

	#Randomly select 492 observations from the non-fraud (majority class)
	non_fraud_df = shuffled_df.loc[shuffled_df['Class'] == 0].sample(n=492,random_state=42)

	# Concatenate both dataframes again