Created
November 25, 2018 15:14
-
-
Save statcompute/c3ab5360481f6d66f4d24a4c8a5bf2a6 to your computer and use it in GitHub Desktop.
Autoencoder for Dimensionality Reduction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas import read_csv, DataFrame | |
from numpy.random import seed | |
from sklearn.preprocessing import minmax_scale | |
from sklearn.model_selection import train_test_split | |
from keras.layers import Input, Dense | |
from keras.models import Model | |
df = read_csv("credit_count.txt") | |
Y = df[df.CARDHLDR == 1].DEFAULTS | |
X = df[df.CARDHLDR == 1].ix[:, 2:12] | |
# SCALE EACH FEATURE INTO [0, 1] RANGE | |
sX = minmax_scale(X, axis = 0) | |
ncol = sX.shape[1] | |
X_train, X_test, Y_train, Y_test = train_test_split(sX, Y, train_size = 0.5, random_state = seed(2017)) | |
### AN EXAMPLE OF SIMPLE AUTOENCODER ### | |
# InputLayer (None, 10) | |
# Dense (None, 5) | |
# Dense (None, 10) | |
input_dim = Input(shape = (ncol, )) | |
# DEFINE THE DIMENSION OF ENCODER ASSUMED 3 | |
encoding_dim = 3 | |
# DEFINE THE ENCODER LAYER | |
encoded = Dense(encoding_dim, activation = 'relu')(input_dim) | |
# DEFINE THE DECODER LAYER | |
decoded = Dense(ncol, activation = 'sigmoid')(encoded) | |
# COMBINE ENCODER AND DECODER INTO AN AUTOENCODER MODEL | |
autoencoder = Model(input = input_dim, output = decoded) | |
# CONFIGURE AND TRAIN THE AUTOENCODER | |
autoencoder.compile(optimizer = 'adadelta', loss = 'binary_crossentropy') | |
autoencoder.fit(X_train, X_train, nb_epoch = 50, batch_size = 100, shuffle = True, validation_data = (X_test, X_test)) | |
# THE ENCODER TO EXTRACT THE REDUCED DIMENSION FROM THE ABOVE AUTOENCODER | |
encoder = Model(input = input_dim, output = encoded) | |
encoded_input = Input(shape = (encoding_dim, )) | |
encoded_out = encoder.predict(X_test) | |
encoded_out[0:2] | |
#array([[ 0. , 1.26510417, 1.62803197], | |
# [ 2.32508397, 0.99735016, 2.06461048]], dtype=float32) | |
### AN EXAMPLE OF DEEP AUTOENCODER WITH MULTIPLE LAYERS | |
# InputLayer (None, 10) | |
# Dense (None, 20) | |
# Dense (None, 10) | |
# Dense (None, 5) | |
# Dense (None, 3) | |
# Dense (None, 5) | |
# Dense (None, 10) | |
# Dense (None, 20) | |
# Dense (None, 10) | |
input_dim = Input(shape = (ncol, )) | |
# DEFINE THE DIMENSION OF ENCODER ASSUMED 3 | |
encoding_dim = 3 | |
# DEFINE THE ENCODER LAYERS | |
encoded1 = Dense(20, activation = 'relu')(input_dim) | |
encoded2 = Dense(10, activation = 'relu')(encoded1) | |
encoded3 = Dense(5, activation = 'relu')(encoded2) | |
encoded4 = Dense(encoding_dim, activation = 'relu')(encoded3) | |
# DEFINE THE DECODER LAYERS | |
decoded1 = Dense(5, activation = 'relu')(encoded4) | |
decoded2 = Dense(10, activation = 'relu')(decoded1) | |
decoded3 = Dense(20, activation = 'relu')(decoded2) | |
decoded4 = Dense(ncol, activation = 'sigmoid')(decoded3) | |
# COMBINE ENCODER AND DECODER INTO AN AUTOENCODER MODEL | |
autoencoder = Model(input = input_dim, output = decoded4) | |
# CONFIGURE AND TRAIN THE AUTOENCODER | |
autoencoder.compile(optimizer = 'adadelta', loss = 'binary_crossentropy') | |
autoencoder.fit(X_train, X_train, nb_epoch = 100, batch_size = 100, shuffle = True, validation_data = (X_test, X_test)) | |
# THE ENCODER TO EXTRACT THE REDUCED DIMENSION FROM THE ABOVE AUTOENCODER | |
encoder = Model(input = input_dim, output = encoded4) | |
encoded_input = Input(shape = (encoding_dim, )) | |
encoded_out = encoder.predict(X_test) | |
encoded_out[0:2] | |
#array([[ 3.74947715, 0. , 3.22947764], | |
# [ 3.93903661, 0.17448257, 1.86618853]], dtype=float32) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Could you provide an example of the .txt file being read in? i.e "credit_count.txt"