Skip to content

Instantly share code, notes, and snippets.

@statcompute
Created November 25, 2018 15:14
Show Gist options
  • Save statcompute/c3ab5360481f6d66f4d24a4c8a5bf2a6 to your computer and use it in GitHub Desktop.
Save statcompute/c3ab5360481f6d66f4d24a4c8a5bf2a6 to your computer and use it in GitHub Desktop.
Autoencoder for Dimensionality Reduction
from pandas import read_csv, DataFrame
from numpy.random import seed
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split
from keras.layers import Input, Dense
from keras.models import Model
df = read_csv("credit_count.txt")
Y = df[df.CARDHLDR == 1].DEFAULTS
X = df[df.CARDHLDR == 1].ix[:, 2:12]
# SCALE EACH FEATURE INTO [0, 1] RANGE
sX = minmax_scale(X, axis = 0)
ncol = sX.shape[1]
X_train, X_test, Y_train, Y_test = train_test_split(sX, Y, train_size = 0.5, random_state = seed(2017))
### AN EXAMPLE OF SIMPLE AUTOENCODER ###
# InputLayer (None, 10)
# Dense (None, 5)
# Dense (None, 10)
input_dim = Input(shape = (ncol, ))
# DEFINE THE DIMENSION OF ENCODER ASSUMED 3
encoding_dim = 3
# DEFINE THE ENCODER LAYER
encoded = Dense(encoding_dim, activation = 'relu')(input_dim)
# DEFINE THE DECODER LAYER
decoded = Dense(ncol, activation = 'sigmoid')(encoded)
# COMBINE ENCODER AND DECODER INTO AN AUTOENCODER MODEL
autoencoder = Model(input = input_dim, output = decoded)
# CONFIGURE AND TRAIN THE AUTOENCODER
autoencoder.compile(optimizer = 'adadelta', loss = 'binary_crossentropy')
autoencoder.fit(X_train, X_train, nb_epoch = 50, batch_size = 100, shuffle = True, validation_data = (X_test, X_test))
# THE ENCODER TO EXTRACT THE REDUCED DIMENSION FROM THE ABOVE AUTOENCODER
encoder = Model(input = input_dim, output = encoded)
encoded_input = Input(shape = (encoding_dim, ))
encoded_out = encoder.predict(X_test)
encoded_out[0:2]
#array([[ 0. , 1.26510417, 1.62803197],
# [ 2.32508397, 0.99735016, 2.06461048]], dtype=float32)
### AN EXAMPLE OF DEEP AUTOENCODER WITH MULTIPLE LAYERS
# InputLayer (None, 10)
# Dense (None, 20)
# Dense (None, 10)
# Dense (None, 5)
# Dense (None, 3)
# Dense (None, 5)
# Dense (None, 10)
# Dense (None, 20)
# Dense (None, 10)
input_dim = Input(shape = (ncol, ))
# DEFINE THE DIMENSION OF ENCODER ASSUMED 3
encoding_dim = 3
# DEFINE THE ENCODER LAYERS
encoded1 = Dense(20, activation = 'relu')(input_dim)
encoded2 = Dense(10, activation = 'relu')(encoded1)
encoded3 = Dense(5, activation = 'relu')(encoded2)
encoded4 = Dense(encoding_dim, activation = 'relu')(encoded3)
# DEFINE THE DECODER LAYERS
decoded1 = Dense(5, activation = 'relu')(encoded4)
decoded2 = Dense(10, activation = 'relu')(decoded1)
decoded3 = Dense(20, activation = 'relu')(decoded2)
decoded4 = Dense(ncol, activation = 'sigmoid')(decoded3)
# COMBINE ENCODER AND DECODER INTO AN AUTOENCODER MODEL
autoencoder = Model(input = input_dim, output = decoded4)
# CONFIGURE AND TRAIN THE AUTOENCODER
autoencoder.compile(optimizer = 'adadelta', loss = 'binary_crossentropy')
autoencoder.fit(X_train, X_train, nb_epoch = 100, batch_size = 100, shuffle = True, validation_data = (X_test, X_test))
# THE ENCODER TO EXTRACT THE REDUCED DIMENSION FROM THE ABOVE AUTOENCODER
encoder = Model(input = input_dim, output = encoded4)
encoded_input = Input(shape = (encoding_dim, ))
encoded_out = encoder.predict(X_test)
encoded_out[0:2]
#array([[ 3.74947715, 0. , 3.22947764],
# [ 3.93903661, 0.17448257, 1.86618853]], dtype=float32)
@antonioa19
Copy link

Could you provide an example of the .txt file being read in? i.e "credit_count.txt"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment