Skip to content

Instantly share code, notes, and snippets.

@adiamb
Created December 17, 2017 10:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adiamb/fec35bc8e2a1ec59639abcdb0696bceb to your computer and use it in GitHub Desktop.
Save adiamb/fec35bc8e2a1ec59639abcdb0696bceb to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
import numpy
# fix random seed for reproducibility
numpy.random.seed(7)
#from sklearn.prep
### read in the training data
mnst_ = pd.read_csv('train_MINST.csv', header=0)
mnst_test= pd.read_csv('test_MINST.csv', header=0)
one_hot_labels = keras.utils.to_categorical(mnst_.label.values, num_classes=10)
train_n = mnst_.shape[0]
test_n= mnst_test.shape[0]
## concat the train and tes frames
total=pd.concat([mnst_.iloc[:, 1:], mnst_test], axis=0, ignore_index=True)
### scale the values
std_sc=StandardScaler()
std_sc.fit(total.values)
total_sc=std_sc.transform(total.values)
### build a tf network
train_x, test_x, train_y, test_y = train_test_split(total_sc[:train_n], one_hot_labels,
test_size=0.33, random_state=42)
model = Sequential()
model.add(Dense(300, activation='sigmoid', input_dim=train_x.shape[1]))
model.add(Dropout(0.2))
model.add(Dense(150, activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
his_=model.fit(train_x, train_y,
epochs=20, batch_size=32,
validation_data=(test_x,test_y), shuffle=True)
test_ = total_sc[train_n:]
pre_=model.predict_classes(test_)
out=pd.DataFrame(index=range(1, len(pre_)+1), columns=['ImageID','Label'])
out.loc[:, 'ImageID'] = range(1, len(pre_)+1)
out.loc[:, 'Label'] = pre_.tolist()
out.to_csv('/Users/adityaambati/Desktop/sub3_minst.csv', index=False)
#train_x, test_x, train_y, test_y = train_test_split(mnst_.iloc[:, 1:].values, one_hot_labels,test_size=0.33, random_state=42)
fig, ax = plt.subplots(2, 1, figsize=(12,6))
ax[0].plot(train_x[0])
ax[0].set_title('784x1 data')
ax[1].imshow(train_x[0].reshape(28,28), cmap='gray')
ax[1].set_title('28x28 data')
train_cnn=train_x.reshape(-1, 28, 28, 1)
test_cnn=test_x.reshape(-1, 28, 28, 1)
test_cnn_f=total_sc[train_n:].reshape(-1, 28, 28, 1)
#train_cnn=train_cnn.astype('float32')/255
#test_cnn=test_cnn.astype('float32')/255
#test_cnn_f=test_cnn_f.astype('float32')/255
model = Sequential()
model.add(Conv2D(filters = 16, (3, 3), activation='relu',
input_shape = (28, 28, 1), padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 16, (3, 3), activation='relu'))
model.add(BatchNormalization())
#model.add(Conv2D(filters = 16, kernel_size = (3, 3), activation='relu'))
#model.add(BatchNormalization())
model.add(MaxPool2D(strides=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters = 32, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 32, (3, 3), activation='relu'))
model.add(BatchNormalization())
#model.add(Conv2D(filters = 32, kernel_size = (3, 3), activation='relu'))
#model.add(BatchNormalization())
model.add(MaxPool2D(strides=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer = Adam(lr=1e-4), metrics=["accuracy"])
datagen = ImageDataGenerator(zoom_range = 0.1,
height_shift_range = 0.1,
width_shift_range = 0.1,
rotation_range = 10)
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.9 ** x)
hist = model.fit_generator(datagen.flow(train_cnn, train_y, batch_size=16),
steps_per_epoch=500,
epochs=100,
callbacks=[annealer],
validation_data=(test_cnn, test_y))
pre_=model.predict_classes(test_cnn_f)
out=pd.DataFrame(index=range(1, len(pre_)+1), columns=['ImageID','Label'])
out.loc[:, 'ImageID'] = range(1, len(pre_)+1)
out.loc[:, 'Label'] = pre_.tolist()
out.to_csv('/Users/adityaambati/Desktop/sub3_cnn_minst.csv', index=False)
@duyqu-chan
Copy link

Thank you for sharing this nice and clean code of yours. It is really helpful. I borrowed your learning rate annealing scheme while training a 50 layer ResNET on whale fluke images. But when I monitor LR at each epoch I see no change! How exactly does your function lambda x: 1e-3 * 0.9 ** x behave? Is it something like f(eta) = (0.9^eta)/1000? If is that so the LR should quickly converge to zero after a few epochs. I'm a bit confused, here. Any help is appreciated.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment