Skip to content

Instantly share code, notes, and snippets.

@renyuanL
Created December 24, 2020 22:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save renyuanL/f41af2e75291a32e84ca5f7983f4c30c to your computer and use it in GitHub Desktop.
Save renyuanL/f41af2e75291a32e84ca5f7983f4c30c to your computer and use it in GitHub Desktop.
for Final Term Test in CGU @2020.12.25
#
# finalTerm2020_cnn.py
#
# # LeNet-5 MNIST Digits Classifier
# This notebook implements the classic LeNet-5 convolutional network [1] and applies it to MNIST digit classification. The basic architecture is shown in the figure below:
#
# ![](lenet-5_1.jpg)
#
#
# LeNet-5 is commonly regarded as the pioneer of convolutional neural networks, consisting of a very simple architecture (by modern standards). In total, LeNet-5 consists of only 7 layers. 3 out of these 7 layers are convolutional layers (C1, C3, C5), which are connected by two average pooling layers (S2 & S4). The penultimate layer is a fully connexted layer (F6), which is followed by the final output layer. The additional details are summarized below:
#
# - All convolutional layers use 5x5 kernels with stride 1.
# - The two average pooling (subsampling) layers are 2x2 pixels wide with stride 1.
# - Throughrout the network, tanh sigmoid activation functions are used. (**In this notebook, we replace these with ReLU activations**)
# - The output layer uses 10 custom Euclidean Radial Basis Function neurons for the output layer. (**In this notebook, we replace these with softmax activations**)
# - The input size is 32x32; here, we rescale the MNIST images from 28x28 to 32x32 to match this input dimension. Alternatively, we would have to change the
# achieve error rate below 1% on the MNIST data set, which was very close to the state of the art at the time (produced by a boosted ensemble of three LeNet-4 networks).
#
#
# ### References
#
# - [1] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Gradient-based learning applied to document recognition. Proceedings of the IEEE, november 1998.
# ## Imports
# In[2]:
import os
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
import torchsummary
import matplotlib.pyplot as plt
from PIL import Image
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
# ## Model Settings
# In[3]:
##########################
### SETTINGS
##########################
# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 128
NUM_EPOCHS = 2 #### 10
# Architecture
NUM_FEATURES= 32*32
NUM_CLASSES= 10
# Other
if torch.cuda.is_available():
DEVICE = "cuda:0"
else:
DEVICE = "cpu"
GRAYSCALE = True
# ### MNIST Dataset
# In[4]:
##########################
### MNIST DATASET
##########################
resize_transform = transforms.Compose([transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))])
# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root= 'data',
train=True,
transform=resize_transform,
download=True)
test_dataset = datasets.MNIST(root= 'data',
train=False,
transform=resize_transform)
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
# Checking the dataset
for images, labels in train_loader:
print('Image batch dimensions:', images.shape)
print('Image label dimensions:', labels.shape)
break
# In[5]:
device = torch.device(DEVICE)
torch.manual_seed(0)
for epoch in range(2):
for batch_idx, (x, y) in enumerate(train_loader):
print('Epoch:', epoch+1, end='')
print(' | Batch index:', batch_idx, end='')
print(' | Batch size:', y.size()[0])
x = x.to(device)
y = y.to(device)
break
# In[6]:
##########################
### MODEL
##########################
class LeNet5(nn.Module):
def __init__(self, num_classes, grayscale=False):
super(LeNet5, self).__init__()
self.grayscale = grayscale
self.num_classes = num_classes
if self.grayscale:
in_channels = 1
else:
in_channels = 3
self.features = nn.Sequential(
nn.Conv2d(in_channels, 6, kernel_size=5),
nn.Tanh(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(6, 16, kernel_size=5),
nn.Tanh(),
nn.MaxPool2d(kernel_size=2)
)
self.classifier = nn.Sequential(
nn.Linear(16*5*5, 120),
nn.Tanh(),
nn.Linear(120, 84),
nn.Tanh(),
nn.Linear(84, num_classes),
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, 1)
logits = self.classifier(x)
probas = F.softmax(logits, dim=1)
return logits, probas
# In[7]:
torch.manual_seed(RANDOM_SEED)
model= LeNet5(NUM_CLASSES, GRAYSCALE)
model.to(DEVICE)
print(f'\n model= {model} \n')
torchsummary.summary(model, (1,32,32))
#%%
optimizer= torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
# ## Training
# In[8]:
def compute_accuracy(model, data_loader, device):
correct_pred, num_examples = 0, 0
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
start_time = time.time()
for epoch in range(NUM_EPOCHS):
model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.to(DEVICE)
targets = targets.to(DEVICE)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 50:
print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f'
%(epoch+1, NUM_EPOCHS, batch_idx,
len(train_loader), cost))
model.eval()
with torch.set_grad_enabled(False): # save memory during inference
print('Epoch: %03d/%03d | Train: %.3f%%' % (
epoch+1, NUM_EPOCHS,
compute_accuracy(model, train_loader, device=DEVICE)))
print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))
# ## Evaluation
model.eval()
# In[9]:
with torch.set_grad_enabled(False): # save memory during inference
acc= compute_accuracy(model, test_loader, device= DEVICE)
print(f'Test accuracy= {acc}')
# In[10]:
for batch_idx, (features, targets) in enumerate(test_loader):
features = features
targets = targets
break
#%%
randIndex= np.random.randint(0, features.shape[0]-1)
print(f'randIndex= {randIndex}')
print(f'the testing figure is as shown in plots ...\n\n')
nhwc_img= np.transpose(features[randIndex], axes=(1, 2, 0))
nhw_img= np.squeeze(nhwc_img.numpy(), axis=2)
plt.imshow(nhw_img, cmap= 'Greys');
logits, probas= model(features.to(device)[randIndex, None])
print(f'probas= {probas}')
print(f'probas.max= {probas.max()}')
print(f'probas.argmax= {probas.argmax()}')
print('-'*10)
print(f'the recognition result= 【{probas.argmax()}】')
#%%
'''
import torchsummary
torchsummary.summary(model, (1,32,32))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 6, 28, 28] 156
Tanh-2 [-1, 6, 28, 28] 0
MaxPool2d-3 [-1, 6, 14, 14] 0
Conv2d-4 [-1, 16, 10, 10] 2,416
Tanh-5 [-1, 16, 10, 10] 0
MaxPool2d-6 [-1, 16, 5, 5] 0
Linear-7 [-1, 120] 48,120
Tanh-8 [-1, 120] 0
Linear-9 [-1, 84] 10,164
Tanh-10 [-1, 84] 0
Linear-11 [-1, 10] 850
================================================================
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.11
Params size (MB): 0.24
Estimated Total Size (MB): 0.35
----------------------------------------------------------------
'''
'''
Image batch dimensions: torch.Size([128, 1, 32, 32])
Image label dimensions: torch.Size([128])
Epoch: 1 | Batch index: 0 | Batch size: 128
Epoch: 2 | Batch index: 0 | Batch size: 128
model= LeNet5(
(features): Sequential(
(0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
(1): Tanh()
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(4): Tanh()
(5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(classifier): Sequential(
(0): Linear(in_features=400, out_features=120, bias=True)
(1): Tanh()
(2): Linear(in_features=120, out_features=84, bias=True)
(3): Tanh()
(4): Linear(in_features=84, out_features=10, bias=True)
)
)
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 6, 28, 28] 156
Tanh-2 [-1, 6, 28, 28] 0
MaxPool2d-3 [-1, 6, 14, 14] 0
Conv2d-4 [-1, 16, 10, 10] 2,416
Tanh-5 [-1, 16, 10, 10] 0
MaxPool2d-6 [-1, 16, 5, 5] 0
Linear-7 [-1, 120] 48,120
Tanh-8 [-1, 120] 0
Linear-9 [-1, 84] 10,164
Tanh-10 [-1, 84] 0
Linear-11 [-1, 10] 850
================================================================
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.11
Params size (MB): 0.24
Estimated Total Size (MB): 0.35
----------------------------------------------------------------
Epoch: 001/002 | Batch 0000/0469 | Cost: 2.2980
Epoch: 001/002 | Batch 0050/0469 | Cost: 0.5383
Epoch: 001/002 | Batch 0100/0469 | Cost: 0.3024
Epoch: 001/002 | Batch 0150/0469 | Cost: 0.2452
Epoch: 001/002 | Batch 0200/0469 | Cost: 0.0898
Epoch: 001/002 | Batch 0250/0469 | Cost: 0.1244
Epoch: 001/002 | Batch 0300/0469 | Cost: 0.0960
Epoch: 001/002 | Batch 0350/0469 | Cost: 0.1397
Epoch: 001/002 | Batch 0400/0469 | Cost: 0.1625
Epoch: 001/002 | Batch 0450/0469 | Cost: 0.0974
Epoch: 001/002 | Train: 97.862%
Time elapsed: 0.27 min
Epoch: 002/002 | Batch 0000/0469 | Cost: 0.0366
Epoch: 002/002 | Batch 0050/0469 | Cost: 0.0841
Epoch: 002/002 | Batch 0100/0469 | Cost: 0.0931
Epoch: 002/002 | Batch 0150/0469 | Cost: 0.0443
Epoch: 002/002 | Batch 0200/0469 | Cost: 0.0939
Epoch: 002/002 | Batch 0250/0469 | Cost: 0.1425
Epoch: 002/002 | Batch 0300/0469 | Cost: 0.0286
Epoch: 002/002 | Batch 0350/0469 | Cost: 0.0589
Epoch: 002/002 | Batch 0400/0469 | Cost: 0.0372
Epoch: 002/002 | Batch 0450/0469 | Cost: 0.0244
Epoch: 002/002 | Train: 98.630%
Time elapsed: 0.53 min
Total Training Time: 0.53 min
Test accuracy= 98.49999237060547
probas= tensor([[7.0871e-06, 1.9202e-04, 3.6732e-03, 4.6215e-03, 9.8744e-05, 7.9395e-05,
7.5230e-07, 9.9080e-01, 1.1051e-04, 4.1845e-04]], device='cuda:0',
grad_fn=<SoftmaxBackward>)
probas.argmax= 7
probas.gmax= 0.9907983541488647
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment