renyuanL/finalTerm2020_cnn.py

## finalTerm2020_cnn.py
#
# finalTerm2020_cnn.py
#

# # LeNet-5 MNIST Digits Classifier

# This notebook implements the classic LeNet-5 convolutional network [1] and applies it to MNIST digit classification. The basic architecture is shown in the figure below:
#
# ![](lenet-5_1.jpg)

#
#
# LeNet-5 is commonly regarded as the pioneer of convolutional neural networks, consisting of a very simple architecture (by modern standards). In total, LeNet-5 consists of only 7 layers. 3 out of these 7 layers are convolutional layers (C1, C3, C5), which are connected by two average pooling layers (S2 & S4). The penultimate layer is a fully connexted layer (F6), which is followed by the final output layer. The additional details are summarized below:
#
# - All convolutional layers use 5x5 kernels with stride 1.
# - The two average pooling (subsampling) layers are 2x2 pixels wide with stride 1.
# - Throughrout the network, tanh sigmoid activation functions are used. (**In this notebook, we replace these with ReLU activations**)
# - The output layer uses 10 custom Euclidean Radial Basis Function neurons for the output layer. (**In this notebook, we replace these with softmax activations**)
# - The input size is 32x32; here, we rescale the MNIST images from 28x28 to 32x32 to match this input dimension. Alternatively, we would have to change the
# achieve error rate below 1% on the MNIST data set, which was very close to the state of the art at the time (produced by a boosted ensemble of three LeNet-4 networks).
#
#
# ### References
#
# - [1] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Gradient-based learning applied to document recognition. Proceedings of the IEEE, november 1998.

# ## Imports

# In[2]:


import os
import time

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from torchvision import datasets
from torchvision import transforms

import torchsummary

import matplotlib.pyplot as plt
from PIL import Image


if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True


# ## Model Settings

# In[3]:


##########################
### SETTINGS
##########################

# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 128

NUM_EPOCHS = 2 #### 10

# Architecture
NUM_FEATURES= 32*32
NUM_CLASSES=  10

# Other
if torch.cuda.is_available():
    DEVICE = "cuda:0"
else:
    DEVICE = "cpu"

GRAYSCALE = True


# ### MNIST Dataset

# In[4]:


##########################
### MNIST DATASET
##########################

resize_transform = transforms.Compose([transforms.Resize((32, 32)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.5,), (0.5,))])

# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root= 'data',
                               train=True,
                               transform=resize_transform,
                               download=True)

test_dataset = datasets.MNIST(root= 'data',
                              train=False,
                              transform=resize_transform)


train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False)

# Checking the dataset
for images, labels in train_loader:
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break


# In[5]:


device = torch.device(DEVICE)
torch.manual_seed(0)

for epoch in range(2):

    for batch_idx, (x, y) in enumerate(train_loader):

        print('Epoch:', epoch+1, end='')
        print(' | Batch index:', batch_idx, end='')
        print(' | Batch size:', y.size()[0])

        x = x.to(device)
        y = y.to(device)
        break


# In[6]:


##########################
### MODEL
##########################


class LeNet5(nn.Module):

    def __init__(self, num_classes, grayscale=False):
        super(LeNet5, self).__init__()

        self.grayscale = grayscale
        self.num_classes = num_classes

        if self.grayscale:
            in_channels = 1
        else:
            in_channels = 3

        self.features = nn.Sequential(

            nn.Conv2d(in_channels, 6, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(6, 16, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.classifier = nn.Sequential(
            nn.Linear(16*5*5, 120),
            nn.Tanh(),
            nn.Linear(120, 84),
            nn.Tanh(),
            nn.Linear(84, num_classes),
        )


    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        logits = self.classifier(x)
        probas = F.softmax(logits, dim=1)
        return logits, probas


# In[7]:


torch.manual_seed(RANDOM_SEED)

model= LeNet5(NUM_CLASSES, GRAYSCALE)

model.to(DEVICE)

print(f'\n model= {model} \n')

torchsummary.summary(model, (1,32,32))


#%%


optimizer= torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


# ## Training

# In[8]:


def compute_accuracy(model, data_loader, device):
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):

        features = features.to(device)
        targets = targets.to(device)

        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100


start_time = time.time()
for epoch in range(NUM_EPOCHS):

    model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):

        features = features.to(DEVICE)
        targets = targets.to(DEVICE)

        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()

        cost.backward()

        ### UPDATE MODEL PARAMETERS
        optimizer.step()

        ### LOGGING
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f'
                   %(epoch+1, NUM_EPOCHS, batch_idx,
                     len(train_loader), cost))


    model.eval()
    with torch.set_grad_enabled(False): # save memory during inference
        print('Epoch: %03d/%03d | Train: %.3f%%' % (
              epoch+1, NUM_EPOCHS,
              compute_accuracy(model, train_loader, device=DEVICE)))

    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))


# ## Evaluation

model.eval()

# In[9]:


with torch.set_grad_enabled(False): # save memory during inference
    acc= compute_accuracy(model, test_loader, device= DEVICE)
    print(f'Test accuracy= {acc}')


# In[10]:


for batch_idx, (features, targets) in enumerate(test_loader):

    features = features
    targets = targets
    break

#%%
randIndex= np.random.randint(0, features.shape[0]-1)

print(f'randIndex= {randIndex}')
print(f'the testing figure is as shown in plots ...\n\n')
nhwc_img=  np.transpose(features[randIndex], axes=(1, 2, 0))
nhw_img=   np.squeeze(nhwc_img.numpy(), axis=2)
plt.imshow(nhw_img,   cmap= 'Greys');


logits, probas= model(features.to(device)[randIndex, None])

print(f'probas=        {probas}')
print(f'probas.max=    {probas.max()}')
print(f'probas.argmax= {probas.argmax()}')
print('-'*10)
print(f'the recognition result= 【{probas.argmax()}】')


#%%
'''
import torchsummary

torchsummary.summary(model, (1,32,32))
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1            [-1, 6, 28, 28]             156
              Tanh-2            [-1, 6, 28, 28]               0
         MaxPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
              Tanh-5           [-1, 16, 10, 10]               0
         MaxPool2d-6             [-1, 16, 5, 5]               0
            Linear-7                  [-1, 120]          48,120
              Tanh-8                  [-1, 120]               0
            Linear-9                   [-1, 84]          10,164
             Tanh-10                   [-1, 84]               0
           Linear-11                   [-1, 10]             850
================================================================
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.11
Params size (MB): 0.24
Estimated Total Size (MB): 0.35
----------------------------------------------------------------
'''

'''

Image batch dimensions: torch.Size([128, 1, 32, 32])
Image label dimensions: torch.Size([128])
Epoch: 1 | Batch index: 0 | Batch size: 128
Epoch: 2 | Batch index: 0 | Batch size: 128

 model= LeNet5(
  (features): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Tanh()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Tanh()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=400, out_features=120, bias=True)
    (1): Tanh()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Tanh()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1            [-1, 6, 28, 28]             156
              Tanh-2            [-1, 6, 28, 28]               0
         MaxPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
              Tanh-5           [-1, 16, 10, 10]               0
         MaxPool2d-6             [-1, 16, 5, 5]               0
            Linear-7                  [-1, 120]          48,120
              Tanh-8                  [-1, 120]               0
            Linear-9                   [-1, 84]          10,164
             Tanh-10                   [-1, 84]               0
           Linear-11                   [-1, 10]             850
================================================================
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.11
Params size (MB): 0.24
Estimated Total Size (MB): 0.35
----------------------------------------------------------------
Epoch: 001/002 | Batch 0000/0469 | Cost: 2.2980
Epoch: 001/002 | Batch 0050/0469 | Cost: 0.5383
Epoch: 001/002 | Batch 0100/0469 | Cost: 0.3024
Epoch: 001/002 | Batch 0150/0469 | Cost: 0.2452
Epoch: 001/002 | Batch 0200/0469 | Cost: 0.0898
Epoch: 001/002 | Batch 0250/0469 | Cost: 0.1244
Epoch: 001/002 | Batch 0300/0469 | Cost: 0.0960
Epoch: 001/002 | Batch 0350/0469 | Cost: 0.1397
Epoch: 001/002 | Batch 0400/0469 | Cost: 0.1625
Epoch: 001/002 | Batch 0450/0469 | Cost: 0.0974
Epoch: 001/002 | Train: 97.862%
Time elapsed: 0.27 min
Epoch: 002/002 | Batch 0000/0469 | Cost: 0.0366
Epoch: 002/002 | Batch 0050/0469 | Cost: 0.0841
Epoch: 002/002 | Batch 0100/0469 | Cost: 0.0931
Epoch: 002/002 | Batch 0150/0469 | Cost: 0.0443
Epoch: 002/002 | Batch 0200/0469 | Cost: 0.0939
Epoch: 002/002 | Batch 0250/0469 | Cost: 0.1425
Epoch: 002/002 | Batch 0300/0469 | Cost: 0.0286
Epoch: 002/002 | Batch 0350/0469 | Cost: 0.0589
Epoch: 002/002 | Batch 0400/0469 | Cost: 0.0372
Epoch: 002/002 | Batch 0450/0469 | Cost: 0.0244
Epoch: 002/002 | Train: 98.630%
Time elapsed: 0.53 min
Total Training Time: 0.53 min
Test accuracy= 98.49999237060547
probas= tensor([[7.0871e-06, 1.9202e-04, 3.6732e-03, 4.6215e-03, 9.8744e-05, 7.9395e-05,
         7.5230e-07, 9.9080e-01, 1.1051e-04, 4.1845e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
probas.argmax= 7
probas.gmax= 0.9907983541488647
'''
	#
	# finalTerm2020_cnn.py
	#

	# # LeNet-5 MNIST Digits Classifier

	# This notebook implements the classic LeNet-5 convolutional network [1] and applies it to MNIST digit classification. The basic architecture is shown in the figure below:
	#
	# ![](lenet-5_1.jpg)

	#
	#
	# LeNet-5 is commonly regarded as the pioneer of convolutional neural networks, consisting of a very simple architecture (by modern standards). In total, LeNet-5 consists of only 7 layers. 3 out of these 7 layers are convolutional layers (C1, C3, C5), which are connected by two average pooling layers (S2 & S4). The penultimate layer is a fully connexted layer (F6), which is followed by the final output layer. The additional details are summarized below:
	#
	# - All convolutional layers use 5x5 kernels with stride 1.
	# - The two average pooling (subsampling) layers are 2x2 pixels wide with stride 1.
	# - Throughrout the network, tanh sigmoid activation functions are used. (In this notebook, we replace these with ReLU activations)
	# - The output layer uses 10 custom Euclidean Radial Basis Function neurons for the output layer. (In this notebook, we replace these with softmax activations)
	# - The input size is 32x32; here, we rescale the MNIST images from 28x28 to 32x32 to match this input dimension. Alternatively, we would have to change the
	# achieve error rate below 1% on the MNIST data set, which was very close to the state of the art at the time (produced by a boosted ensemble of three LeNet-4 networks).
	#
	#
	# ### References
	#
	# - [1] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Gradient-based learning applied to document recognition. Proceedings of the IEEE, november 1998.

	# ## Imports

	# In[2]:


	import os
	import time

	import numpy as np
	import pandas as pd

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch.utils.data import DataLoader

	from torchvision import datasets
	from torchvision import transforms

	import torchsummary

	import matplotlib.pyplot as plt
	from PIL import Image


	if torch.cuda.is_available():
	torch.backends.cudnn.deterministic = True


	# ## Model Settings

	# In[3]:


	##########################
	### SETTINGS
	##########################

	# Hyperparameters
	RANDOM_SEED = 1
	LEARNING_RATE = 0.001
	BATCH_SIZE = 128

	NUM_EPOCHS = 2 #### 10

	# Architecture
	NUM_FEATURES= 32*32
	NUM_CLASSES= 10

	# Other
	if torch.cuda.is_available():
	DEVICE = "cuda:0"
	else:
	DEVICE = "cpu"

	GRAYSCALE = True


	# ### MNIST Dataset

	# In[4]:


	##########################
	### MNIST DATASET
	##########################

	resize_transform = transforms.Compose([transforms.Resize((32, 32)),
	transforms.ToTensor(),
	transforms.Normalize((0.5,), (0.5,))])

	# Note transforms.ToTensor() scales input images
	# to 0-1 range
	train_dataset = datasets.MNIST(root= 'data',
	train=True,
	transform=resize_transform,
	download=True)

	test_dataset = datasets.MNIST(root= 'data',
	train=False,
	transform=resize_transform)


	train_loader = DataLoader(dataset=train_dataset,
	batch_size=BATCH_SIZE,
	shuffle=True)

	test_loader = DataLoader(dataset=test_dataset,
	batch_size=BATCH_SIZE,
	shuffle=False)

	# Checking the dataset
	for images, labels in train_loader:
	print('Image batch dimensions:', images.shape)
	print('Image label dimensions:', labels.shape)
	break


	# In[5]:


	device = torch.device(DEVICE)
	torch.manual_seed(0)

	for epoch in range(2):

	for batch_idx, (x, y) in enumerate(train_loader):

	print('Epoch:', epoch+1, end='')
	print(' \| Batch index:', batch_idx, end='')
	print(' \| Batch size:', y.size()[0])

	x = x.to(device)
	y = y.to(device)
	break


	# In[6]:


	##########################
	### MODEL
	##########################


	class LeNet5(nn.Module):

	def __init__(self, num_classes, grayscale=False):
	super(LeNet5, self).__init__()

	self.grayscale = grayscale
	self.num_classes = num_classes

	if self.grayscale:
	in_channels = 1
	else:
	in_channels = 3

	self.features = nn.Sequential(

	nn.Conv2d(in_channels, 6, kernel_size=5),
	nn.Tanh(),
	nn.MaxPool2d(kernel_size=2),
	nn.Conv2d(6, 16, kernel_size=5),
	nn.Tanh(),
	nn.MaxPool2d(kernel_size=2)
	)

	self.classifier = nn.Sequential(
	nn.Linear(1655, 120),
	nn.Tanh(),
	nn.Linear(120, 84),
	nn.Tanh(),
	nn.Linear(84, num_classes),
	)


	def forward(self, x):
	x = self.features(x)
	x = torch.flatten(x, 1)
	logits = self.classifier(x)
	probas = F.softmax(logits, dim=1)
	return logits, probas


	# In[7]:


	torch.manual_seed(RANDOM_SEED)

	model= LeNet5(NUM_CLASSES, GRAYSCALE)

	model.to(DEVICE)

	print(f'\n model= {model} \n')

	torchsummary.summary(model, (1,32,32))


	#%%


	optimizer= torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


	# ## Training

	# In[8]:


	def compute_accuracy(model, data_loader, device):
	correct_pred, num_examples = 0, 0
	for i, (features, targets) in enumerate(data_loader):

	features = features.to(device)
	targets = targets.to(device)

	logits, probas = model(features)
	_, predicted_labels = torch.max(probas, 1)
	num_examples += targets.size(0)
	correct_pred += (predicted_labels == targets).sum()
	return correct_pred.float()/num_examples * 100


	start_time = time.time()
	for epoch in range(NUM_EPOCHS):

	model.train()
	for batch_idx, (features, targets) in enumerate(train_loader):

	features = features.to(DEVICE)
	targets = targets.to(DEVICE)

	### FORWARD AND BACK PROP
	logits, probas = model(features)
	cost = F.cross_entropy(logits, targets)
	optimizer.zero_grad()

	cost.backward()

	### UPDATE MODEL PARAMETERS
	optimizer.step()

	### LOGGING
	if not batch_idx % 50:
	print ('Epoch: %03d/%03d \| Batch %04d/%04d \| Cost: %.4f'
	%(epoch+1, NUM_EPOCHS, batch_idx,
	len(train_loader), cost))



	model.eval()
	with torch.set_grad_enabled(False): # save memory during inference
	print('Epoch: %03d/%03d \| Train: %.3f%%' % (
	epoch+1, NUM_EPOCHS,
	compute_accuracy(model, train_loader, device=DEVICE)))

	print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

	print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))


	# ## Evaluation

	model.eval()

	# In[9]:


	with torch.set_grad_enabled(False): # save memory during inference
	acc= compute_accuracy(model, test_loader, device= DEVICE)
	print(f'Test accuracy= {acc}')


	# In[10]:


	for batch_idx, (features, targets) in enumerate(test_loader):

	features = features
	targets = targets
	break

	#%%
	randIndex= np.random.randint(0, features.shape[0]-1)

	print(f'randIndex= {randIndex}')
	print(f'the testing figure is as shown in plots ...\n\n')
	nhwc_img= np.transpose(features[randIndex], axes=(1, 2, 0))
	nhw_img= np.squeeze(nhwc_img.numpy(), axis=2)
	plt.imshow(nhw_img, cmap= 'Greys');


	logits, probas= model(features.to(device)[randIndex, None])

	print(f'probas= {probas}')
	print(f'probas.max= {probas.max()}')
	print(f'probas.argmax= {probas.argmax()}')
	print('-'*10)
	print(f'the recognition result= 【{probas.argmax()}】')



	#%%
	'''
	import torchsummary

	torchsummary.summary(model, (1,32,32))
	----------------------------------------------------------------
	Layer (type) Output Shape Param #
	================================================================
	Conv2d-1 [-1, 6, 28, 28] 156
	Tanh-2 [-1, 6, 28, 28] 0
	MaxPool2d-3 [-1, 6, 14, 14] 0
	Conv2d-4 [-1, 16, 10, 10] 2,416
	Tanh-5 [-1, 16, 10, 10] 0
	MaxPool2d-6 [-1, 16, 5, 5] 0
	Linear-7 [-1, 120] 48,120
	Tanh-8 [-1, 120] 0
	Linear-9 [-1, 84] 10,164
	Tanh-10 [-1, 84] 0
	Linear-11 [-1, 10] 850
	================================================================
	Total params: 61,706
	Trainable params: 61,706
	Non-trainable params: 0
	----------------------------------------------------------------
	Input size (MB): 0.00
	Forward/backward pass size (MB): 0.11
	Params size (MB): 0.24
	Estimated Total Size (MB): 0.35
	----------------------------------------------------------------
	'''

	'''

	Image batch dimensions: torch.Size([128, 1, 32, 32])
	Image label dimensions: torch.Size([128])
	Epoch: 1 \| Batch index: 0 \| Batch size: 128
	Epoch: 2 \| Batch index: 0 \| Batch size: 128

	model= LeNet5(
	(features): Sequential(
	(0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
	(1): Tanh()
	(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
	(3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
	(4): Tanh()
	(5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
	)
	(classifier): Sequential(
	(0): Linear(in_features=400, out_features=120, bias=True)
	(1): Tanh()
	(2): Linear(in_features=120, out_features=84, bias=True)
	(3): Tanh()
	(4): Linear(in_features=84, out_features=10, bias=True)
	)
	)

	----------------------------------------------------------------
	Layer (type) Output Shape Param #
	================================================================
	Conv2d-1 [-1, 6, 28, 28] 156
	Tanh-2 [-1, 6, 28, 28] 0
	MaxPool2d-3 [-1, 6, 14, 14] 0
	Conv2d-4 [-1, 16, 10, 10] 2,416
	Tanh-5 [-1, 16, 10, 10] 0
	MaxPool2d-6 [-1, 16, 5, 5] 0
	Linear-7 [-1, 120] 48,120
	Tanh-8 [-1, 120] 0
	Linear-9 [-1, 84] 10,164
	Tanh-10 [-1, 84] 0
	Linear-11 [-1, 10] 850
	================================================================
	Total params: 61,706
	Trainable params: 61,706
	Non-trainable params: 0
	----------------------------------------------------------------
	Input size (MB): 0.00
	Forward/backward pass size (MB): 0.11
	Params size (MB): 0.24
	Estimated Total Size (MB): 0.35
	----------------------------------------------------------------
	Epoch: 001/002 \| Batch 0000/0469 \| Cost: 2.2980
	Epoch: 001/002 \| Batch 0050/0469 \| Cost: 0.5383
	Epoch: 001/002 \| Batch 0100/0469 \| Cost: 0.3024
	Epoch: 001/002 \| Batch 0150/0469 \| Cost: 0.2452
	Epoch: 001/002 \| Batch 0200/0469 \| Cost: 0.0898
	Epoch: 001/002 \| Batch 0250/0469 \| Cost: 0.1244
	Epoch: 001/002 \| Batch 0300/0469 \| Cost: 0.0960
	Epoch: 001/002 \| Batch 0350/0469 \| Cost: 0.1397
	Epoch: 001/002 \| Batch 0400/0469 \| Cost: 0.1625
	Epoch: 001/002 \| Batch 0450/0469 \| Cost: 0.0974
	Epoch: 001/002 \| Train: 97.862%
	Time elapsed: 0.27 min
	Epoch: 002/002 \| Batch 0000/0469 \| Cost: 0.0366
	Epoch: 002/002 \| Batch 0050/0469 \| Cost: 0.0841
	Epoch: 002/002 \| Batch 0100/0469 \| Cost: 0.0931
	Epoch: 002/002 \| Batch 0150/0469 \| Cost: 0.0443
	Epoch: 002/002 \| Batch 0200/0469 \| Cost: 0.0939
	Epoch: 002/002 \| Batch 0250/0469 \| Cost: 0.1425
	Epoch: 002/002 \| Batch 0300/0469 \| Cost: 0.0286
	Epoch: 002/002 \| Batch 0350/0469 \| Cost: 0.0589
	Epoch: 002/002 \| Batch 0400/0469 \| Cost: 0.0372
	Epoch: 002/002 \| Batch 0450/0469 \| Cost: 0.0244
	Epoch: 002/002 \| Train: 98.630%
	Time elapsed: 0.53 min
	Total Training Time: 0.53 min
	Test accuracy= 98.49999237060547
	probas= tensor([[7.0871e-06, 1.9202e-04, 3.6732e-03, 4.6215e-03, 9.8744e-05, 7.9395e-05,
	7.5230e-07, 9.9080e-01, 1.1051e-04, 4.1845e-04]], device='cuda:0',
	grad_fn=<SoftmaxBackward>)
	probas.argmax= 7
	probas.gmax= 0.9907983541488647
	'''