Last active
July 16, 2019 00:50
-
-
Save f0nzie/90749827f43583638b5c9cf382424f9b to your computer and use it in GitHub Desktop.
code for mnist_digits_python.Rmd
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Digits recognition on PNG images. Python code | |
Source: https://www.deeplearningwizard.com/deep_learning/practical_pytorch/pytorch_logistic_regression/ | |
> This is a modified version of the original article. | |
1. added utility functions | |
2. Introspection on dim=2 | |
3. Converting code to R | |
4. Read PNG images instead of MNIST standard dataset | |
## Objective | |
Read PNG images instead of MNIST standard dataset. | |
## Load PyTorch libraries | |
```{r, echo=FALSE} | |
# Force using local Python environment | |
if (.Platform$OS.type == "unix") { | |
reticulate::use_python(python = file.path(script_path, "/..", "conda", "bin", | |
"python3"), require = TRUE) | |
} else if (.Platform$OS.type == "windows") { | |
reticulate::use_python(python = file.path(script_path, "..", "conda"), | |
require = TRUE) | |
} | |
# Find the PyQt libraries under Library/plugins/platforms | |
qt_plugins <- file.path(find_rsuite_root(), "conda", "Library", "plugins", | |
"platforms") | |
reticulate::py_run_string(" | |
import os; | |
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = r.qt_plugins") | |
``` | |
```{python} | |
import torch | |
import torchvision | |
import torch.nn as nn | |
import torchvision.transforms as transforms | |
import torchvision.datasets as dsets | |
import numpy as np | |
``` | |
## Initial settings | |
```{python} | |
# batch size for datasets | |
batch_size_train = 100 | |
batch_size_test = 1000 | |
# folders where te images are located | |
train_data_path = '~/mnist_png_full/training/' | |
test_data_path = '~/mnist_png_full/testing/' | |
``` | |
## Read datasets | |
```{python} | |
# read the datasets without normalization | |
train_dataset = torchvision.datasets.ImageFolder(root = train_data_path, | |
transform = torchvision.transforms.ToTensor() | |
) | |
test_dataset = torchvision.datasets.ImageFolder(root = test_data_path, | |
transform = torchvision.transforms.ToTensor() | |
) | |
print(len(train_dataset), len(test_dataset)) | |
train_dataset.__len__() | |
type(train_dataset) # <class 'torchvision.datasets.folder.ImageFolder'> | |
# train_dataset[0] is a tuple | |
type(train_dataset[0]) # <class 'tuple'> | |
train_dataset | |
# Dataset ImageFolder | |
# Number of datapoints: 60000 | |
# Root location: /home/msfz751/mnist_png_full/training/ | |
``` | |
## Read image and label for a data point | |
```{python} | |
# retrieve a random data point from the dataset | |
import random | |
i_rand = random.randrange(0, len(train_dataset)-1) | |
i_rand | |
# read the tuple and load it to two objects | |
image, lbl = train_dataset[i_rand] # pick any number. digits 0-9 were read in order | |
type(image) | |
type(lbl) | |
lbl | |
``` | |
The size of the image tensor is `torch.Size([3, 28, 28])` but it should really be `torch.Size([1, 28, 28])`. | |
```{python} | |
image.size() # torch.Size([3, 28, 28]) | |
``` | |
### Introspection of train_dataset | |
```{python} | |
type(train_dataset[0]) | |
print("\tfirst member of the tuple is a tensor for the image") | |
type(train_dataset[0][0]) | |
print("\tsecond member of the tuple is an integer for the label") | |
type(train_dataset[0][1]) | |
``` | |
```{python} | |
# check size of a data point tensor, image part | |
train_dataset[0][0].size() # first object | |
train_dataset[59999][0].size() # last object | |
image.size() | |
# it is torch.Size([3, 28, 28]) but it should be 1, 28, 28 | |
# we will take only one slice of dim=1 | |
``` | |
### Size of tensor on dim=2 | |
The tensors on dim=2 seem to be identical. We will confirm in the following chunk: | |
```{python} | |
# dim=2 has three layers. choose only one | |
# but find out if all of them are the same | |
image[0, :, :].size() # take only one slice in dim=2 | |
image[1, :, :].size() # take only one slice in dim=2 | |
image[2, :, :].size() # take only one slice in dim=2 | |
# same value for sums of the tensors means that all tensors are virtually the same | |
image[0, :, :].sum() # take only one slice in dim=2 | |
image[1, :, :].sum() # take only one slice in dim=2 | |
image[2, :, :].sum() # take only one slice in dim=2 | |
``` | |
### Take one slice of dim=2 | |
```{python} | |
# select one layer of the tensor. 28x28 is the grid space | |
# take a slice 0:1 but could have been 1:2 or 2:3 | |
image[0:1, :, :].size() | |
image[0:1, :, :].numpy().shape | |
``` | |
```{python} | |
import matplotlib.pyplot as plt | |
# the label for the corresponding tensor | |
print(lbl) | |
# convert to numpy array and reshape | |
img_np_rs = image[0:1, :, :].numpy().reshape(28, 28) | |
plt.imshow(img_np_rs, cmap='gray') | |
plt.show() | |
``` | |
### Retrieve a second data point | |
```{python} | |
# second random data point | |
i_rand = random.randrange(0, len(train_dataset)-1) # get a random data point | |
image, lbl = train_dataset[i_rand] # read the tuple | |
type(image) | |
type(lbl) | |
# the label for the corresponding tensor | |
print(lbl) | |
# convert to numpy array and reshape | |
img_np_rs = image[0:1, :, :].numpy().reshape(28, 28) | |
plt.imshow(img_np_rs, cmap='gray') | |
plt.show() | |
``` | |
## Reduce the number of layers for dim=1 in the image | |
```{python} | |
# this class to be used to get rid of two duplicate layers in the image | |
class PickLayerTransform: | |
def __init__(self, layer): | |
# self.img_ds = img | |
self.layer = layer | |
if self.layer < 0: raise RuntimeError("Layer index {} cannot be negative ".format(self.layer)) | |
def __call__(self, img): | |
if (self.layer > len(img)-1): raise RuntimeError("Layer index {} incompatible with dimension size {}".format(self.layer, len(img))) | |
return img[(self.layer-1):self.layer, :, :] | |
# trying to resize tensor to [1, 28, 28] | |
train_dataset = torchvision.datasets.ImageFolder(root = train_data_path, | |
transform = torchvision.transforms.Compose([ | |
transforms.ToTensor(), | |
PickLayerTransform(1) | |
])) | |
test_dataset = torchvision.datasets.ImageFolder(root = test_data_path, | |
transform = torchvision.transforms.Compose([ | |
transforms.ToTensor(), | |
PickLayerTransform(1) | |
])) | |
``` | |
```{python} | |
# check size of a data point tensor, image part | |
train_dataset[0][0].size() # first object | |
train_dataset[59999][0].size() # last object | |
print("previous image size") | |
image.size() | |
# it is torch.Size([3, 28, 28]) but it should be 1, 28, 28 | |
# we will take only one slice of dim=1 | |
``` | |
### Show a random image | |
```{python} | |
i_rand = random.randrange(0, len(train_dataset)-1) # get a random data point | |
image_, lbl = train_dataset[i_rand] # read the tuple | |
type(image_) | |
image_.size() | |
type(lbl) | |
# the label for the corresponding tensor | |
print(lbl) | |
# convert to numpy array and reshape | |
# we don't need to specify the layer index anymore | |
# we can use this form: | |
# img_np_rs = img[:, :, :].numpy().reshape(28, 28) | |
# or this: | |
img_np_rs = image_.numpy().reshape(28, 28) | |
plt.imshow(img_np_rs, cmap='gray') | |
plt.show() | |
``` | |
### Apply data loader and batch size | |
To prevent losing features by using a simple for loop to iterate over the data. In particular, we are missing out on: | |
Batching the data | |
Shuffling the data | |
Load the data in parallel using multiprocessing workers. | |
```{python} | |
# load the dataset of images | |
train_loader = torch.utils.data.DataLoader( | |
train_dataset, | |
batch_size=batch_size_train, | |
shuffle=True | |
) | |
# load the dataset of images | |
test_loader = torch.utils.data.DataLoader( | |
test_dataset, | |
batch_size=batch_size_test, | |
shuffle=True | |
) | |
print(len(train_loader), len(test_loader)) | |
``` | |
```{python} | |
# Confirm that the dataset loaders are iterable objects | |
import collections | |
isinstance(train_loader, collections.Iterable) | |
isinstance(test_loader, collections.Iterable) | |
``` | |
## Build the model | |
```{python} | |
# Build the model | |
# Same as linear regression! | |
class LogisticRegressionModel(nn.Module): | |
def __init__(self, input_dim, output_dim): | |
super(LogisticRegressionModel, self).__init__() | |
self.linear = nn.Linear(input_dim, output_dim) | |
def forward(self, x): | |
out = self.linear(x) | |
return out | |
``` | |
```{python} | |
# feeding the model with 28x28 images | |
input_dim = 28*28 | |
# classify digits 0-9 a total of 10 classes, | |
output_dim = 10 | |
# instantiate model | |
model = LogisticRegressionModel(input_dim, output_dim) | |
model | |
``` | |
```{python} | |
### Instantiate Cross Entropy Loss class | |
# need Cross Entropy Loss to calculate loss before we backpropagation | |
criterion = nn.CrossEntropyLoss() | |
# calculate parameters' gradients and update | |
learning_rate = 0.001 | |
### Instantiate Optimizer class | |
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) | |
``` | |
```{python} | |
# Type of parameter object | |
print(model.parameters()) | |
# Length of parameters | |
print(len(list(model.parameters()))) | |
# FC 1 Parameters | |
print(list(model.parameters())[0].size()) | |
# FC 1 Bias Parameters | |
print(list(model.parameters())[1].size()) | |
``` | |
We arbitrarily set 3000 iterations here which means the model would update 3000 times. | |
```{python} | |
n_iters = 3000 | |
``` | |
One epoch consists of 60,000 / 100 = 600 iterations. Because we would like to go through 3000 iterations, this implies we would have 3000 / 600 = 5 epochs as each epoch has 600 iterations. | |
```{python} | |
num_epochs = n_iters / (len(train_dataset) / batch_size_train) | |
num_epochs = int(num_epochs) | |
num_epochs | |
``` | |
## Training the model | |
```{python, py_training_model} | |
# train the model | |
iter = 0 | |
for epoch in range(num_epochs): | |
for i, (images, labels) in enumerate(train_loader): | |
# Load images as Variable | |
images = images.view(-1, 28*28).requires_grad_() | |
labels = labels | |
# Clear gradients w.r.t. parameters | |
optimizer.zero_grad() | |
# Forward pass to get output/logits | |
outputs = model(images) | |
# Calculate Loss: softmax --> cross entropy loss | |
loss = criterion(outputs, labels) | |
# Getting gradients w.r.t. parameters | |
loss.backward() | |
# Updating parameters | |
optimizer.step() | |
iter += 1 | |
if iter % 500 == 0: | |
# Calculate Accuracy for each epoch | |
correct = 0 | |
total = 0 | |
# Iterate through test dataset | |
for images, labels in test_loader: | |
# Load images to a Torch Variable | |
images = images.view(-1, 28*28).requires_grad_() | |
# Forward pass only to get logits/output | |
outputs = model(images) | |
# Get predictions from the maximum value | |
_, predicted = torch.max(outputs.data, 1) | |
# Total number of labels | |
total += labels.size(0) | |
# Total correct predictions | |
correct += (predicted == labels).sum() | |
accuracy = 100 * correct / total | |
# Print Loss | |
print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy)) | |
``` | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment