Skip to content

Instantly share code, notes, and snippets.

@bjce
Last active January 25, 2021 08:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bjce/56da5304660e69ee98ababd29947467c to your computer and use it in GitHub Desktop.
Save bjce/56da5304660e69ee98ababd29947467c to your computer and use it in GitHub Desktop.
CNN dogscats
# dataset can be obtained at https://www.floydhub.com/swaroopgrs/datasets/dogscats\n
# In[2]:
# In[3]:
from tqdm.notebook import tqdm
import numpy as np
import torch
import PIL
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# In[4]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import Resize, ToTensor, Normalize, Compose
def run():
torch.multiprocessing.freeze_support()
print('loop')
if __name__ == '__main__':
run()
root_dir = '/path_to/dogscats/train'
target_size = (32, 32)
transforms = Compose([Resize(target_size), # Resizes image
ToTensor(), # Converts to Tensor, scales to [0, 1] float (from [0, 255] int)
Normalize(mean=(0.5, 0.5, 0.5,), std=(0.5, 0.5, 0.5)), # scales to [-1.0, 1.0]
])
train_dataset_ = ImageFolder(root_dir, transform=transforms)
# In[5]:
len(train_dataset_)
# In[6]:
#get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
plt.imshow((train_dataset_[101][0]*0.5+0.5).numpy().transpose(1, 2, 0))
# In[7]:
class RAMDatasetWrapper(torch.utils.data.Dataset):
def __init__(self, dataset):
data = []
count = 0
for sample in tqdm(dataset):
if count %5000 == True:
print("sample N: " + str(count))
data.append(sample)
count += 1
self.n = len(dataset)
self.data = data
def __getitem__(self, ind):
return self.data[ind]
def __len__(self):
return self.n
print("building train_dataset")
train_dataset = RAMDatasetWrapper(train_dataset_)
print("building train_dataset: done!")
# In[ ]:
from torch.utils.data import DataLoader
batch_size = 32
print("building train_dataloader")
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=32)
print("building train_dataloader: done!")
# In[9]:
# Same for validation dataset
val_root_dir = '/path_to/dogscats/valid'
val_dataset_ = ImageFolder(val_root_dir, transform=transforms)
print("building val_dataset")
val_dataset = RAMDatasetWrapper(val_dataset_)
print("building val_dataset: done!")
print("building val_dataloader")
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
print("building val_dataloader: done!")
# In[10]:
print(len(val_dataset))
# In[ ]:
import torch.nn as nn
class MLPModel(nn.Module):
def __init__(self, input_dim, hidden_dim):
super(MLPModel, self).__init__()
self.layers = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, 2)
)
def forward(self, input):
input = input.view(input.size(0), -1)
return self.layers(input)
# In[ ]:
import numpy as np
def train_epoch(model, train_dataloader, optimizer, loss_fn):
losses = []
correct_predictions = 0
# Iterate mini batches over training dataset
for images, labels in tqdm(train_dataloader):
images = images.to(device)
labels = labels.to(device)
# Run predictions
output = model(images)
# Set gradients to zero
optimizer.zero_grad()
# Compute loss
loss = loss_fn(output, labels)
# Backpropagate (compute gradients)
loss.backward()
# Make an optimization step (update parameters)
optimizer.step()
# Log metrics
losses.append(loss.item())
predicted_labels = output.argmax(dim=1)
correct_predictions += (predicted_labels == labels).sum().item()
accuracy = 100.0 * correct_predictions / len(train_dataloader.dataset)
# Return loss values for each iteration and accuracy
mean_loss = np.array(losses).mean()
return mean_loss, accuracy
def evaluate(model, dataloader, loss_fn):
losses = []
correct_predictions = 0
with torch.no_grad():
for images, labels in dataloader:
images = images.to(device)
labels = labels.to(device)
# Run predictions
output = model(images)
# Compute loss
loss = loss_fn(output, labels)
# Save metrics
predicted_labels = output.argmax(dim=1)
correct_predictions += (predicted_labels == labels).sum().item()
losses.append(loss.item())
mean_loss = np.array(losses).mean()
accuracy = 100.0 * correct_predictions / len(dataloader.dataset)
# Return mean loss and accuracy
return mean_loss, accuracy
def train(model, train_dataloader, val_dataloader, optimizer, n_epochs, loss_function):
# We will monitor loss functions as the training progresses
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
for epoch in range(n_epochs):
model.train()
train_loss, train_accuracy = train_epoch(model, train_dataloader, optimizer, loss_fn)
model.eval()
val_loss, val_accuracy = evaluate(model, val_dataloader, loss_fn)
train_losses.append(train_loss)
val_losses.append(val_loss)
train_accuracies.append(train_accuracy)
val_accuracies.append(val_accuracy)
print('Epoch {}/{}: train_loss: {:.4f}, train_accuracy: {:.4f}, val_loss: {:.4f}, val_accuracy: {:.4f}'.format(epoch+1, n_epochs,
train_losses[-1],
train_accuracies[-1],
val_losses[-1],
val_accuracies[-1]))
return train_losses, val_losses, train_accuracies, val_accuracies
# In[ ]:
def plot(train_losses, val_losses, train_accuracies, val_accuracies, title):
plt.figure()
plt.plot(np.arange(len(train_losses)), train_losses)
plt.plot(np.arange(len(val_losses)), val_losses)
plt.legend(['train_loss', 'val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss value')
plt.title('{}: Train/val loss'.format(title));
plt.figure()
plt.plot(np.arange(len(train_accuracies)), train_accuracies)
plt.plot(np.arange(len(val_accuracies)), val_accuracies)
plt.legend(['train_acc', 'val_acc'])
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('{}: Train/val accuracy'.format(title));
# In[ ]:
print("training model")
model = MLPModel(32*32*3, 128)
model = model.to(device)
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
n_epochs = 25
loss_fn = nn.CrossEntropyLoss()
# In[15]:
train_losses, val_losses, train_acc, val_acc = train(model, train_dataloader, val_dataloader, optimizer, n_epochs, loss_fn)
print("training model: done")
# In[18]:
plot(train_losses, val_losses, train_acc, val_acc, title='no_regularization')
# In[19]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment