Skip to content

Instantly share code, notes, and snippets.

@ecsplendid
Last active May 19, 2024 10:24
Show Gist options
  • Save ecsplendid/49a3761d452934170e5b3599d4d7d158 to your computer and use it in GitHub Desktop.
Save ecsplendid/49a3761d452934170e5b3599d4d7d158 to your computer and use it in GitHub Desktop.
Hinton example with CNN and ~10^8 params, validation acc gets to about 30% for me
// generated with gpt4-o, probably still buggy
// testing what Hinton spoke about here https://youtu.be/tP-4njhyGvo?si=9JCVwyiftFayc6mA&t=857
// i.e. 50% label noise on train
// CNN, ~10^8 params i.e. in overparam regime for MNIST, tried adding regularisation
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset, random_split
import numpy as np
import os
# Define transformations for the training and validation sets
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
# Define hyperparameters
batch_size = 64
validation_split = 0.1
# Download and prepare the datasets
mnist_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
# Split the dataset into training and validation sets
train_size = int((1 - validation_split) * len(mnist_dataset))
val_size = len(mnist_dataset) - train_size
train_dataset, val_dataset = random_split(mnist_dataset, [train_size, val_size])
# Preprocess data
train_images = train_dataset.dataset.data[train_dataset.indices].view(-1, 28*28).float() / 255.0
train_labels = train_dataset.dataset.targets[train_dataset.indices]
# Shuffle the training set
indices = torch.randperm(len(train_labels))
train_images_shuffled = train_images[indices]
train_labels_shuffled = train_labels[indices]
# Create a dataset with shuffled images and partially shuffled labels
shuffled_train_dataset = TensorDataset(train_images_shuffled, train_labels_shuffled)
# Data loaders
train_loader = DataLoader(shuffled_train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# Define the CNN model with Dropout
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.fc1 = nn.Linear(256*3*3, 1024)
self.fc2 = nn.Linear(1024, 1024)
self.fc3 = nn.Linear(1024, 10)
self.dropout = nn.Dropout(0.3) # Add dropout with 30% probability
def forward(self, x):
x = torch.relu(self.conv1(x))
x = torch.max_pool2d(x, 2)
x = torch.relu(self.conv2(x))
x = torch.max_pool2d(x, 2)
x = torch.relu(self.conv3(x))
x = torch.max_pool2d(x, 2)
x = x.view(-1, 256*3*3)
x = torch.relu(self.fc1(x))
x = self.dropout(x) # Apply dropout
x = torch.relu(self.fc2(x))
x = self.dropout(x) # Apply dropout
x = self.fc3(x)
return x
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
# Define loss function and optimizer with L2 regularization
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4) # Add weight decay for L2 regularization
num_epochs = 2000 # Reduce the number of epochs for testing purposes
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
def calculate_accuracy(loader, model):
correct = 0
total = 0
with torch.no_grad():
for images, labels in loader:
images = images.view(-1, 1, 28, 28).to(device) # Reshape to original dimensions
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
return 100 * correct / total
# Open a log file to write the loss data incrementally
with open('training_log_cnn.txt', 'w') as log_file:
log_file.write('Epoch,Train Loss,Validation Loss,Train Accuracy,Validation Accuracy\n')
for epoch in range(num_epochs):
scaler = GradScaler()
model.train()
train_loss = 0
for images, labels in train_loader:
images = images.view(-1, 1, 28, 28).to(device) # Reshape to original dimensions
labels = labels.to(device)
optimizer.zero_grad()
with autocast():
outputs = model(images)
loss = criterion(outputs, labels)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
train_loss += loss.item()
train_loss /= len(train_loader)
train_losses.append(train_loss)
train_accuracy = calculate_accuracy(train_loader, model)
train_accuracies.append(train_accuracy)
model.eval()
val_loss = 0
with torch.no_grad():
for images, labels in val_loader:
images = images.view(-1, 1, 28, 28).to(device) # Reshape to original dimensions
labels = labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
val_loss += loss.item()
val_loss /= len(val_loader)
val_losses.append(val_loss)
val_accuracy = calculate_accuracy(val_loader, model)
val_accuracies.append(val_accuracy)
log_file.write(f'{epoch + 1},{train_loss:.4f},{val_loss:.4f},{train_accuracy:.2f},{val_accuracy:.2f}\n')
log_file.flush() # Ensure data is written incrementally
print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Validation Accuracy: {val_accuracy:.2f}%')
# Plotting losses and accuracies
fig, ax1 = plt.subplots()
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.plot(range(1, num_epochs + 1), train_losses, label='Train Loss', color='tab:blue')
ax1.plot(range(1, num_epochs + 1), val_losses, label='Validation Loss', color='tab:orange')
ax1.tick_params(axis='y')
ax2 = ax1.twinx()
ax2.set_ylabel('Accuracy')
ax2.plot(range(1, num_epochs + 1), train_accuracies, label='Train Accuracy', color='tab:green')
ax2.plot(range(1, num_epochs + 1), val_accuracies, label='Validation Accuracy', color='tab:red')
ax2.tick_params(axis='y')
fig.tight_layout()
fig.legend(loc='upper right', bbox_to_anchor=(1,1), bbox_transform=ax1.transAxes)
plt.title('Loss and Accuracy over Epochs')
# Save the plot as a JPG file
plt.savefig('training_plot.jpg', format='jpg', dpi=300)
plt.show()
# Calculate test accuracy
test_accuracy = calculate_accuracy(test_loader, model)
# Log the test accuracy
with open('training_log_cnn.txt', 'a') as log_file: # Append to the log file
log_file.write(f'Test Accuracy: {test_accuracy:.2f}%\n')
print(f'Test Accuracy: {test_accuracy:.2f}%')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment