Created January 2, 2023 08:54
import torch
from torch import nn
from import DataLoader, Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import os
import argparse
import struct
import numpy as np
import gzip
#parse command line arguments from SageMaker SDK
parser = argparse.ArgumentParser()
parser.add_argument("--model_dir", type=str, default=os.environ["SM_MODEL_DIR"])
#Mapping training and test data locations from S3 to traning container environment
parser.add_argument("--train", type=str, default=os.environ["SM_CHANNEL_TRAIN"])
parser.add_argument("--test", type=str, default=os.environ["SM_CHANNEL_TEST"])
args = parser.parse_args()
batch_size = 64
#Method to load, parse and convert the dataset into Torch Tensor objects
def convert_to_tensor(path, images_file, labels_file):
# Open the images file and decompress it
with, images_file), 'rb') as f:
images = np.frombuffer(, np.uint8, offset=16).reshape(-1, 28, 28).astype(np.float32)
# Open the labels file and decompress it
with, labels_file), 'rb') as f:
labels = np.frombuffer(, np.uint8, offset=8).astype(np.int64)
# Convert the images and labels to tensors
images = images.astype(np.float32) / 255.0
images = images.reshape(-1, 28, 28, 1)
images = torch.from_numpy(images)
labels = torch.from_numpy(labels)
return images, labels
#Class to hold the raw dataset objects, extending from class
class FashionMNIST(Dataset):
def __init__(self, path, train=True):
if train:
images_file = "train-images-idx3-ubyte.gz"
labels_file = "train-labels-idx1-ubyte.gz"
images_file = "t10k-images-idx3-ubyte.gz"
labels_file = "t10k-labels-idx1-ubyte.gz"
self.images, self.labels = convert_to_tensor(path, images_file, labels_file)
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
return self.images[idx], self.labels[idx]
#Create PyTorch dataloaders for training and test datasets
train_dataloader = DataLoader(FashionMNIST(args.train, train=True), batch_size=batch_size)
test_dataloader = DataLoader(FashionMNIST(args.test, train=False), batch_size=batch_size)
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
# Define model
class NeuralNetwork(nn.Module):
def __init__(self):
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.Linear(512, 512),
nn.Linear(512, 10)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
X, y =,
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y =,
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
epochs = 5
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model, loss_fn)
path = os.path.join(args.model_dir, "model.pth"), path)
print("Saved PyTorch Model State to model.pth")
