ganindu7/.gitignore

## .gitignore
./data/*
logs
runs
./img/*
tiny-imagenet-200
*.pth

## alexnet_replica.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we will implment alexnet!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!pip install fastai # to help run test datasets quicly \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "from torchvision import datasets, transforms\n",
    "from torch.utils.data import DataLoader, random_split"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![Alexnet aarch](img/alexnet_aarch.png)\n",
    "<br/>\n",
    "Image: from [Andrew NG: Convolutional Neural Networks Slides](https://www.coursera.org/lecture/convolutional-neural-networks/classic-networks-MmYe2)\n",
    "\n",
    "```shell\n",
    " AlexNet Architecture\n",
    "\n",
    " Input: 3x227x227\n",
    "  |\n",
    " Conv1: {96 filters, 11x11 ksize with Stride=4, Padding=No} -> ReLU -> MaxPool: 3x3, Stride=2\n",
    "  |\n",
    " Conv2: {256 filters, 5x5 ksize  with Stride-1, Padding=2} -> ReLU -> MaxPool: 3x3, Stride=2\n",
    "  |\n",
    " Conv3: 384x3x3, Padding=1 -> ReLU\n",
    "  |\n",
    " Conv4: 256x3x3, Padding=1 -> ReLU\n",
    "  |\n",
    " Conv5: 256x3x3, Padding=1 -> ReLU -> MaxPool: 3x3, Stride=2\n",
    "  |\n",
    " AdaptiveAvgPool: Output Size=6x6 -> Flatten\n",
    "  |\n",
    " Dropout -> FC1: 4096 -> ReLU\n",
    "  |\n",
    " Dropout -> FC2: 4096 -> ReLU\n",
    "  |\n",
    " FC3: (num_classes)\n",
    "  |\n",
    " Softmax \n",
    "\n",
    " source: https://pytorch.org/vision/main/_modules/torchvision/models/alexnet.html#AlexNet_Weights\n",
    "\n",
    "Note:\n",
    "Something that is slightly frustrating when it comes to implementing AlexNet from the paper (that suggest \n",
    "imput image dims of 224x224) is the getting the first Conv2d output to match the figure in the tha paper. \n",
    "\n",
    "the equation for the output of the first Conv2d operation should follow this format. (simplified equation)\n",
    "output_width = (input_width - kern_width + 2 * padding)/stride + 1 \n",
    "             = (227 - 11)/4 + 1\n",
    "             = 55\n",
    "\n",
    "```\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "class AlexNet(nn.Module):\n",
    "\n",
    "    def __init__(self, num_classes=10):\n",
    "        super().__init__()\n",
    "        # Note: Tested on Nvidia DGX station A100 (please change size appropriately to suit your compute)\n",
    "        # In the paper authors have split the model to two GPUs, here it's using just one. \n",
    "        # input: 227x227 RGB(3ch) image\n",
    "\n",
    "        # First layer: conv 96 kernels with ksize 11, stride 4 -> ReLU -> MaxPool\n",
    "        '''\n",
    "        Discussion: wrt. to the implementaiton on the paper.\n",
    "        In the paper this is shown as two filter sets (one for each gpu).\n",
    "        These primary featue filters have an input depth of 3 (to fit the image dims) and filters \n",
    "        in either gpu (48 per each gpu) these are associated with the full image surface as they usualy are \n",
    "        in convolution networks. they are just two stacks of 48 independent filters allocated per gpu. \n",
    "\n",
    "        we can think of this approach(Figure 2, in the paper) as a form of parallelisation \n",
    "        to reduce the number of operations happening in series (because in this case we create 96 filters\n",
    "        in about the same time to create 48 fiters)\n",
    "\n",
    "        having said that as I mentioned in this implementation we create the lump 96 filters without explicily optimising\n",
    "        as done in the paper. \n",
    "        '''\n",
    "        self.num_output_classes = num_classes \n",
    "\n",
    "        self.conv1 = nn.Sequential(\n",
    "            nn.Conv2d(3, 96, kernel_size=11, stride=4), # no padding\n",
    "            nn.ReLU(inplace=False), # this results in a 55x55x96 output \n",
    "\n",
    "            # in the paper: After ReLU'ing Local response normalisation is used, however I have not seen this in \n",
    "            # many implmentations (including the one at the pytorch website). and some research suggests that. \n",
    "            # Local response normalise is less favoured over techniques such as batch normalisation \n",
    "            \n",
    "            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2),\n",
    "            nn.MaxPool2d(kernel_size=3, stride=2) # this results in a 27x27x96 output. \n",
    "        ) \n",
    "\n",
    "        # Second layer: conv 256 kernels with ksize 5 -> ReLU -> MaxPool\n",
    "        self.conv2 = nn.Sequential(\n",
    "            nn.Conv2d(96, 256, kernel_size=5, padding=2, stride=1), # same padding\n",
    "            nn.ReLU(inplace=False), # result size unchanges as this is as same padding\n",
    "            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2),\n",
    "            nn.MaxPool2d(kernel_size=3, stride=2) # this results in a 13x13x256 output.\n",
    "        )\n",
    "\n",
    "        '''\n",
    "        Note:\n",
    "        \"Third fourth and fifth convolutional layers are conected to one another without any interveaning \n",
    "        pooling or normalisation layers\"\n",
    "        '''\n",
    "        # Third layer: conv 384 kernels with ksize 3 -> ReLU \n",
    "        self.conv3 = nn.Sequential(\n",
    "            nn.Conv2d(256, 384, kernel_size=3, padding='same'), # s=1, p=1\n",
    "            nn.ReLU(inplace=False) # output: 13x13x384\n",
    "        )\n",
    "\n",
    "        # Fourth layer: conv 384 kernels with ksize 3 -> ReLU \n",
    "        self.conv4 = nn.Sequential(\n",
    "            nn.Conv2d(384, 384, kernel_size=3, padding='same'), # s=1, p=1\n",
    "            nn.ReLU(inplace=False) # output 13x13x384\n",
    "        )\n",
    "\n",
    "        # Fifth layer: conv 256 kernels with ksize 3 -> ReLU\n",
    "        '''\n",
    "        As mentioned in the Note above there are no pooling or normalising between layers 3, 4, 5.\n",
    "        From layer 5 onwards we can resume pooling \n",
    "        '''\n",
    "        self.conv5 = nn.Sequential(\n",
    "            nn.Conv2d(384, 256, kernel_size=3, padding='same'), # s=1, p=1\n",
    "            nn.ReLU(inplace=False),\n",
    "            nn.MaxPool2d(kernel_size=3, stride=2) # output: 6x6x256\n",
    "        )\n",
    "\n",
    "        # pack these layers into a stage: stage 1, Feature extractor\n",
    "        self.feature_extractor = nn.Sequential(\n",
    "                                                self.conv1, \n",
    "                                                self.conv2, \n",
    "                                                self.conv3,\n",
    "                                                self.conv4, \n",
    "                                                self.conv5\n",
    "                                            )\n",
    "        \n",
    "        '''\n",
    "        At this point the paper moves on to having FC layers but the pytorch implementaion \n",
    "        seem to do avgpooling, I will not followthat here and try to stick to the paper as much as \n",
    "        possible.\n",
    "\n",
    "        so as the paper suggests the next step is using dropout and building the fully conected layer.\n",
    "\n",
    "        droput: This turns off all activations below 0.5 (the aim is to encourage higher indepent feature learning)\n",
    "                \n",
    "\n",
    "        building the Fully Connected layers:\n",
    "            The last convolution output has dims 6x6x256, This means we have 9216 values arranges in matrix form that\n",
    "            needs to be packed in to a single dimension for the fully connected layer. We do this reshaping \n",
    "            in the forward pass. In this constructor we will just declare the layer. \n",
    "        '''\n",
    "\n",
    "        self.prepare_and_fc1 = nn.Sequential(\n",
    "               nn.Dropout(p= 0.5, inplace=False),\n",
    "               nn.Linear(6*6*256, 4096), \n",
    "               nn.ReLU(inplace=False)\n",
    "        )\n",
    "\n",
    "        self.fc2 = nn.Sequential(\n",
    "            nn.Dropout(p=0.5, inplace=False),\n",
    "            nn.Linear(4096, 4096),\n",
    "            nn.ReLU(inplace=False)\n",
    "        )\n",
    "\n",
    "        self.output = nn.Sequential(\n",
    "            nn.Linear(4096, self.num_output_classes),\n",
    "        )\n",
    "\n",
    "        self.fc_layers = nn.Sequential(\n",
    "            self.prepare_and_fc1,\n",
    "            self.fc2,\n",
    "            self.output\n",
    "\n",
    "        )\n",
    "        \n",
    "    def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
    "        x = self.feature_extractor(x)\n",
    "        x = torch.flatten(x, 1) # flattening to create FC along dim 1\n",
    "        x = self.fc_layers(x)\n",
    "\n",
    "        return x \n",
    "        \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Now let's try if this works at all,  \n",
    "To make them fit without distortion I will upscale and do a centercrop.\n",
    "\n",
    "the dataset we will use here first is MNIST (I know this is not the best approach but, I just wanted to make sure that pretty much anything sensible should work)\n",
    "later we can try any other dataset (I have added an example of using imagenette below, I think that is way more suitable becasue It has all three channels and a good image size compared \n",
    "to MNIST's tiny images)\n",
    "'''\n",
    "\n",
    "from torchvision import datasets, transforms\n",
    "from torch.utils.data import DataLoader, random_split\n",
    "\n",
    "\n",
    "# Standard preprocessing\n",
    "preprocess_1 = transforms.Compose([\n",
    "    transforms.ToTensor(),\n",
    "    transforms.Resize(252), \n",
    "    transforms.CenterCrop(227),\n",
    "    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # Convert grayscale to RGB by repeating the single channel 3 times\n",
    "    # transforms.ToTensor(),\n",
    "    transforms.Normalize(mean=[0.485, 0.456, 0.406],\n",
    "                         std=[0.229, 0.224, 0.225])\n",
    "])\n",
    "\n",
    "\n",
    "# Preprocessing with augmentation\n",
    "# preprocess_with_augmentation = transforms.Compose([\n",
    "#     transforms.RandomHorizontalFlip(),\n",
    "#     transforms.RandomRotation(10),  # Rotate the image by a random angle between -10 and 10 degrees\n",
    "#     transforms.RandomResizedCrop(227, scale=(0.8, 1.0)),  # Randomly crop and resize\n",
    "#     transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Random color jitter\n",
    "#     transforms.ToTensor(),\n",
    "#     transforms.Normalize(mean=[0.485, 0.456, 0.406],\n",
    "#                          std=[0.229, 0.224, 0.225])\n",
    "# ])\n",
    "\n",
    "# Load Tiny ImageNet dataset\n",
    "# can be downloaded with `wget http://cs231n.stanford.edu/tiny-imagenet-200.zip`\n",
    "# and then extract to folder\n",
    "# train_dataset_all = datasets.ImageFolder(root='./tiny-imagenet-200/train', transform=preprocess_1)\n",
    "\n",
    "# Download and load the Oxford-IIIT Pet dataset\n",
    "# train_dataset_all = datasets.OxfordIIITPet(root='./data', \n",
    "#                                            split='trainval', \n",
    "#                                            target_types='category', \n",
    "#                                            download=True, \n",
    "#                                            transform=preprocess_1)\n",
    "\n",
    "train_dataset_all = datasets.MNIST(root='data', train=True, download=True, transform=preprocess_1)\n",
    "\n",
    "\n",
    "# Split into train and validation\n",
    "train_size = int(0.8 * len(train_dataset_all))\n",
    "val_size = len(train_dataset_all) - train_size\n",
    "\n",
    "train_dataset, val_dataset = random_split(train_dataset_all, [train_size, val_size])\n",
    "\n",
    "train_dataset_loader = DataLoader(train_dataset_all, batch_size=12, shuffle=True, num_workers=4)\n",
    "val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# testing on imagenette \n",
    "\n",
    "''' \n",
    "I have downloaded the imagenette dataset into a folder `data/imagenette2` \n",
    "imagenette2 folder has train and val subdirectories. This is the format expected by \n",
    "Pytorch's imagefolder class.  \n",
    "\n",
    "I this format classes are in folders and the pytorch's torchvison.datasets.ImageFolder can import them in a compatible way and associate with loss functions for example.\n",
    "'''\n",
    "preprocess = transforms.Compose([\n",
    "    transforms.Resize(256),  # Resize the shortest side to 256 pixels\n",
    "    transforms.CenterCrop(227),  # Crop to 227x227 for AlexNet\n",
    "    transforms.ToTensor(),\n",
    "    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n",
    "])\n",
    "\n",
    "# Load the imagenette dataset\n",
    "train_dataset_all = datasets.ImageFolder(root='./data/imagenette2/train', transform=preprocess)\n",
    "\n",
    "# train_dataset_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)\n",
    "\n",
    "# Split into train and validation\n",
    "train_size = int(0.8 * len(train_dataset_all))\n",
    "val_size = len(train_dataset_all) - train_size\n",
    "\n",
    "train_dataset, val_dataset = random_split(train_dataset_all, [train_size, val_size])\n",
    "\n",
    "train_dataset_loader = DataLoader(train_dataset_all, batch_size=64, shuffle=True, num_workers=4)\n",
    "val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "using cuda\n",
      "Train Epoch: 0: \tLoss: 2.315335\n",
      "Train Epoch: 1: \tLoss: 2.297037\n",
      "Train Epoch: 2: \tLoss: 2.305285\n"
     ]
    }
   ],
   "source": [
    "\n",
    "# Set device to GPU if available, otherwise fallback to CPU\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "print(f'using {device}')\n",
    "\n",
    "\n",
    "model = AlexNet(num_classes=10).to(device)\n",
    "\n",
    "# define loss function and optimizer\n",
    "criterion = nn.CrossEntropyLoss()\n",
    "optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# in this toy example we try only a few epochs\n",
    "\n",
    "n_epochs = 50\n",
    "\n",
    "# initlaise a list to store the loss values for plotting. \n",
    "train_losses = []\n",
    "\n",
    "\n",
    "\n",
    "torch.autograd.set_detect_anomaly(True) # enable anomaly detection in graph to probe for issue in gradient flow \n",
    "\n",
    "for epoch in range(n_epochs):\n",
    "    model.train() # set the mode to training so during the forward pass, the gradient graph is formed \n",
    "    running_loss = 0.0 # reset running loss agt start of epoch\n",
    "    for batch_idx, (data, target) in enumerate(train_dataset_loader):\n",
    "        \n",
    "        data, target = data.to(device), target.to(device)\n",
    "        \n",
    "        optimizer.zero_grad() # reset gradients befor the forward pass \n",
    "        output = model(data)\n",
    "        loss = criterion(output, target)\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "\n",
    "        running_loss += loss.item() # we add up the loss of all batches in the epoch\n",
    "        # if batch_idx % 100 == 0:\n",
    "        #     print(f'Train Epoch: {epoch}: processed [{batch_idx *len(data)}/{len(train_dataset_loader.dataset)}'\n",
    "        #           f'[({100. * batch_idx / len(train_dataset_loader):.0f})]\\tLoss: {loss.item():.6f}')\n",
    "            # print(f'loss = {loss.item():.6f}')\n",
    "\n",
    "    #loss per epoch is running_loss/batch_size\n",
    "    epoch_loss = running_loss/len(train_dataset_loader)\n",
    "    train_losses.append(epoch_loss)\n",
    "\n",
    "    print(f'Train Epoch: {epoch}: \\tLoss: {loss.item():.6f}')\n",
    "\n",
    "        \n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import  matplotlib.pyplot as plt \n",
    "%matplotlib inline \n",
    "\n",
    "# plot the training loss \n",
    "plt.plot(range(1, n_epochs+1), train_losses, marker='o', label='Training Loss')\n",
    "plt.xlabel('Epoch')\n",
    "plt.ylabel('Loss')\n",
    "plt.title('Training loss over epochs')\n",
    "plt.legend()\n",
    "plt.show()\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

## alexnet_with_tensorboard.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              alexnet_with_tensorboard.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## custom_datasets.py
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset

class CustomImageDataset(Dataset):
    def __init__(self, annotation_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotation_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[index, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[index, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label


## data
../../data

## dataloader.py
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

training_data = datasets.FashionMNIST(root="data",
                                      train="True",
                                      download=True,
                                      transform=ToTensor()
                                     )

labels_map = {
    0: "Tshirt",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle boot",
}

figure = plt.figure(figsize=(8,8))
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = int(train_labels[0])
print(f"Label: {labels_map[label]}")
plt.title(labels_map[label])
plt.imshow(img, cmap="gray")
plt.show()


## eager_model.py
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


model = NeuralNetwork().to(device)
print(model)

X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_prob = nn.Softmax(dim=1)(logits)
y_pred = pred_prob.argmax(1)
print(f"Prediction: {y_pred}")

## img
../../img/

## loading_models.py
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor
from quickstart import NeuralNetwork

# the process of loading models includes re-creating the model structure and loading the state dictionary into it

model = NeuralNetwork()
model.load_state_dict(torch.load("model.pth"))

model.eval()

classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

test_data = datasets.FashionMNIST(
                                    root="data",
                                    train=False,
                                    download=True,
                                    transform=ToTensor(),
    )

x, y = test_data[0][0], test_data[0][1]

with torch.no_grad():
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f"Predicted: {predicted}, Actual: {actual}")

## model_params.py
import torch
from torch import nn
from torchvision import datasets
from torchvision.transforms import ToTensor

from torchvision import datasets, transforms

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

'''
Define a NN
'''

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


'''
acquire some training data
'''
training_data = datasets.FashionMNIST(root="data",
                                      train="True",
                                      download=True,
                                      transform=ToTensor()
                                     )

'''
get some image tensors from the dataset
'''
listsize = 3
image_list = []
for i in range(1, listsize + 1):
    sample_idx = torch.randint(len(training_data), size=(1,)).item()
    img, *_  = training_data[sample_idx]
    image_list.append(img)


'''
instantiate the model and pass an image through it
Note: The model is still untrained
'''
model = NeuralNetwork().to(device)
image = image_list[0].to(device)
logits = model(image)
pred_prob = nn.Softmax(dim=1)(logits)
y_pred = pred_prob.argmax(1)
print(f"Prediction: {y_pred}")

print(f"Model Structure: {model} ")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

## network_components.py
import torch
from torch import nn
from torchvision import datasets
from torchvision.transforms import ToTensor
# import matplotlib.pyplot as plt
# import  numpy as np

training_data = datasets.FashionMNIST(root="data",
                                      train="True",
                                      download=True,
                                      transform=ToTensor()
                                     )

listsize = 3
image_list = []
for i in range(1, listsize + 1):
    sample_idx = torch.randint(len(training_data), size=(1,)).item()
    img, label = training_data[sample_idx]
    image_list.append(img)

'''
The image pulled from the training data has dimensions  [1=channels, 28=dim1(width?), 28=dim2(height?)]
the n image stack now has dimensions [n, 1, 28, 28]
the following step will squeeze out the dimension with one resulting in a [n=3?, 28, 28] tensor.
you can uncomment the two lines below to verify this behavour.
'''
# intermediate_raw_stack = torch.stack(image_list,0)
# print(f"intermediate dimaneions {intermediate_raw_stack.size()}")
images = torch.squeeze(torch.stack(image_list,0), 1) # image tensor
print(f"Images are now stacked into a tensor of dims: {images.size()}")

'''
Flattten layer conditions the 2D input by reducing the dimensions
'''
flatten = nn.Flatten()
flat_images = flatten(images)

print(f"Now we have a tensor of 1D values (flattened 2D values) representing images: {flat_images.size()}")

layer1 = nn.Linear(in_features=28*28, out_features=512)
hidden1 = layer1(flat_images)
print(f"Size of the hidden linear layer [1] = {hidden1.size()}")

print(f"Before ReLU: {hidden1}")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

'''
sequential net made from the modules above.
'''

seq_modules = nn.Sequential(flatten,  # flattened (from 28x28/2D to 784/1D) feature tensors
                            layer1,   # input: flattened features, output 512 units
                            nn.ReLU(),
                            nn.Linear(512, 10)
                            )

logits = seq_modules(images)

print(f"logits after seqential network = {logits}")

softmax = nn.Softmax(dim=1) # 'dim' parameter indicates the dimension along the values must sum to 1
predicted_probabilities = softmax(logits)

print(f"predicted probababilities = {predicted_probabilities}")


## nn_refresher.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              nn_refresher.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## quickstart.py
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt


'''
Model creation
'''

# Model definition
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

        self.loss_fn = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.SGD(self.parameters(), lr=1e-3)


    def forward(self, x):
            x = self.flatten(x)
            logits = self.linear_relu_stack(x)
            return logits


def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y ) in enumerate(dataloader):
        X, y = X.to(model.device), y.to(model.device)

        # compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(model.device), y.to(model.device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


if __name__ == "__main__":

    # download training data from open datasets
    training_data = datasets.FashionMNIST(
                                       root="data",
                                       train=True,
                                       download=True,
                                       transform=ToTensor(),
    )

    # download test data from open datsets
    test_data = datasets.FashionMNIST(
                                    root="data",
                                    train=False,
                                    download=True,
                                    transform=ToTensor(),
    )

    batch_size = 64


    '''
    pass the data to a Dataloader, the Dataloader wraps an iterable over the dataset
    and support automatic  batching, random sampling, then loads the data into our training functions
    Here our batch size is 64 each elemement will trturn a batch size of 64 features and labels.
    '''

    train_dataloader = DataLoader(training_data, batch_size=batch_size)
    test_dataloader = DataLoader(test_data, batch_size=batch_size)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = NeuralNetwork().to(device)

    epochs = 5
    for t in range(epochs):
        print(f"Epoch {t+1}\n-----------------------")
        train(train_dataloader, model, model.loss_fn, model.optimizer)
        test(test_dataloader, model, model.loss_fn)
    print("Done")

    torch.save(model.state_dict(), "model.pth")
    print("saved model")


## visualise_data.py
import torch
# from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

training_data = datasets.FashionMNIST(root="data",
                                      train="True",
                                      download=True,
                                      transform=ToTensor()
                                     )

labels_map = {
    0: "Tshirt",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle boot",
}

figure = plt.figure(figsize=(8,8))
cols, rows = 3, 3
for i in range(1, cols*rows + 1):
    sample_idx = torch.randint(len(training_data), size=(1,)).item()
    img, label = training_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title(labels_map[label])
    plt.axis("off")
    # print(f"image shape before squeeze = {img.shape}")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Now we will implment alexnet!"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"#!pip install fastai # to help run test datasets quicly \n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import torch\n",
	"import torch.nn as nn\n",
	"import torch.optim as optim\n",
	"from torchvision import datasets, transforms\n",
	"from torch.utils.data import DataLoader, random_split"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"![Alexnet aarch](img/alexnet_aarch.png)\n",
	"<br/>\n",
	"Image: from [Andrew NG: Convolutional Neural Networks Slides](https://www.coursera.org/lecture/convolutional-neural-networks/classic-networks-MmYe2)\n",
	"\n",
	"```shell\n",
	" AlexNet Architecture\n",
	"\n",
	" Input: 3x227x227\n",
	" \|\n",
	" Conv1: {96 filters, 11x11 ksize with Stride=4, Padding=No} -> ReLU -> MaxPool: 3x3, Stride=2\n",
	" \|\n",
	" Conv2: {256 filters, 5x5 ksize with Stride-1, Padding=2} -> ReLU -> MaxPool: 3x3, Stride=2\n",
	" \|\n",
	" Conv3: 384x3x3, Padding=1 -> ReLU\n",
	" \|\n",
	" Conv4: 256x3x3, Padding=1 -> ReLU\n",
	" \|\n",
	" Conv5: 256x3x3, Padding=1 -> ReLU -> MaxPool: 3x3, Stride=2\n",
	" \|\n",
	" AdaptiveAvgPool: Output Size=6x6 -> Flatten\n",
	" \|\n",
	" Dropout -> FC1: 4096 -> ReLU\n",
	" \|\n",
	" Dropout -> FC2: 4096 -> ReLU\n",
	" \|\n",
	" FC3: (num_classes)\n",
	" \|\n",
	" Softmax \n",
	"\n",
	" source: https://pytorch.org/vision/main/_modules/torchvision/models/alexnet.html#AlexNet_Weights\n",
	"\n",
	"Note:\n",
	"Something that is slightly frustrating when it comes to implementing AlexNet from the paper (that suggest \n",
	"imput image dims of 224x224) is the getting the first Conv2d output to match the figure in the tha paper. \n",
	"\n",
	"the equation for the output of the first Conv2d operation should follow this format. (simplified equation)\n",
	"output_width = (input_width - kern_width + 2 * padding)/stride + 1 \n",
	" = (227 - 11)/4 + 1\n",
	" = 55\n",
	"\n",
	"```\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"\n",
	"class AlexNet(nn.Module):\n",
	"\n",
	" def __init__(self, num_classes=10):\n",
	" super().__init__()\n",
	" # Note: Tested on Nvidia DGX station A100 (please change size appropriately to suit your compute)\n",
	" # In the paper authors have split the model to two GPUs, here it's using just one. \n",
	" # input: 227x227 RGB(3ch) image\n",
	"\n",
	" # First layer: conv 96 kernels with ksize 11, stride 4 -> ReLU -> MaxPool\n",
	" '''\n",
	" Discussion: wrt. to the implementaiton on the paper.\n",
	" In the paper this is shown as two filter sets (one for each gpu).\n",
	" These primary featue filters have an input depth of 3 (to fit the image dims) and filters \n",
	" in either gpu (48 per each gpu) these are associated with the full image surface as they usualy are \n",
	" in convolution networks. they are just two stacks of 48 independent filters allocated per gpu. \n",
	"\n",
	" we can think of this approach(Figure 2, in the paper) as a form of parallelisation \n",
	" to reduce the number of operations happening in series (because in this case we create 96 filters\n",
	" in about the same time to create 48 fiters)\n",
	"\n",
	" having said that as I mentioned in this implementation we create the lump 96 filters without explicily optimising\n",
	" as done in the paper. \n",
	" '''\n",
	" self.num_output_classes = num_classes \n",
	"\n",
	" self.conv1 = nn.Sequential(\n",
	" nn.Conv2d(3, 96, kernel_size=11, stride=4), # no padding\n",
	" nn.ReLU(inplace=False), # this results in a 55x55x96 output \n",
	"\n",
	" # in the paper: After ReLU'ing Local response normalisation is used, however I have not seen this in \n",
	" # many implmentations (including the one at the pytorch website). and some research suggests that. \n",
	" # Local response normalise is less favoured over techniques such as batch normalisation \n",
	" \n",
	" nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2),\n",
	" nn.MaxPool2d(kernel_size=3, stride=2) # this results in a 27x27x96 output. \n",
	" ) \n",
	"\n",
	" # Second layer: conv 256 kernels with ksize 5 -> ReLU -> MaxPool\n",
	" self.conv2 = nn.Sequential(\n",
	" nn.Conv2d(96, 256, kernel_size=5, padding=2, stride=1), # same padding\n",
	" nn.ReLU(inplace=False), # result size unchanges as this is as same padding\n",
	" nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2),\n",
	" nn.MaxPool2d(kernel_size=3, stride=2) # this results in a 13x13x256 output.\n",
	" )\n",
	"\n",
	" '''\n",
	" Note:\n",
	" \"Third fourth and fifth convolutional layers are conected to one another without any interveaning \n",
	" pooling or normalisation layers\"\n",
	" '''\n",
	" # Third layer: conv 384 kernels with ksize 3 -> ReLU \n",
	" self.conv3 = nn.Sequential(\n",
	" nn.Conv2d(256, 384, kernel_size=3, padding='same'), # s=1, p=1\n",
	" nn.ReLU(inplace=False) # output: 13x13x384\n",
	" )\n",
	"\n",
	" # Fourth layer: conv 384 kernels with ksize 3 -> ReLU \n",
	" self.conv4 = nn.Sequential(\n",
	" nn.Conv2d(384, 384, kernel_size=3, padding='same'), # s=1, p=1\n",
	" nn.ReLU(inplace=False) # output 13x13x384\n",
	" )\n",
	"\n",
	" # Fifth layer: conv 256 kernels with ksize 3 -> ReLU\n",
	" '''\n",
	" As mentioned in the Note above there are no pooling or normalising between layers 3, 4, 5.\n",
	" From layer 5 onwards we can resume pooling \n",
	" '''\n",
	" self.conv5 = nn.Sequential(\n",
	" nn.Conv2d(384, 256, kernel_size=3, padding='same'), # s=1, p=1\n",
	" nn.ReLU(inplace=False),\n",
	" nn.MaxPool2d(kernel_size=3, stride=2) # output: 6x6x256\n",
	" )\n",
	"\n",
	" # pack these layers into a stage: stage 1, Feature extractor\n",
	" self.feature_extractor = nn.Sequential(\n",
	" self.conv1, \n",
	" self.conv2, \n",
	" self.conv3,\n",
	" self.conv4, \n",
	" self.conv5\n",
	" )\n",
	" \n",
	" '''\n",
	" At this point the paper moves on to having FC layers but the pytorch implementaion \n",
	" seem to do avgpooling, I will not followthat here and try to stick to the paper as much as \n",
	" possible.\n",
	"\n",
	" so as the paper suggests the next step is using dropout and building the fully conected layer.\n",
	"\n",
	" droput: This turns off all activations below 0.5 (the aim is to encourage higher indepent feature learning)\n",
	" \n",
	"\n",
	" building the Fully Connected layers:\n",
	" The last convolution output has dims 6x6x256, This means we have 9216 values arranges in matrix form that\n",
	" needs to be packed in to a single dimension for the fully connected layer. We do this reshaping \n",
	" in the forward pass. In this constructor we will just declare the layer. \n",
	" '''\n",
	"\n",
	" self.prepare_and_fc1 = nn.Sequential(\n",
	" nn.Dropout(p= 0.5, inplace=False),\n",
	" nn.Linear(66256, 4096), \n",
	" nn.ReLU(inplace=False)\n",
	" )\n",
	"\n",
	" self.fc2 = nn.Sequential(\n",
	" nn.Dropout(p=0.5, inplace=False),\n",
	" nn.Linear(4096, 4096),\n",
	" nn.ReLU(inplace=False)\n",
	" )\n",
	"\n",
	" self.output = nn.Sequential(\n",
	" nn.Linear(4096, self.num_output_classes),\n",
	" )\n",
	"\n",
	" self.fc_layers = nn.Sequential(\n",
	" self.prepare_and_fc1,\n",
	" self.fc2,\n",
	" self.output\n",
	"\n",
	" )\n",
	" \n",
	" def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
	" x = self.feature_extractor(x)\n",
	" x = torch.flatten(x, 1) # flattening to create FC along dim 1\n",
	" x = self.fc_layers(x)\n",
	"\n",
	" return x \n",
	" \n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"'''\n",
	"Now let's try if this works at all, \n",
	"To make them fit without distortion I will upscale and do a centercrop.\n",
	"\n",
	"the dataset we will use here first is MNIST (I know this is not the best approach but, I just wanted to make sure that pretty much anything sensible should work)\n",
	"later we can try any other dataset (I have added an example of using imagenette below, I think that is way more suitable becasue It has all three channels and a good image size compared \n",
	"to MNIST's tiny images)\n",
	"'''\n",
	"\n",
	"from torchvision import datasets, transforms\n",
	"from torch.utils.data import DataLoader, random_split\n",
	"\n",
	"\n",
	"# Standard preprocessing\n",
	"preprocess_1 = transforms.Compose([\n",
	" transforms.ToTensor(),\n",
	" transforms.Resize(252), \n",
	" transforms.CenterCrop(227),\n",
	" transforms.Lambda(lambda x: x.repeat(3, 1, 1)), # Convert grayscale to RGB by repeating the single channel 3 times\n",
	" # transforms.ToTensor(),\n",
	" transforms.Normalize(mean=[0.485, 0.456, 0.406],\n",
	" std=[0.229, 0.224, 0.225])\n",
	"])\n",
	"\n",
	"\n",
	"# Preprocessing with augmentation\n",
	"# preprocess_with_augmentation = transforms.Compose([\n",
	"# transforms.RandomHorizontalFlip(),\n",
	"# transforms.RandomRotation(10), # Rotate the image by a random angle between -10 and 10 degrees\n",
	"# transforms.RandomResizedCrop(227, scale=(0.8, 1.0)), # Randomly crop and resize\n",
	"# transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Random color jitter\n",
	"# transforms.ToTensor(),\n",
	"# transforms.Normalize(mean=[0.485, 0.456, 0.406],\n",
	"# std=[0.229, 0.224, 0.225])\n",
	"# ])\n",
	"\n",
	"# Load Tiny ImageNet dataset\n",
	"# can be downloaded with `wget http://cs231n.stanford.edu/tiny-imagenet-200.zip`\n",
	"# and then extract to folder\n",
	"# train_dataset_all = datasets.ImageFolder(root='./tiny-imagenet-200/train', transform=preprocess_1)\n",
	"\n",
	"# Download and load the Oxford-IIIT Pet dataset\n",
	"# train_dataset_all = datasets.OxfordIIITPet(root='./data', \n",
	"# split='trainval', \n",
	"# target_types='category', \n",
	"# download=True, \n",
	"# transform=preprocess_1)\n",
	"\n",
	"train_dataset_all = datasets.MNIST(root='data', train=True, download=True, transform=preprocess_1)\n",
	"\n",
	"\n",
	"# Split into train and validation\n",
	"train_size = int(0.8 * len(train_dataset_all))\n",
	"val_size = len(train_dataset_all) - train_size\n",
	"\n",
	"train_dataset, val_dataset = random_split(train_dataset_all, [train_size, val_size])\n",
	"\n",
	"train_dataset_loader = DataLoader(train_dataset_all, batch_size=12, shuffle=True, num_workers=4)\n",
	"val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# testing on imagenette \n",
	"\n",
	"''' \n",
	"I have downloaded the imagenette dataset into a folder `data/imagenette2` \n",
	"imagenette2 folder has train and val subdirectories. This is the format expected by \n",
	"Pytorch's imagefolder class. \n",
	"\n",
	"I this format classes are in folders and the pytorch's torchvison.datasets.ImageFolder can import them in a compatible way and associate with loss functions for example.\n",
	"'''\n",
	"preprocess = transforms.Compose([\n",
	" transforms.Resize(256), # Resize the shortest side to 256 pixels\n",
	" transforms.CenterCrop(227), # Crop to 227x227 for AlexNet\n",
	" transforms.ToTensor(),\n",
	" transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n",
	"])\n",
	"\n",
	"# Load the imagenette dataset\n",
	"train_dataset_all = datasets.ImageFolder(root='./data/imagenette2/train', transform=preprocess)\n",
	"\n",
	"# train_dataset_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)\n",
	"\n",
	"# Split into train and validation\n",
	"train_size = int(0.8 * len(train_dataset_all))\n",
	"val_size = len(train_dataset_all) - train_size\n",
	"\n",
	"train_dataset, val_dataset = random_split(train_dataset_all, [train_size, val_size])\n",
	"\n",
	"train_dataset_loader = DataLoader(train_dataset_all, batch_size=64, shuffle=True, num_workers=4)\n",
	"val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)\n",
	"\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"using cuda\n",
	"Train Epoch: 0: \tLoss: 2.315335\n",
	"Train Epoch: 1: \tLoss: 2.297037\n",
	"Train Epoch: 2: \tLoss: 2.305285\n"
	]
	}
	],
	"source": [
	"\n",
	"# Set device to GPU if available, otherwise fallback to CPU\n",
	"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
	"\n",
	"print(f'using {device}')\n",
	"\n",
	"\n",
	"model = AlexNet(num_classes=10).to(device)\n",
	"\n",
	"# define loss function and optimizer\n",
	"criterion = nn.CrossEntropyLoss()\n",
	"optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
	"\n",
	"# in this toy example we try only a few epochs\n",
	"\n",
	"n_epochs = 50\n",
	"\n",
	"# initlaise a list to store the loss values for plotting. \n",
	"train_losses = []\n",
	"\n",
	"\n",
	"\n",
	"torch.autograd.set_detect_anomaly(True) # enable anomaly detection in graph to probe for issue in gradient flow \n",
	"\n",
	"for epoch in range(n_epochs):\n",
	" model.train() # set the mode to training so during the forward pass, the gradient graph is formed \n",
	" running_loss = 0.0 # reset running loss agt start of epoch\n",
	" for batch_idx, (data, target) in enumerate(train_dataset_loader):\n",
	" \n",
	" data, target = data.to(device), target.to(device)\n",
	" \n",
	" optimizer.zero_grad() # reset gradients befor the forward pass \n",
	" output = model(data)\n",
	" loss = criterion(output, target)\n",
	" loss.backward()\n",
	" optimizer.step()\n",
	"\n",
	" running_loss += loss.item() # we add up the loss of all batches in the epoch\n",
	" # if batch_idx % 100 == 0:\n",
	" # print(f'Train Epoch: {epoch}: processed [{batch_idx *len(data)}/{len(train_dataset_loader.dataset)}'\n",
	" # f'[({100. * batch_idx / len(train_dataset_loader):.0f})]\\tLoss: {loss.item():.6f}')\n",
	" # print(f'loss = {loss.item():.6f}')\n",
	"\n",
	" #loss per epoch is running_loss/batch_size\n",
	" epoch_loss = running_loss/len(train_dataset_loader)\n",
	" train_losses.append(epoch_loss)\n",
	"\n",
	" print(f'Train Epoch: {epoch}: \\tLoss: {loss.item():.6f}')\n",
	"\n",
	" \n",
	"\n",
	"\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import matplotlib.pyplot as plt \n",
	"%matplotlib inline \n",
	"\n",
	"# plot the training loss \n",
	"plt.plot(range(1, n_epochs+1), train_losses, marker='o', label='Training Loss')\n",
	"plt.xlabel('Epoch')\n",
	"plt.ylabel('Loss')\n",
	"plt.title('Training loss over epochs')\n",
	"plt.legend()\n",
	"plt.show()\n",
	"\n"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.12"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
	import os
	import pandas as pd
	from torchvision.io import read_image
	from torch.utils.data import Dataset

	class CustomImageDataset(Dataset):
	def __init__(self, annotation_file, img_dir, transform=None, target_transform=None):
	self.img_labels = pd.read_csv(annotation_file)
	self.img_dir = img_dir
	self.transform = transform
	self.target_transform = target_transform

	def __len__(self):
	return len(self.img_labels)

	def __getitem__(self, index):
	img_path = os.path.join(self.img_dir, self.img_labels.iloc[index, 0])
	image = read_image(img_path)
	label = self.img_labels.iloc[index, 1]
	if self.transform:
	image = self.transform(image)
	if self.target_transform:
	label = self.target_transform(label)
	return image, label
	from torchvision import datasets
	from torchvision.transforms import ToTensor
	import matplotlib.pyplot as plt
	from torch.utils.data import DataLoader

	training_data = datasets.FashionMNIST(root="data",
	train="True",
	download=True,
	transform=ToTensor()
	)

	labels_map = {
	0: "Tshirt",
	1: "Trouser",
	2: "Pullover",
	3: "Dress",
	4: "Coat",
	5: "Sandal",
	6: "Shirt",
	7: "Sneaker",
	8: "Bag",
	9: "Ankle boot",
	}

	figure = plt.figure(figsize=(8,8))
	train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
	train_features, train_labels = next(iter(train_dataloader))
	print(f"Feature batch shape: {train_features.size()}")
	print(f"Labels batch shape: {train_labels.size()}")
	img = train_features[0].squeeze()
	label = int(train_labels[0])
	print(f"Label: {labels_map[label]}")
	plt.title(labels_map[label])
	plt.imshow(img, cmap="gray")
	plt.show()
	import os
	import torch
	from torch import nn
	from torch.utils.data import DataLoader
	from torchvision import datasets, transforms

	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	print(f'Using {device} device')

	class NeuralNetwork(nn.Module):
	def __init__(self):
	super(NeuralNetwork, self).__init__()
	self.flatten = nn.Flatten()
	self.linear_relu_stack = nn.Sequential(
	nn.Linear(28*28, 512),
	nn.ReLU(),
	nn.Linear(512, 512),
	nn.ReLU(),
	nn.Linear(512, 10),
	)

	def forward(self, x):
	x = self.flatten(x)
	logits = self.linear_relu_stack(x)
	return logits


	model = NeuralNetwork().to(device)
	print(model)

	X = torch.rand(1, 28, 28, device=device)
	logits = model(X)
	pred_prob = nn.Softmax(dim=1)(logits)
	y_pred = pred_prob.argmax(1)
	print(f"Prediction: {y_pred}")
	import torch
	# from torch.utils.data import Dataset
	from torchvision import datasets
	from torchvision.transforms import ToTensor
	import matplotlib.pyplot as plt

	training_data = datasets.FashionMNIST(root="data",
	train="True",
	download=True,
	transform=ToTensor()
	)

	labels_map = {
	0: "Tshirt",
	1: "Trouser",
	2: "Pullover",
	3: "Dress",
	4: "Coat",
	5: "Sandal",
	6: "Shirt",
	7: "Sneaker",
	8: "Bag",
	9: "Ankle boot",
	}

	figure = plt.figure(figsize=(8,8))
	cols, rows = 3, 3
	for i in range(1, cols*rows + 1):
	sample_idx = torch.randint(len(training_data), size=(1,)).item()
	img, label = training_data[sample_idx]
	figure.add_subplot(rows, cols, i)
	plt.title(labels_map[label])
	plt.axis("off")
	# print(f"image shape before squeeze = {img.shape}")
	plt.imshow(img.squeeze(), cmap="gray")
	plt.show()