Skip to content

Instantly share code, notes, and snippets.

@georgehc
Last active April 18, 2022 20:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save georgehc/9369b7af1df2e60502b663bccc389def to your computer and use it in GitHub Desktop.
Save georgehc/9369b7af1df2e60502b663bccc389def to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 94-775/95-865: Handwritten Digit Recognition with Neural Nets\n",
"\n",
"Author: George H. Chen (georgechen [at symbol] cmu.edu)\n",
"\n",
"This demo shows how to train and evaluate four neural net models using PyTorch:\n",
"\n",
"1. Flatten -> fully connected -> softmax activation*\n",
"\n",
"2. Flatten -> fully connected -> ReLU -> fully connected -> softmax activation*\n",
"\n",
"3. Conv2d -> ReLU -> MaxPool2d -> flatten -> fully connected -> softmax activation*\n",
"\n",
"4. Conv2d -> ReLU -> MaxPool2d -> Conv2d -> ReLU -> MaxPool2d -> flatten -> fully connected -> softmax activation*\n",
"\n",
"*In PyTorch, softmax activation is automatically done as part of using the cross entropy loss."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import random\n",
"\n",
"# the next two lines are needed on my Intel-based MacBook Air to get the code to run; you likely don't need these two lines...\n",
"# (in fact I used to not need these two lines)\n",
"import os\n",
"os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'\n",
"\n",
"import torch\n",
"torch.use_deterministic_algorithms(True)\n",
"torch.backends.cudnn.benchmark = False\n",
"import torch.nn as nn\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"from torchsummaryX import summary\n",
"\n",
"from UDA_pytorch_utils import UDA_pytorch_classifier_fit, \\\n",
" UDA_plot_train_val_accuracy_vs_epoch, UDA_pytorch_classifier_predict, \\\n",
" UDA_compute_accuracy\n",
"\n",
"np.random.seed(0)\n",
"torch.manual_seed(0)\n",
"random.seed(0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading in the data and a quick data inspection¶"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"train_dataset = torchvision.datasets.MNIST(root='data/',\n",
" train=True,\n",
" transform=transforms.ToTensor(),\n",
" download=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"train_images = torch.tensor(np.array([image.numpy() for image, label in train_dataset]))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"train_labels = torch.tensor([label for image, label in train_dataset])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([60000, 1, 28, 28])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_images.shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([60000])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_labels.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We first take a look at the data."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.image.AxesImage at 0x7f8834e5ec70>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAN9ElEQVR4nO3df6hc9ZnH8c/HbIv4A2MqXoLGxC2JbFncdBUT0KxZpCUbwVihNf4hygqpUCVqdFe7f1RcFsJuXAX/qFyp9K40lqiRiKiNRDGrQvH6Y01stjXrj/R6gyEqNlWkmjz7xz0p13jnO9eZOXPG+7xfcJmZ89zvOQ+TfO45M2fOfB0RAjDzHdV0AwD6g7ADSRB2IAnCDiRB2IEk/qKfG7PNW/9AzSLCUy3vas9ue4Xt39rebfvmbtYFoF7u9Dy77VmSfifpO5LGJL0g6bKI+E1hDHt2oGZ17NnPkbQ7It6IiD9J+qWkVV2sD0CNugn7KZJ+P+nxWLXsc2yvsT1qe7SLbQHoUjdv0E11qPCFw/SIGJY0LHEYDzSpmz37mKR5kx6fKmm8u3YA1KWbsL8gaaHt021/XdJqSY/0pi0AvdbxYXxEfGb7Gkm/kjRL0r0R8VrPOgPQUx2feutoY7xmB2pXy4dqAHx1EHYgCcIOJEHYgSQIO5AEYQeS6Ov17Mhn0aJFLWtPPPFEceysWbOK9fnz53fUU1bs2YEkCDuQBGEHkiDsQBKEHUiCsANJcOoNXbnrrruK9UsvvbRlbc6cOcWxjz76aEc9YWrs2YEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCb5dNrmhoaFiffPmzcX60qVLi/XS/6+dO3cWx15wwQXF+nvvvVesZ8W3ywLJEXYgCcIOJEHYgSQIO5AEYQeSIOxAElzPPsOVvspZkjZs2FCsL1mypKvt33LLLS1ro6OjxbGcR++trsJu+y1JByQdlPRZRJzdi6YA9F4v9ux/HxH7e7AeADXiNTuQRLdhD0lbbb9oe81Uv2B7je1R2+UXaABq1e1h/LkRMW77ZElP2v7fiNg++RciYljSsMSFMECTutqzR8R4dbtP0sOSzulFUwB6r+Ow2z7W9vGH70v6rqTyNYsAGtPNYfyQpIdtH17Pxogoz8GLvmv33ewrV66sdftjY2Mta08//XSt28bndRz2iHhD0t/0sBcANeLUG5AEYQeSIOxAEoQdSIKwA0lwiesMULqMdePGjcWx1anTjl1yySXF+pYtW7paP3qHPTuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJMF59hng8ssvb1k77bTTimMfe+yxYv3qq68u1t95551iHYODPTuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJOGI/k3SwowwnXn++eeL9cWLF7esjY+PF8euWLGiWN+9e3exjsETEVN+SQF7diAJwg4kQdiBJAg7kARhB5Ig7EAShB1IguvZB8CqVauK9SVLlhTrpc9KPPDAA8Wxn3zySbGOmaPtnt32vbb32d45adkc20/afr26PbHeNgF0azqH8T+XdOTHrG6WtC0iFkraVj0GMMDahj0itkt6/4jFqySNVPdHJF3c27YA9Fqnr9mHImKvJEXEXtsnt/pF22skrelwOwB6pPY36CJiWNKwxIUwQJM6PfX2ru25klTd7utdSwDq0GnYH5F0RXX/CknMywsMuLaH8bbvl7Rc0km2xyT9RNJ6SZtsXyVpj6Tv19nkV93s2bOL9WXLltW27Q8++KBYHxsbq23b7axdu7ZYnzdvXlfrv/HGG7saP9O0DXtEXNaidEGPewFQIz4uCyRB2IEkCDuQBGEHkiDsQBJc4toHBw8eLNbPOuusYv2oo8p/kw8dOtSytn379uLYbl1//fUdj7322muL9fnz53e8bklat25dy9qpp55aHDsTp6Jmzw4kQdiBJAg7kARhB5Ig7EAShB1IgrADSXCevQ/OP//8Yr3dJa6l8+iStGfPnpa1/fv3F8e2U5oOWmrf+0UXXdTxtj/66KNivd3luWeccUbL2oMPPlgcu3r16mL97bffLtYHEXt2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiC8+w9cPzxxxfrp59+elfrHx8fL9bvu+++lrXdu3cXxy5atKhYv+mmm4r1dtNNl87zb926tTj29ttvL9ZPOOGEYv2pp57qeOxMxJ4dSIKwA0kQdiAJwg4kQdiBJAg7kARhB5LgPHsPnHfeecX6HXfc0dX677nnnmL9tttua1kbGhoqjt2wYUOxvnLlymL9wIEDxfqmTZta1tpNqbxw4cJi/e677y7WS71t27atOPareL16O2337Lbvtb3P9s5Jy261/Y7tV6qf8v8IAI2bzmH8zyWtmGL5HRGxuPp5rLdtAei1tmGPiO2S3u9DLwBq1M0bdNfYfrU6zD+x1S/ZXmN71PZoF9sC0KVOw/5TSd+UtFjSXkktr1iIiOGIODsizu5wWwB6oKOwR8S7EXEwIg5JukfSOb1tC0CvdRR223MnPfyepJ2tfhfAYGh7nt32/ZKWSzrJ9pikn0habnuxpJD0lqQf1tfi4DvzzDNrXX/pPHo7mzdvLtaXLFnS8bql9tezP/PMMy1rS5cuLY599tlnO+rpsDvvvLNlrd05/pmobdgj4rIpFv+shl4A1IiPywJJEHYgCcIOJEHYgSQIO5AEl7j2wOzZs4t128X6li1butp+aVrlBQsWFMe2623dunXFeunUmlT+quqNGzcWx3bbW+nUW0bs2YEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCc6z90FEdFXvxqFDh7radrvLd/fs2VOsH3300S1rb775ZnHssmXLivUPP/ywWMfnsWcHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSRc5zneL2zM7t/G+qjur0RuNyV06Xr29evXF8ced9xxnbT0Z+2uOd+/f3/L2pVXXlkc+/jjj3fSUnoRMeU/Cnt2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiC69l74NNPPy3WP/7442L9mGOOKdafe+65Yr2fn5U40oEDB4r1TZs2taxxHr2/2u7Zbc+z/bTtXbZfs722Wj7H9pO2X69uT6y/XQCdms5h/GeS1kXEX0laKulHtr8l6WZJ2yJioaRt1WMAA6pt2CNib0S8VN0/IGmXpFMkrZI0Uv3aiKSLa+oRQA98qdfsthdI+rakX0saioi90sQfBNsntxizRtKaLvsE0KVph932cZIeknRdRPyh3QUQh0XEsKThah0z8kIY4KtgWqfebH9NE0H/RURsrha/a3tuVZ8raV89LQLohbaXuHpiFz4i6f2IuG7S8v+Q9F5ErLd9s6Q5EfFPbdaVcs9+4YUXFus33HBDsb58+fJivZtTbyMjI8X6jh07ivWXX365WG83pTN6r9UlrtM5jD9X0uWSdth+pVr2Y0nrJW2yfZWkPZK+34M+AdSkbdgj4llJrV6gX9DbdgDUhY/LAkkQdiAJwg4kQdiBJAg7kARfJQ3MMHyVNJAcYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJNE27Lbn2X7a9i7br9leWy2/1fY7tl+pflbW3y6ATrWdJML2XElzI+Il28dLelHSxZJ+IOmPEbFh2htjkgigdq0miZjO/Ox7Je2t7h+wvUvSKb1tD0DdvtRrdtsLJH1b0q+rRdfYftX2vbZPbDFmje1R26PdtQqgG9Oe6832cZKekfRvEbHZ9pCk/ZJC0r9q4lD/H9usg8N4oGatDuOnFXbbX5P0qKRfRcR/TlFfIOnRiPjrNush7EDNOp7Y0bYl/UzSrslBr964O+x7knZ22ySA+kzn3fjzJP23pB2SDlWLfyzpMkmLNXEY/5akH1Zv5pXWxZ4dqFlXh/G9QtiB+jE/O5AcYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgrADSRB2IIm2XzjZY/slvT3p8UnVskE0qL0Nal8SvXWql73Nb1Xo6/XsX9i4PRoRZzfWQMGg9jaofUn01ql+9cZhPJAEYQeSaDrsww1vv2RQexvUviR661Rfemv0NTuA/ml6zw6gTwg7kEQjYbe9wvZvbe+2fXMTPbRi+y3bO6ppqBudn66aQ2+f7Z2Tls2x/aTt16vbKefYa6i3gZjGuzDNeKPPXdPTn/f9NbvtWZJ+J+k7ksYkvSDpsoj4TV8bacH2W5LOjojGP4Bh++8k/VHSfx2eWsv2v0t6PyLWV38oT4yIfx6Q3m7Vl5zGu6beWk0zfqUafO56Of15J5rYs58jaXdEvBERf5L0S0mrGuhj4EXEdknvH7F4laSR6v6IJv6z9F2L3gZCROyNiJeq+wckHZ5mvNHnrtBXXzQR9lMk/X7S4zEN1nzvIWmr7Rdtr2m6mSkMHZ5mq7o9ueF+jtR2Gu9+OmKa8YF57jqZ/rxbTYR9qqlpBun837kR8beS/kHSj6rDVUzPTyV9UxNzAO6VdHuTzVTTjD8k6bqI+EOTvUw2RV99ed6aCPuYpHmTHp8qabyBPqYUEePV7T5JD2viZccgeffwDLrV7b6G+/mziHg3Ig5GxCFJ96jB566aZvwhSb+IiM3V4safu6n66tfz1kTYX5C00Pbptr8uabWkRxro4wtsH1u9cSLbx0r6rgZvKupHJF1R3b9C0pYGe/mcQZnGu9U042r4uWt8+vOI6PuPpJWaeEf+/yT9SxM9tOjrLyX9T/XzWtO9SbpfE4d1n2riiOgqSd+QtE3S69XtnAHq7T5NTO39qiaCNbeh3s7TxEvDVyW9Uv2sbPq5K/TVl+eNj8sCSfAJOiAJwg4kQdiBJAg7kARhB5Ig7EAShB1I4v8B7GlapTyox0wAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.imshow(train_images[13][0], cmap='gray')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(6)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_labels[13]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Basics of working with neural nets"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"simple_model = nn.Sequential(nn.Flatten(),\n",
" nn.Linear(in_features=784, out_features=10))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"==================================================\n",
" Kernel Shape Output Shape Params Mult-Adds\n",
"Layer \n",
"0_0 - [5, 784] - -\n",
"1_1 [784, 10] [5, 10] 7.85k 7.84k\n",
"--------------------------------------------------\n",
" Totals\n",
"Total params 7.85k\n",
"Trainable params 7.85k\n",
"Non-trainable params 0.0\n",
"Mult-Adds 7.84k\n",
"==================================================\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/georgehc/opt/anaconda3/lib/python3.9/site-packages/torchsummaryX/torchsummaryX.py:101: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n",
" df_sum = df.sum()\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Kernel Shape</th>\n",
" <th>Output Shape</th>\n",
" <th>Params</th>\n",
" <th>Mult-Adds</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Layer</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0_0</th>\n",
" <td>-</td>\n",
" <td>[5, 784]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1_1</th>\n",
" <td>[784, 10]</td>\n",
" <td>[5, 10]</td>\n",
" <td>7850.0</td>\n",
" <td>7840.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Kernel Shape Output Shape Params Mult-Adds\n",
"Layer \n",
"0_0 - [5, 784] NaN NaN\n",
"1_1 [784, 10] [5, 10] 7850.0 7840.0"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"summary(simple_model, torch.zeros((5, 1, 28, 28))) # (batch size, num channels, height, width)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"proper_train_size = int(0.8 * len(train_dataset))\n",
"val_size = len(train_dataset) - proper_train_size\n",
"proper_train_dataset, val_dataset = torch.utils.data.random_split(train_dataset,\n",
" [proper_train_size,\n",
" val_size])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1 [==================================================] 48000/48000\n",
" Train accuracy: 0.8913\n",
" Validation accuracy: 0.8873\n",
"Epoch 2 [==================================================] 48000/48000\n",
" Train accuracy: 0.9075\n",
" Validation accuracy: 0.9046\n",
"Epoch 3 [==================================================] 48000/48000\n",
" Train accuracy: 0.9125\n",
" Validation accuracy: 0.9107\n",
"Epoch 4 [==================================================] 48000/48000\n",
" Train accuracy: 0.9179\n",
" Validation accuracy: 0.9127\n",
"Epoch 5 [==================================================] 48000/48000\n",
" Train accuracy: 0.9194\n",
" Validation accuracy: 0.9147\n",
"Epoch 6 [==================================================] 48000/48000\n",
" Train accuracy: 0.9229\n",
" Validation accuracy: 0.9166\n",
"Epoch 7 [==================================================] 48000/48000\n",
" Train accuracy: 0.9254\n",
" Validation accuracy: 0.9178\n",
"Epoch 8 [==================================================] 48000/48000\n",
" Train accuracy: 0.9266\n",
" Validation accuracy: 0.9188\n",
"Epoch 9 [==================================================] 48000/48000\n",
" Train accuracy: 0.9278\n",
" Validation accuracy: 0.9197\n",
"Epoch 10 [==================================================] 48000/48000\n",
" Train accuracy: 0.9278\n",
" Validation accuracy: 0.9187\n"
]
}
],
"source": [
"num_epochs = 10 # during optimization, how many times we look at training data\n",
"batch_size = 128 # during optimization, how many training data to use at each step\n",
"learning_rate = 0.001 # during optimization, how much we nudge our solution at each step\n",
"\n",
"train_accuracies, val_accuracies = \\\n",
" UDA_pytorch_classifier_fit(simple_model,\n",
" torch.optim.Adam(simple_model.parameters(),\n",
" lr=learning_rate),\n",
" nn.CrossEntropyLoss(), # includes softmax\n",
" proper_train_dataset, val_dataset,\n",
" num_epochs, batch_size)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"UDA_plot_train_val_accuracy_vs_epoch(train_accuracies, val_accuracies)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"====================================================\n",
" Kernel Shape Output Shape Params Mult-Adds\n",
"Layer \n",
"0_0 - [1, 784] - -\n",
"1_1 [784, 512] [1, 512] 401.92k 401.408k\n",
"2_2 - [1, 512] - -\n",
"3_3 [512, 10] [1, 10] 5.13k 5.12k\n",
"----------------------------------------------------\n",
" Totals\n",
"Total params 407.05k\n",
"Trainable params 407.05k\n",
"Non-trainable params 0.0\n",
"Mult-Adds 406.528k\n",
"====================================================\n",
"Epoch 1 [= ] 1024/48000"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/georgehc/opt/anaconda3/lib/python3.9/site-packages/torchsummaryX/torchsummaryX.py:101: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n",
" df_sum = df.sum()\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1 [==================================================] 48000/48000\n",
" Train accuracy: 0.9521\n",
" Validation accuracy: 0.9438\n",
"Epoch 2 [==================================================] 48000/48000\n",
" Train accuracy: 0.9719\n",
" Validation accuracy: 0.9650\n",
"Epoch 3 [==================================================] 48000/48000\n",
" Train accuracy: 0.9805\n",
" Validation accuracy: 0.9706\n",
"Epoch 4 [==================================================] 48000/48000\n",
" Train accuracy: 0.9864\n",
" Validation accuracy: 0.9748\n",
"Epoch 5 [==================================================] 48000/48000\n",
" Train accuracy: 0.9902\n",
" Validation accuracy: 0.9765\n",
"Epoch 6 [==================================================] 48000/48000\n",
" Train accuracy: 0.9921\n",
" Validation accuracy: 0.9754\n",
"Epoch 7 [==================================================] 48000/48000\n",
" Train accuracy: 0.9943\n",
" Validation accuracy: 0.9779\n",
"Epoch 8 [==================================================] 48000/48000\n",
" Train accuracy: 0.9943\n",
" Validation accuracy: 0.9762\n",
"Epoch 9 [==================================================] 48000/48000\n",
" Train accuracy: 0.9959\n",
" Validation accuracy: 0.9771\n",
"Epoch 10 [==================================================] 48000/48000\n",
" Train accuracy: 0.9961\n",
" Validation accuracy: 0.9775\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"deeper_model = nn.Sequential(nn.Flatten(),\n",
" nn.Linear(in_features=784, out_features=512),\n",
" nn.ReLU(),\n",
" nn.Linear(in_features=512, out_features=10))\n",
"summary(deeper_model, torch.zeros((1, 1, 28, 28))) # (batch size, num channels, height, width)\n",
"\n",
"train_accuracies, val_accuracies = \\\n",
" UDA_pytorch_classifier_fit(deeper_model,\n",
" torch.optim.Adam(deeper_model.parameters(),\n",
" lr=learning_rate),\n",
" nn.CrossEntropyLoss(), # includes softmax\n",
" proper_train_dataset, val_dataset,\n",
" num_epochs, batch_size)\n",
"\n",
"UDA_plot_train_val_accuracy_vs_epoch(train_accuracies, val_accuracies)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Convnets"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"=========================================================\n",
" Kernel Shape Output Shape Params Mult-Adds\n",
"Layer \n",
"0_0 [1, 32, 3, 3] [1, 32, 26, 26] 320.0 194.688k\n",
"1_1 - [1, 32, 26, 26] - -\n",
"2_2 - [1, 32, 13, 13] - -\n",
"3_3 - [1, 5408] - -\n",
"4_4 [5408, 10] [1, 10] 54.09k 54.08k\n",
"---------------------------------------------------------\n",
" Totals\n",
"Total params 54.41k\n",
"Trainable params 54.41k\n",
"Non-trainable params 0.0\n",
"Mult-Adds 248.768k\n",
"=========================================================\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/georgehc/opt/anaconda3/lib/python3.9/site-packages/torchsummaryX/torchsummaryX.py:101: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n",
" df_sum = df.sum()\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Kernel Shape</th>\n",
" <th>Output Shape</th>\n",
" <th>Params</th>\n",
" <th>Mult-Adds</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Layer</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0_0</th>\n",
" <td>[1, 32, 3, 3]</td>\n",
" <td>[1, 32, 26, 26]</td>\n",
" <td>320.0</td>\n",
" <td>194688.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1_1</th>\n",
" <td>-</td>\n",
" <td>[1, 32, 26, 26]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2_2</th>\n",
" <td>-</td>\n",
" <td>[1, 32, 13, 13]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3_3</th>\n",
" <td>-</td>\n",
" <td>[1, 5408]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4_4</th>\n",
" <td>[5408, 10]</td>\n",
" <td>[1, 10]</td>\n",
" <td>54090.0</td>\n",
" <td>54080.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Kernel Shape Output Shape Params Mult-Adds\n",
"Layer \n",
"0_0 [1, 32, 3, 3] [1, 32, 26, 26] 320.0 194688.0\n",
"1_1 - [1, 32, 26, 26] NaN NaN\n",
"2_2 - [1, 32, 13, 13] NaN NaN\n",
"3_3 - [1, 5408] NaN NaN\n",
"4_4 [5408, 10] [1, 10] 54090.0 54080.0"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"simple_convnet = nn.Sequential(nn.Conv2d(1, 32, 3),\n",
" nn.ReLU(),\n",
" nn.MaxPool2d(2),\n",
" nn.Flatten(),\n",
" nn.Linear(in_features=5408, out_features=10))\n",
"summary(simple_convnet, torch.zeros((1, 1, 28, 28))) # (batch size, num channels, height, width)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1 [==================================================] 48000/48000\n",
" Train accuracy: 0.9540\n",
" Validation accuracy: 0.9507\n",
"Epoch 2 [==================================================] 48000/48000\n",
" Train accuracy: 0.9722\n",
" Validation accuracy: 0.9683\n",
"Epoch 3 [==================================================] 48000/48000\n",
" Train accuracy: 0.9797\n",
" Validation accuracy: 0.9758\n",
"Epoch 4 [==================================================] 48000/48000\n",
" Train accuracy: 0.9847\n",
" Validation accuracy: 0.9802\n",
"Epoch 5 [==================================================] 48000/48000\n",
" Train accuracy: 0.9872\n",
" Validation accuracy: 0.9816\n",
"Epoch 6 [==================================================] 48000/48000\n",
" Train accuracy: 0.9880\n",
" Validation accuracy: 0.9816\n",
"Epoch 7 [==================================================] 48000/48000\n",
" Train accuracy: 0.9896\n",
" Validation accuracy: 0.9822\n",
"Epoch 8 [==================================================] 48000/48000\n",
" Train accuracy: 0.9911\n",
" Validation accuracy: 0.9828\n",
"Epoch 9 [==================================================] 48000/48000\n",
" Train accuracy: 0.9921\n",
" Validation accuracy: 0.9825\n",
"Epoch 10 [==================================================] 48000/48000\n",
" Train accuracy: 0.9918\n",
" Validation accuracy: 0.9822\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"train_accuracies, val_accuracies = \\\n",
" UDA_pytorch_classifier_fit(simple_convnet,\n",
" torch.optim.Adam(simple_convnet.parameters(),\n",
" lr=learning_rate),\n",
" nn.CrossEntropyLoss(), # includes softmax\n",
" proper_train_dataset, val_dataset,\n",
" num_epochs, batch_size)\n",
"\n",
"UDA_plot_train_val_accuracy_vs_epoch(train_accuracies, val_accuracies)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"=========================================================\n",
" Kernel Shape Output Shape Params Mult-Adds\n",
"Layer \n",
"0_0 [1, 32, 3, 3] [1, 32, 26, 26] 320.0 194.688k\n",
"1_1 - [1, 32, 26, 26] - -\n",
"2_2 - [1, 32, 13, 13] - -\n",
"3_3 [32, 16, 3, 3] [1, 16, 11, 11] 4.624k 557.568k\n",
"4_4 - [1, 16, 11, 11] - -\n",
"5_5 - [1, 16, 5, 5] - -\n",
"6_6 - [1, 400] - -\n",
"7_7 [400, 10] [1, 10] 4.01k 4.0k\n",
"---------------------------------------------------------\n",
" Totals\n",
"Total params 8.954k\n",
"Trainable params 8.954k\n",
"Non-trainable params 0.0\n",
"Mult-Adds 756.256k\n",
"=========================================================\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/georgehc/opt/anaconda3/lib/python3.9/site-packages/torchsummaryX/torchsummaryX.py:101: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n",
" df_sum = df.sum()\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Kernel Shape</th>\n",
" <th>Output Shape</th>\n",
" <th>Params</th>\n",
" <th>Mult-Adds</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Layer</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0_0</th>\n",
" <td>[1, 32, 3, 3]</td>\n",
" <td>[1, 32, 26, 26]</td>\n",
" <td>320.0</td>\n",
" <td>194688.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1_1</th>\n",
" <td>-</td>\n",
" <td>[1, 32, 26, 26]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2_2</th>\n",
" <td>-</td>\n",
" <td>[1, 32, 13, 13]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3_3</th>\n",
" <td>[32, 16, 3, 3]</td>\n",
" <td>[1, 16, 11, 11]</td>\n",
" <td>4624.0</td>\n",
" <td>557568.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4_4</th>\n",
" <td>-</td>\n",
" <td>[1, 16, 11, 11]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5_5</th>\n",
" <td>-</td>\n",
" <td>[1, 16, 5, 5]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6_6</th>\n",
" <td>-</td>\n",
" <td>[1, 400]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7_7</th>\n",
" <td>[400, 10]</td>\n",
" <td>[1, 10]</td>\n",
" <td>4010.0</td>\n",
" <td>4000.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Kernel Shape Output Shape Params Mult-Adds\n",
"Layer \n",
"0_0 [1, 32, 3, 3] [1, 32, 26, 26] 320.0 194688.0\n",
"1_1 - [1, 32, 26, 26] NaN NaN\n",
"2_2 - [1, 32, 13, 13] NaN NaN\n",
"3_3 [32, 16, 3, 3] [1, 16, 11, 11] 4624.0 557568.0\n",
"4_4 - [1, 16, 11, 11] NaN NaN\n",
"5_5 - [1, 16, 5, 5] NaN NaN\n",
"6_6 - [1, 400] NaN NaN\n",
"7_7 [400, 10] [1, 10] 4010.0 4000.0"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"deeper_convnet = nn.Sequential(nn.Conv2d(1, 32, 3),\n",
" nn.ReLU(),\n",
" nn.MaxPool2d(2),\n",
" nn.Conv2d(32, 16, 3),\n",
" nn.ReLU(),\n",
" nn.MaxPool2d(2),\n",
" nn.Flatten(),\n",
" nn.Linear(in_features=400, out_features=10))\n",
"summary(deeper_convnet, torch.zeros((1, 1, 28, 28))) # (batch size, num channels, height, width)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1 [==================================================] 48000/48000\n",
" Train accuracy: 0.9564\n",
" Validation accuracy: 0.9556\n",
"Epoch 2 [==================================================] 48000/48000\n",
" Train accuracy: 0.9711\n",
" Validation accuracy: 0.9712\n",
"Epoch 3 [==================================================] 48000/48000\n",
" Train accuracy: 0.9771\n",
" Validation accuracy: 0.9754\n",
"Epoch 4 [==================================================] 48000/48000\n",
" Train accuracy: 0.9810\n",
" Validation accuracy: 0.9802\n",
"Epoch 5 [==================================================] 48000/48000\n",
" Train accuracy: 0.9829\n",
" Validation accuracy: 0.9820\n",
"Epoch 6 [==================================================] 48000/48000\n",
" Train accuracy: 0.9843\n",
" Validation accuracy: 0.9822\n",
"Epoch 7 [==================================================] 48000/48000\n",
" Train accuracy: 0.9841\n",
" Validation accuracy: 0.9827\n",
"Epoch 8 [==================================================] 48000/48000\n",
" Train accuracy: 0.9873\n",
" Validation accuracy: 0.9847\n",
"Epoch 9 [==================================================] 48000/48000\n",
" Train accuracy: 0.9880\n",
" Validation accuracy: 0.9852\n",
"Epoch 10 [==================================================] 48000/48000\n",
" Train accuracy: 0.9868\n",
" Validation accuracy: 0.9843\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"train_accuracies, val_accuracies = \\\n",
" UDA_pytorch_classifier_fit(deeper_convnet,\n",
" torch.optim.Adam(deeper_convnet.parameters(),\n",
" lr=learning_rate),\n",
" nn.CrossEntropyLoss(), # includes softmax\n",
" proper_train_dataset, val_dataset,\n",
" num_epochs, batch_size)\n",
"\n",
"UDA_plot_train_val_accuracy_vs_epoch(train_accuracies, val_accuracies)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Finally evaluate on test data"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"test_dataset = torchvision.datasets.MNIST(root='data/',\n",
" train=False,\n",
" transform=transforms.ToTensor(),\n",
" download=True)\n",
"test_images = torch.tensor(np.array([image.numpy() for image, label in test_dataset]))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test accuracy: 0.9241\n"
]
}
],
"source": [
"predicted_test_labels = UDA_pytorch_classifier_predict(simple_model, test_images)\n",
"print('Test accuracy:', UDA_compute_accuracy(predicted_test_labels, test_dataset.targets))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test accuracy: 0.9767\n"
]
}
],
"source": [
"predicted_test_labels = UDA_pytorch_classifier_predict(deeper_model, test_images)\n",
"print('Test accuracy:', UDA_compute_accuracy(predicted_test_labels, test_dataset.targets))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test accuracy: 0.9816\n"
]
}
],
"source": [
"predicted_test_labels = UDA_pytorch_classifier_predict(simple_convnet, test_images)\n",
"print('Test accuracy:', UDA_compute_accuracy(predicted_test_labels, test_dataset.targets))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test accuracy: 0.9838\n"
]
}
],
"source": [
"predicted_test_labels = UDA_pytorch_classifier_predict(deeper_convnet, test_images)\n",
"print('Test accuracy:', UDA_compute_accuracy(predicted_test_labels, test_dataset.targets))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
"""
Helper code for Carnegie Mellon University's Unstructured Data Analytics course
Author: George H. Chen (georgechen [at symbol] cmu.edu)
I wrote this code for my class to make teaching how to use PyTorch as simple as
using Keras. Note that this code only has been tested using categorical cross
entropy loss.
"""
import matplotlib.pyplot as plt
import numpy as np
import sys
import torch
import torch.nn as nn
from matplotlib.ticker import MaxNLocator
from torchnlp.encoders.text import stack_and_pad_tensors
from torchnlp.samplers import BucketBatchSampler
from torchnlp.utils import collate_tensors
def UDA_pytorch_classifier_fit(model, optimizer, loss,
proper_train_dataset, val_dataset,
num_epochs, batch_size, device=None,
sequence=False,
save_epoch_checkpoint_prefix=None):
"""
Trains a neural net classifier `model` using an `optimizer` such as Adam or
stochastic gradient descent. We specifically minimize the given `loss`
using the data given by `proper_train_dataset` using the number of epochs
given by `num_epochs` and a batch size given by `batch_size`.
Accuracies on the (proper) training data (`proper_train_dataset`) and
validation data (`val_dataset`) are computed at the end of each epoch;
`val_dataset` can be set to None if you don't want to use a validation set.
The function outputs the training and validation accuracies.
You can manually set which device (CPU or GPU) to use with the optional
`device` argument (e.g., setting `device=torch.device('cpu')` or
`device=torch.device('cuda')`). By default, the code tries to use a GPU if
it is available.
The boolean argument `sequence` says whether we are looking at time series
data (set this True for working with recurrent neural nets).
Lastly, if `save_epoch_checkpoint_prefix` is a string prefix, then each
epoch's model is saved to a filename with format
'<save_epoch_checkpoint_prefix>_epoch<epoch number>.pt'.
"""
if device is None:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
if loss._get_name() != 'CrossEntropyLoss':
raise Exception('Unsupported loss: ' + loss._get_name())
if not sequence:
# PyTorch uses DataLoader to load data in batches
proper_train_loader = \
torch.utils.data.DataLoader(dataset=proper_train_dataset,
batch_size=batch_size,
shuffle=True)
if val_dataset is not None:
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
batch_size=batch_size,
shuffle=False)
else:
proper_train_loader = \
UDA_get_batches_sequence(proper_train_dataset,
batch_size,
shuffle=True,
device=device)
if val_dataset is not None:
val_loader = \
UDA_get_batches_sequence(val_dataset,
batch_size,
shuffle=False,
device=device)
proper_train_size = len(proper_train_dataset)
val_size = len(val_dataset)
train_accuracies = np.zeros(num_epochs)
val_accuracies = np.zeros(num_epochs)
for epoch_idx in range(num_epochs):
# go through training data
num_training_examples_so_far = 0
for batch_idx, (batch_features, batch_labels) \
in enumerate(proper_train_loader):
# make sure the data are stored on the right device
batch_features = batch_features.to(device)
batch_labels = batch_labels.to(device)
# make predictions for current batch and compute loss
batch_outputs = model(batch_features)
batch_loss = loss(batch_outputs, batch_labels)
# update model parameters
optimizer.zero_grad() # reset which direction optimizer is going
batch_loss.backward() # compute new direction optimizer should go
optimizer.step() # move the optimizer
# draw fancy progress bar
num_training_examples_so_far += batch_features.shape[0]
sys.stdout.write('\r')
sys.stdout.write("Epoch %d [%-50s] %d/%d"
% (epoch_idx + 1,
'=' * int(num_training_examples_so_far
/ proper_train_size * 50),
num_training_examples_so_far,
proper_train_size))
sys.stdout.flush()
# draw fancy progress bar at 100%
sys.stdout.write('\r')
sys.stdout.write("Epoch %d [%-50s] %d/%d"
% (epoch_idx + 1,
'=' * 50,
num_training_examples_so_far, proper_train_size))
sys.stdout.flush()
sys.stdout.write('\n')
sys.stdout.flush()
# compute proper training and validation set raw accuracies
model.eval() # turn on evaluation mode
train_accuracy = \
UDA_pytorch_classifier_evaluate(model,
proper_train_dataset,
device=device,
batch_size=batch_size,
sequence=sequence)
print(' Train accuracy: %.4f' % train_accuracy, flush=True)
train_accuracies[epoch_idx] = train_accuracy
if val_dataset is not None:
val_accuracy = \
UDA_pytorch_classifier_evaluate(model,
val_dataset,
device=device,
batch_size=batch_size,
sequence=sequence)
print(' Validation accuracy: %.4f' % val_accuracy, flush=True)
val_accuracies[epoch_idx] = val_accuracy
model.train() # turn off evaluation mode
if save_epoch_checkpoint_prefix is not None:
torch.save(model.state_dict(),
'%s_epoch%d.pt'
% (save_epoch_checkpoint_prefix, epoch_idx + 1))
return train_accuracies, val_accuracies
def UDA_pytorch_model_transform(model, inputs, device=None, batch_size=128,
sequence=False):
"""
Given a neural net `model`, evaluate the model given `inputs`, which should
*not* be already batched. This helper function automatically batches the
data, feeds each batch through the neural net, and then unbatches the
outputs. The outputs are stored as a PyTorch tensor.
You can manually set which device (CPU or GPU) to use with the optional
`device` argument (e.g., setting `device=torch.device('cpu')` or
`device=torch.device('cuda')`). By default, the code tries to use a GPU if
it is available.
You can also manually set `batch_size`; this is less critical than in
training since we are, at this point, just evaluating the model without
updating its parameters.
Lastly, the boolean argument `sequence` says whether we are looking at time
series data (set this True for working with recurrent neural nets).
"""
if device is None:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
# batch the inputs
if not sequence:
feature_loader = torch.utils.data.DataLoader(dataset=inputs,
batch_size=batch_size,
shuffle=False)
else:
feature_loader = \
UDA_get_batches_from_encoded_text(inputs,
None,
batch_size,
shuffle=False,
device=device)
outputs = []
with torch.no_grad():
idx = 0
for batch_features in feature_loader:
batch_features = batch_features.to(device)
batch_outputs = model(batch_features)
outputs.append(batch_outputs)
return torch.cat(outputs, 0)
def UDA_pytorch_classifier_predict(model, inputs, device=None, batch_size=128,
sequence=False):
"""
Given a neural net classifier `model`, predict labels for the given
`inputs`, which should *not* be already batched. This helper function
automatically batches the data, feeds each batch through the neural net,
and then computes predicted labels by looking at the argmax. The output
predicted labels are stored as a PyTorch tensor.
You can manually set which device (CPU or GPU) to use with the optional
`device` argument (e.g., setting `device=torch.device('cpu')` or
`device=torch.device('cuda')`). By default, the code tries to use a GPU if
it is available.
You can also manually set `batch_size`; this is less critical than in
training since we are, at this point, just evaluating the model without
updating its parameters.
Lastly, the boolean argument `sequence` says whether we are looking at time
series data (set this True for working with recurrent neural nets).
"""
outputs = UDA_pytorch_model_transform(model,
inputs,
device=device,
batch_size=batch_size,
sequence=sequence)
with torch.no_grad():
return outputs.argmax(axis=1).view(-1)
def UDA_pytorch_classifier_evaluate(model, dataset, device=None,
batch_size=128, sequence=False):
"""
Evaluate the raw accuracy of a neural net classifier `model` for a
`dataset`, which should be a list of pairs of the format (input, label).
You can manually set which device (CPU or GPU) to use with the optional
`device` argument (e.g., setting `device=torch.device('cpu')` or
`device=torch.device('cuda')`). By default, the code tries to use a GPU if
it is available.
You can also manually set `batch_size`; this is less critical than in
training since we are, at this point, just evaluating the model without
updating its parameters.
Lastly, the boolean argument `sequence` says whether we are looking at time
series data (set this True for working with recurrent neural nets).
"""
if device is None:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
if not sequence:
loader = torch.utils.data.DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=False)
else:
loader = UDA_get_batches_sequence(dataset,
batch_size,
shuffle=False,
device=device)
with torch.no_grad():
num_correct = 0.
for batch_features, batch_labels in loader:
batch_features = batch_features.to(device)
batch_outputs = model(batch_features)
batch_predicted_labels = batch_outputs.argmax(axis=1)
if type(batch_labels) == np.ndarray:
batch_predicted_labels = \
batch_predicted_labels.view(-1).cpu().numpy()
num_correct += (batch_predicted_labels == batch_labels).sum()
else:
num_correct += \
(batch_predicted_labels.view(-1)
== batch_labels.to(device).view(-1)).sum().item()
return num_correct / len(dataset)
def UDA_plot_train_val_accuracy_vs_epoch(train_accuracies, val_accuracies):
"""
Helper function for plotting (proper) training and validation accuracies
across epochs; `train_accuracies` and `val_accuracies` should be the same
length, which should equal the number of epochs.
"""
ax = plt.figure().gca()
num_epochs = len(train_accuracies)
plt.plot(np.arange(1, num_epochs + 1), train_accuracies, '-o',
label='Training')
plt.plot(np.arange(1, num_epochs + 1), val_accuracies, '-+',
label='Validation')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
def UDA_compute_accuracy(labels1, labels2):
"""
Computes the raw accuracy of two label sequences `labels1` and `labels2`
agreeing. This helper function coerces both label sequences to be on the
CPU, flattened, and stored as 1D NumPy arrays before computing the average
agreement.
"""
if type(labels1) == torch.Tensor:
labels1 = labels1.detach().view(-1).cpu().numpy()
elif type(labels1) != np.ndarray:
labels1 = np.array(labels1).flatten()
else:
labels1 = labels1.flatten()
if type(labels2) == torch.Tensor:
labels2 = labels2.detach().view(-1).cpu().numpy()
elif type(labels2) != np.ndarray:
labels2 = np.array(labels2).flatten()
else:
labels2 = labels2.flatten()
return np.mean(labels1 == labels2)
class UDA_LSTMforSequential(nn.Module):
"""
This helper class allows for an LSTM to be used with nn.Sequential().
"""
def __init__(self, input_size, hidden_size, return_sequences=False):
super().__init__()
self.return_sequences = return_sequences
self.model = nn.LSTM(input_size=input_size,
hidden_size=hidden_size,
batch_first=True) # axis 0 indexes data in batch
def forward(self, x):
# x should be of shape (batch size, sequence length, feature dimension)
outputs, _ = self.model(x)
if self.return_sequences:
return outputs
else:
return outputs[:, -1, :] # take last time step's output
def UDA_get_batches_sequence(dataset, batch_size, shuffle=True, device=None):
"""
Helper function that does the same thing as
`UDA_get_batches_from_encoded_text()` except that the input dataset is a
list of pairs of the format (encoded text, label). This function
basically converts the input format to be what is expected by
`UDA_get_batches_from_encoded_text()` and then runs that function. See
the documentation for that function to understand what the arguments are.
"""
text_encoded = []
labels = []
for text, label in dataset:
text_encoded.append(text)
labels.append(label)
return UDA_get_batches_from_encoded_text(text_encoded, labels,
batch_size, shuffle, device)
def UDA_get_batches_from_encoded_text(text_encoded, labels, batch_size,
shuffle=True, device=None):
"""
Batches sequence data, where sequences within the same batch could have
unequal lengths, so padding is needed to get their lengths to be the same
for feeding to the neural net. The input text `text_encoded` should already
be encoded so that each text sequence consists of word indices to represent
indices into a vocabulary. The i-th element of `text_encoded` should have a
label given by the i-th entry in `labels` (which will be converted to a
PyTorch tensor). The batch size is specified by `batch_size`.
If `shuffle` is set to True, a bucket sampling strategy is used that reduces
how much padding is needed in different batches while injecting some
randomness.
You can manually set which device (CPU or GPU) to use with the optional
`device` argument (e.g., setting `device=torch.device('cpu')` or
`device=torch.device('cuda')`). By default, the code tries to use a GPU if
it is available.
"""
if device is None:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if shuffle:
# use bucket sampling strategy to reduce the amount of padding needed
sampler = torch.utils.data.sampler.SequentialSampler(text_encoded)
loader = BucketBatchSampler(
sampler, batch_size=batch_size, drop_last=False,
sort_key=lambda i: text_encoded[i].shape[0])
else:
indices = list(range(len(text_encoded)))
loader = torch.utils.data.DataLoader(dataset=indices,
batch_size=batch_size,
shuffle=False)
if labels is None:
batches = [collate_tensors([text_encoded[i] for i in batch],
stack_tensors=stack_and_pad_tensors
).tensor.to(device)
for batch in loader]
else:
batches = [(collate_tensors([text_encoded[i] for i in batch],
stack_tensors=stack_and_pad_tensors
).tensor.to(device),
torch.tensor([labels[i] for i in batch],
dtype=torch.long).to(device).view(-1))
for batch in loader]
return batches
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment