Skip to content

Instantly share code, notes, and snippets.

@CookieBox26
Last active August 17, 2022 14:34
Show Gist options
  • Save CookieBox26/61451c79d7d7e54e180df47183cd43f1 to your computer and use it in GitHub Desktop.
Save CookieBox26/61451c79d7d7e54e180df47183cd43f1 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "e71f743f-5d4b-44da-8282-2fbc5d8bc838",
"metadata": {},
"source": [
"## LeNet 編\n",
"\n",
"#### 参考文献\n",
"- [1] [Probabilistic Machine Learning: An Introduction](https://probml.github.io/pml-book/book1.html) (テキスト) \n",
"- [2] [Yann LeCun, Leon Bottou, Yoshua Bengio, Patrick Haffner. Gradient-Based Learning Applied to Document Recognition. in proc of the IEEE, 1998.](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf) (原論文)\n",
" - 7ページに LeNet-5 の節がある。原論文通りの実装は複雑なので今回の実装は [1] にしたがうが、活性化がわからない。\n",
"\n",
"#### LeNet とは [1][2]\n",
"最初期の畳み込みニューラルネットであり、1998 年に Yann LeCun に提案された。 \n",
"MNIST を訓練すると1エポックでテストデータ正解率が 98.8% になる。 \n",
"訓練を続けるともはやラベル誤差というレベルまで正解率が上昇する。 \n",
"原論文はそもそも文章認識に取り組んでおり、LeNet はシステム中の文字認識のパーツとなっている。"
]
},
{
"cell_type": "markdown",
"id": "5633ca2b-2e4c-4b2d-8ac1-88a14e5eb5a6",
"metadata": {},
"source": [
"#### そういうわけで MNIST を用意する\n",
"訓練データの最初の 5 からなんか雑である。"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "34952295-671a-4680-b726-9b550424faa2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"◆ データのサイズ(データ数、縦方向ピクセル数、横方向ピクセル数)\n",
"訓練データ torch.Size([60000, 28, 28])\n",
"テストデータ torch.Size([10000, 28, 28])\n",
"\n",
"◆ 訓練データの最初の4枚を描画\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1200x300 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import warnings\n",
"warnings.simplefilter('ignore')\n",
"import torch\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"from pylab import rcParams\n",
"rcParams['figure.figsize'] = 12, 3\n",
"rcParams['font.size'] = 14\n",
"rcParams['font.family']='Ume Hy Gothic O5'\n",
"\n",
"# 各画像に以下の順に処理をしてくださいというのを込めた関数\n",
"# ・ Tensor にする(ついでにこのときレンジを [0,255] から [0,1] にする)\n",
"# ・ 各画素の値から 0.5 を差し引いて 0.5 で割る(レンジを [0,1] から [-1,1] にする)\n",
"transform = transforms.Compose([transforms.ToTensor(), \n",
" transforms.Normalize((0.5,), (0.5,))])\n",
"\n",
"# 以下は最初に実行するときはダウンロードが走る\n",
"root = '../data'\n",
"batch_size = 4\n",
"trainset = torchvision.datasets.MNIST(root=root, train=True, download=True, transform=transform)\n",
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)\n",
"testset = torchvision.datasets.MNIST(root=root, train=False, download=True, transform=transform)\n",
"testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)\n",
"\n",
"print('◆ データのサイズ(データ数、縦方向ピクセル数、横方向ピクセル数)')\n",
"print('訓練データ', trainset.data.shape)\n",
"print('テストデータ', testset.data.shape)\n",
"\n",
"print('\\n◆ 訓練データの最初の4枚を描画')\n",
"fig = plt.figure()\n",
"ax1 = fig.add_subplot(1, 4, 1)\n",
"ax2 = fig.add_subplot(1, 4, 2)\n",
"ax3 = fig.add_subplot(1, 4, 3)\n",
"ax4 = fig.add_subplot(1, 4, 4)\n",
"ax1.imshow(trainset.data[0])\n",
"ax1.set_title(trainset.targets[0])\n",
"ax2.imshow(trainset.data[1])\n",
"ax2.set_title(trainset.targets[1])\n",
"ax3.imshow(trainset.data[2])\n",
"ax3.set_title(trainset.targets[2])\n",
"ax4.imshow(trainset.data[3])\n",
"ax4.set_title(trainset.targets[3])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "890e83a4-c2a6-42a2-9c52-2823a295c9f2",
"metadata": {},
"source": [
"#### テキスト同様のモデルを実現するのに必要な層を特定する"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "947a4447-cf6f-4d1c-99a0-d99e91f1479d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"◆ テキスト474ページの図14.15と図14.16に合うようなモデルを突き止める\n",
"※ バッチサイズ4, 1チャネル, 28x28ピクセルのダミーデータを流す。\n",
"入力直後\n",
"torch.Size([4, 1, 28, 28])\n",
"1番目の畳み込み後\n",
"torch.Size([4, 6, 28, 28])\n",
"1番目のプール後\n",
"torch.Size([4, 6, 14, 14])\n",
"2番目の畳み込み後\n",
"torch.Size([4, 16, 10, 10])\n",
"2番目のプール後\n",
"torch.Size([4, 16, 5, 5])\n",
"リシェイプした後\n",
"torch.Size([4, 400])\n",
"1番目の全結合した後\n",
"torch.Size([4, 120])\n",
"2番目の全結合した後\n",
"torch.Size([4, 84])\n",
"3番目の全結合した後\n",
"torch.Size([4, 10])\n"
]
}
],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
"print('◆ テキスト474ページの図14.15と図14.16に合うようなモデルを突き止める')\n",
"print(f'※ バッチサイズ{batch_size}, 1チャネル, 28x28ピクセルのダミーデータを流す。')\n",
"x = torch.randn(batch_size, 1, 28, 28)\n",
"print(f'入力直後\\n{x.size()}')\n",
"x = F.relu(nn.Conv2d(1, 6, 5, padding=2)(x))\n",
"print(f'1番目の畳み込み後\\n{x.size()}')\n",
"x = nn.AvgPool2d(2)(x)\n",
"print(f'1番目のプール後\\n{x.size()}')\n",
"x = F.relu(nn.Conv2d(6, 16, 5)(x))\n",
"print(f'2番目の畳み込み後\\n{x.size()}')\n",
"x = nn.AvgPool2d(2)(x)\n",
"print(f'2番目のプール後\\n{x.size()}')\n",
"size = x.size()[1:]\n",
"num_features = 1\n",
"for s in size:\n",
" num_features *= s\n",
"x = x.view(-1, num_features)\n",
"print(f'リシェイプした後\\n{x.size()}')\n",
"x = F.relu(nn.Linear(num_features, 120)(x))\n",
"print(f'1番目の全結合した後\\n{x.size()}')\n",
"x = F.relu(nn.Linear(120, 84)(x))\n",
"print(f'2番目の全結合した後\\n{x.size()}')\n",
"x = nn.Linear(84, 10)(x)\n",
"print(f'3番目の全結合した後\\n{x.size()}')"
]
},
{
"cell_type": "markdown",
"id": "25ec0a82-2747-452b-ab94-16b0dfca4454",
"metadata": {},
"source": [
"#### モデルを実装する"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5bd5eee6-98e8-4439-b31f-bcb3061fa466",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"◆ 今回学習するネットワーク\n",
"LeNet5_MNIST(\n",
" (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n",
" (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n",
" (fc1): Linear(in_features=400, out_features=120, bias=True)\n",
" (fc2): Linear(in_features=120, out_features=84, bias=True)\n",
" (fc3): Linear(in_features=84, out_features=10, bias=True)\n",
")\n",
"\n",
"◆ 今回学習するパラメータたち\n",
"conv1.weight torch.Size([6, 1, 5, 5])\n",
"conv1.bias torch.Size([6])\n",
"conv2.weight torch.Size([16, 6, 5, 5])\n",
"conv2.bias torch.Size([16])\n",
"fc1.weight torch.Size([120, 400])\n",
"fc1.bias torch.Size([120])\n",
"fc2.weight torch.Size([84, 120])\n",
"fc2.bias torch.Size([84])\n",
"fc3.weight torch.Size([10, 84])\n",
"fc3.bias torch.Size([10])\n"
]
}
],
"source": [
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
"class LeNet5_MNIST(nn.Module):\n",
"\n",
" def __init__(self):\n",
" super(LeNet5_MNIST, self).__init__()\n",
" self.conv1 = nn.Conv2d(1, 6, 5, padding=2)\n",
" self.conv2 = nn.Conv2d(6, 16, 5)\n",
" self.fc1 = nn.Linear(400, 120) # TODO: ここが 28x28 ピクセル決め打ちになっている\n",
" self.fc2 = nn.Linear(120, 84)\n",
" self.fc3 = nn.Linear(84, 10)\n",
"\n",
" def forward(self, x):\n",
" x = nn.AvgPool2d(2)(F.relu(self.conv1(x)))\n",
" x = nn.AvgPool2d(2)(F.relu(self.conv2(x)))\n",
" x = x.view(-1, 400)\n",
" x = F.relu(self.fc1(x))\n",
" x = F.relu(self.fc2(x))\n",
" x = self.fc3(x)\n",
" return x\n",
"\n",
"net = LeNet5_MNIST()\n",
"\n",
"print('◆ 今回学習するネットワーク')\n",
"print(net)\n",
"print('\\n◆ 今回学習するパラメータたち')\n",
"for name, param in net.named_parameters():\n",
" print(name.ljust(14), param.size())"
]
},
{
"cell_type": "markdown",
"id": "a3723955-316e-48fb-9df4-97d43e79a68a",
"metadata": {},
"source": [
"#### 1エポック学習する"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "61a185f9-95b7-427d-a2d8-f77bf8d11d3d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2000バッチまで学習済み, 現在の1バッチあたり損失:2.025\n",
"4000バッチまで学習済み, 現在の1バッチあたり損失:0.399\n",
"6000バッチまで学習済み, 現在の1バッチあたり損失:0.185\n",
"8000バッチまで学習済み, 現在の1バッチあたり損失:0.144\n",
"10000バッチまで学習済み, 現在の1バッチあたり損失:0.126\n",
"12000バッチまで学習済み, 現在の1バッチあたり損失:0.111\n",
"14000バッチまで学習済み, 現在の1バッチあたり損失:0.104\n",
"テストデータ 10000 枚に対する正解率:97.94%\n"
]
}
],
"source": [
"import torch.optim as optim\n",
"\n",
"def train(model, trainloader, criterion, optimizer, print_interval=-1):\n",
" running_loss = 0.0 # プリント用\n",
" for i, data in enumerate(trainloader):\n",
" inputs, labels = data\n",
" optimizer.zero_grad()\n",
" outputs = model(inputs)\n",
" loss = criterion(outputs, labels)\n",
" loss.backward()\n",
" optimizer.step()\n",
" running_loss += loss.item() # プリント用\n",
"\n",
" if print_interval > 0:\n",
" if i % print_interval == (print_interval - 1):\n",
" print(f'{i + 1}バッチまで学習済み, '\n",
" f'現在の1バッチあたり損失:{running_loss / print_interval:.3f}')\n",
" running_loss = 0.0\n",
"\n",
"def test(model, testloader):\n",
" correct = 0\n",
" total = 0\n",
" with torch.no_grad():\n",
" for data in testloader:\n",
" images, labels = data\n",
" outputs = model(images)\n",
" _, predicted = torch.max(outputs.data, 1)\n",
" total += labels.size(0)\n",
" correct += (predicted == labels).sum().item()\n",
" return correct / total\n",
"\n",
"criterion = nn.CrossEntropyLoss()\n",
"optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)\n",
"train(net, trainloader, criterion, optimizer, print_interval=2000)\n",
"accuracy = test(net, testloader)\n",
"print(f'テストデータ 10000 枚に対する正解率:{accuracy:.2%}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ebec1fa-1758-4acf-ac28-2acfa40ad8ff",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment