Created
April 26, 2019 02:38
-
-
Save Whamp/c2e2836666b86027740a6003ed1a844f to your computer and use it in GitHub Desktop.
Imagewoof Training with Learned ReLU size 192
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%load_ext autoreload\n", | |
"%autoreload 2\n", | |
"\n", | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#export\n", | |
"from exp.nb_10c import *" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# LearnedRelu" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class LearnedRelu(nn.Module):\n", | |
" def __init__(self, leak=0.05, sub=0.25, maxv=10):\n", | |
" super().__init__()\n", | |
" self.leak = nn.Parameter(torch.ones(1)*leak)\n", | |
" self.sub = nn.Parameter(torch.zeros(1)+sub)\n", | |
" self.maxv = nn.Parameter(torch.ones(1)*maxv)\n", | |
" \n", | |
" def forward(self, x):\n", | |
" if self.training:\n", | |
" with torch.no_grad():#Set some limits \n", | |
" self.leak.clamp_(0,.5) #Don't have a leak of more than 0.5 or less than 0\n", | |
" self.sub.clamp_(0,1) #don't subtract more than 1 and not less than 0\n", | |
" self.maxv.clamp_(5,100) #don't let maxv go above 10 and not below -10\n", | |
" x = F.leaky_relu(x,self.leak.item())\n", | |
" x.sub_(self.sub)\n", | |
" x.clamp_max_(self.maxv.item()) \n", | |
" return x" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## XResNet" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#export\n", | |
"def noop(x): return x\n", | |
"\n", | |
"class Flatten(nn.Module):\n", | |
" def forward(self, x): return x.view(x.size(0), -1)\n", | |
"\n", | |
"def conv(ni, nf, ks=3, stride=1, bias=False):\n", | |
" return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=bias)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Use LearnedRelu instead of nn.ReLU with starting params of 0leak and 0.25 sub and maxv of 10" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#export\n", | |
"#act_fn = nn.ReLU(inplace=True)\n", | |
"\n", | |
"def init_cnn(m):\n", | |
" if getattr(m, 'bias', None) is not None: nn.init.constant_(m.bias, 0)\n", | |
" if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight)\n", | |
" for l in m.children(): init_cnn(l)\n", | |
"\n", | |
"def conv_layer(ni, nf, ks=3, stride=1, zero_bn=False, act=True):\n", | |
" bn = nn.BatchNorm2d(nf)\n", | |
" nn.init.constant_(bn.weight, 0. if zero_bn else 1.)\n", | |
" layers = [conv(ni, nf, ks, stride=stride), bn]\n", | |
" if act: layers.append(act_fn)\n", | |
" return nn.Sequential(*layers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#export\n", | |
"class ResBlock(nn.Module):\n", | |
" def __init__(self, expansion, ni, nh, stride=1):\n", | |
" super().__init__()\n", | |
" nf,ni = nh*expansion,ni*expansion\n", | |
" layers = [conv_layer(ni, nh, 1)]\n", | |
" layers += [\n", | |
" conv_layer(nh, nf, 3, stride=stride, zero_bn=True, act=False)\n", | |
" ] if expansion==1 else [\n", | |
" conv_layer(nh, nh, 3, stride=stride),\n", | |
" conv_layer(nh, nf, 1, zero_bn=True, act=False)\n", | |
" ]\n", | |
" self.convs = nn.Sequential(*layers)\n", | |
" self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n", | |
" self.pool = noop if stride==1 else nn.AvgPool2d(2)\n", | |
"\n", | |
" def forward(self, x): return act_fn(self.convs(x) + self.idconv(self.pool(x)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#export\n", | |
"class XResNet(nn.Sequential):\n", | |
" @classmethod\n", | |
" def create(cls, expansion, layers, c_in=3, c_out=1000):\n", | |
" nfs = [c_in, (c_in+1)*8, 64, 64]\n", | |
" stem = [conv_layer(nfs[i], nfs[i+1], stride=2 if i==0 else 1)\n", | |
" for i in range(3)]\n", | |
"\n", | |
" nfs = [64//expansion,64,128,256,512]\n", | |
" res_layers = [cls._make_layer(expansion, nfs[i], nfs[i+1],\n", | |
" n_blocks=l, stride=1 if i==0 else 2)\n", | |
" for i,l in enumerate(layers)]\n", | |
" res = cls(\n", | |
" *stem,\n", | |
" nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n", | |
" *res_layers,\n", | |
" nn.AdaptiveAvgPool2d(1), Flatten(),\n", | |
" nn.Linear(nfs[-1]*expansion, c_out),\n", | |
" )\n", | |
" init_cnn(res)\n", | |
" return res\n", | |
"\n", | |
" @staticmethod\n", | |
" def _make_layer(expansion, ni, nf, n_blocks, stride):\n", | |
" return nn.Sequential(\n", | |
" *[ResBlock(expansion, ni if i==0 else nf, nf, stride if i==0 else 1)\n", | |
" for i in range(n_blocks)])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#export\n", | |
"def xresnet18_LR (**kwargs): return XResNet.create(1, [2, 2, 2, 2], **kwargs)\n", | |
"def xresnet34_LR (**kwargs): return XResNet.create(1, [3, 4, 6, 3], **kwargs)\n", | |
"def xresnet50_LR (**kwargs): return XResNet.create(4, [3, 4, 6, 3], **kwargs)\n", | |
"def xresnet101_LR(**kwargs): return XResNet.create(4, [3, 4, 23, 3], **kwargs)\n", | |
"def xresnet152_LR(**kwargs): return XResNet.create(4, [3, 8, 36, 3], **kwargs)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Train" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cbfs = [partial(AvgStatsCallback,accuracy), ProgressCallback, CudaCallback,\n", | |
" partial(BatchTransformXCallback, norm_imagenette),\n", | |
" partial(MixUp, 0.2)\n", | |
" ]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"loss_func = LabelSmoothingCrossEntropy()\n", | |
"opt_func = adam_opt(mom=0.9, mom_sqr=0.99, eps=1e-6, wd=1e-2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#export\n", | |
"def get_batch(dl, learn):\n", | |
" learn.xb,learn.yb = next(iter(dl))\n", | |
" learn.do_begin_fit(0)\n", | |
" learn('begin_batch')\n", | |
" learn('after_fit')\n", | |
" return learn.xb,learn.yb" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We need to replace the old `model_summary` since it used to take a `Runner`." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# export\n", | |
"def model_summary(model, data, find_all=False, print_mod=False):\n", | |
" xb,yb = get_batch(data.valid_dl, learn)\n", | |
" mods = find_modules(model, is_lin_layer) if find_all else model.children()\n", | |
" f = lambda hook,mod,inp,out: print(f\"====\\n{mod}\\n\" if print_mod else \"\", out.shape)\n", | |
" with Hooks(mods, f) as hooks: learn.model(xb)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#export\n", | |
"def create_phases(phases):\n", | |
" phases = listify(phases)\n", | |
" return phases + [1-sum(phases)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[0.3, 0.7]\n", | |
"[0.3, 0.2, 0.5]\n" | |
] | |
} | |
], | |
"source": [ | |
"print(create_phases(0.3))\n", | |
"print(create_phases([0.3,0.2]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lr =3e-3\n", | |
"pct_start = 0.5\n", | |
"phases = create_phases(pct_start)\n", | |
"sched_lr = combine_scheds(phases, cos_1cycle_anneal(lr/10., lr, lr/1e5))\n", | |
"sched_mom = combine_scheds(phases, cos_1cycle_anneal(0.95,0.85, 0.95))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cbsched = [\n", | |
" ParamScheduler('lr', sched_lr),\n", | |
" ParamScheduler('mom', sched_mom)]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## cnn_learner" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#export\n", | |
"def cnn_learner(arch, data, loss_func, opt_func, c_in=None, c_out=None,\n", | |
" lr=3e-3, cuda=True, norm=None, progress=True, mixup=0, xtra_cb=None, **kwargs):\n", | |
" cbfs = [partial(AvgStatsCallback,accuracy)]+listify(xtra_cb)\n", | |
" if progress: cbfs.append(ProgressCallback)\n", | |
" if cuda: cbfs.append(CudaCallback)\n", | |
" if norm: cbfs.append(partial(BatchTransformXCallback, norm))\n", | |
" if mixup: cbfs.append(partial(MixUp, mixup))\n", | |
" arch_args = {}\n", | |
" if not c_in : c_in = data.c_in\n", | |
" if not c_out: c_out = data.c_out\n", | |
" if c_in: arch_args['c_in' ]=c_in\n", | |
" if c_out: arch_args['c_out']=c_out\n", | |
" return Learner(arch(**arch_args), data, loss_func, opt_func=opt_func, lr=lr, cb_funcs=cbfs, **kwargs)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Imagewoof training" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"path = datasets.untar_data(datasets.URLs.IMAGEWOOF_320)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"size = 192\n", | |
"tfms = [make_rgb, RandomResizedCrop(192,scale=(0.35,1)), np_to_float, PilRandomFlip()]\n", | |
"bs = 48\n", | |
"\n", | |
"il = ImageList.from_files(path, tfms=tfms)\n", | |
"sd = SplitData.split_by_func(il, partial(grandparent_splitter, valid_name='val'))\n", | |
"ll = label_by_func(sd, parent_labeler, proc_y=CategoryProcessor())\n", | |
"\n", | |
"ll.valid.x.tfms = [make_rgb, CenterCrop(size), np_to_float]\n", | |
"\n", | |
"data = ll.to_databunch(bs, c_in=3, c_out=10, num_workers=16)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Start with 5 epochs per arch" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Try xresnet18 with act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn = cnn_learner(xresnet18_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: left;\">\n", | |
" <th>epoch</th>\n", | |
" <th>train_loss</th>\n", | |
" <th>train_accuracy</th>\n", | |
" <th>valid_loss</th>\n", | |
" <th>valid_accuracy</th>\n", | |
" <th>time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>2.176271</td>\n", | |
" <td>0.233499</td>\n", | |
" <td>2.184551</td>\n", | |
" <td>0.214000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>2.065433</td>\n", | |
" <td>0.309941</td>\n", | |
" <td>2.013440</td>\n", | |
" <td>0.322000</td>\n", | |
" <td>00:29</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>1.942753</td>\n", | |
" <td>0.391681</td>\n", | |
" <td>1.820060</td>\n", | |
" <td>0.382000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>1.807206</td>\n", | |
" <td>0.477758</td>\n", | |
" <td>1.593475</td>\n", | |
" <td>0.498000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>1.669652</td>\n", | |
" <td>0.562229</td>\n", | |
" <td>1.496681</td>\n", | |
" <td>0.564000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"learn.fit(5, cbsched)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"xresnet34 act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn = cnn_learner(xresnet34_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: left;\">\n", | |
" <th>epoch</th>\n", | |
" <th>train_loss</th>\n", | |
" <th>train_accuracy</th>\n", | |
" <th>valid_loss</th>\n", | |
" <th>valid_accuracy</th>\n", | |
" <th>time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>2.239583</td>\n", | |
" <td>0.186928</td>\n", | |
" <td>2.196563</td>\n", | |
" <td>0.226000</td>\n", | |
" <td>00:39</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>2.042328</td>\n", | |
" <td>0.331861</td>\n", | |
" <td>2.284073</td>\n", | |
" <td>0.294000</td>\n", | |
" <td>00:39</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>1.901500</td>\n", | |
" <td>0.417777</td>\n", | |
" <td>1.749918</td>\n", | |
" <td>0.414000</td>\n", | |
" <td>00:39</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>1.752035</td>\n", | |
" <td>0.509876</td>\n", | |
" <td>1.484071</td>\n", | |
" <td>0.572000</td>\n", | |
" <td>00:39</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>1.604641</td>\n", | |
" <td>0.601975</td>\n", | |
" <td>1.368960</td>\n", | |
" <td>0.630000</td>\n", | |
" <td>00:39</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"learn.fit(5, cbsched)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Try xresnet50 act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn = cnn_learner(xresnet50_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: left;\">\n", | |
" <th>epoch</th>\n", | |
" <th>train_loss</th>\n", | |
" <th>train_accuracy</th>\n", | |
" <th>valid_loss</th>\n", | |
" <th>valid_accuracy</th>\n", | |
" <th>time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>2.235829</td>\n", | |
" <td>0.204513</td>\n", | |
" <td>2.272176</td>\n", | |
" <td>0.270000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>2.044194</td>\n", | |
" <td>0.334672</td>\n", | |
" <td>2.116261</td>\n", | |
" <td>0.314000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>1.932448</td>\n", | |
" <td>0.400353</td>\n", | |
" <td>1.758810</td>\n", | |
" <td>0.400000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>1.770677</td>\n", | |
" <td>0.494058</td>\n", | |
" <td>1.490581</td>\n", | |
" <td>0.576000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>1.608710</td>\n", | |
" <td>0.594668</td>\n", | |
" <td>1.321973</td>\n", | |
" <td>0.662000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"learn.fit(5, cbsched)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Now try 20 epochs" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Try xresnet18 for 20 epochs act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn = cnn_learner(xresnet18_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: left;\">\n", | |
" <th>epoch</th>\n", | |
" <th>train_loss</th>\n", | |
" <th>train_accuracy</th>\n", | |
" <th>valid_loss</th>\n", | |
" <th>valid_accuracy</th>\n", | |
" <th>time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>2.170229</td>\n", | |
" <td>0.243135</td>\n", | |
" <td>2.128347</td>\n", | |
" <td>0.248000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>2.073682</td>\n", | |
" <td>0.306889</td>\n", | |
" <td>2.030547</td>\n", | |
" <td>0.320000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>1.969655</td>\n", | |
" <td>0.378031</td>\n", | |
" <td>1.942254</td>\n", | |
" <td>0.368000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>1.887025</td>\n", | |
" <td>0.430223</td>\n", | |
" <td>1.810817</td>\n", | |
" <td>0.400000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>1.834445</td>\n", | |
" <td>0.463385</td>\n", | |
" <td>1.735087</td>\n", | |
" <td>0.436000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>5</td>\n", | |
" <td>1.781669</td>\n", | |
" <td>0.493175</td>\n", | |
" <td>1.935316</td>\n", | |
" <td>0.436000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>6</td>\n", | |
" <td>1.740426</td>\n", | |
" <td>0.521760</td>\n", | |
" <td>1.589024</td>\n", | |
" <td>0.498000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>7</td>\n", | |
" <td>1.682010</td>\n", | |
" <td>0.558616</td>\n", | |
" <td>1.548017</td>\n", | |
" <td>0.510000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>8</td>\n", | |
" <td>1.641817</td>\n", | |
" <td>0.578609</td>\n", | |
" <td>1.485235</td>\n", | |
" <td>0.564000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>9</td>\n", | |
" <td>1.596934</td>\n", | |
" <td>0.615465</td>\n", | |
" <td>1.340996</td>\n", | |
" <td>0.630000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>10</td>\n", | |
" <td>1.546046</td>\n", | |
" <td>0.633130</td>\n", | |
" <td>1.316688</td>\n", | |
" <td>0.644000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>11</td>\n", | |
" <td>1.502797</td>\n", | |
" <td>0.659306</td>\n", | |
" <td>1.297992</td>\n", | |
" <td>0.674000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>12</td>\n", | |
" <td>1.460568</td>\n", | |
" <td>0.688775</td>\n", | |
" <td>1.189224</td>\n", | |
" <td>0.716000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>13</td>\n", | |
" <td>1.406436</td>\n", | |
" <td>0.718886</td>\n", | |
" <td>1.133647</td>\n", | |
" <td>0.758000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>14</td>\n", | |
" <td>1.365329</td>\n", | |
" <td>0.737434</td>\n", | |
" <td>1.097194</td>\n", | |
" <td>0.766000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>15</td>\n", | |
" <td>1.331212</td>\n", | |
" <td>0.759756</td>\n", | |
" <td>1.037644</td>\n", | |
" <td>0.778000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>16</td>\n", | |
" <td>1.290638</td>\n", | |
" <td>0.780713</td>\n", | |
" <td>0.997188</td>\n", | |
" <td>0.802000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>17</td>\n", | |
" <td>1.261216</td>\n", | |
" <td>0.798860</td>\n", | |
" <td>0.970057</td>\n", | |
" <td>0.814000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>18</td>\n", | |
" <td>1.250530</td>\n", | |
" <td>0.807371</td>\n", | |
" <td>0.963768</td>\n", | |
" <td>0.824000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>19</td>\n", | |
" <td>1.240110</td>\n", | |
" <td>0.815722</td>\n", | |
" <td>0.963476</td>\n", | |
" <td>0.814000</td>\n", | |
" <td>00:30</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"learn.fit(20, cbsched)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Try xresnet34 for 20 epochs act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn = cnn_learner(xresnet34_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: left;\">\n", | |
" <th>epoch</th>\n", | |
" <th>train_loss</th>\n", | |
" <th>train_accuracy</th>\n", | |
" <th>valid_loss</th>\n", | |
" <th>valid_accuracy</th>\n", | |
" <th>time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>2.204060</td>\n", | |
" <td>0.213185</td>\n", | |
" <td>2.099484</td>\n", | |
" <td>0.262000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>2.052038</td>\n", | |
" <td>0.319496</td>\n", | |
" <td>1.974819</td>\n", | |
" <td>0.332000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>1.944317</td>\n", | |
" <td>0.390959</td>\n", | |
" <td>1.845240</td>\n", | |
" <td>0.412000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>1.861098</td>\n", | |
" <td>0.446764</td>\n", | |
" <td>1.766139</td>\n", | |
" <td>0.434000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>1.776837</td>\n", | |
" <td>0.495102</td>\n", | |
" <td>1.769971</td>\n", | |
" <td>0.432000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>5</td>\n", | |
" <td>1.715599</td>\n", | |
" <td>0.536053</td>\n", | |
" <td>1.545579</td>\n", | |
" <td>0.564000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>6</td>\n", | |
" <td>1.666961</td>\n", | |
" <td>0.561105</td>\n", | |
" <td>1.527621</td>\n", | |
" <td>0.554000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>7</td>\n", | |
" <td>1.610517</td>\n", | |
" <td>0.599647</td>\n", | |
" <td>1.433473</td>\n", | |
" <td>0.606000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>8</td>\n", | |
" <td>1.556512</td>\n", | |
" <td>0.626064</td>\n", | |
" <td>1.608577</td>\n", | |
" <td>0.508000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>9</td>\n", | |
" <td>1.525477</td>\n", | |
" <td>0.644130</td>\n", | |
" <td>1.291797</td>\n", | |
" <td>0.656000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>10</td>\n", | |
" <td>1.472974</td>\n", | |
" <td>0.676329</td>\n", | |
" <td>1.385635</td>\n", | |
" <td>0.592000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>11</td>\n", | |
" <td>1.435359</td>\n", | |
" <td>0.696483</td>\n", | |
" <td>1.143334</td>\n", | |
" <td>0.732000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>12</td>\n", | |
" <td>1.389284</td>\n", | |
" <td>0.722659</td>\n", | |
" <td>1.144489</td>\n", | |
" <td>0.730000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>13</td>\n", | |
" <td>1.355664</td>\n", | |
" <td>0.739521</td>\n", | |
" <td>1.124210</td>\n", | |
" <td>0.758000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>14</td>\n", | |
" <td>1.309906</td>\n", | |
" <td>0.765457</td>\n", | |
" <td>1.042107</td>\n", | |
" <td>0.762000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>15</td>\n", | |
" <td>1.277276</td>\n", | |
" <td>0.781757</td>\n", | |
" <td>0.979578</td>\n", | |
" <td>0.798000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>16</td>\n", | |
" <td>1.226820</td>\n", | |
" <td>0.809941</td>\n", | |
" <td>0.958027</td>\n", | |
" <td>0.804000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>17</td>\n", | |
" <td>1.193602</td>\n", | |
" <td>0.824956</td>\n", | |
" <td>0.920326</td>\n", | |
" <td>0.826000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>18</td>\n", | |
" <td>1.175790</td>\n", | |
" <td>0.835635</td>\n", | |
" <td>0.914901</td>\n", | |
" <td>0.824000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>19</td>\n", | |
" <td>1.167698</td>\n", | |
" <td>0.840854</td>\n", | |
" <td>0.922976</td>\n", | |
" <td>0.826000</td>\n", | |
" <td>00:40</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"learn.fit(20, cbsched)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Try xresnet50 for 20 epochs act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn = cnn_learner(xresnet50_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: left;\">\n", | |
" <th>epoch</th>\n", | |
" <th>train_loss</th>\n", | |
" <th>train_accuracy</th>\n", | |
" <th>valid_loss</th>\n", | |
" <th>valid_accuracy</th>\n", | |
" <th>time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>2.236692</td>\n", | |
" <td>0.213506</td>\n", | |
" <td>2.161840</td>\n", | |
" <td>0.250000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>2.041531</td>\n", | |
" <td>0.333387</td>\n", | |
" <td>1.928734</td>\n", | |
" <td>0.334000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>1.921252</td>\n", | |
" <td>0.415770</td>\n", | |
" <td>1.834021</td>\n", | |
" <td>0.416000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>1.838308</td>\n", | |
" <td>0.461057</td>\n", | |
" <td>1.719596</td>\n", | |
" <td>0.446000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>1.783118</td>\n", | |
" <td>0.499117</td>\n", | |
" <td>1.671353</td>\n", | |
" <td>0.434000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>5</td>\n", | |
" <td>1.708833</td>\n", | |
" <td>0.543761</td>\n", | |
" <td>1.485957</td>\n", | |
" <td>0.544000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>6</td>\n", | |
" <td>1.644760</td>\n", | |
" <td>0.574594</td>\n", | |
" <td>1.554471</td>\n", | |
" <td>0.558000</td>\n", | |
" <td>01:16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>7</td>\n", | |
" <td>1.590069</td>\n", | |
" <td>0.608238</td>\n", | |
" <td>1.465984</td>\n", | |
" <td>0.606000</td>\n", | |
" <td>01:16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>8</td>\n", | |
" <td>1.554413</td>\n", | |
" <td>0.625181</td>\n", | |
" <td>1.374534</td>\n", | |
" <td>0.608000</td>\n", | |
" <td>01:16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>9</td>\n", | |
" <td>1.505342</td>\n", | |
" <td>0.660190</td>\n", | |
" <td>1.505629</td>\n", | |
" <td>0.560000</td>\n", | |
" <td>01:16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>10</td>\n", | |
" <td>1.463908</td>\n", | |
" <td>0.679621</td>\n", | |
" <td>1.253468</td>\n", | |
" <td>0.698000</td>\n", | |
" <td>01:16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>11</td>\n", | |
" <td>1.416193</td>\n", | |
" <td>0.702907</td>\n", | |
" <td>1.225318</td>\n", | |
" <td>0.690000</td>\n", | |
" <td>01:16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>12</td>\n", | |
" <td>1.374655</td>\n", | |
" <td>0.735186</td>\n", | |
" <td>1.325135</td>\n", | |
" <td>0.668000</td>\n", | |
" <td>01:16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>13</td>\n", | |
" <td>1.331353</td>\n", | |
" <td>0.751887</td>\n", | |
" <td>1.029575</td>\n", | |
" <td>0.792000</td>\n", | |
" <td>01:16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>14</td>\n", | |
" <td>1.287370</td>\n", | |
" <td>0.778224</td>\n", | |
" <td>0.958926</td>\n", | |
" <td>0.826000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>15</td>\n", | |
" <td>1.240000</td>\n", | |
" <td>0.798780</td>\n", | |
" <td>0.969717</td>\n", | |
" <td>0.808000</td>\n", | |
" <td>01:16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>16</td>\n", | |
" <td>1.206745</td>\n", | |
" <td>0.817649</td>\n", | |
" <td>0.899490</td>\n", | |
" <td>0.842000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>17</td>\n", | |
" <td>1.176220</td>\n", | |
" <td>0.839489</td>\n", | |
" <td>0.877147</td>\n", | |
" <td>0.846000</td>\n", | |
" <td>01:16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>18</td>\n", | |
" <td>1.136527</td>\n", | |
" <td>0.852176</td>\n", | |
" <td>0.882169</td>\n", | |
" <td>0.848000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>19</td>\n", | |
" <td>1.131456</td>\n", | |
" <td>0.857957</td>\n", | |
" <td>0.874610</td>\n", | |
" <td>0.852000</td>\n", | |
" <td>01:15</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"learn.fit(20, cbsched)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Now try 80 Epochs" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Try xresnet18 for 80 epochs act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn = cnn_learner(xresnet18_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn.fit(80, cbsched)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Try xresnet34 for 80 epochs act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn = cnn_learner(xresnet34_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn.fit(80, cbsched)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment