Skip to content

Instantly share code, notes, and snippets.

@Whamp
Created April 26, 2019 02:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Whamp/c2e2836666b86027740a6003ed1a844f to your computer and use it in GitHub Desktop.
Save Whamp/c2e2836666b86027740a6003ed1a844f to your computer and use it in GitHub Desktop.
Imagewoof Training with Learned ReLU size 192
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#export\n",
"from exp.nb_10c import *"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# LearnedRelu"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"class LearnedRelu(nn.Module):\n",
" def __init__(self, leak=0.05, sub=0.25, maxv=10):\n",
" super().__init__()\n",
" self.leak = nn.Parameter(torch.ones(1)*leak)\n",
" self.sub = nn.Parameter(torch.zeros(1)+sub)\n",
" self.maxv = nn.Parameter(torch.ones(1)*maxv)\n",
" \n",
" def forward(self, x):\n",
" if self.training:\n",
" with torch.no_grad():#Set some limits \n",
" self.leak.clamp_(0,.5) #Don't have a leak of more than 0.5 or less than 0\n",
" self.sub.clamp_(0,1) #don't subtract more than 1 and not less than 0\n",
" self.maxv.clamp_(5,100) #don't let maxv go above 10 and not below -10\n",
" x = F.leaky_relu(x,self.leak.item())\n",
" x.sub_(self.sub)\n",
" x.clamp_max_(self.maxv.item()) \n",
" return x"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## XResNet"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#export\n",
"def noop(x): return x\n",
"\n",
"class Flatten(nn.Module):\n",
" def forward(self, x): return x.view(x.size(0), -1)\n",
"\n",
"def conv(ni, nf, ks=3, stride=1, bias=False):\n",
" return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=bias)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Use LearnedRelu instead of nn.ReLU with starting params of 0leak and 0.25 sub and maxv of 10"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#export\n",
"#act_fn = nn.ReLU(inplace=True)\n",
"\n",
"def init_cnn(m):\n",
" if getattr(m, 'bias', None) is not None: nn.init.constant_(m.bias, 0)\n",
" if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight)\n",
" for l in m.children(): init_cnn(l)\n",
"\n",
"def conv_layer(ni, nf, ks=3, stride=1, zero_bn=False, act=True):\n",
" bn = nn.BatchNorm2d(nf)\n",
" nn.init.constant_(bn.weight, 0. if zero_bn else 1.)\n",
" layers = [conv(ni, nf, ks, stride=stride), bn]\n",
" if act: layers.append(act_fn)\n",
" return nn.Sequential(*layers)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"#export\n",
"class ResBlock(nn.Module):\n",
" def __init__(self, expansion, ni, nh, stride=1):\n",
" super().__init__()\n",
" nf,ni = nh*expansion,ni*expansion\n",
" layers = [conv_layer(ni, nh, 1)]\n",
" layers += [\n",
" conv_layer(nh, nf, 3, stride=stride, zero_bn=True, act=False)\n",
" ] if expansion==1 else [\n",
" conv_layer(nh, nh, 3, stride=stride),\n",
" conv_layer(nh, nf, 1, zero_bn=True, act=False)\n",
" ]\n",
" self.convs = nn.Sequential(*layers)\n",
" self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n",
" self.pool = noop if stride==1 else nn.AvgPool2d(2)\n",
"\n",
" def forward(self, x): return act_fn(self.convs(x) + self.idconv(self.pool(x)))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"#export\n",
"class XResNet(nn.Sequential):\n",
" @classmethod\n",
" def create(cls, expansion, layers, c_in=3, c_out=1000):\n",
" nfs = [c_in, (c_in+1)*8, 64, 64]\n",
" stem = [conv_layer(nfs[i], nfs[i+1], stride=2 if i==0 else 1)\n",
" for i in range(3)]\n",
"\n",
" nfs = [64//expansion,64,128,256,512]\n",
" res_layers = [cls._make_layer(expansion, nfs[i], nfs[i+1],\n",
" n_blocks=l, stride=1 if i==0 else 2)\n",
" for i,l in enumerate(layers)]\n",
" res = cls(\n",
" *stem,\n",
" nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n",
" *res_layers,\n",
" nn.AdaptiveAvgPool2d(1), Flatten(),\n",
" nn.Linear(nfs[-1]*expansion, c_out),\n",
" )\n",
" init_cnn(res)\n",
" return res\n",
"\n",
" @staticmethod\n",
" def _make_layer(expansion, ni, nf, n_blocks, stride):\n",
" return nn.Sequential(\n",
" *[ResBlock(expansion, ni if i==0 else nf, nf, stride if i==0 else 1)\n",
" for i in range(n_blocks)])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#export\n",
"def xresnet18_LR (**kwargs): return XResNet.create(1, [2, 2, 2, 2], **kwargs)\n",
"def xresnet34_LR (**kwargs): return XResNet.create(1, [3, 4, 6, 3], **kwargs)\n",
"def xresnet50_LR (**kwargs): return XResNet.create(4, [3, 4, 6, 3], **kwargs)\n",
"def xresnet101_LR(**kwargs): return XResNet.create(4, [3, 4, 23, 3], **kwargs)\n",
"def xresnet152_LR(**kwargs): return XResNet.create(4, [3, 8, 36, 3], **kwargs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"cbfs = [partial(AvgStatsCallback,accuracy), ProgressCallback, CudaCallback,\n",
" partial(BatchTransformXCallback, norm_imagenette),\n",
" partial(MixUp, 0.2)\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"loss_func = LabelSmoothingCrossEntropy()\n",
"opt_func = adam_opt(mom=0.9, mom_sqr=0.99, eps=1e-6, wd=1e-2)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"#export\n",
"def get_batch(dl, learn):\n",
" learn.xb,learn.yb = next(iter(dl))\n",
" learn.do_begin_fit(0)\n",
" learn('begin_batch')\n",
" learn('after_fit')\n",
" return learn.xb,learn.yb"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We need to replace the old `model_summary` since it used to take a `Runner`."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# export\n",
"def model_summary(model, data, find_all=False, print_mod=False):\n",
" xb,yb = get_batch(data.valid_dl, learn)\n",
" mods = find_modules(model, is_lin_layer) if find_all else model.children()\n",
" f = lambda hook,mod,inp,out: print(f\"====\\n{mod}\\n\" if print_mod else \"\", out.shape)\n",
" with Hooks(mods, f) as hooks: learn.model(xb)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"#export\n",
"def create_phases(phases):\n",
" phases = listify(phases)\n",
" return phases + [1-sum(phases)]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.3, 0.7]\n",
"[0.3, 0.2, 0.5]\n"
]
}
],
"source": [
"print(create_phases(0.3))\n",
"print(create_phases([0.3,0.2]))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"lr =3e-3\n",
"pct_start = 0.5\n",
"phases = create_phases(pct_start)\n",
"sched_lr = combine_scheds(phases, cos_1cycle_anneal(lr/10., lr, lr/1e5))\n",
"sched_mom = combine_scheds(phases, cos_1cycle_anneal(0.95,0.85, 0.95))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"cbsched = [\n",
" ParamScheduler('lr', sched_lr),\n",
" ParamScheduler('mom', sched_mom)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## cnn_learner"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"#export\n",
"def cnn_learner(arch, data, loss_func, opt_func, c_in=None, c_out=None,\n",
" lr=3e-3, cuda=True, norm=None, progress=True, mixup=0, xtra_cb=None, **kwargs):\n",
" cbfs = [partial(AvgStatsCallback,accuracy)]+listify(xtra_cb)\n",
" if progress: cbfs.append(ProgressCallback)\n",
" if cuda: cbfs.append(CudaCallback)\n",
" if norm: cbfs.append(partial(BatchTransformXCallback, norm))\n",
" if mixup: cbfs.append(partial(MixUp, mixup))\n",
" arch_args = {}\n",
" if not c_in : c_in = data.c_in\n",
" if not c_out: c_out = data.c_out\n",
" if c_in: arch_args['c_in' ]=c_in\n",
" if c_out: arch_args['c_out']=c_out\n",
" return Learner(arch(**arch_args), data, loss_func, opt_func=opt_func, lr=lr, cb_funcs=cbfs, **kwargs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Imagewoof training"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"path = datasets.untar_data(datasets.URLs.IMAGEWOOF_320)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"size = 192\n",
"tfms = [make_rgb, RandomResizedCrop(192,scale=(0.35,1)), np_to_float, PilRandomFlip()]\n",
"bs = 48\n",
"\n",
"il = ImageList.from_files(path, tfms=tfms)\n",
"sd = SplitData.split_by_func(il, partial(grandparent_splitter, valid_name='val'))\n",
"ll = label_by_func(sd, parent_labeler, proc_y=CategoryProcessor())\n",
"\n",
"ll.valid.x.tfms = [make_rgb, CenterCrop(size), np_to_float]\n",
"\n",
"data = ll.to_databunch(bs, c_in=3, c_out=10, num_workers=16)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Start with 5 epochs per arch"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Try xresnet18 with act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"learn = cnn_learner(xresnet18_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>train_accuracy</th>\n",
" <th>valid_loss</th>\n",
" <th>valid_accuracy</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>2.176271</td>\n",
" <td>0.233499</td>\n",
" <td>2.184551</td>\n",
" <td>0.214000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>2.065433</td>\n",
" <td>0.309941</td>\n",
" <td>2.013440</td>\n",
" <td>0.322000</td>\n",
" <td>00:29</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>1.942753</td>\n",
" <td>0.391681</td>\n",
" <td>1.820060</td>\n",
" <td>0.382000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>1.807206</td>\n",
" <td>0.477758</td>\n",
" <td>1.593475</td>\n",
" <td>0.498000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1.669652</td>\n",
" <td>0.562229</td>\n",
" <td>1.496681</td>\n",
" <td>0.564000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.fit(5, cbsched)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"xresnet34 act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"learn = cnn_learner(xresnet34_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>train_accuracy</th>\n",
" <th>valid_loss</th>\n",
" <th>valid_accuracy</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>2.239583</td>\n",
" <td>0.186928</td>\n",
" <td>2.196563</td>\n",
" <td>0.226000</td>\n",
" <td>00:39</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>2.042328</td>\n",
" <td>0.331861</td>\n",
" <td>2.284073</td>\n",
" <td>0.294000</td>\n",
" <td>00:39</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>1.901500</td>\n",
" <td>0.417777</td>\n",
" <td>1.749918</td>\n",
" <td>0.414000</td>\n",
" <td>00:39</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>1.752035</td>\n",
" <td>0.509876</td>\n",
" <td>1.484071</td>\n",
" <td>0.572000</td>\n",
" <td>00:39</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1.604641</td>\n",
" <td>0.601975</td>\n",
" <td>1.368960</td>\n",
" <td>0.630000</td>\n",
" <td>00:39</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.fit(5, cbsched)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Try xresnet50 act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"learn = cnn_learner(xresnet50_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>train_accuracy</th>\n",
" <th>valid_loss</th>\n",
" <th>valid_accuracy</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>2.235829</td>\n",
" <td>0.204513</td>\n",
" <td>2.272176</td>\n",
" <td>0.270000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>2.044194</td>\n",
" <td>0.334672</td>\n",
" <td>2.116261</td>\n",
" <td>0.314000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>1.932448</td>\n",
" <td>0.400353</td>\n",
" <td>1.758810</td>\n",
" <td>0.400000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>1.770677</td>\n",
" <td>0.494058</td>\n",
" <td>1.490581</td>\n",
" <td>0.576000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1.608710</td>\n",
" <td>0.594668</td>\n",
" <td>1.321973</td>\n",
" <td>0.662000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.fit(5, cbsched)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Now try 20 epochs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Try xresnet18 for 20 epochs act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"learn = cnn_learner(xresnet18_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>train_accuracy</th>\n",
" <th>valid_loss</th>\n",
" <th>valid_accuracy</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>2.170229</td>\n",
" <td>0.243135</td>\n",
" <td>2.128347</td>\n",
" <td>0.248000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>2.073682</td>\n",
" <td>0.306889</td>\n",
" <td>2.030547</td>\n",
" <td>0.320000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>1.969655</td>\n",
" <td>0.378031</td>\n",
" <td>1.942254</td>\n",
" <td>0.368000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>1.887025</td>\n",
" <td>0.430223</td>\n",
" <td>1.810817</td>\n",
" <td>0.400000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1.834445</td>\n",
" <td>0.463385</td>\n",
" <td>1.735087</td>\n",
" <td>0.436000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>1.781669</td>\n",
" <td>0.493175</td>\n",
" <td>1.935316</td>\n",
" <td>0.436000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>1.740426</td>\n",
" <td>0.521760</td>\n",
" <td>1.589024</td>\n",
" <td>0.498000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>1.682010</td>\n",
" <td>0.558616</td>\n",
" <td>1.548017</td>\n",
" <td>0.510000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>1.641817</td>\n",
" <td>0.578609</td>\n",
" <td>1.485235</td>\n",
" <td>0.564000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>1.596934</td>\n",
" <td>0.615465</td>\n",
" <td>1.340996</td>\n",
" <td>0.630000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>1.546046</td>\n",
" <td>0.633130</td>\n",
" <td>1.316688</td>\n",
" <td>0.644000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>11</td>\n",
" <td>1.502797</td>\n",
" <td>0.659306</td>\n",
" <td>1.297992</td>\n",
" <td>0.674000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>12</td>\n",
" <td>1.460568</td>\n",
" <td>0.688775</td>\n",
" <td>1.189224</td>\n",
" <td>0.716000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>13</td>\n",
" <td>1.406436</td>\n",
" <td>0.718886</td>\n",
" <td>1.133647</td>\n",
" <td>0.758000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14</td>\n",
" <td>1.365329</td>\n",
" <td>0.737434</td>\n",
" <td>1.097194</td>\n",
" <td>0.766000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15</td>\n",
" <td>1.331212</td>\n",
" <td>0.759756</td>\n",
" <td>1.037644</td>\n",
" <td>0.778000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16</td>\n",
" <td>1.290638</td>\n",
" <td>0.780713</td>\n",
" <td>0.997188</td>\n",
" <td>0.802000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>1.261216</td>\n",
" <td>0.798860</td>\n",
" <td>0.970057</td>\n",
" <td>0.814000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>18</td>\n",
" <td>1.250530</td>\n",
" <td>0.807371</td>\n",
" <td>0.963768</td>\n",
" <td>0.824000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" <tr>\n",
" <td>19</td>\n",
" <td>1.240110</td>\n",
" <td>0.815722</td>\n",
" <td>0.963476</td>\n",
" <td>0.814000</td>\n",
" <td>00:30</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.fit(20, cbsched)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Try xresnet34 for 20 epochs act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"learn = cnn_learner(xresnet34_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>train_accuracy</th>\n",
" <th>valid_loss</th>\n",
" <th>valid_accuracy</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>2.204060</td>\n",
" <td>0.213185</td>\n",
" <td>2.099484</td>\n",
" <td>0.262000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>2.052038</td>\n",
" <td>0.319496</td>\n",
" <td>1.974819</td>\n",
" <td>0.332000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>1.944317</td>\n",
" <td>0.390959</td>\n",
" <td>1.845240</td>\n",
" <td>0.412000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>1.861098</td>\n",
" <td>0.446764</td>\n",
" <td>1.766139</td>\n",
" <td>0.434000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1.776837</td>\n",
" <td>0.495102</td>\n",
" <td>1.769971</td>\n",
" <td>0.432000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>1.715599</td>\n",
" <td>0.536053</td>\n",
" <td>1.545579</td>\n",
" <td>0.564000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>1.666961</td>\n",
" <td>0.561105</td>\n",
" <td>1.527621</td>\n",
" <td>0.554000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>1.610517</td>\n",
" <td>0.599647</td>\n",
" <td>1.433473</td>\n",
" <td>0.606000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>1.556512</td>\n",
" <td>0.626064</td>\n",
" <td>1.608577</td>\n",
" <td>0.508000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>1.525477</td>\n",
" <td>0.644130</td>\n",
" <td>1.291797</td>\n",
" <td>0.656000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>1.472974</td>\n",
" <td>0.676329</td>\n",
" <td>1.385635</td>\n",
" <td>0.592000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>11</td>\n",
" <td>1.435359</td>\n",
" <td>0.696483</td>\n",
" <td>1.143334</td>\n",
" <td>0.732000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>12</td>\n",
" <td>1.389284</td>\n",
" <td>0.722659</td>\n",
" <td>1.144489</td>\n",
" <td>0.730000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>13</td>\n",
" <td>1.355664</td>\n",
" <td>0.739521</td>\n",
" <td>1.124210</td>\n",
" <td>0.758000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14</td>\n",
" <td>1.309906</td>\n",
" <td>0.765457</td>\n",
" <td>1.042107</td>\n",
" <td>0.762000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15</td>\n",
" <td>1.277276</td>\n",
" <td>0.781757</td>\n",
" <td>0.979578</td>\n",
" <td>0.798000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16</td>\n",
" <td>1.226820</td>\n",
" <td>0.809941</td>\n",
" <td>0.958027</td>\n",
" <td>0.804000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>1.193602</td>\n",
" <td>0.824956</td>\n",
" <td>0.920326</td>\n",
" <td>0.826000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>18</td>\n",
" <td>1.175790</td>\n",
" <td>0.835635</td>\n",
" <td>0.914901</td>\n",
" <td>0.824000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" <tr>\n",
" <td>19</td>\n",
" <td>1.167698</td>\n",
" <td>0.840854</td>\n",
" <td>0.922976</td>\n",
" <td>0.826000</td>\n",
" <td>00:40</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.fit(20, cbsched)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Try xresnet50 for 20 epochs act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"learn = cnn_learner(xresnet50_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>train_accuracy</th>\n",
" <th>valid_loss</th>\n",
" <th>valid_accuracy</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>2.236692</td>\n",
" <td>0.213506</td>\n",
" <td>2.161840</td>\n",
" <td>0.250000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>2.041531</td>\n",
" <td>0.333387</td>\n",
" <td>1.928734</td>\n",
" <td>0.334000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>1.921252</td>\n",
" <td>0.415770</td>\n",
" <td>1.834021</td>\n",
" <td>0.416000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>1.838308</td>\n",
" <td>0.461057</td>\n",
" <td>1.719596</td>\n",
" <td>0.446000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1.783118</td>\n",
" <td>0.499117</td>\n",
" <td>1.671353</td>\n",
" <td>0.434000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>1.708833</td>\n",
" <td>0.543761</td>\n",
" <td>1.485957</td>\n",
" <td>0.544000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>1.644760</td>\n",
" <td>0.574594</td>\n",
" <td>1.554471</td>\n",
" <td>0.558000</td>\n",
" <td>01:16</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>1.590069</td>\n",
" <td>0.608238</td>\n",
" <td>1.465984</td>\n",
" <td>0.606000</td>\n",
" <td>01:16</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>1.554413</td>\n",
" <td>0.625181</td>\n",
" <td>1.374534</td>\n",
" <td>0.608000</td>\n",
" <td>01:16</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>1.505342</td>\n",
" <td>0.660190</td>\n",
" <td>1.505629</td>\n",
" <td>0.560000</td>\n",
" <td>01:16</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>1.463908</td>\n",
" <td>0.679621</td>\n",
" <td>1.253468</td>\n",
" <td>0.698000</td>\n",
" <td>01:16</td>\n",
" </tr>\n",
" <tr>\n",
" <td>11</td>\n",
" <td>1.416193</td>\n",
" <td>0.702907</td>\n",
" <td>1.225318</td>\n",
" <td>0.690000</td>\n",
" <td>01:16</td>\n",
" </tr>\n",
" <tr>\n",
" <td>12</td>\n",
" <td>1.374655</td>\n",
" <td>0.735186</td>\n",
" <td>1.325135</td>\n",
" <td>0.668000</td>\n",
" <td>01:16</td>\n",
" </tr>\n",
" <tr>\n",
" <td>13</td>\n",
" <td>1.331353</td>\n",
" <td>0.751887</td>\n",
" <td>1.029575</td>\n",
" <td>0.792000</td>\n",
" <td>01:16</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14</td>\n",
" <td>1.287370</td>\n",
" <td>0.778224</td>\n",
" <td>0.958926</td>\n",
" <td>0.826000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15</td>\n",
" <td>1.240000</td>\n",
" <td>0.798780</td>\n",
" <td>0.969717</td>\n",
" <td>0.808000</td>\n",
" <td>01:16</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16</td>\n",
" <td>1.206745</td>\n",
" <td>0.817649</td>\n",
" <td>0.899490</td>\n",
" <td>0.842000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>1.176220</td>\n",
" <td>0.839489</td>\n",
" <td>0.877147</td>\n",
" <td>0.846000</td>\n",
" <td>01:16</td>\n",
" </tr>\n",
" <tr>\n",
" <td>18</td>\n",
" <td>1.136527</td>\n",
" <td>0.852176</td>\n",
" <td>0.882169</td>\n",
" <td>0.848000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>19</td>\n",
" <td>1.131456</td>\n",
" <td>0.857957</td>\n",
" <td>0.874610</td>\n",
" <td>0.852000</td>\n",
" <td>01:15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.fit(20, cbsched)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Now try 80 Epochs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Try xresnet18 for 80 epochs act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"learn = cnn_learner(xresnet18_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"learn.fit(80, cbsched)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Try xresnet34 for 80 epochs act_fn = LearnedRelu(leak=0,sub=0.25,maxv=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"learn = cnn_learner(xresnet34_LR, data, loss_func, opt_func, lr=3e-3, norm=norm_imagenette, mixup=0.2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"learn.fit(80, cbsched)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment