znewsham/object_detection.ipynb

## object_detection.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Image segmentation with CamVid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastai.vision import *\n",
    "from fastai.callbacks.hooks import *\n",
    "from sklearn.metrics import jaccard_similarity_score as jsc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[PosixPath('/home/ubuntu/.fastai/data/coco_sample/train_sample'),\n",
       " PosixPath('/home/ubuntu/.fastai/data/coco_sample/annotations')]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path = untar_data(URLs.COCO_SAMPLE)\n",
    "path.ls()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "path_train_sample = path/'train_sample/'\n",
    "#path_annotations = path/'annotations'\n",
    "#print(path_annotations.ls())\n",
    "images, lbl_bbox = get_annotations(path/'annotations/train_sample.json')\n",
    "img2bbox = dict(zip(images, lbl_bbox))\n",
    "\n",
    "\n",
    "\n",
    "classes=[\"chair\",\"couch\", \"tv\", \"remote\", \"book\", \"vase\"];\n",
    "def get_lrg(b):\n",
    "    if not b: raise Exception()\n",
    "    b = sorted(b, key=lambda x: np.product(np.array(x[-2:])-np.array(x[:2])), \n",
    "               reverse=True)\n",
    "    return b[0]\n",
    "trn_lrg_anno = {a: get_lrg(img2bbox[a][0]) for a in images}\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "fnames = get_image_files(path_train_sample)\n",
    "fnames[:3]\n",
    "img=open(fnames[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_y_func(o):\n",
    "    #return FloatList(img2bbox[o.name][0][0])\n",
    "    points1 = trn_lrg_anno[o.name];\n",
    "    points = [\n",
    "        [points1[0], points1[1]],\n",
    "        [points1[2], points1[3]],\n",
    "        #[points1[2], points1[1]],\n",
    "        #[points1[0], points1[3]]\n",
    "    ]\n",
    "    #labels = list(map(lambda x: [1., 1.] if img2bbox[o.name][1][0] == x else [0., 0.], classes))\n",
    "    return tensor(points).float();\n",
    "\n",
    "#get_y_func(path_train_sample/\"000000066154.jpg\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21838"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "size = 224\n",
    "bs=16\n",
    "len(path_train_sample.ls())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(2)\n",
    "data = (PointsItemList.from_folder(path_train_sample)\n",
    "       .random_split_by_pct(0.95)\n",
    "       .label_from_func(get_y_func)\n",
    "       .transform(get_transforms(max_zoom=0., max_rotate=3.),tfm_y=True, resize_method=ResizeMethod.SQUISH, size=size, remove_out=False)\n",
    "    )\n",
    "\n",
    "data.lists[1].y = data.lists[1].y[0:1000]\n",
    "data.lists[1].x = data.lists[1].x[0:1000]\n",
    "data = data.databunch(bs=64).normalize(imagenet_stats)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.show_batch(2, figsize=(10,7))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.show_batch(2, figsize=(10,7), ds_type=DatasetType.Valid)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "head_reg4 = nn.Sequential(Flatten(), nn.Linear(25088,4))\n",
    "lmse=MSELossFlat();\n",
    "def accuracy1(input, targs):\n",
    "    input = input.view(-1)\n",
    "    targs = targs.view(-1)\n",
    "    return lmse((input + 1) * 112, (targs + 1) * 112).sqrt();\n",
    "   \n",
    "l1loss = nn.L1Loss();\n",
    "def loss_func(input, targs):\n",
    "    #print(input.view(-1).shape, targs.view(-1).shape);\n",
    "    return l1loss(input.view(-1), targs.view(-1));\n",
    "    #return l1loss((input.view(-1) + 1) * 112, (targs.view(-1) + 1) * 112);\n",
    "\n",
    "\n",
    "learn = create_cnn(data, models.resnet34, custom_head=head_reg4, metrics=accuracy1, loss_func=loss_func)\n",
    "#learn.opt_fn = optim.Adam\n",
    "#learn.crit = nn.L1Loss()\n",
    "#learn.model\n",
    "#print(learn.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lr_find(learn)\n",
    "learn.recorder.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "        <style>\n",
       "            /* Turns off some styling */\n",
       "            progress {\n",
       "                /* gets rid of default border in Firefox and Opera. */\n",
       "                border: none;\n",
       "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
       "                background-size: auto;\n",
       "            }\n",
       "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
       "                background: #F44336;\n",
       "            }\n",
       "        </style>\n",
       "      <progress value='2' class='' max='5', style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      40.00% [2/5 00:47<01:11]\n",
       "    </div>\n",
       "    \n",
       "<table style='width:300px; margin-bottom:10px'>\n",
       "  <tr>\n",
       "    <th>epoch</th>\n",
       "    <th>train_loss</th>\n",
       "    <th>valid_loss</th>\n",
       "    <th>accuracy1</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>1</th>\n",
       "    <th>5.595885</th>\n",
       "    <th>14.827637</th>\n",
       "    <th>2016.135132</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>2</th>\n",
       "    <th>9.422756</th>\n",
       "    <th>1.568463</th>\n",
       "    <th>217.355057</th>\n",
       "  </tr>\n",
       "</table>\n",
       "\n",
       "\n",
       "    <div>\n",
       "        <style>\n",
       "            /* Turns off some styling */\n",
       "            progress {\n",
       "                /* gets rid of default border in Firefox and Opera. */\n",
       "                border: none;\n",
       "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
       "                background-size: auto;\n",
       "            }\n",
       "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
       "                background: #F44336;\n",
       "            }\n",
       "        </style>\n",
       "      <progress value='8' class='' max='16', style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      50.00% [8/16 00:06<00:06]\n",
       "    </div>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "lr=1e-2\n",
    "learn.fit_one_cycle(5, lr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.save('stage-1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.load('stage-1')\n",
    "lr=5e-3\n",
    "learn.freeze_to(-2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "Total time: 02:04 <p><table style='width:300px; margin-bottom:10px'>\n",
       "  <tr>\n",
       "    <th>epoch</th>\n",
       "    <th>train_loss</th>\n",
       "    <th>valid_loss</th>\n",
       "    <th>accuracy1</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>1</th>\n",
       "    <th>0.386974</th>\n",
       "    <th>0.439564</th>\n",
       "    <th>74.194801</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>2</th>\n",
       "    <th>0.389389</th>\n",
       "    <th>0.996556</th>\n",
       "    <th>111.589348</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>3</th>\n",
       "    <th>0.390334</th>\n",
       "    <th>1.768569</th>\n",
       "    <th>146.334457</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>4</th>\n",
       "    <th>0.384820</th>\n",
       "    <th>0.308937</th>\n",
       "    <th>62.223328</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>5</th>\n",
       "    <th>0.372513</th>\n",
       "    <th>0.309802</th>\n",
       "    <th>62.309986</th>\n",
       "  </tr>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "learn.fit_one_cycle(5, slice(lr/10, lr/1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.save(\"stage-1a\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "learn.load('mseloss-adamopt-l1crit');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "learn.show_results(rows=10, figsize=(10,40))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.unfreeze()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
    "lrs = slice(lr/400,lr/4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.fit_one_cycle(12, lrs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.save('stage-2');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Go big - nothing below here is currently in use due to the bad predictions above"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You may have to restart your kernel and come back to this stage if you run out of memory, and may also need to decrease `bs`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = (PointsItemList.from_folder(path_train_sample)\n",
    "       .random_split_by_pct(0.2)\n",
    "       .label_from_func(get_y_func)\n",
    "       .transform(get_transforms(), tfm_y=True, size=256, remove_out=False)\n",
    "       .databunch(bs=8)\n",
    "       .normalize(imagenet_stats))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn = create_cnn(data, models.resnet50, wd=wd)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.load('stage-2');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lr_find(learn)\n",
    "learn.recorder.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [],
   "source": [
    "lr=1e-4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "Total time: 00:45 <p><table style='width:300px; margin-bottom:10px'>\n",
       "  <tr>\n",
       "    <th>epoch</th>\n",
       "    <th>train_loss</th>\n",
       "    <th>valid_loss</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>1</th>\n",
       "    <th>0.609223</th>\n",
       "    <th>0.234268</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>2</th>\n",
       "    <th>0.612361</th>\n",
       "    <th>0.225444</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>3</th>\n",
       "    <th>0.626123</th>\n",
       "    <th>0.225538</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>4</th>\n",
       "    <th>0.634189</th>\n",
       "    <th>0.223056</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>5</th>\n",
       "    <th>0.626173</th>\n",
       "    <th>0.232538</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>6</th>\n",
       "    <th>0.614644</th>\n",
       "    <th>0.236454</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>7</th>\n",
       "    <th>0.616741</th>\n",
       "    <th>0.236577</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>8</th>\n",
       "    <th>0.617492</th>\n",
       "    <th>0.242800</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>9</th>\n",
       "    <th>0.611753</th>\n",
       "    <th>0.239526</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>10</th>\n",
       "    <th>0.599041</th>\n",
       "    <th>0.239952</th>\n",
       "  </tr>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "learn.fit_one_cycle(10, slice(lr), pct_start=0.8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.save('stage-1-big')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.load('stage-1-big');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.unfreeze()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [],
   "source": [
    "lrs = slice(1e-6,lr/10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "Total time: 01:02 <p><table style='width:300px; margin-bottom:10px'>\n",
       "  <tr>\n",
       "    <th>epoch</th>\n",
       "    <th>train_loss</th>\n",
       "    <th>valid_loss</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>1</th>\n",
       "    <th>0.531507</th>\n",
       "    <th>0.236299</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>2</th>\n",
       "    <th>0.555349</th>\n",
       "    <th>0.236082</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>3</th>\n",
       "    <th>0.548737</th>\n",
       "    <th>0.235379</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>4</th>\n",
       "    <th>0.574503</th>\n",
       "    <th>0.239024</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>5</th>\n",
       "    <th>0.568524</th>\n",
       "    <th>0.237507</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>6</th>\n",
       "    <th>0.570499</th>\n",
       "    <th>0.234831</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>7</th>\n",
       "    <th>0.563644</th>\n",
       "    <th>0.239454</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>8</th>\n",
       "    <th>0.571189</th>\n",
       "    <th>0.238208</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>9</th>\n",
       "    <th>0.566935</th>\n",
       "    <th>0.240294</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>10</th>\n",
       "    <th>0.588694</th>\n",
       "    <th>0.239558</th>\n",
       "  </tr>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "learn.fit_one_cycle(10, lrs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.save('stage-2-big')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.load('stage-2-big');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.show_results(rows=3, figsize=(10,10))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Image segmentation with CamVid"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"%reload_ext autoreload\n",
	"%autoreload 2\n",
	"%matplotlib inline"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"from fastai.vision import *\n",
	"from fastai.callbacks.hooks import *\n",
	"from sklearn.metrics import jaccard_similarity_score as jsc"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[PosixPath('/home/ubuntu/.fastai/data/coco_sample/train_sample'),\n",
	" PosixPath('/home/ubuntu/.fastai/data/coco_sample/annotations')]"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"path = untar_data(URLs.COCO_SAMPLE)\n",
	"path.ls()\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"path_train_sample = path/'train_sample/'\n",
	"#path_annotations = path/'annotations'\n",
	"#print(path_annotations.ls())\n",
	"images, lbl_bbox = get_annotations(path/'annotations/train_sample.json')\n",
	"img2bbox = dict(zip(images, lbl_bbox))\n",
	"\n",
	"\n",
	"\n",
	"classes=[\"chair\",\"couch\", \"tv\", \"remote\", \"book\", \"vase\"];\n",
	"def get_lrg(b):\n",
	" if not b: raise Exception()\n",
	" b = sorted(b, key=lambda x: np.product(np.array(x[-2:])-np.array(x[:2])), \n",
	" reverse=True)\n",
	" return b[0]\n",
	"trn_lrg_anno = {a: get_lrg(img2bbox[a][0]) for a in images}\n"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Data"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"fnames = get_image_files(path_train_sample)\n",
	"fnames[:3]\n",
	"img=open(fnames[0])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 28,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_y_func(o):\n",
	" #return FloatList(img2bbox[o.name][0][0])\n",
	" points1 = trn_lrg_anno[o.name];\n",
	" points = [\n",
	" [points1[0], points1[1]],\n",
	" [points1[2], points1[3]],\n",
	" #[points1[2], points1[1]],\n",
	" #[points1[0], points1[3]]\n",
	" ]\n",
	" #labels = list(map(lambda x: [1., 1.] if img2bbox[o.name][1][0] == x else [0., 0.], classes))\n",
	" return tensor(points).float();\n",
	"\n",
	"#get_y_func(path_train_sample/\"000000066154.jpg\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Datasets"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"21838"
	]
	},
	"execution_count": 29,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"size = 224\n",
	"bs=16\n",
	"len(path_train_sample.ls())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 37,
	"metadata": {},
	"outputs": [],
	"source": [
	"np.random.seed(2)\n",
	"data = (PointsItemList.from_folder(path_train_sample)\n",
	" .random_split_by_pct(0.95)\n",
	" .label_from_func(get_y_func)\n",
	" .transform(get_transforms(max_zoom=0., max_rotate=3.),tfm_y=True, resize_method=ResizeMethod.SQUISH, size=size, remove_out=False)\n",
	" )\n",
	"\n",
	"data.lists[1].y = data.lists[1].y[0:1000]\n",
	"data.lists[1].x = data.lists[1].x[0:1000]\n",
	"data = data.databunch(bs=64).normalize(imagenet_stats)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"data.show_batch(2, figsize=(10,7))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"data.show_batch(2, figsize=(10,7), ds_type=DatasetType.Valid)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Model"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 41,
	"metadata": {},
	"outputs": [],
	"source": [
	"head_reg4 = nn.Sequential(Flatten(), nn.Linear(25088,4))\n",
	"lmse=MSELossFlat();\n",
	"def accuracy1(input, targs):\n",
	" input = input.view(-1)\n",
	" targs = targs.view(-1)\n",
	" return lmse((input + 1) * 112, (targs + 1) * 112).sqrt();\n",
	" \n",
	"l1loss = nn.L1Loss();\n",
	"def loss_func(input, targs):\n",
	" #print(input.view(-1).shape, targs.view(-1).shape);\n",
	" return l1loss(input.view(-1), targs.view(-1));\n",
	" #return l1loss((input.view(-1) + 1) * 112, (targs.view(-1) + 1) * 112);\n",
	"\n",
	"\n",
	"learn = create_cnn(data, models.resnet34, custom_head=head_reg4, metrics=accuracy1, loss_func=loss_func)\n",
	"#learn.opt_fn = optim.Adam\n",
	"#learn.crit = nn.L1Loss()\n",
	"#learn.model\n",
	"#print(learn.summary())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"lr_find(learn)\n",
	"learn.recorder.plot()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"\n",
	" <div>\n",
	" <style>\n",
	" /* Turns off some styling */\n",
	" progress {\n",
	" /* gets rid of default border in Firefox and Opera. */\n",
	" border: none;\n",
	" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
	" background-size: auto;\n",
	" }\n",
	" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
	" background: #F44336;\n",
	" }\n",
	" </style>\n",
	" <progress value='2' class='' max='5', style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
	" 40.00% [2/5 00:47<01:11]\n",
	" </div>\n",
	" \n",
	"<table style='width:300px; margin-bottom:10px'>\n",
	" <tr>\n",
	" <th>epoch</th>\n",
	" <th>train_loss</th>\n",
	" <th>valid_loss</th>\n",
	" <th>accuracy1</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <th>5.595885</th>\n",
	" <th>14.827637</th>\n",
	" <th>2016.135132</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <th>9.422756</th>\n",
	" <th>1.568463</th>\n",
	" <th>217.355057</th>\n",
	" </tr>\n",
	"</table>\n",
	"\n",
	"\n",
	" <div>\n",
	" <style>\n",
	" /* Turns off some styling */\n",
	" progress {\n",
	" /* gets rid of default border in Firefox and Opera. */\n",
	" border: none;\n",
	" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
	" background-size: auto;\n",
	" }\n",
	" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
	" background: #F44336;\n",
	" }\n",
	" </style>\n",
	" <progress value='8' class='' max='16', style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
	" 50.00% [8/16 00:06<00:06]\n",
	" </div>\n",
	" "
	],
	"text/plain": [
	"<IPython.core.display.HTML object>"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"lr=1e-2\n",
	"learn.fit_one_cycle(5, lr)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 179,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.save('stage-1')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 180,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.load('stage-1')\n",
	"lr=5e-3\n",
	"learn.freeze_to(-2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 181,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"Total time: 02:04 <p><table style='width:300px; margin-bottom:10px'>\n",
	" <tr>\n",
	" <th>epoch</th>\n",
	" <th>train_loss</th>\n",
	" <th>valid_loss</th>\n",
	" <th>accuracy1</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <th>0.386974</th>\n",
	" <th>0.439564</th>\n",
	" <th>74.194801</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <th>0.389389</th>\n",
	" <th>0.996556</th>\n",
	" <th>111.589348</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <th>0.390334</th>\n",
	" <th>1.768569</th>\n",
	" <th>146.334457</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <th>0.384820</th>\n",
	" <th>0.308937</th>\n",
	" <th>62.223328</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <th>0.372513</th>\n",
	" <th>0.309802</th>\n",
	" <th>62.309986</th>\n",
	" </tr>\n",
	"</table>\n"
	],
	"text/plain": [
	"<IPython.core.display.HTML object>"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"learn.fit_one_cycle(5, slice(lr/10, lr/1))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 139,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.save(\"stage-1a\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 64,
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"learn.load('mseloss-adamopt-l1crit');"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"scrolled": false
	},
	"outputs": [],
	"source": [
	"learn.show_results(rows=10, figsize=(10,40))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 107,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.unfreeze()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 108,
	"metadata": {},
	"outputs": [],
	"source": [
	"lrs = slice(lr/400,lr/4)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.fit_one_cycle(12, lrs)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 98,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.save('stage-2');"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Go big - nothing below here is currently in use due to the bad predictions above"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"You may have to restart your kernel and come back to this stage if you run out of memory, and may also need to decrease `bs`."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 136,
	"metadata": {},
	"outputs": [],
	"source": [
	"data = (PointsItemList.from_folder(path_train_sample)\n",
	" .random_split_by_pct(0.2)\n",
	" .label_from_func(get_y_func)\n",
	" .transform(get_transforms(), tfm_y=True, size=256, remove_out=False)\n",
	" .databunch(bs=8)\n",
	" .normalize(imagenet_stats))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 142,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn = create_cnn(data, models.resnet50, wd=wd)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 143,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.load('stage-2');"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"lr_find(learn)\n",
	"learn.recorder.plot()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 145,
	"metadata": {},
	"outputs": [],
	"source": [
	"lr=1e-4"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 146,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"Total time: 00:45 <p><table style='width:300px; margin-bottom:10px'>\n",
	" <tr>\n",
	" <th>epoch</th>\n",
	" <th>train_loss</th>\n",
	" <th>valid_loss</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <th>0.609223</th>\n",
	" <th>0.234268</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <th>0.612361</th>\n",
	" <th>0.225444</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <th>0.626123</th>\n",
	" <th>0.225538</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <th>0.634189</th>\n",
	" <th>0.223056</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <th>0.626173</th>\n",
	" <th>0.232538</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6</th>\n",
	" <th>0.614644</th>\n",
	" <th>0.236454</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7</th>\n",
	" <th>0.616741</th>\n",
	" <th>0.236577</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>8</th>\n",
	" <th>0.617492</th>\n",
	" <th>0.242800</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>9</th>\n",
	" <th>0.611753</th>\n",
	" <th>0.239526</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>10</th>\n",
	" <th>0.599041</th>\n",
	" <th>0.239952</th>\n",
	" </tr>\n",
	"</table>\n"
	],
	"text/plain": [
	"<IPython.core.display.HTML object>"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"learn.fit_one_cycle(10, slice(lr), pct_start=0.8)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 147,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.save('stage-1-big')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 148,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.load('stage-1-big');"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 149,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.unfreeze()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 150,
	"metadata": {},
	"outputs": [],
	"source": [
	"lrs = slice(1e-6,lr/10)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 151,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"Total time: 01:02 <p><table style='width:300px; margin-bottom:10px'>\n",
	" <tr>\n",
	" <th>epoch</th>\n",
	" <th>train_loss</th>\n",
	" <th>valid_loss</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <th>0.531507</th>\n",
	" <th>0.236299</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <th>0.555349</th>\n",
	" <th>0.236082</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <th>0.548737</th>\n",
	" <th>0.235379</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <th>0.574503</th>\n",
	" <th>0.239024</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <th>0.568524</th>\n",
	" <th>0.237507</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6</th>\n",
	" <th>0.570499</th>\n",
	" <th>0.234831</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7</th>\n",
	" <th>0.563644</th>\n",
	" <th>0.239454</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>8</th>\n",
	" <th>0.571189</th>\n",
	" <th>0.238208</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>9</th>\n",
	" <th>0.566935</th>\n",
	" <th>0.240294</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>10</th>\n",
	" <th>0.588694</th>\n",
	" <th>0.239558</th>\n",
	" </tr>\n",
	"</table>\n"
	],
	"text/plain": [
	"<IPython.core.display.HTML object>"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"learn.fit_one_cycle(10, lrs)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 152,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.save('stage-2-big')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 153,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.load('stage-2-big');"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"learn.show_results(rows=3, figsize=(10,10))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}