ThomasDelteil/1_gluon_cv.ipynb

## 1_gluon_cv.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# GluonCV overview\n",
    "https://gluon-cv.mxnet.io/index.html\n",
    "\n",
    "- Image Classification\n",
    "- Object detection\n",
    "- Instance Segmentation\n",
    "- Pose Estimation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "notes"
    }
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "import mxnet as mx\n",
    "import numpy as np\n",
    "import gluoncv as gcv\n",
    "import matplotlib.image as mpimg\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from mxnet import nd, image\n",
    "from mxnet.gluon.data.vision import transforms\n",
    "from mxnet.test_utils import download\n",
    "from gluoncv.model_zoo import get_model\n",
    "from gluoncv.utils import viz, download"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "notes"
    }
   },
   "outputs": [],
   "source": [
    "plt.rcParams['figure.figsize'] = (15, 9)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Image Classification"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "With this one-line code, we can load a pre-trained **ResNet50_v1** model for classification."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [],
   "source": [
    "ctx = mx.cpu(0)\n",
    "net = gcv.model_zoo.resnet50_v1b(pretrained=True, ctx=ctx)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "notes"
    }
   },
   "source": [
    "With another one-line code we can have our prediction."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir support\n",
    "download('https://d1u4oo4rb13yy8.cloudfront.net/article/sgkvwimkyw-1532534963.jpg', 'support/goal.jpg')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "x, img = gcv.data.transforms.presets.ssd.load_test('support/goal.jpg', short=500)\n",
    "viz.plot_image(img)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "pred = net(x.as_in_context(ctx))\n",
    "nd.waitall()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "notes"
    }
   },
   "source": [
    "Let's review the top 5 prediction results."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "notes"
    }
   },
   "outputs": [],
   "source": [
    "classes_fname = download('https://raw.githubusercontent.com/hetong007/gluon-cv/master/scripts/classification/imagenet/imagenet_labels.txt',\n",
    "                         'support/imagenet_labels.txt')\n",
    "with open('support/imagenet_labels.txt', 'r') as f:\n",
    "    class_names = [l.strip('\\n') for l in f.readlines()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [],
   "source": [
    "topK = 5\n",
    "ind = pred.topk(k=topK).astype('int')[0]\n",
    "for i in range(topK):\n",
    "    print('[%s], with probability %.1f%%'%\n",
    "         (class_names[ind[i].asscalar()], nd.softmax(pred)[0][ind[i]].asscalar()*100))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Object Detection"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "notes"
    }
   },
   "source": [
    "Image classifications are trained to recognize the main object in the scene. What if we have multiple subjects?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "We can instead predict with an object detection model **SSD**.\n",
    "![](https://cdn-images-1.medium.com/max/2000/1*pPxrkm4Urz04Ez65mwWE9Q.png)\n",
    "![](https://i.stack.imgur.com/Z3cIS.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [],
   "source": [
    "ctx = mx.cpu(0)\n",
    "net = get_model('ssd_512_resnet50_v1_coco', pretrained=True, ctx=ctx)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "notes"
    }
   },
   "source": [
    "Now we predict. Notice we have three output variables."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true,
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "class_IDs, scores, bounding_boxes = net(x.as_in_context(ctx))\n",
    "nd.waitall()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "viz.plot_bbox(img, bounding_boxes[0], scores[0], class_IDs[0], class_names=net.classes, thresh=0.8)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "notes"
    }
   },
   "source": [
    "Can be used to count people in a picture"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "download('https://hadinur1969.files.wordpress.com/2012/09/solvay_conference_1927_crop.jpg', 'support/scientists.jpg')\n",
    "x, img = gcv.data.transforms.presets.ssd.load_test('support/scientists.jpg', short=800)\n",
    "viz.plot_image(img)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "class_IDs, scores, bounding_boxes = net(x.as_in_context(ctx))\n",
    "person_ind = [i for i, cls in enumerate(net.classes) if cls == 'person'][0]\n",
    "ind = np.nonzero(class_IDs[0].asnumpy() == person_ind)[0]\n",
    "\n",
    "total = 0\n",
    "threshold = 0.4\n",
    "for class_id, score  in zip(class_IDs[0].asnumpy(), scores[0].asnumpy()):\n",
    "    if class_id[0] == person_ind and score[0] > threshold:        \n",
    "        total += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "new_class_IDs = class_IDs[0][ind]\n",
    "new_scores = scores[0][ind]\n",
    "new_bounding_boxes = bounding_boxes[0][ind]\n",
    "\n",
    "viz.plot_bbox(img, new_bounding_boxes, new_scores, new_class_IDs, class_names=net.classes, thresh=threshold)\n",
    "\n",
    "print('There are %d people in this photo.\\nActually, it should be 29.'%total)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Another scenario, following the heating topic: automated driving."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [],
   "source": [
    "download('https://images.pexels.com/photos/378570/pexels-photo-378570.jpeg?dl&fit=crop&crop=entropy&w=1280&h=818', 'support/rue.jpg', overwrite=True)\n",
    "x, img = gcv.data.transforms.presets.ssd.load_test('support/rue.jpg', short=512)\n",
    "viz.plot_image(img)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "class_IDs, scores, bounding_boxs = net(x.as_in_context(ctx))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [],
   "source": [
    "viz.plot_bbox(img, bounding_boxs[0], scores[0], class_IDs[0], class_names=net.classes)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Semantic Segmentation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "notes"
    }
   },
   "source": [
    "Semantic segmentation takes the concept further, by predicting the class for every pixel."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "ctx = mx.cpu(0)\n",
    "net = get_model('fcn_resnet50_ade', pretrained=True, ctx=ctx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "output = net.demo(x.as_in_context(ctx))\n",
    "pred = output.argmax(1).asnumpy().squeeze()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "mask = viz.get_color_pallete(pred, 'ade20k')\n",
    "mask = np.array(mask.convert('RGB'), dtype=np.int)\n",
    "\n",
    "combined = (mask+img)/2\n",
    "\n",
    "plt.imshow(combined.astype(np.uint8))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "download('https://user-images.githubusercontent.com/3716307/53995326-8f76dc80-40e9-11e9-98a0-102562fbf884.png', 'support/rue3.png', overwrite=True)\n",
    "x, img = gcv.data.transforms.presets.ssd.load_test('support/rue3.png', short=500)\n",
    "viz.plot_image(img)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "output = net.demo(x.as_in_context(ctx))\n",
    "pred = output.argmax(1).asnumpy().squeeze()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [],
   "source": [
    "mask = viz.get_color_pallete(pred, 'ade20k')\n",
    "mask = np.array(mask.convert('RGB'), dtype=np.int)\n",
    "\n",
    "combined = (mask+img)/2\n",
    "plt.imshow(combined.astype(np.uint8))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ctx = mx.cpu()\n",
    "net = get_model('mask_rcnn_resnet50_v1b_coco', pretrained=True, ctx=ctx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x, orig_img = gcv.data.transforms.presets.rcnn.load_test('support/rue3.png', short=600)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ids, scores, bboxes, masks = [xx[0].asnumpy() for xx in net(x.as_in_context(ctx))]\n",
    "\n",
    "# paint segmentation mask on images directly\n",
    "width, height = orig_img.shape[1], orig_img.shape[0]\n",
    "masks = viz.expand_mask(masks, bboxes, (width, height), scores)\n",
    "orig_img = viz.plot_mask(orig_img, masks)\n",
    "\n",
    "# identical to Faster RCNN object detection\n",
    "fig = plt.figure(figsize=(15, 15))\n",
    "ax = fig.add_subplot(1, 1, 1)\n",
    "ax = viz.plot_bbox(orig_img, bboxes, scores, ids,\n",
    "                         class_names=net.classes, ax=ax)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pose Estimation\n",
    "\n",
    "(Available in gluon-cv 0.4.0 released next week)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For pose estimation, we have two approaches, bottom-up or top-down. \n",
    "- Top-down use a first pass to detect humans on a picture, and then run a pose-estimation algorithm on the crops. \n",
    "- Bottom-up use heuristics or neural networks approaches to create unique IDs from the detected keypoints"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "from gluoncv import model_zoo, data, utils\n",
    "from gluoncv.data.transforms.pose import detector_to_simple_pose, heatmap_to_coord"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In its latest release, Gluon-cv introduced implementation of the [Simple Baseline for Pose Estimation](https://arxiv.org/abs/1804.06208) paper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "detector = model_zoo.get_model('yolo3_mobilenet1.0_coco', pretrained=True)\n",
    "pose_net = model_zoo.get_model('simple_pose_resnet18_v1b', pretrained=True)\n",
    "\n",
    "# Note that we can reset the classes of the detector to only include\n",
    "# human, so that the NMS process is faster.\n",
    "\n",
    "detector.reset_class([\"person\"], reuse_weights=['person'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x, img = data.transforms.presets.ssd.load_test('support/scientists.jpg', short=512)\n",
    "print('Shape of pre-processed image:', x.shape)\n",
    "\n",
    "class_IDs, scores, bounding_boxs = detector(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "predicted_heatmap = pose_net(pose_input)\n",
    "pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ax = utils.viz.plot_keypoints(img, pred_coords, confidence,\n",
    "                              class_IDs, bounding_boxs, scores,\n",
    "                              box_thresh=0.3, keypoint_thresh=0.1)\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Slideshow",
  "kernelspec": {
   "display_name": "conda_mxnet_p36",
   "language": "python",
   "name": "conda_mxnet_p36"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

## 2_gluon_nlp_embed_lm.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              2_gluon_nlp_embed_lm.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## get_data.sh

#!/bin/bash

echo ". /home/ec2-user/anaconda3/etc/profile.d/conda.sh" >> ~/.bashrc
source ~/.bashrc
source activate mxnet_p36
sudo chown -R ec2-user /tmp
pip uninstall mxnet-cu90mkl -y
pip install mxnet-cu90mkl --user --pre --upgrade
pip install gluonnlp --user  --pre --upgrade
pip install gluoncv --user  --pre --upgrade

cd /home/ec2-user/SageMaker
git clone https://gist.github.com/ThomasDelteil/63a37d87bb14c7b98f0b4cd9a4167d32 GRT_gluon_toolkits
wget http://gluon-nlp.mxnet.io/_downloads/sentence_embedding.zip
unzip sentence_embedding.zip -d GRT_gluon_toolkits
rm sentence_embedding.zip
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# GluonCV overview\n",
	"https://gluon-cv.mxnet.io/index.html\n",
	"\n",
	"- Image Classification\n",
	"- Object detection\n",
	"- Instance Segmentation\n",
	"- Pose Estimation"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "notes"
	}
	},
	"outputs": [],
	"source": [
	"import warnings\n",
	"warnings.filterwarnings('ignore')\n",
	"\n",
	"import mxnet as mx\n",
	"import numpy as np\n",
	"import gluoncv as gcv\n",
	"import matplotlib.image as mpimg\n",
	"import matplotlib.pyplot as plt\n",
	"\n",
	"from mxnet import nd, image\n",
	"from mxnet.gluon.data.vision import transforms\n",
	"from mxnet.test_utils import download\n",
	"from gluoncv.model_zoo import get_model\n",
	"from gluoncv.utils import viz, download"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "notes"
	}
	},
	"outputs": [],
	"source": [
	"plt.rcParams['figure.figsize'] = (15, 9)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"source": [
	"# Image Classification"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"source": [
	"With this one-line code, we can load a pre-trained ResNet50_v1 model for classification."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "fragment"
	}
	},
	"outputs": [],
	"source": [
	"ctx = mx.cpu(0)\n",
	"net = gcv.model_zoo.resnet50_v1b(pretrained=True, ctx=ctx)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "notes"
	}
	},
	"source": [
	"With another one-line code we can have our prediction."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"!mkdir support\n",
	"download('https://d1u4oo4rb13yy8.cloudfront.net/article/sgkvwimkyw-1532534963.jpg', 'support/goal.jpg')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"x, img = gcv.data.transforms.presets.ssd.load_test('support/goal.jpg', short=500)\n",
	"viz.plot_image(img)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"%%time\n",
	"pred = net(x.as_in_context(ctx))\n",
	"nd.waitall()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "notes"
	}
	},
	"source": [
	"Let's review the top 5 prediction results."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "notes"
	}
	},
	"outputs": [],
	"source": [
	"classes_fname = download('https://raw.githubusercontent.com/hetong007/gluon-cv/master/scripts/classification/imagenet/imagenet_labels.txt',\n",
	" 'support/imagenet_labels.txt')\n",
	"with open('support/imagenet_labels.txt', 'r') as f:\n",
	" class_names = [l.strip('\\n') for l in f.readlines()]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "fragment"
	}
	},
	"outputs": [],
	"source": [
	"topK = 5\n",
	"ind = pred.topk(k=topK).astype('int')[0]\n",
	"for i in range(topK):\n",
	" print('[%s], with probability %.1f%%'%\n",
	" (class_names[ind[i].asscalar()], nd.softmax(pred)[0][ind[i]].asscalar()*100))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"source": [
	"# Object Detection"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "notes"
	}
	},
	"source": [
	"Image classifications are trained to recognize the main object in the scene. What if we have multiple subjects?"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"source": [
	"We can instead predict with an object detection model SSD.\n",
	"![](https://cdn-images-1.medium.com/max/2000/1*pPxrkm4Urz04Ez65mwWE9Q.png)\n",
	"![](https://i.stack.imgur.com/Z3cIS.png)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "fragment"
	}
	},
	"outputs": [],
	"source": [
	"ctx = mx.cpu(0)\n",
	"net = get_model('ssd_512_resnet50_v1_coco', pretrained=True, ctx=ctx)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "notes"
	}
	},
	"source": [
	"Now we predict. Notice we have three output variables."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"scrolled": true,
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"%%time\n",
	"class_IDs, scores, bounding_boxes = net(x.as_in_context(ctx))\n",
	"nd.waitall()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"viz.plot_bbox(img, bounding_boxes[0], scores[0], class_IDs[0], class_names=net.classes, thresh=0.8)\n",
	"plt.show()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "notes"
	}
	},
	"source": [
	"Can be used to count people in a picture"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"download('https://hadinur1969.files.wordpress.com/2012/09/solvay_conference_1927_crop.jpg', 'support/scientists.jpg')\n",
	"x, img = gcv.data.transforms.presets.ssd.load_test('support/scientists.jpg', short=800)\n",
	"viz.plot_image(img)\n",
	"plt.show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"class_IDs, scores, bounding_boxes = net(x.as_in_context(ctx))\n",
	"person_ind = [i for i, cls in enumerate(net.classes) if cls == 'person'][0]\n",
	"ind = np.nonzero(class_IDs[0].asnumpy() == person_ind)[0]\n",
	"\n",
	"total = 0\n",
	"threshold = 0.4\n",
	"for class_id, score in zip(class_IDs[0].asnumpy(), scores[0].asnumpy()):\n",
	" if class_id[0] == person_ind and score[0] > threshold: \n",
	" total += 1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"new_class_IDs = class_IDs[0][ind]\n",
	"new_scores = scores[0][ind]\n",
	"new_bounding_boxes = bounding_boxes[0][ind]\n",
	"\n",
	"viz.plot_bbox(img, new_bounding_boxes, new_scores, new_class_IDs, class_names=net.classes, thresh=threshold)\n",
	"\n",
	"print('There are %d people in this photo.\\nActually, it should be 29.'%total)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"source": [
	"Another scenario, following the heating topic: automated driving."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "fragment"
	}
	},
	"outputs": [],
	"source": [
	"download('https://images.pexels.com/photos/378570/pexels-photo-378570.jpeg?dl&fit=crop&crop=entropy&w=1280&h=818', 'support/rue.jpg', overwrite=True)\n",
	"x, img = gcv.data.transforms.presets.ssd.load_test('support/rue.jpg', short=512)\n",
	"viz.plot_image(img)\n",
	"plt.show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"%%time\n",
	"class_IDs, scores, bounding_boxs = net(x.as_in_context(ctx))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "fragment"
	}
	},
	"outputs": [],
	"source": [
	"viz.plot_bbox(img, bounding_boxs[0], scores[0], class_IDs[0], class_names=net.classes)\n",
	"plt.show()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"source": [
	"# Semantic Segmentation"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"slideshow": {
	"slide_type": "notes"
	}
	},
	"source": [
	"Semantic segmentation takes the concept further, by predicting the class for every pixel."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"ctx = mx.cpu(0)\n",
	"net = get_model('fcn_resnet50_ade', pretrained=True, ctx=ctx)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "fragment"
	}
	},
	"outputs": [],
	"source": [
	"%%time\n",
	"output = net.demo(x.as_in_context(ctx))\n",
	"pred = output.argmax(1).asnumpy().squeeze()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"mask = viz.get_color_pallete(pred, 'ade20k')\n",
	"mask = np.array(mask.convert('RGB'), dtype=np.int)\n",
	"\n",
	"combined = (mask+img)/2\n",
	"\n",
	"plt.imshow(combined.astype(np.uint8))\n",
	"plt.show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"download('https://user-images.githubusercontent.com/3716307/53995326-8f76dc80-40e9-11e9-98a0-102562fbf884.png', 'support/rue3.png', overwrite=True)\n",
	"x, img = gcv.data.transforms.presets.ssd.load_test('support/rue3.png', short=500)\n",
	"viz.plot_image(img)\n",
	"plt.show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "slide"
	}
	},
	"outputs": [],
	"source": [
	"%%time\n",
	"output = net.demo(x.as_in_context(ctx))\n",
	"pred = output.argmax(1).asnumpy().squeeze()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"slideshow": {
	"slide_type": "fragment"
	}
	},
	"outputs": [],
	"source": [
	"mask = viz.get_color_pallete(pred, 'ade20k')\n",
	"mask = np.array(mask.convert('RGB'), dtype=np.int)\n",
	"\n",
	"combined = (mask+img)/2\n",
	"plt.imshow(combined.astype(np.uint8))\n",
	"plt.show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"ctx = mx.cpu()\n",
	"net = get_model('mask_rcnn_resnet50_v1b_coco', pretrained=True, ctx=ctx)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"x, orig_img = gcv.data.transforms.presets.rcnn.load_test('support/rue3.png', short=600)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"ids, scores, bboxes, masks = [xx[0].asnumpy() for xx in net(x.as_in_context(ctx))]\n",
	"\n",
	"# paint segmentation mask on images directly\n",
	"width, height = orig_img.shape[1], orig_img.shape[0]\n",
	"masks = viz.expand_mask(masks, bboxes, (width, height), scores)\n",
	"orig_img = viz.plot_mask(orig_img, masks)\n",
	"\n",
	"# identical to Faster RCNN object detection\n",
	"fig = plt.figure(figsize=(15, 15))\n",
	"ax = fig.add_subplot(1, 1, 1)\n",
	"ax = viz.plot_bbox(orig_img, bboxes, scores, ids,\n",
	" class_names=net.classes, ax=ax)\n",
	"plt.show()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Pose Estimation\n",
	"\n",
	"(Available in gluon-cv 0.4.0 released next week)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"For pose estimation, we have two approaches, bottom-up or top-down. \n",
	"- Top-down use a first pass to detect humans on a picture, and then run a pose-estimation algorithm on the crops. \n",
	"- Bottom-up use heuristics or neural networks approaches to create unique IDs from the detected keypoints"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"from matplotlib import pyplot as plt\n",
	"from gluoncv import model_zoo, data, utils\n",
	"from gluoncv.data.transforms.pose import detector_to_simple_pose, heatmap_to_coord"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"In its latest release, Gluon-cv introduced implementation of the [Simple Baseline for Pose Estimation](https://arxiv.org/abs/1804.06208) paper"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"detector = model_zoo.get_model('yolo3_mobilenet1.0_coco', pretrained=True)\n",
	"pose_net = model_zoo.get_model('simple_pose_resnet18_v1b', pretrained=True)\n",
	"\n",
	"# Note that we can reset the classes of the detector to only include\n",
	"# human, so that the NMS process is faster.\n",
	"\n",
	"detector.reset_class([\"person\"], reuse_weights=['person'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"x, img = data.transforms.presets.ssd.load_test('support/scientists.jpg', short=512)\n",
	"print('Shape of pre-processed image:', x.shape)\n",
	"\n",
	"class_IDs, scores, bounding_boxs = detector(x)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"predicted_heatmap = pose_net(pose_input)\n",
	"pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"ax = utils.viz.plot_keypoints(img, pred_coords, confidence,\n",
	" class_IDs, bounding_boxs, scores,\n",
	" box_thresh=0.3, keypoint_thresh=0.1)\n",
	"plt.show()"
	]
	}
	],
	"metadata": {
	"celltoolbar": "Slideshow",
	"kernelspec": {
	"display_name": "conda_mxnet_p36",
	"language": "python",
	"name": "conda_mxnet_p36"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}

	#!/bin/bash

	echo ". /home/ec2-user/anaconda3/etc/profile.d/conda.sh" >> ~/.bashrc
	source ~/.bashrc
	source activate mxnet_p36
	sudo chown -R ec2-user /tmp
	pip uninstall mxnet-cu90mkl -y
	pip install mxnet-cu90mkl --user --pre --upgrade
	pip install gluonnlp --user --pre --upgrade
	pip install gluoncv --user --pre --upgrade

	cd /home/ec2-user/SageMaker
	git clone https://gist.github.com/ThomasDelteil/63a37d87bb14c7b98f0b4cd9a4167d32 GRT_gluon_toolkits
	wget http://gluon-nlp.mxnet.io/_downloads/sentence_embedding.zip
	unzip sentence_embedding.zip -d GRT_gluon_toolkits
	rm sentence_embedding.zip