Last active
March 8, 2019 18:10
-
-
Save ThomasDelteil/63a37d87bb14c7b98f0b4cd9a4167d32 to your computer and use it in GitHub Desktop.
GRT_Amazon_08
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# GluonCV overview\n", | |
"https://gluon-cv.mxnet.io/index.html\n", | |
"\n", | |
"- Image Classification\n", | |
"- Object detection\n", | |
"- Instance Segmentation\n", | |
"- Pose Estimation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "notes" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import warnings\n", | |
"warnings.filterwarnings('ignore')\n", | |
"\n", | |
"import mxnet as mx\n", | |
"import numpy as np\n", | |
"import gluoncv as gcv\n", | |
"import matplotlib.image as mpimg\n", | |
"import matplotlib.pyplot as plt\n", | |
"\n", | |
"from mxnet import nd, image\n", | |
"from mxnet.gluon.data.vision import transforms\n", | |
"from mxnet.test_utils import download\n", | |
"from gluoncv.model_zoo import get_model\n", | |
"from gluoncv.utils import viz, download" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "notes" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"plt.rcParams['figure.figsize'] = (15, 9)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"source": [ | |
"# Image Classification" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"source": [ | |
"With this one-line code, we can load a pre-trained **ResNet50_v1** model for classification." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "fragment" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"ctx = mx.cpu(0)\n", | |
"net = gcv.model_zoo.resnet50_v1b(pretrained=True, ctx=ctx)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "notes" | |
} | |
}, | |
"source": [ | |
"With another one-line code we can have our prediction." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"!mkdir support\n", | |
"download('https://d1u4oo4rb13yy8.cloudfront.net/article/sgkvwimkyw-1532534963.jpg', 'support/goal.jpg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"x, img = gcv.data.transforms.presets.ssd.load_test('support/goal.jpg', short=500)\n", | |
"viz.plot_image(img)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"%%time\n", | |
"pred = net(x.as_in_context(ctx))\n", | |
"nd.waitall()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "notes" | |
} | |
}, | |
"source": [ | |
"Let's review the top 5 prediction results." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "notes" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"classes_fname = download('https://raw.githubusercontent.com/hetong007/gluon-cv/master/scripts/classification/imagenet/imagenet_labels.txt',\n", | |
" 'support/imagenet_labels.txt')\n", | |
"with open('support/imagenet_labels.txt', 'r') as f:\n", | |
" class_names = [l.strip('\\n') for l in f.readlines()]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "fragment" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"topK = 5\n", | |
"ind = pred.topk(k=topK).astype('int')[0]\n", | |
"for i in range(topK):\n", | |
" print('[%s], with probability %.1f%%'%\n", | |
" (class_names[ind[i].asscalar()], nd.softmax(pred)[0][ind[i]].asscalar()*100))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"source": [ | |
"# Object Detection" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "notes" | |
} | |
}, | |
"source": [ | |
"Image classifications are trained to recognize the main object in the scene. What if we have multiple subjects?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"source": [ | |
"We can instead predict with an object detection model **SSD**.\n", | |
"![](https://cdn-images-1.medium.com/max/2000/1*pPxrkm4Urz04Ez65mwWE9Q.png)\n", | |
"![](https://i.stack.imgur.com/Z3cIS.png)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "fragment" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"ctx = mx.cpu(0)\n", | |
"net = get_model('ssd_512_resnet50_v1_coco', pretrained=True, ctx=ctx)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "notes" | |
} | |
}, | |
"source": [ | |
"Now we predict. Notice we have three output variables." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true, | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"%%time\n", | |
"class_IDs, scores, bounding_boxes = net(x.as_in_context(ctx))\n", | |
"nd.waitall()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"viz.plot_bbox(img, bounding_boxes[0], scores[0], class_IDs[0], class_names=net.classes, thresh=0.8)\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "notes" | |
} | |
}, | |
"source": [ | |
"Can be used to count people in a picture" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"download('https://hadinur1969.files.wordpress.com/2012/09/solvay_conference_1927_crop.jpg', 'support/scientists.jpg')\n", | |
"x, img = gcv.data.transforms.presets.ssd.load_test('support/scientists.jpg', short=800)\n", | |
"viz.plot_image(img)\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"class_IDs, scores, bounding_boxes = net(x.as_in_context(ctx))\n", | |
"person_ind = [i for i, cls in enumerate(net.classes) if cls == 'person'][0]\n", | |
"ind = np.nonzero(class_IDs[0].asnumpy() == person_ind)[0]\n", | |
"\n", | |
"total = 0\n", | |
"threshold = 0.4\n", | |
"for class_id, score in zip(class_IDs[0].asnumpy(), scores[0].asnumpy()):\n", | |
" if class_id[0] == person_ind and score[0] > threshold: \n", | |
" total += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"new_class_IDs = class_IDs[0][ind]\n", | |
"new_scores = scores[0][ind]\n", | |
"new_bounding_boxes = bounding_boxes[0][ind]\n", | |
"\n", | |
"viz.plot_bbox(img, new_bounding_boxes, new_scores, new_class_IDs, class_names=net.classes, thresh=threshold)\n", | |
"\n", | |
"print('There are %d people in this photo.\\nActually, it should be 29.'%total)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"source": [ | |
"Another scenario, following the heating topic: automated driving." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "fragment" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"download('https://images.pexels.com/photos/378570/pexels-photo-378570.jpeg?dl&fit=crop&crop=entropy&w=1280&h=818', 'support/rue.jpg', overwrite=True)\n", | |
"x, img = gcv.data.transforms.presets.ssd.load_test('support/rue.jpg', short=512)\n", | |
"viz.plot_image(img)\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"%%time\n", | |
"class_IDs, scores, bounding_boxs = net(x.as_in_context(ctx))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "fragment" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"viz.plot_bbox(img, bounding_boxs[0], scores[0], class_IDs[0], class_names=net.classes)\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"source": [ | |
"# Semantic Segmentation" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"slideshow": { | |
"slide_type": "notes" | |
} | |
}, | |
"source": [ | |
"Semantic segmentation takes the concept further, by predicting the class for every pixel." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"ctx = mx.cpu(0)\n", | |
"net = get_model('fcn_resnet50_ade', pretrained=True, ctx=ctx)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "fragment" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"%%time\n", | |
"output = net.demo(x.as_in_context(ctx))\n", | |
"pred = output.argmax(1).asnumpy().squeeze()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"mask = viz.get_color_pallete(pred, 'ade20k')\n", | |
"mask = np.array(mask.convert('RGB'), dtype=np.int)\n", | |
"\n", | |
"combined = (mask+img)/2\n", | |
"\n", | |
"plt.imshow(combined.astype(np.uint8))\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"download('https://user-images.githubusercontent.com/3716307/53995326-8f76dc80-40e9-11e9-98a0-102562fbf884.png', 'support/rue3.png', overwrite=True)\n", | |
"x, img = gcv.data.transforms.presets.ssd.load_test('support/rue3.png', short=500)\n", | |
"viz.plot_image(img)\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "slide" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"%%time\n", | |
"output = net.demo(x.as_in_context(ctx))\n", | |
"pred = output.argmax(1).asnumpy().squeeze()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"slideshow": { | |
"slide_type": "fragment" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"mask = viz.get_color_pallete(pred, 'ade20k')\n", | |
"mask = np.array(mask.convert('RGB'), dtype=np.int)\n", | |
"\n", | |
"combined = (mask+img)/2\n", | |
"plt.imshow(combined.astype(np.uint8))\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ctx = mx.cpu()\n", | |
"net = get_model('mask_rcnn_resnet50_v1b_coco', pretrained=True, ctx=ctx)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x, orig_img = gcv.data.transforms.presets.rcnn.load_test('support/rue3.png', short=600)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ids, scores, bboxes, masks = [xx[0].asnumpy() for xx in net(x.as_in_context(ctx))]\n", | |
"\n", | |
"# paint segmentation mask on images directly\n", | |
"width, height = orig_img.shape[1], orig_img.shape[0]\n", | |
"masks = viz.expand_mask(masks, bboxes, (width, height), scores)\n", | |
"orig_img = viz.plot_mask(orig_img, masks)\n", | |
"\n", | |
"# identical to Faster RCNN object detection\n", | |
"fig = plt.figure(figsize=(15, 15))\n", | |
"ax = fig.add_subplot(1, 1, 1)\n", | |
"ax = viz.plot_bbox(orig_img, bboxes, scores, ids,\n", | |
" class_names=net.classes, ax=ax)\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Pose Estimation\n", | |
"\n", | |
"(Available in gluon-cv 0.4.0 released next week)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"For pose estimation, we have two approaches, bottom-up or top-down. \n", | |
"- Top-down use a first pass to detect humans on a picture, and then run a pose-estimation algorithm on the crops. \n", | |
"- Bottom-up use heuristics or neural networks approaches to create unique IDs from the detected keypoints" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from matplotlib import pyplot as plt\n", | |
"from gluoncv import model_zoo, data, utils\n", | |
"from gluoncv.data.transforms.pose import detector_to_simple_pose, heatmap_to_coord" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"In its latest release, Gluon-cv introduced implementation of the [Simple Baseline for Pose Estimation](https://arxiv.org/abs/1804.06208) paper" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"detector = model_zoo.get_model('yolo3_mobilenet1.0_coco', pretrained=True)\n", | |
"pose_net = model_zoo.get_model('simple_pose_resnet18_v1b', pretrained=True)\n", | |
"\n", | |
"# Note that we can reset the classes of the detector to only include\n", | |
"# human, so that the NMS process is faster.\n", | |
"\n", | |
"detector.reset_class([\"person\"], reuse_weights=['person'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x, img = data.transforms.presets.ssd.load_test('support/scientists.jpg', short=512)\n", | |
"print('Shape of pre-processed image:', x.shape)\n", | |
"\n", | |
"class_IDs, scores, bounding_boxs = detector(x)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"predicted_heatmap = pose_net(pose_input)\n", | |
"pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ax = utils.viz.plot_keypoints(img, pred_coords, confidence,\n", | |
" class_IDs, bounding_boxs, scores,\n", | |
" box_thresh=0.3, keypoint_thresh=0.1)\n", | |
"plt.show()" | |
] | |
} | |
], | |
"metadata": { | |
"celltoolbar": "Slideshow", | |
"kernelspec": { | |
"display_name": "conda_mxnet_p36", | |
"language": "python", | |
"name": "conda_mxnet_p36" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo ". /home/ec2-user/anaconda3/etc/profile.d/conda.sh" >> ~/.bashrc | |
source ~/.bashrc | |
source activate mxnet_p36 | |
sudo chown -R ec2-user /tmp | |
pip uninstall mxnet-cu90mkl -y | |
pip install mxnet-cu90mkl --user --pre --upgrade | |
pip install gluonnlp --user --pre --upgrade | |
pip install gluoncv --user --pre --upgrade | |
cd /home/ec2-user/SageMaker | |
git clone https://gist.github.com/ThomasDelteil/63a37d87bb14c7b98f0b4cd9a4167d32 GRT_gluon_toolkits | |
wget http://gluon-nlp.mxnet.io/_downloads/sentence_embedding.zip | |
unzip sentence_embedding.zip -d GRT_gluon_toolkits | |
rm sentence_embedding.zip |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment