Skip to content

Instantly share code, notes, and snippets.

@cosmincatalin
Last active August 18, 2020 21:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save cosmincatalin/7f6d79236b5c9ce824ee1c19f147d9fe to your computer and use it in GitHub Desktop.
Save cosmincatalin/7f6d79236b5c9ce824ee1c19f147d9fe to your computer and use it in GitHub Desktop.
The SageMaker attached notebook that allows building an MXNet model that counts shapes in an image
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# You can modify these ones\n",
"dataset_size = 12000\n",
"img_size = 128\n",
"shape_size = 8\n",
"max_rows = 14\n",
"max_cols = 14\n",
"target_shape = \"circle\"\n",
"\n",
"# Do not modify these one\n",
"shapes = [\"circle\", \"triangle\", \"square\"]\n",
"img_width = img_height = img_size"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from random import randint, seed\n",
"\n",
"# Make sure the required folder structure is in place\n",
"rmtree(\"data/raw\", True)\n",
"os.makedirs(\"data/raw\")\n",
" \n",
"# Make sure data generation is reproducible\n",
"seed(42)\n",
"\n",
"with open(\"data/dataset.csv\", \"w+\") as dataset:\n",
" dataset.write(\"img_path,target\\n\")\n",
" for i in range(1, dataset_size + 1):\n",
" target_number_of_shapes = 0\n",
"\n",
" filename = str(i).zfill(6) + \".shape\"\n",
"\n",
" # Generate Shaper DSL source code\n",
" file_content = \"img_dim:{},shp_dim:{}>>>\".format(img_size, shape_size)\n",
" for x in range(0, randint(1, max_rows)):\n",
" for y in range(0, randint(1, max_cols)):\n",
" shape_idx = randint(0, len(shapes) - 1)\n",
" shape = shapes[shape_idx]\n",
" if shape == target_shape:\n",
" target_number_of_shapes += 1\n",
" file_content = file_content + shape + \",\"\n",
" file_content = file_content[:-1]\n",
" file_content += \"|\"\n",
" file_content = file_content[:-1]\n",
" file_content += \"<<<\"\n",
"\n",
" with open(\"./data/raw/{}\".format(filename), \"w\") as shape_file:\n",
" shape_file.write(file_content)\n",
"\n",
" dataset.write(\"./raw/{}.png,{:d}\\n\".format(filename, target_number_of_shapes))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import subprocess\n",
"\n",
"subprocess.call(\"\"\"\n",
" java -cp bin/shaper-all.jar com.cosminsanda.shaper.compiler.Shaper2Image \\\n",
" --source-dir data/raw\"\"\", shell=True);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pandas import read_csv\n",
"import numpy as np\n",
"\n",
"df = read_csv(\"data/dataset.csv\")\n",
"\n",
"train = df.sample(frac=.8333, random_state=42)\n",
"validation = df.loc[~df.index.isin(train.index), :] \\\n",
" .sample(frac=.5, random_state=42)\n",
"test = df.loc[np.logical_not(\n",
" np.logical_xor(\n",
" ~df.index.isin(train.index),\n",
" ~df.index.isin(validation.index))), :]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test.size"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import mxnet as mx\n",
"\n",
"def transform(row):\n",
" img = cv2.imread(\"./data/{}\".format(row[\"img_path\"]))\n",
" img = mx.nd.array(img)\n",
" img = img.astype(np.float32)\n",
" img = mx.nd.transpose(img, (2, 0, 1))\n",
" img = img / 255\n",
" label = np.float32(row[\"target\"])\n",
" return img, label"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_nd = [transform(row) for _, row in train.iterrows()]\n",
"validation_nd = [transform(row) for _, row in validation.iterrows()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pickle import dump\n",
"\n",
"def save_to_disk(data, type):\n",
" os.makedirs(\"data/pickles/{}\".format(type))\n",
" with open(\"data/pickles/{}/data.p\".format(type), \"wb\") as out:\n",
" dump(data, out)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"save_to_disk(train_nd, \"train\")\n",
"save_to_disk(validation_nd, \"validation\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sagemaker\n",
"\n",
"sagemaker_session = sagemaker.Session()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"inputs = sagemaker_session.upload_data(path=\"data/pickles\", bucket=\"adt-adhoc\", key_prefix=\"cosmin/sagemaker/demo\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from shutil import rmtree, copy2\n",
"\n",
"rmtree(\"./test\", True)\n",
"os.makedirs(\"./test\")\n",
"for _, row in test.iterrows():\n",
" os.makedirs(\"test/{}\".format(row[\"target\"]), exist_ok=True)\n",
" copy2(\"./data/{}\".format(row[\"img_path\"]), \"./test/{}\".format(row[\"target\"]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rmtree(\"data\", True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"estimator = sagemaker.mxnet.MXNet(\"object-counting-sagemaker-script.py\", \n",
" role=sagemaker.get_execution_role(), \n",
" train_instance_count=1, \n",
" train_instance_type=\"ml.p2.xlarge\",\n",
" hyperparameters={\"epochs\": 5},\n",
" py_version=\"py3\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"estimator.fit(inputs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"predictor = estimator.deploy(1, \"ml.m4.xlarge\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import boto3\n",
"\n",
"sagemaker_runtime_client = boto3.client(\"sagemaker-runtime\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import base64\n",
"import json\n",
"\n",
"for directory in os.listdir(\"./test\"):\n",
" batch = []\n",
" for file in os.listdir(\"./test/{}\".format(directory)):\n",
" with open(\"./test/{}/{}\".format(directory, file), \"rb\") as image_file:\n",
" batch.append(base64.b64encode(image_file.read()).decode(\"utf-8\"))\n",
" binary_json = json.dumps(batch).encode(\"utf-8\")\n",
" response = sagemaker_runtime_client.invoke_endpoint(\n",
" EndpointName=predictor.endpoint,\n",
" Body=binary_json,\n",
" ContentType=\"application/json\",\n",
" Accept=\"application/json\"\n",
" )[\"Body\"].read()\n",
" individual_predictions = json.loads(response, encoding=\"utf-8\")\n",
" total = 0\n",
" detected = 0\n",
" for prediction in individual_predictions:\n",
" total += 1 \n",
" if int(prediction) == int(directory):\n",
" detected += 1\n",
" print(\"\"\"Images with {} circles:\n",
" Total: {}\n",
" Detected: {}\n",
" Accuracy: {:0.2f}\n",
" \"\"\".format(directory, str(total), str(detected), detected/total))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment