Skip to content

Instantly share code, notes, and snippets.

@mani3
Created January 15, 2020 09:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mani3/1ec02066cb11df85cfc694cab9230bc3 to your computer and use it in GitHub Desktop.
Save mani3/1ec02066cb11df85cfc694cab9230bc3 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create head pose estimation dataset"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import multiprocessing\n",
"import absl\n",
"import numpy as np\n",
"\n",
"from pathlib import Path\n",
"from PIL import Image\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow.python.ops import control_flow_ops\n",
"import tensorflow_datasets as tfds\n",
"\n",
"logger = absl.logging"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def pad_square(image, target_h, target_w):\n",
" h, w = image.shape[0:2]\n",
" y = np.abs(target_h - h) // 2\n",
" x = np.abs(target_w - w) // 2\n",
" new_image = np.pad(image, ((y, target_h - h - y), (x, target_w - w - x), (0, 0)))\n",
" return new_image\n",
"\n",
"def resize_and_pad_image(image, size):\n",
" h, w = image.shape[0:2]\n",
" max_side = max(h, w)\n",
" new_h = int(size / max_side * h)\n",
" new_w = int(size / max_side * w)\n",
" im = Image.fromarray(image)\n",
" im = im.resize((new_w, new_h), Image.LANCZOS)\n",
" im = pad_square(np.array(im), size, size)\n",
" return im\n",
"\n",
"def resize_func(image, bbox, pose):\n",
" image = Image.fromarray(image.numpy()).convert('RGB')\n",
" bbox = bbox.numpy()\n",
" pose = pose.numpy()\n",
"\n",
" # Crop\n",
" w, h = image.size\n",
" ymin, xmin, ymax, xmax = bbox\n",
" box_w = abs(xmax - xmin)\n",
" box_h = abs(ymax - ymin)\n",
"\n",
" # 0.1 ~ 0.5\n",
" random_scales = np.array([0.5])\n",
" xmin = max(0, xmin - box_w * np.random.choice(random_scales))\n",
" xmax = min(w, xmax + box_w * np.random.choice(random_scales))\n",
" ymin = max(0, ymin - box_h * np.random.choice(random_scales))\n",
" ymax = min(h, ymax + box_h * np.random.choice(random_scales))\n",
" image = image.crop([int(xmin), int(ymin), int(xmax), int(ymax)])\n",
" \n",
" image = np.array(image, dtype=np.uint8)\n",
" image = resize_and_pad_image(image, 128)\n",
" return image, bbox, pose\n",
" \n",
"def read_example(example):\n",
" image = example['image']\n",
" landmarks_2d = example['landmarks_2d']\n",
" pose = example['pose_params']\n",
"\n",
" x = tf.expand_dims(landmarks_2d[:, 0], 0)\n",
" y = tf.expand_dims(landmarks_2d[:, 1], 0)\n",
" xmin, xmax = tf.math.reduce_min(x), tf.math.reduce_max(x)\n",
" ymin, ymax = tf.math.reduce_min(y), tf.math.reduce_max(y)\n",
" bbox = tf.stack([ymin, xmin, ymax, xmax]) * 450.0\n",
"\n",
" image, bbox, pose = tf.py_function(\n",
" resize_func, [image, bbox, pose], (tf.uint8, tf.int32, tf.float32))\n",
" pose = pose[:3] * 180 / np.pi\n",
" return image, pose"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"dataset = tfds.load('the300w_lp', split='train')\n",
"dataset = dataset.map(read_example)\n",
"dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"data, label = [], []\n",
"for e in dataset.take(20000):\n",
" data.append(e[0].numpy())\n",
" label.append(e[1].numpy())\n",
"np.save('data_20000.npy', np.array(data))\n",
"np.save('label_20000.npy', np.array(label))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(20000, 128, 128, 3)\n",
"(20000, 3)\n"
]
}
],
"source": [
"with open('data_20000.npy', 'rb') as data, open('label_20000.npy', 'rb') as label:\n",
" data = np.load(data)\n",
" print(data.shape)\n",
" label = np.load(label)\n",
" print(label.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment