mani3/generate-dataset-from-the300w-lp-public.ipynb

## generate-dataset-from-the300w-lp-public.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create head pose estimation dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import multiprocessing\n",
    "import absl\n",
    "import numpy as np\n",
    "\n",
    "from pathlib import Path\n",
    "from PIL import Image\n",
    "\n",
    "import tensorflow as tf\n",
    "from tensorflow.python.ops import control_flow_ops\n",
    "import tensorflow_datasets as tfds\n",
    "\n",
    "logger = absl.logging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pad_square(image, target_h, target_w):\n",
    "  h, w = image.shape[0:2]\n",
    "  y = np.abs(target_h - h) // 2\n",
    "  x = np.abs(target_w - w) // 2\n",
    "  new_image = np.pad(image, ((y, target_h - h - y), (x, target_w - w - x), (0, 0)))\n",
    "  return new_image\n",
    "\n",
    "def resize_and_pad_image(image, size):\n",
    "  h, w = image.shape[0:2]\n",
    "  max_side = max(h, w)\n",
    "  new_h = int(size / max_side * h)\n",
    "  new_w = int(size / max_side * w)\n",
    "  im = Image.fromarray(image)\n",
    "  im = im.resize((new_w, new_h), Image.LANCZOS)\n",
    "  im = pad_square(np.array(im), size, size)\n",
    "  return im\n",
    "\n",
    "def resize_func(image, bbox, pose):\n",
    "  image = Image.fromarray(image.numpy()).convert('RGB')\n",
    "  bbox = bbox.numpy()\n",
    "  pose = pose.numpy()\n",
    "\n",
    "  # Crop\n",
    "  w, h = image.size\n",
    "  ymin, xmin, ymax, xmax = bbox\n",
    "  box_w = abs(xmax - xmin)\n",
    "  box_h = abs(ymax - ymin)\n",
    "\n",
    "  # 0.1 ~ 0.5\n",
    "  random_scales = np.array([0.5])\n",
    "  xmin = max(0, xmin - box_w * np.random.choice(random_scales))\n",
    "  xmax = min(w, xmax + box_w * np.random.choice(random_scales))\n",
    "  ymin = max(0, ymin - box_h * np.random.choice(random_scales))\n",
    "  ymax = min(h, ymax + box_h * np.random.choice(random_scales))\n",
    "  image = image.crop([int(xmin), int(ymin), int(xmax), int(ymax)])\n",
    "  \n",
    "  image = np.array(image, dtype=np.uint8)\n",
    "  image = resize_and_pad_image(image, 128)\n",
    "  return image, bbox, pose\n",
    "  \n",
    "def read_example(example):\n",
    "  image = example['image']\n",
    "  landmarks_2d = example['landmarks_2d']\n",
    "  pose = example['pose_params']\n",
    "\n",
    "  x = tf.expand_dims(landmarks_2d[:, 0], 0)\n",
    "  y = tf.expand_dims(landmarks_2d[:, 1], 0)\n",
    "  xmin, xmax = tf.math.reduce_min(x), tf.math.reduce_max(x)\n",
    "  ymin, ymax = tf.math.reduce_min(y), tf.math.reduce_max(y)\n",
    "  bbox = tf.stack([ymin, xmin, ymax, xmax]) * 450.0\n",
    "\n",
    "  image, bbox, pose = tf.py_function(\n",
    "    resize_func, [image, bbox, pose], (tf.uint8, tf.int32, tf.float32))\n",
    "  pose = pose[:3] * 180 / np.pi\n",
    "  return image, pose"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = tfds.load('the300w_lp', split='train')\n",
    "dataset = dataset.map(read_example)\n",
    "dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "data, label = [], []\n",
    "for e in dataset.take(20000):\n",
    "  data.append(e[0].numpy())\n",
    "  label.append(e[1].numpy())\n",
    "np.save('data_20000.npy', np.array(data))\n",
    "np.save('label_20000.npy', np.array(label))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(20000, 128, 128, 3)\n",
      "(20000, 3)\n"
     ]
    }
   ],
   "source": [
    "with open('data_20000.npy', 'rb') as data, open('label_20000.npy', 'rb') as label:\n",
    "  data = np.load(data)\n",
    "  print(data.shape)\n",
    "  label = np.load(label)\n",
    "  print(label.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Create head pose estimation dataset"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import os\n",
	"import multiprocessing\n",
	"import absl\n",
	"import numpy as np\n",
	"\n",
	"from pathlib import Path\n",
	"from PIL import Image\n",
	"\n",
	"import tensorflow as tf\n",
	"from tensorflow.python.ops import control_flow_ops\n",
	"import tensorflow_datasets as tfds\n",
	"\n",
	"logger = absl.logging"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"def pad_square(image, target_h, target_w):\n",
	" h, w = image.shape[0:2]\n",
	" y = np.abs(target_h - h) // 2\n",
	" x = np.abs(target_w - w) // 2\n",
	" new_image = np.pad(image, ((y, target_h - h - y), (x, target_w - w - x), (0, 0)))\n",
	" return new_image\n",
	"\n",
	"def resize_and_pad_image(image, size):\n",
	" h, w = image.shape[0:2]\n",
	" max_side = max(h, w)\n",
	" new_h = int(size / max_side * h)\n",
	" new_w = int(size / max_side * w)\n",
	" im = Image.fromarray(image)\n",
	" im = im.resize((new_w, new_h), Image.LANCZOS)\n",
	" im = pad_square(np.array(im), size, size)\n",
	" return im\n",
	"\n",
	"def resize_func(image, bbox, pose):\n",
	" image = Image.fromarray(image.numpy()).convert('RGB')\n",
	" bbox = bbox.numpy()\n",
	" pose = pose.numpy()\n",
	"\n",
	" # Crop\n",
	" w, h = image.size\n",
	" ymin, xmin, ymax, xmax = bbox\n",
	" box_w = abs(xmax - xmin)\n",
	" box_h = abs(ymax - ymin)\n",
	"\n",
	" # 0.1 ~ 0.5\n",
	" random_scales = np.array([0.5])\n",
	" xmin = max(0, xmin - box_w * np.random.choice(random_scales))\n",
	" xmax = min(w, xmax + box_w * np.random.choice(random_scales))\n",
	" ymin = max(0, ymin - box_h * np.random.choice(random_scales))\n",
	" ymax = min(h, ymax + box_h * np.random.choice(random_scales))\n",
	" image = image.crop([int(xmin), int(ymin), int(xmax), int(ymax)])\n",
	" \n",
	" image = np.array(image, dtype=np.uint8)\n",
	" image = resize_and_pad_image(image, 128)\n",
	" return image, bbox, pose\n",
	" \n",
	"def read_example(example):\n",
	" image = example['image']\n",
	" landmarks_2d = example['landmarks_2d']\n",
	" pose = example['pose_params']\n",
	"\n",
	" x = tf.expand_dims(landmarks_2d[:, 0], 0)\n",
	" y = tf.expand_dims(landmarks_2d[:, 1], 0)\n",
	" xmin, xmax = tf.math.reduce_min(x), tf.math.reduce_max(x)\n",
	" ymin, ymax = tf.math.reduce_min(y), tf.math.reduce_max(y)\n",
	" bbox = tf.stack([ymin, xmin, ymax, xmax]) * 450.0\n",
	"\n",
	" image, bbox, pose = tf.py_function(\n",
	" resize_func, [image, bbox, pose], (tf.uint8, tf.int32, tf.float32))\n",
	" pose = pose[:3] * 180 / np.pi\n",
	" return image, pose"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"dataset = tfds.load('the300w_lp', split='train')\n",
	"dataset = dataset.map(read_example)\n",
	"dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"data, label = [], []\n",
	"for e in dataset.take(20000):\n",
	" data.append(e[0].numpy())\n",
	" label.append(e[1].numpy())\n",
	"np.save('data_20000.npy', np.array(data))\n",
	"np.save('label_20000.npy', np.array(label))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(20000, 128, 128, 3)\n",
	"(20000, 3)\n"
	]
	}
	],
	"source": [
	"with open('data_20000.npy', 'rb') as data, open('label_20000.npy', 'rb') as label:\n",
	" data = np.load(data)\n",
	" print(data.shape)\n",
	" label = np.load(label)\n",
	" print(label.shape)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}