patricksnape/LabelsFromMeanBasel.ipynb

## LabelsFromMeanBasel.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "from __future__ import division\n",
    "import matplotlib.pyplot as plt\n",
    "import menpo3d.io as m3dio\n",
    "import menpo.io as mio\n",
    "import numpy as np\n",
    "import mayavi.mlab as mlab\n",
    "import cv2\n",
    "from skimage.color import label2rgb\n",
    "from random import shuffle\n",
    "\n",
    "from menpo.image import Image\n",
    "from menpo3d.unwrap import optimal_cylindrical_unwrap\n",
    "from menpo.shape import PointCloud, TriMesh, TexturedTriMesh\n",
    "from menpo.transform import UniformScale, Translation, Homogeneous, scale_about_centre, Rotation\n",
    "from menpo3d.rasterize import GLRasterizer\n",
    "from menpo3d.visualize import LandmarkViewer3d\n",
    "from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap\n",
    "\n",
    "np.set_printoptions(precision=3, suppress=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "flip_xy_yx = Homogeneous(np.array([[0, 1, 0],\n",
    "                                   [1, 0, 0],\n",
    "                                   [0, 0, 1]]))\n",
    "drop_h = Homogeneous(np.eye(4)[:3])\n",
    "\n",
    "\n",
    "# equivalent to rotating y by 180, then rotating z by 180\n",
    "# This is also equivalent to glulookat to lookat the origin\n",
    "#   gluLookAt(0,0,0,0,0,1,0,-1,0);\n",
    "axes_flip_matrix = np.eye(4)\n",
    "axes_flip_matrix[1, 1] = -1\n",
    "axes_flip_matrix[2, 2] = -1\n",
    "axes_flip_t = Homogeneous(axes_flip_matrix)\n",
    "\n",
    "\n",
    "def retrieve_camera_matrix(image, mesh, group=None):\n",
    "\n",
    "    rows = image.shape[0]\n",
    "    cols = image.shape[1]\n",
    "    max_d = max(rows, cols)\n",
    "    camera_matrix = np.array([[max_d, 0,     cols / 2.0],\n",
    "                              [0,     max_d, rows / 2.0],\n",
    "                              [0,     0,     1.0]])\n",
    "    distortion_coeffs = np.zeros(4)\n",
    "\n",
    "    converged, r_vec, t_vec, _ = cv2.solvePnPRansac(mesh.landmarks[group].points, \n",
    "                                                    image.landmarks[group].points[:, ::-1], \n",
    "                                                    camera_matrix, \n",
    "                                                    distortion_coeffs)\n",
    "\n",
    "    rotation_matrix = cv2.Rodrigues(r_vec)[0]\n",
    "    \n",
    "    h_camera_matrix = np.eye(4)\n",
    "    h_camera_matrix[:3, :3] = camera_matrix\n",
    "\n",
    "    c = Homogeneous(h_camera_matrix)\n",
    "    t = Translation(t_vec.ravel())\n",
    "    r = Rotation(rotation_matrix)\n",
    "\n",
    "    view_t = r.compose_before(t)\n",
    "    proj_t = c.compose_before(drop_h).compose_before(flip_xy_yx)\n",
    "    return view_t, c, proj_t\n",
    "\n",
    "\n",
    "def weak_projection_matrix(width, height, mesh_camera_space):\n",
    "\n",
    "    # Identify how far and near the mesh is in camera space.\n",
    "    # we want to ensure that the near and far planes are\n",
    "    # set so that all the mesh is displayed.\n",
    "    near_bounds, far_bounds = mesh_camera_space.bounds()\n",
    "\n",
    "    # Rather than just use the bounds, we add 10% in each direction\n",
    "    # just to avoid any numerical errors.\n",
    "    average_plane = (near_bounds[-1] + far_bounds[-1]) * 0.5\n",
    "    padded_range = mesh_camera_space.range()[-1] * 1.1\n",
    "    near_plane = average_plane - padded_range\n",
    "    far_plane = average_plane + padded_range\n",
    "\n",
    "    plane_sum = far_plane + near_plane\n",
    "    plane_prod = far_plane * near_plane\n",
    "    denom = far_plane - near_plane\n",
    "    max_d = max(width, height)\n",
    "\n",
    "    return np.array([[2.0 * max_d / width, 0,                    0,                    0],\n",
    "                     [0,                   2.0 * max_d / height, 0,                    0],\n",
    "                     [0,                   0,                    (-plane_sum) / denom, (-2.0 * plane_prod) / denom],\n",
    "                     [0,                   0,                    -1,                   0]])\n",
    "\n",
    "\n",
    "def duplicate_vertices(mesh):\n",
    "    # generate a new mesh with unique vertices per triangle\n",
    "    # (i.e. duplicate verts so that each triangle is unique)\n",
    "    old_to_new = mesh.trilist.ravel()\n",
    "    new_trilist = np.arange(old_to_new.shape[0]).reshape([-1, 3])\n",
    "    new_points = mesh.points[old_to_new]\n",
    "    tcoords = mesh.tcoords.points[old_to_new]\n",
    "    return TexturedTriMesh(new_points, trilist=new_trilist, tcoords=tcoords, texture=mesh.texture), old_to_new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "# 10 implies ~100 labels\n",
    "LABEL_WINDOWS_SIZE = 10\n",
    "\n",
    "template = m3dio.import_builtin_asset.james_obj()\n",
    "template = Translation(-template.centre()).apply(template)\n",
    "# template = scale_about_centre(template, 1./1000.).apply(template)\n",
    "\n",
    "template.landmarks['ibug68'] = template.landmarks['LJSON']\n",
    "del template.landmarks['LJSON']\n",
    "\n",
    "image = mio.import_image('/Users/pts08/data/lfpw/trainset/image_0001.png')\n",
    "image.landmarks['ibug68'] = image.landmarks['PTS']\n",
    "del image.landmarks['PTS']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "unwrapped_template = optimal_cylindrical_unwrap(template).apply(template)\n",
    "unwrapped_template.landmarks.clear()\n",
    "unwrapped_template.points = unwrapped_template.points[:, :2]\n",
    "minimum = unwrapped_template.bounds(boundary=0)[0]\n",
    "unwrapped_template = Translation(-minimum).apply(unwrapped_template)\n",
    "\n",
    "window_shape = np.ceil(unwrapped_template.range() / LABEL_WINDOWS_SIZE)\n",
    "# Approximate binning of the space\n",
    "per_vertex_labels = np.floor(unwrapped_template.points / window_shape).astype(np.int)\n",
    "# Exact binning of the space\n",
    "# y_labels = np.digitize(unwrapped_template.points[:, 0], np.linspace(0, unwrapped_range[0], LABEL_WINDOWS_SIZE))\n",
    "# x_labels = np.digitize(unwrapped_template.points[:, 1], np.linspace(0, unwrapped_range[1], LABEL_WINDOWS_SIZE))\n",
    "per_vertex_labels = LABEL_WINDOWS_SIZE * per_vertex_labels[:, 0] + per_vertex_labels[:, 1]\n",
    "N_LABELS = np.unique(per_vertex_labels).shape[0]\n",
    "print('Generated {} labels'.format(N_LABELS))\n",
    "\n",
    "v1 = unwrapped_template.trilist[:, 0]\n",
    "id_per_tri = per_vertex_labels[v1]\n",
    "\n",
    "template_uniq_verts, _ = duplicate_vertices(template)\n",
    "id_per_v_per_t = np.repeat(id_per_tri, 3)\n",
    "gt_label_f3v = np.tile(id_per_v_per_t, (3, 1)).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "view_t, c_t, proj_t = retrieve_camera_matrix(image, template, group='ibug68')\n",
    "mesh_to_image = view_t.compose_before(proj_t)\n",
    "image.landmarks['projected'] = mesh_to_image.apply(template.landmarks['ibug68'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "image.view_landmarks(group='projected')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "rasterizer = GLRasterizer(height=image.height, width=image.width, \n",
    "                          view_matrix=view_t.compose_before(axes_flip_t).h_matrix,\n",
    "                          projection_matrix=weak_projection_matrix(image.width, image.height, \n",
    "                                                                   view_t.apply(template)))\n",
    "\n",
    "rgb_image, label_image = rasterizer.rasterize_mesh_with_f3v_interpolant(template_uniq_verts, \n",
    "                                                                        per_vertex_f3v=gt_label_f3v)\n",
    "# Create 1-based labels so we can save as png\n",
    "label_image.pixels += 1\n",
    "label_image.pixels[:, ~label_image.mask.mask] = 0\n",
    "label_image.pixels = np.round(label_image.pixels).astype(np.uint8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "rgb_image.view()\n",
    "label_image.view(new_figure=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "print('Projection:')\n",
    "print(rasterizer.projection_matrix)\n",
    "print('View:')\n",
    "print(rasterizer.view_matrix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "test = image.as_masked()\n",
    "test.mask = rgb_image.mask\n",
    "test.view()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "colors = sample_colours_from_colourmap(N_LABELS, 'jet')\n",
    "shuffle(colors)\n",
    "\n",
    "image_label_coloring = label2rgb(label_image.pixels[0],\n",
    "                                 image=image.pixels_with_channels_at_back(out_dtype=np.uint8), \n",
    "                                 colors=colors,\n",
    "                                 bg_label=-1)\n",
    "\n",
    "Image.init_from_channels_at_back(image_label_coloring).view(figure_size=(15, 15))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "gist_id": "639a16dfd02adc92cefd",
  "kernelspec": {
   "display_name": "Python 3.7 (menpo)",
   "language": "python",
   "name": "menpo"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "widgets": {
   "state": {},
   "version": "1.1.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"jupyter": {
	"outputs_hidden": false
	}
	},
	"outputs": [],
	"source": [
	"from __future__ import division\n",
	"import matplotlib.pyplot as plt\n",
	"import menpo3d.io as m3dio\n",
	"import menpo.io as mio\n",
	"import numpy as np\n",
	"import mayavi.mlab as mlab\n",
	"import cv2\n",
	"from skimage.color import label2rgb\n",
	"from random import shuffle\n",
	"\n",
	"from menpo.image import Image\n",
	"from menpo3d.unwrap import optimal_cylindrical_unwrap\n",
	"from menpo.shape import PointCloud, TriMesh, TexturedTriMesh\n",
	"from menpo.transform import UniformScale, Translation, Homogeneous, scale_about_centre, Rotation\n",
	"from menpo3d.rasterize import GLRasterizer\n",
	"from menpo3d.visualize import LandmarkViewer3d\n",
	"from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap\n",
	"\n",
	"np.set_printoptions(precision=3, suppress=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"flip_xy_yx = Homogeneous(np.array([[0, 1, 0],\n",
	" [1, 0, 0],\n",
	" [0, 0, 1]]))\n",
	"drop_h = Homogeneous(np.eye(4)[:3])\n",
	"\n",
	"\n",
	"# equivalent to rotating y by 180, then rotating z by 180\n",
	"# This is also equivalent to glulookat to lookat the origin\n",
	"# gluLookAt(0,0,0,0,0,1,0,-1,0);\n",
	"axes_flip_matrix = np.eye(4)\n",
	"axes_flip_matrix[1, 1] = -1\n",
	"axes_flip_matrix[2, 2] = -1\n",
	"axes_flip_t = Homogeneous(axes_flip_matrix)\n",
	"\n",
	"\n",
	"def retrieve_camera_matrix(image, mesh, group=None):\n",
	"\n",
	" rows = image.shape[0]\n",
	" cols = image.shape[1]\n",
	" max_d = max(rows, cols)\n",
	" camera_matrix = np.array([[max_d, 0, cols / 2.0],\n",
	" [0, max_d, rows / 2.0],\n",
	" [0, 0, 1.0]])\n",
	" distortion_coeffs = np.zeros(4)\n",
	"\n",
	" converged, r_vec, t_vec, _ = cv2.solvePnPRansac(mesh.landmarks[group].points, \n",
	" image.landmarks[group].points[:, ::-1], \n",
	" camera_matrix, \n",
	" distortion_coeffs)\n",
	"\n",
	" rotation_matrix = cv2.Rodrigues(r_vec)[0]\n",
	" \n",
	" h_camera_matrix = np.eye(4)\n",
	" h_camera_matrix[:3, :3] = camera_matrix\n",
	"\n",
	" c = Homogeneous(h_camera_matrix)\n",
	" t = Translation(t_vec.ravel())\n",
	" r = Rotation(rotation_matrix)\n",
	"\n",
	" view_t = r.compose_before(t)\n",
	" proj_t = c.compose_before(drop_h).compose_before(flip_xy_yx)\n",
	" return view_t, c, proj_t\n",
	"\n",
	"\n",
	"def weak_projection_matrix(width, height, mesh_camera_space):\n",
	"\n",
	" # Identify how far and near the mesh is in camera space.\n",
	" # we want to ensure that the near and far planes are\n",
	" # set so that all the mesh is displayed.\n",
	" near_bounds, far_bounds = mesh_camera_space.bounds()\n",
	"\n",
	" # Rather than just use the bounds, we add 10% in each direction\n",
	" # just to avoid any numerical errors.\n",
	" average_plane = (near_bounds[-1] + far_bounds[-1]) * 0.5\n",
	" padded_range = mesh_camera_space.range()[-1] * 1.1\n",
	" near_plane = average_plane - padded_range\n",
	" far_plane = average_plane + padded_range\n",
	"\n",
	" plane_sum = far_plane + near_plane\n",
	" plane_prod = far_plane * near_plane\n",
	" denom = far_plane - near_plane\n",
	" max_d = max(width, height)\n",
	"\n",
	" return np.array([[2.0 * max_d / width, 0, 0, 0],\n",
	" [0, 2.0 * max_d / height, 0, 0],\n",
	" [0, 0, (-plane_sum) / denom, (-2.0 * plane_prod) / denom],\n",
	" [0, 0, -1, 0]])\n",
	"\n",
	"\n",
	"def duplicate_vertices(mesh):\n",
	" # generate a new mesh with unique vertices per triangle\n",
	" # (i.e. duplicate verts so that each triangle is unique)\n",
	" old_to_new = mesh.trilist.ravel()\n",
	" new_trilist = np.arange(old_to_new.shape[0]).reshape([-1, 3])\n",
	" new_points = mesh.points[old_to_new]\n",
	" tcoords = mesh.tcoords.points[old_to_new]\n",
	" return TexturedTriMesh(new_points, trilist=new_trilist, tcoords=tcoords, texture=mesh.texture), old_to_new"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"jupyter": {
	"outputs_hidden": false
	}
	},
	"outputs": [],
	"source": [
	"# 10 implies ~100 labels\n",
	"LABEL_WINDOWS_SIZE = 10\n",
	"\n",
	"template = m3dio.import_builtin_asset.james_obj()\n",
	"template = Translation(-template.centre()).apply(template)\n",
	"# template = scale_about_centre(template, 1./1000.).apply(template)\n",
	"\n",
	"template.landmarks['ibug68'] = template.landmarks['LJSON']\n",
	"del template.landmarks['LJSON']\n",
	"\n",
	"image = mio.import_image('/Users/pts08/data/lfpw/trainset/image_0001.png')\n",
	"image.landmarks['ibug68'] = image.landmarks['PTS']\n",
	"del image.landmarks['PTS']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"jupyter": {
	"outputs_hidden": false
	}
	},
	"outputs": [],
	"source": [
	"unwrapped_template = optimal_cylindrical_unwrap(template).apply(template)\n",
	"unwrapped_template.landmarks.clear()\n",
	"unwrapped_template.points = unwrapped_template.points[:, :2]\n",
	"minimum = unwrapped_template.bounds(boundary=0)[0]\n",
	"unwrapped_template = Translation(-minimum).apply(unwrapped_template)\n",
	"\n",
	"window_shape = np.ceil(unwrapped_template.range() / LABEL_WINDOWS_SIZE)\n",
	"# Approximate binning of the space\n",
	"per_vertex_labels = np.floor(unwrapped_template.points / window_shape).astype(np.int)\n",
	"# Exact binning of the space\n",
	"# y_labels = np.digitize(unwrapped_template.points[:, 0], np.linspace(0, unwrapped_range[0], LABEL_WINDOWS_SIZE))\n",
	"# x_labels = np.digitize(unwrapped_template.points[:, 1], np.linspace(0, unwrapped_range[1], LABEL_WINDOWS_SIZE))\n",
	"per_vertex_labels = LABEL_WINDOWS_SIZE * per_vertex_labels[:, 0] + per_vertex_labels[:, 1]\n",
	"N_LABELS = np.unique(per_vertex_labels).shape[0]\n",
	"print('Generated {} labels'.format(N_LABELS))\n",
	"\n",
	"v1 = unwrapped_template.trilist[:, 0]\n",
	"id_per_tri = per_vertex_labels[v1]\n",
	"\n",
	"template_uniq_verts, _ = duplicate_vertices(template)\n",
	"id_per_v_per_t = np.repeat(id_per_tri, 3)\n",
	"gt_label_f3v = np.tile(id_per_v_per_t, (3, 1)).T"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"jupyter": {
	"outputs_hidden": false
	}
	},
	"outputs": [],
	"source": [
	"view_t, c_t, proj_t = retrieve_camera_matrix(image, template, group='ibug68')\n",
	"mesh_to_image = view_t.compose_before(proj_t)\n",
	"image.landmarks['projected'] = mesh_to_image.apply(template.landmarks['ibug68'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"jupyter": {
	"outputs_hidden": false
	}
	},
	"outputs": [],
	"source": [
	"%matplotlib inline\n",
	"\n",
	"image.view_landmarks(group='projected')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"jupyter": {
	"outputs_hidden": false
	}
	},
	"outputs": [],
	"source": [
	"rasterizer = GLRasterizer(height=image.height, width=image.width, \n",
	" view_matrix=view_t.compose_before(axes_flip_t).h_matrix,\n",
	" projection_matrix=weak_projection_matrix(image.width, image.height, \n",
	" view_t.apply(template)))\n",
	"\n",
	"rgb_image, label_image = rasterizer.rasterize_mesh_with_f3v_interpolant(template_uniq_verts, \n",
	" per_vertex_f3v=gt_label_f3v)\n",
	"# Create 1-based labels so we can save as png\n",
	"label_image.pixels += 1\n",
	"label_image.pixels[:, ~label_image.mask.mask] = 0\n",
	"label_image.pixels = np.round(label_image.pixels).astype(np.uint8)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"jupyter": {
	"outputs_hidden": false
	}
	},
	"outputs": [],
	"source": [
	"%matplotlib inline\n",
	"\n",
	"rgb_image.view()\n",
	"label_image.view(new_figure=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"jupyter": {
	"outputs_hidden": false
	}
	},
	"outputs": [],
	"source": [
	"print('Projection:')\n",
	"print(rasterizer.projection_matrix)\n",
	"print('View:')\n",
	"print(rasterizer.view_matrix)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"jupyter": {
	"outputs_hidden": false
	}
	},
	"outputs": [],
	"source": [
	"%matplotlib inline\n",
	"\n",
	"test = image.as_masked()\n",
	"test.mask = rgb_image.mask\n",
	"test.view()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"jupyter": {
	"outputs_hidden": false
	}
	},
	"outputs": [],
	"source": [
	"colors = sample_colours_from_colourmap(N_LABELS, 'jet')\n",
	"shuffle(colors)\n",
	"\n",
	"image_label_coloring = label2rgb(label_image.pixels[0],\n",
	" image=image.pixels_with_channels_at_back(out_dtype=np.uint8), \n",
	" colors=colors,\n",
	" bg_label=-1)\n",
	"\n",
	"Image.init_from_channels_at_back(image_label_coloring).view(figure_size=(15, 15))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"jupyter": {
	"outputs_hidden": true
	}
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"gist_id": "639a16dfd02adc92cefd",
	"kernelspec": {
	"display_name": "Python 3.7 (menpo)",
	"language": "python",
	"name": "menpo"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.3"
	},
	"widgets": {
	"state": {},
	"version": "1.1.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}