nulledge/MPI-INF-3DHP.ipynb

## MPI-INF-3DHP.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import copy\n",
    "import cv2 as cv\n",
    "import math\n",
    "import numpy as np\n",
    "import random\n",
    "import imageio\n",
    "import scipy.io\n",
    "import skimage.transform\n",
    "from tqdm import tqdm\n",
    "from itertools import product\n",
    "from vectormath import Vector2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "root = 'D:/data/MPI-INF-3DHP'\n",
    "\n",
    "available_subject = [1, 2, 3, 4, 5, 6, 7, 8, ]\n",
    "available_sequence = [1, 2, ]\n",
    "available_camera = [camera for camera in range(14)]\n",
    "\n",
    "# available_segment[subject][sequence]\n",
    "# segmented sequence = np.squeeze(np.where(available_segment[subject])) + 1\n",
    "available_segment = [\n",
    "    [False, True], # subject 1\n",
    "    [False, True], # subject 2\n",
    "    [False, True],\n",
    "    [False, True],\n",
    "    [False, True],\n",
    "    [False, True],\n",
    "    [True, False],\n",
    "    [True, False], # subject 8\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class SequentialDictionary:\n",
    "    '''The custom dictionary class\n",
    "    \n",
    "    You can use a dictionary with multiple indices, i.e. x['1st dim']['2nd dim'] = 2.\n",
    "    '''\n",
    "    \n",
    "    def __init__(self):\n",
    "        self.data = dict()\n",
    "    \n",
    "    def __getitem__(self, index):\n",
    "        if index not in self.data.keys():\n",
    "            self.data[index] = SequentialDictionary()\n",
    "        return self.data[index]\n",
    "    \n",
    "    def __setitem__(self, index, value):\n",
    "        self.data[index] = value\n",
    "    \n",
    "    def __len__(self):\n",
    "        length = 0\n",
    "        for key, value in self.data.items():\n",
    "            if type(value) is SequentialDictionary:\n",
    "                length = length + len(value)\n",
    "            else:\n",
    "                length = length + 1\n",
    "        return length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 672/672 [00:34<00:00, 20.23it/s]\n"
     ]
    }
   ],
   "source": [
    "# Video\n",
    "VIDEO_RGB = 'imageSequence'\n",
    "VIDEO_MASK_HUMAN_AND_CHAIR = 'FGmasks'\n",
    "VIDEO_MASK_CHAIR = 'ChairMasks'\n",
    "\n",
    "available_format = [\n",
    "    VIDEO_RGB,\n",
    "    VIDEO_MASK_HUMAN_AND_CHAIR,\n",
    "    VIDEO_MASK_CHAIR,\n",
    "]\n",
    "\n",
    "video_path = '{root}/{subject}/{sequence}/{format}/video_{camera}.avi'\n",
    "video = SequentialDictionary()\n",
    "\n",
    "available_video = product(*[\n",
    "    available_subject, \n",
    "    available_sequence, \n",
    "    available_format, \n",
    "    available_camera,\n",
    "])\n",
    "total = len(available_subject) * len(available_sequence) * len(available_format) * len(available_camera)\n",
    "\n",
    "for subject, sequence, format, camera in tqdm(available_video, total=total):\n",
    "    video[subject][sequence][format][camera] = cv.VideoCapture(video_path.format(\n",
    "        root=root,\n",
    "        subject='S%d' % subject,\n",
    "        sequence='Seq%d' % sequence,\n",
    "        format=format,\n",
    "        camera=camera,\n",
    "    ))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:17<00:00,  1.14it/s]\n"
     ]
    }
   ],
   "source": [
    "# annotation\n",
    "ANNOT_CAMERA_2D = 'annot2'\n",
    "ANNOT_CAMERA_3D = 'annot3'\n",
    "ANNOT_WORLD_3D = 'univ_annot3'\n",
    "ANNOT_CAMERA_CALI = 'cameras'\n",
    "\n",
    "annot_path = '{root}/{subject}/{sequence}/annot.mat'\n",
    "annot = SequentialDictionary()\n",
    "\n",
    "available_annot = product(*[\n",
    "    available_subject, \n",
    "    available_sequence,\n",
    "])\n",
    "total = len(available_subject) * len(available_sequence)\n",
    "\n",
    "for subject, sequence, in tqdm(available_annot, total=total):\n",
    "    annot[subject][sequence] = scipy.io.loadmat(annot_path.format(\n",
    "        root=root,\n",
    "        subject='S%d' % subject,\n",
    "        sequence='Seq%d' % sequence,\n",
    "    ))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 144.42it/s]\n"
     ]
    }
   ],
   "source": [
    "# camera parameters\n",
    "CAMERA_INTRINSIC = 'intrinsic'\n",
    "CAMERA_EXTRINSIC = 'extrinsic'\n",
    "\n",
    "camera_path = '{root}/{subject}/{sequence}/camera.calibration'\n",
    "camera_parameter = SequentialDictionary()\n",
    "\n",
    "available_camera_parameter = product(*[\n",
    "    available_subject, \n",
    "    available_sequence,\n",
    "])\n",
    "total = len(available_subject) * len(available_sequence)\n",
    "\n",
    "for subject, sequence, in tqdm(available_camera_parameter, total=total):\n",
    "    camera_index = -1\n",
    "    with open(camera_path.format(\n",
    "        root=root,\n",
    "        subject='S%d' % subject,\n",
    "        sequence='Seq%d' % sequence,\n",
    "    ), 'r') as file:\n",
    "        for line in file:\n",
    "            word = line.strip().split() # remove whilespace\n",
    "\n",
    "            if word[0] == 'name':\n",
    "                camera_index = int(word[-1])\n",
    "\n",
    "            elif word[0] == CAMERA_INTRINSIC:\n",
    "                mat = np.reshape(np.asarray(word[1:], dtype=np.float), newshape=(4, 4))\n",
    "                mat = mat[0:3, 0:3]\n",
    "                camera_parameter[subject][sequence][camera_index][CAMERA_INTRINSIC] = mat\n",
    "            elif word[0] == CAMERA_EXTRINSIC:\n",
    "                mat = np.reshape(np.asarray(word[1:], dtype=np.float), newshape=(4, 4))\n",
    "                mat = mat[0:3, 0:4]\n",
    "                camera_parameter[subject][sequence][camera_index][CAMERA_EXTRINSIC] = mat\n",
    "            else:\n",
    "                continue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def rotate_bound(image, angle):\n",
    "    height, width, channel = image.shape\n",
    "    \n",
    "    mat = cv.getRotationMatrix2D((width/2, height/2), -angle, 1)\n",
    "    return cv.warpAffine(image, mat, (width, height))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def crop_image(image, center, scale, rotate, resolution):\n",
    "    center = Vector2(center)  # assign new array\n",
    "    height, width, channel = image.shape\n",
    "    crop_ratio = 200 * scale / resolution\n",
    "    \n",
    "    if crop_ratio >= 2:  # if box size is greater than two time of resolution px\n",
    "        # scale down image\n",
    "        height = math.floor(height / crop_ratio)\n",
    "        width = math.floor(width / crop_ratio)\n",
    "\n",
    "        if max([height, width]) < 2:\n",
    "            # Zoomed out so much that the image is now a single pixel or less\n",
    "            raise ValueError(\"Width or height is invalid!\")\n",
    "\n",
    "#         image = skimage.transform.resize(image, (height, width), mode='constant')\n",
    "#         image = image.resize(image, (height, width), mode='constant')\n",
    "        image = cv.resize(image, (height, width))\n",
    "        center /= crop_ratio\n",
    "        scale /= crop_ratio\n",
    "\n",
    "    ul = (center - 200 * scale / 2).astype(int)\n",
    "    br = (center + 200 * scale / 2).astype(int)  # Vector2\n",
    "\n",
    "    if crop_ratio >= 2:  # force image size 256 x 256\n",
    "        br -= (br - ul - resolution)\n",
    "\n",
    "    pad_length = math.ceil((ul - br).length - (br.x - ul.x) / 2)\n",
    "\n",
    "    if rotate != 0:\n",
    "        ul -= pad_length\n",
    "        br += pad_length\n",
    "\n",
    "    src = [max(0, ul.y), min(height, br.y), max(0, ul.x), min(width, br.x)]\n",
    "    dst = [max(0, -ul.y), min(height, br.y) - ul.y, max(0, -ul.x), min(width, br.x) - ul.x]\n",
    "\n",
    "    new_image = np.zeros([br.y - ul.y, br.x - ul.x, channel], dtype=np.uint8)\n",
    "    new_image[dst[0]:dst[1], dst[2]:dst[3], :] = image[src[0]:src[1], src[2]:src[3], :]\n",
    "\n",
    "    if rotate != 0:\n",
    "        # new_image = skimage.transform.rotate(new_image, rotate)\n",
    "        new_image = rotate_bound(new_image, rotate)\n",
    "        new_height, new_width, _ = new_image.shape\n",
    "        new_image = new_image[pad_length:new_height - pad_length, pad_length:new_width - pad_length, :]\n",
    "\n",
    "    if crop_ratio < 2:\n",
    "        new_image = cv.resize(new_image, (resolution, resolution))\n",
    "#         new_image = skimage.transform.resize(new_image, (resolution, resolution), mode='constant')\n",
    "#         new_image = Image.resize(new_image, (resolution, resolution), mode='constant')\n",
    "\n",
    "    return new_image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "subject = 3\n",
    "sequence = np.squeeze(np.where(available_segment[subject-1])) + 1\n",
    "camera = 8\n",
    "frame = 4748\n",
    "\n",
    "image = SequentialDictionary()\n",
    "for format in available_format:\n",
    "    video[subject][sequence][format][camera].set(cv.CAP_PROP_POS_FRAMES, frame)\n",
    "    success, image[format] = video[subject][sequence][format][camera].read()\n",
    "    assert success\n",
    "\n",
    "height, width, channel = np.asarray([\n",
    "    video[subject][sequence][VIDEO_RGB][camera].get(cv.CAP_PROP_FRAME_HEIGHT),\n",
    "    video[subject][sequence][VIDEO_RGB][camera].get(cv.CAP_PROP_FRAME_WIDTH),\n",
    "    3,\n",
    "]).astype(np.int)\n",
    "    \n",
    "for image_name in ['checker', 'room', 'flower', ]:\n",
    "    image[image_name] = cv.imread('{image_name}.jpg'.format(image_name=image_name))\n",
    "    image[image_name] = cv.resize(image[image_name], (height, width))\n",
    "\n",
    "gitter = 0.4 + 0.8 * random.random()\n",
    "\n",
    "background = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 2] < 200\n",
    "chair = image[VIDEO_MASK_CHAIR][:, :, 2] < 200\n",
    "pants = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 0] < 200\n",
    "shirts = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 1] < 200\n",
    "\n",
    "image[VIDEO_RGB][shirts] = image['checker'][shirts]\n",
    "image[VIDEO_RGB][pants] = image[VIDEO_RGB][pants] * gitter\n",
    "image[VIDEO_RGB][background] = image['room'][background]\n",
    "image[VIDEO_RGB][chair] = image['flower'][chair]\n",
    "\n",
    "in_3D = np.reshape(annot[subject][sequence][ANNOT_CAMERA_3D][camera, 0][frame], newshape=(-1, 3))\n",
    "\n",
    "num_keypoints = len(in_3D)\n",
    "\n",
    "# reshape for easy matrix multiplication\n",
    "in_3D = np.concatenate((in_3D, np.ones(shape=(num_keypoints, 1))), axis=1).transpose(1, 0)\n",
    "identity_transform = np.concatenate((np.eye(3), np.ones(shape=(3, 1))), axis=1)\n",
    "\n",
    "projected = np.matmul(identity_transform, in_3D)\n",
    "projected = np.matmul(camera_parameter[subject][sequence][camera][CAMERA_INTRINSIC], projected)\n",
    "projected = projected / projected[-1, :]\n",
    "projected = projected.transpose(1, 0)\n",
    "\n",
    "pad = np.asarray([50, 50], dtype=np.int)\n",
    "ul = np.asarray([np.min(projected[:, 0]), np.min(projected[:, 1])], dtype=np.int) - pad\n",
    "br = np.asarray([np.max(projected[:, 0]), np.max(projected[:, 1])], dtype=np.int) + pad\n",
    "\n",
    "center = (ul + br) * 0.5\n",
    "center = center.astype(np.int)\n",
    "scale = np.max(br - ul) / 200\n",
    "\n",
    "tmp = crop_image(image[VIDEO_RGB], center, scale, 30, 256)\n",
    "cv.imwrite('crop.jpg', tmp)\n",
    "\n",
    "image[VIDEO_MASK_HUMAN_AND_CHAIR][ul[1]:br[1], ul[0]:br[0], :] = [255, 255, 255]\n",
    "\n",
    "for keypoint in projected:\n",
    "    x, y, _ = keypoint\n",
    "    \n",
    "    for tx in range(-10, 10):\n",
    "        for ty in range(-10, 10):\n",
    "            xx = x + tx\n",
    "            yy = y + ty\n",
    "            \n",
    "            if xx < 0 or image[VIDEO_RGB].shape[1] <= xx \\\n",
    "            or yy < 0 or image[VIDEO_RGB].shape[0] <= yy:\n",
    "                continue\n",
    "            \n",
    "            image[VIDEO_RGB][int(yy), int(xx), :] = [0, 0, 255]\n",
    "            \n",
    "for format in available_format:\n",
    "    success = cv.imwrite('{format}.jpg'.format(format=format), image[format])\n",
    "    assert success"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for subject, sequence, format, camera in available_video:\n",
    "    video[subject][sequence][format][camera].release()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# print('Video:', video_path)\n",
    "# print('Open:', video.isOpened())\n",
    "# print('Resolution:', '%dx%d' % (video.get(cv.CAP_PROP_FRAME_WIDTH), video.get(cv.CAP_PROP_FRAME_HEIGHT)))\n",
    "# print('Total frames:', video.get(cv.CAP_PROP_FRAME_COUNT))\n",
    "# print('Frame-rate:', video.get(cv.CAP_PROP_FPS))\n",
    "# print('OpenCV:', cv.__version__)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import copy\n",
	"import cv2 as cv\n",
	"import math\n",
	"import numpy as np\n",
	"import random\n",
	"import imageio\n",
	"import scipy.io\n",
	"import skimage.transform\n",
	"from tqdm import tqdm\n",
	"from itertools import product\n",
	"from vectormath import Vector2"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"root = 'D:/data/MPI-INF-3DHP'\n",
	"\n",
	"available_subject = [1, 2, 3, 4, 5, 6, 7, 8, ]\n",
	"available_sequence = [1, 2, ]\n",
	"available_camera = [camera for camera in range(14)]\n",
	"\n",
	"# available_segment[subject][sequence]\n",
	"# segmented sequence = np.squeeze(np.where(available_segment[subject])) + 1\n",
	"available_segment = [\n",
	" [False, True], # subject 1\n",
	" [False, True], # subject 2\n",
	" [False, True],\n",
	" [False, True],\n",
	" [False, True],\n",
	" [False, True],\n",
	" [True, False],\n",
	" [True, False], # subject 8\n",
	"]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"class SequentialDictionary:\n",
	" '''The custom dictionary class\n",
	" \n",
	" You can use a dictionary with multiple indices, i.e. x['1st dim']['2nd dim'] = 2.\n",
	" '''\n",
	" \n",
	" def __init__(self):\n",
	" self.data = dict()\n",
	" \n",
	" def __getitem__(self, index):\n",
	" if index not in self.data.keys():\n",
	" self.data[index] = SequentialDictionary()\n",
	" return self.data[index]\n",
	" \n",
	" def __setitem__(self, index, value):\n",
	" self.data[index] = value\n",
	" \n",
	" def __len__(self):\n",
	" length = 0\n",
	" for key, value in self.data.items():\n",
	" if type(value) is SequentialDictionary:\n",
	" length = length + len(value)\n",
	" else:\n",
	" length = length + 1\n",
	" return length"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"100%\|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 672/672 [00:34<00:00, 20.23it/s]\n"
	]
	}
	],
	"source": [
	"# Video\n",
	"VIDEO_RGB = 'imageSequence'\n",
	"VIDEO_MASK_HUMAN_AND_CHAIR = 'FGmasks'\n",
	"VIDEO_MASK_CHAIR = 'ChairMasks'\n",
	"\n",
	"available_format = [\n",
	" VIDEO_RGB,\n",
	" VIDEO_MASK_HUMAN_AND_CHAIR,\n",
	" VIDEO_MASK_CHAIR,\n",
	"]\n",
	"\n",
	"video_path = '{root}/{subject}/{sequence}/{format}/video_{camera}.avi'\n",
	"video = SequentialDictionary()\n",
	"\n",
	"available_video = product(*[\n",
	" available_subject, \n",
	" available_sequence, \n",
	" available_format, \n",
	" available_camera,\n",
	"])\n",
	"total = len(available_subject) * len(available_sequence) * len(available_format) * len(available_camera)\n",
	"\n",
	"for subject, sequence, format, camera in tqdm(available_video, total=total):\n",
	" video[subject][sequence][format][camera] = cv.VideoCapture(video_path.format(\n",
	" root=root,\n",
	" subject='S%d' % subject,\n",
	" sequence='Seq%d' % sequence,\n",
	" format=format,\n",
	" camera=camera,\n",
	" ))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"100%\|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 16/16 [00:17<00:00, 1.14it/s]\n"
	]
	}
	],
	"source": [
	"# annotation\n",
	"ANNOT_CAMERA_2D = 'annot2'\n",
	"ANNOT_CAMERA_3D = 'annot3'\n",
	"ANNOT_WORLD_3D = 'univ_annot3'\n",
	"ANNOT_CAMERA_CALI = 'cameras'\n",
	"\n",
	"annot_path = '{root}/{subject}/{sequence}/annot.mat'\n",
	"annot = SequentialDictionary()\n",
	"\n",
	"available_annot = product(*[\n",
	" available_subject, \n",
	" available_sequence,\n",
	"])\n",
	"total = len(available_subject) * len(available_sequence)\n",
	"\n",
	"for subject, sequence, in tqdm(available_annot, total=total):\n",
	" annot[subject][sequence] = scipy.io.loadmat(annot_path.format(\n",
	" root=root,\n",
	" subject='S%d' % subject,\n",
	" sequence='Seq%d' % sequence,\n",
	" ))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"100%\|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 16/16 [00:00<00:00, 144.42it/s]\n"
	]
	}
	],
	"source": [
	"# camera parameters\n",
	"CAMERA_INTRINSIC = 'intrinsic'\n",
	"CAMERA_EXTRINSIC = 'extrinsic'\n",
	"\n",
	"camera_path = '{root}/{subject}/{sequence}/camera.calibration'\n",
	"camera_parameter = SequentialDictionary()\n",
	"\n",
	"available_camera_parameter = product(*[\n",
	" available_subject, \n",
	" available_sequence,\n",
	"])\n",
	"total = len(available_subject) * len(available_sequence)\n",
	"\n",
	"for subject, sequence, in tqdm(available_camera_parameter, total=total):\n",
	" camera_index = -1\n",
	" with open(camera_path.format(\n",
	" root=root,\n",
	" subject='S%d' % subject,\n",
	" sequence='Seq%d' % sequence,\n",
	" ), 'r') as file:\n",
	" for line in file:\n",
	" word = line.strip().split() # remove whilespace\n",
	"\n",
	" if word[0] == 'name':\n",
	" camera_index = int(word[-1])\n",
	"\n",
	" elif word[0] == CAMERA_INTRINSIC:\n",
	" mat = np.reshape(np.asarray(word[1:], dtype=np.float), newshape=(4, 4))\n",
	" mat = mat[0:3, 0:3]\n",
	" camera_parameter[subject][sequence][camera_index][CAMERA_INTRINSIC] = mat\n",
	" elif word[0] == CAMERA_EXTRINSIC:\n",
	" mat = np.reshape(np.asarray(word[1:], dtype=np.float), newshape=(4, 4))\n",
	" mat = mat[0:3, 0:4]\n",
	" camera_parameter[subject][sequence][camera_index][CAMERA_EXTRINSIC] = mat\n",
	" else:\n",
	" continue"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 45,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def rotate_bound(image, angle):\n",
	" height, width, channel = image.shape\n",
	" \n",
	" mat = cv.getRotationMatrix2D((width/2, height/2), -angle, 1)\n",
	" return cv.warpAffine(image, mat, (width, height))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 46,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def crop_image(image, center, scale, rotate, resolution):\n",
	" center = Vector2(center) # assign new array\n",
	" height, width, channel = image.shape\n",
	" crop_ratio = 200 * scale / resolution\n",
	" \n",
	" if crop_ratio >= 2: # if box size is greater than two time of resolution px\n",
	" # scale down image\n",
	" height = math.floor(height / crop_ratio)\n",
	" width = math.floor(width / crop_ratio)\n",
	"\n",
	" if max([height, width]) < 2:\n",
	" # Zoomed out so much that the image is now a single pixel or less\n",
	" raise ValueError(\"Width or height is invalid!\")\n",
	"\n",
	"# image = skimage.transform.resize(image, (height, width), mode='constant')\n",
	"# image = image.resize(image, (height, width), mode='constant')\n",
	" image = cv.resize(image, (height, width))\n",
	" center /= crop_ratio\n",
	" scale /= crop_ratio\n",
	"\n",
	" ul = (center - 200 * scale / 2).astype(int)\n",
	" br = (center + 200 * scale / 2).astype(int) # Vector2\n",
	"\n",
	" if crop_ratio >= 2: # force image size 256 x 256\n",
	" br -= (br - ul - resolution)\n",
	"\n",
	" pad_length = math.ceil((ul - br).length - (br.x - ul.x) / 2)\n",
	"\n",
	" if rotate != 0:\n",
	" ul -= pad_length\n",
	" br += pad_length\n",
	"\n",
	" src = [max(0, ul.y), min(height, br.y), max(0, ul.x), min(width, br.x)]\n",
	" dst = [max(0, -ul.y), min(height, br.y) - ul.y, max(0, -ul.x), min(width, br.x) - ul.x]\n",
	"\n",
	" new_image = np.zeros([br.y - ul.y, br.x - ul.x, channel], dtype=np.uint8)\n",
	" new_image[dst[0]:dst[1], dst[2]:dst[3], :] = image[src[0]:src[1], src[2]:src[3], :]\n",
	"\n",
	" if rotate != 0:\n",
	" # new_image = skimage.transform.rotate(new_image, rotate)\n",
	" new_image = rotate_bound(new_image, rotate)\n",
	" new_height, new_width, _ = new_image.shape\n",
	" new_image = new_image[pad_length:new_height - pad_length, pad_length:new_width - pad_length, :]\n",
	"\n",
	" if crop_ratio < 2:\n",
	" new_image = cv.resize(new_image, (resolution, resolution))\n",
	"# new_image = skimage.transform.resize(new_image, (resolution, resolution), mode='constant')\n",
	"# new_image = Image.resize(new_image, (resolution, resolution), mode='constant')\n",
	"\n",
	" return new_image"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 50,
	"metadata": {},
	"outputs": [],
	"source": [
	"subject = 3\n",
	"sequence = np.squeeze(np.where(available_segment[subject-1])) + 1\n",
	"camera = 8\n",
	"frame = 4748\n",
	"\n",
	"image = SequentialDictionary()\n",
	"for format in available_format:\n",
	" video[subject][sequence][format][camera].set(cv.CAP_PROP_POS_FRAMES, frame)\n",
	" success, image[format] = video[subject][sequence][format][camera].read()\n",
	" assert success\n",
	"\n",
	"height, width, channel = np.asarray([\n",
	" video[subject][sequence][VIDEO_RGB][camera].get(cv.CAP_PROP_FRAME_HEIGHT),\n",
	" video[subject][sequence][VIDEO_RGB][camera].get(cv.CAP_PROP_FRAME_WIDTH),\n",
	" 3,\n",
	"]).astype(np.int)\n",
	" \n",
	"for image_name in ['checker', 'room', 'flower', ]:\n",
	" image[image_name] = cv.imread('{image_name}.jpg'.format(image_name=image_name))\n",
	" image[image_name] = cv.resize(image[image_name], (height, width))\n",
	"\n",
	"gitter = 0.4 + 0.8 * random.random()\n",
	"\n",
	"background = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 2] < 200\n",
	"chair = image[VIDEO_MASK_CHAIR][:, :, 2] < 200\n",
	"pants = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 0] < 200\n",
	"shirts = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 1] < 200\n",
	"\n",
	"image[VIDEO_RGB][shirts] = image['checker'][shirts]\n",
	"image[VIDEO_RGB][pants] = image[VIDEO_RGB][pants] * gitter\n",
	"image[VIDEO_RGB][background] = image['room'][background]\n",
	"image[VIDEO_RGB][chair] = image['flower'][chair]\n",
	"\n",
	"in_3D = np.reshape(annot[subject][sequence][ANNOT_CAMERA_3D][camera, 0][frame], newshape=(-1, 3))\n",
	"\n",
	"num_keypoints = len(in_3D)\n",
	"\n",
	"# reshape for easy matrix multiplication\n",
	"in_3D = np.concatenate((in_3D, np.ones(shape=(num_keypoints, 1))), axis=1).transpose(1, 0)\n",
	"identity_transform = np.concatenate((np.eye(3), np.ones(shape=(3, 1))), axis=1)\n",
	"\n",
	"projected = np.matmul(identity_transform, in_3D)\n",
	"projected = np.matmul(camera_parameter[subject][sequence][camera][CAMERA_INTRINSIC], projected)\n",
	"projected = projected / projected[-1, :]\n",
	"projected = projected.transpose(1, 0)\n",
	"\n",
	"pad = np.asarray([50, 50], dtype=np.int)\n",
	"ul = np.asarray([np.min(projected[:, 0]), np.min(projected[:, 1])], dtype=np.int) - pad\n",
	"br = np.asarray([np.max(projected[:, 0]), np.max(projected[:, 1])], dtype=np.int) + pad\n",
	"\n",
	"center = (ul + br) * 0.5\n",
	"center = center.astype(np.int)\n",
	"scale = np.max(br - ul) / 200\n",
	"\n",
	"tmp = crop_image(image[VIDEO_RGB], center, scale, 30, 256)\n",
	"cv.imwrite('crop.jpg', tmp)\n",
	"\n",
	"image[VIDEO_MASK_HUMAN_AND_CHAIR][ul[1]:br[1], ul[0]:br[0], :] = [255, 255, 255]\n",
	"\n",
	"for keypoint in projected:\n",
	" x, y, _ = keypoint\n",
	" \n",
	" for tx in range(-10, 10):\n",
	" for ty in range(-10, 10):\n",
	" xx = x + tx\n",
	" yy = y + ty\n",
	" \n",
	" if xx < 0 or image[VIDEO_RGB].shape[1] <= xx \\\n",
	" or yy < 0 or image[VIDEO_RGB].shape[0] <= yy:\n",
	" continue\n",
	" \n",
	" image[VIDEO_RGB][int(yy), int(xx), :] = [0, 0, 255]\n",
	" \n",
	"for format in available_format:\n",
	" success = cv.imwrite('{format}.jpg'.format(format=format), image[format])\n",
	" assert success"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"for subject, sequence, format, camera in available_video:\n",
	" video[subject][sequence][format][camera].release()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# print('Video:', video_path)\n",
	"# print('Open:', video.isOpened())\n",
	"# print('Resolution:', '%dx%d' % (video.get(cv.CAP_PROP_FRAME_WIDTH), video.get(cv.CAP_PROP_FRAME_HEIGHT)))\n",
	"# print('Total frames:', video.get(cv.CAP_PROP_FRAME_COUNT))\n",
	"# print('Frame-rate:', video.get(cv.CAP_PROP_FPS))\n",
	"# print('OpenCV:', cv.__version__)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}