nulledge/MATLABengine.ipynb

## MATLABengine.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import copy\n",
    "import cv2\n",
    "import h5py\n",
    "import imageio\n",
    "import math\n",
    "import matlab.engine\n",
    "import numpy as np\n",
    "import os\n",
    "import pickle\n",
    "import skimage\n",
    "import skimage.io\n",
    "import skimage.transform\n",
    "from functools import lru_cache\n",
    "from tqdm import tqdm as tqdm\n",
    "from vectormath import Vector2, Vector3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "root = 'D:/data/Human3.6M/Release-v1.1/'\n",
    "script_paths = [subdir for subdir, _, _ in os.walk(root) if '.git' not in subdir]\n",
    "additional_script_paths = [\n",
    "    # empty\n",
    "]\n",
    "subjects = [\n",
    "    1, 5, 6, 7, 8, # training\n",
    "    9, 11, # validation\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "core = matlab.engine.start_matlab()\n",
    "for script_path in script_paths + additional_script_paths:\n",
    "    core.addpath(script_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "core.workspace['DB'] = core.H36MDataBase.instance()\n",
    "core.workspace['feature_RGB'] = core.H36MRGBVideoFeature()\n",
    "core.workspace['feature_BB'] = core.H36MMyBBMask()\n",
    "core.workspace['feature_BG'] = core.H36MMyBGMask()\n",
    "core.workspace['features'] = [\n",
    "    core.H36MPose2DPositionsFeature(),\n",
    "    core.H36MPose3DPositionsFeature('Monocular', True),\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def valid_sequence(subject, action, sub_action, camera):\n",
    "    return subject in [1, 5, 6, 7, 8, 9, 11] and\\\n",
    "        1 <= action <= 16 and\\\n",
    "        1 <= sub_action <= 2 and\\\n",
    "        1 <= camera <= 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_max_frame(subject, action, sub_action):\n",
    "    return int(core.getNumFrames(core.workspace['DB'], subject, action, sub_action))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_sequence(subject, action, sub_action, camera):\n",
    "    core.workspace['sequence'] = core.H36MSequence(subject, action, sub_action, camera, -1)\n",
    "    return core.workspace['sequence']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_intrinsics(subject, action, sub_action, camera):\n",
    "    if not valid_sequence(subject, action, sub_action, camera):\n",
    "        raise IndexError()\n",
    "    \n",
    "    sequence = get_sequence(subject, action, sub_action, camera)\n",
    "    core.workspace['camera'] = core.getCamera(sequence)\n",
    "    \n",
    "    f, c, k, p = [core.eval('camera.%s' % attrib)[0] for attrib in ['f', 'c', 'k', 'p']]\n",
    "    \n",
    "    return f, c, k, p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_RGB(subject, action, sub_action, camera, frame):\n",
    "    if not valid_sequence(subject, action, sub_action, camera):\n",
    "        raise IndexError()\n",
    "    \n",
    "    max_frame = get_max_frame(subject, action, sub_action)\n",
    "    if not (1 <= frame <= max_frame):\n",
    "        raise IndexError()\n",
    "    \n",
    "    sequence = get_sequence(subject, action, sub_action, camera)\n",
    "    core.workspace['metadata'] = core.serializer(core.workspace['feature_RGB'], sequence)\n",
    "    \n",
    "    image = core.getFrame(core.workspace['metadata'], core.double(frame))\n",
    "    image = np.reshape(np.asarray(image._data, dtype=np.float), newshape=(image._size[2], image._size[1], image._size[0])).transpose(2, 1, 0)\n",
    "    \n",
    "    video_name = core.eval('metadata.Reader.VideoName')\n",
    "    \n",
    "    return image, video_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_video_name(subject, action, sub_action, camera):\n",
    "    if not valid_sequence(subject, action, sub_action, camera):\n",
    "        raise IndexError()\n",
    "    \n",
    "    sequence = get_sequence(subject, action, sub_action, camera)\n",
    "    core.workspace['metadata'] = core.serializer(core.workspace['feature_RGB'], sequence)\n",
    "    \n",
    "    video_name = core.eval('metadata.Reader.VideoName')\n",
    "    \n",
    "    return video_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_pose(subject, action, sub_action, camera, frame):\n",
    "    if not valid_sequence(subject, action, sub_action, camera):\n",
    "        raise IndexError()\n",
    "    \n",
    "    max_frame = get_max_frame(subject, action, sub_action)\n",
    "    if not (1 <= frame <= max_frame):\n",
    "        raise IndexError()\n",
    "    \n",
    "    sequence = get_sequence(subject, action, sub_action, camera)\n",
    "    core.eval('sequence.IdxFrames = %d;' % frame, nargout=0)\n",
    "    \n",
    "    pose = core.H36MComputeFeatures(sequence, core.workspace['features'])\n",
    "    \n",
    "    return np.reshape(np.asarray(pose[0]), newshape=(32, 2)),\\\n",
    "        np.reshape(np.asarray(pose[1]), newshape=(32, 3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_center_scale(subject, action, sub_action, camera, frame):\n",
    "    if not valid_sequence(subject, action, sub_action, camera):\n",
    "        raise IndexError()\n",
    "    \n",
    "    max_frame = get_max_frame(subject, action, sub_action)\n",
    "    if not (1 <= frame <= max_frame):\n",
    "        raise IndexError()\n",
    "    \n",
    "    sequence = get_sequence(subject, action, sub_action, camera)\n",
    "    core.workspace['metadata'] = core.serializer(core.workspace['feature_BB'], sequence)\n",
    "    \n",
    "    mask = core.getFrame(core.workspace['metadata'], core.double(frame))\n",
    "    mask = np.reshape(np.asarray(mask._data, dtype=np.float), newshape=(mask._size[1], mask._size[0])).transpose(1, 0)\n",
    "    \n",
    "    flatten = mask.flatten()\n",
    "    flatten = np.nonzero(flatten)[0]\n",
    "    ul, br = [flatten[where] for where in [0, -1]]\n",
    "    ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n",
    "    br = Vector2(br % mask.shape[1], br // mask.shape[1])\n",
    "\n",
    "    center = (ul + br) / 2\n",
    "    height = (br - ul).y\n",
    "    width  = (br - ul).x\n",
    "    scale = max(height, width) / 200\n",
    "    \n",
    "    return center, scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_center_scale_directly(video_name, frame):\n",
    "    \n",
    "    sub = video_name.split('/')[-3].split('\\\\')[0]\n",
    "    act, cam = video_name.split('/')[-1].split('.mp4')[0].split('.')\n",
    "    #act = act.replace(' ', '_')\n",
    "        \n",
    "    data_root = 'D:/data/Human3.6M/downloaded/'\n",
    "    bb_path = os.path.join(data_root, sub, 'MySegmentsMat', 'ground_truth_bb', '%s.%s.mat' % (act, cam))\n",
    "    with h5py.File(bb_path, 'r') as file:\n",
    "        mask = np.asarray(file[file['Masks'][frame][0]]).transpose(1, 0)\n",
    "\n",
    "        flatten = mask.flatten()\n",
    "        flatten = np.nonzero(flatten)[0]\n",
    "        ul, br = [flatten[where] for where in [0, -1]]\n",
    "        ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n",
    "        br = Vector2(br % mask.shape[1], br // mask.shape[1])\n",
    "\n",
    "        center = (ul + br) / 2\n",
    "        height = (br - ul).y\n",
    "        width  = (br - ul).x\n",
    "        scale = max(height, width) / 200\n",
    "    \n",
    "    return center, scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def project(keypoints, f, c, k, p):\n",
    "    X = keypoints.transpose(1, 0) # Already in 3D pose\n",
    "    XX = np.divide(X[0:2, :], X[2, :])\n",
    "    r2 = np.power(XX[0, :], 2) + np.power(XX[1, :], 2)\n",
    "    radial = np.dot(k, np.asarray([r2, np.power(r2, 2), np.power(r2, 3)])) + 1\n",
    "    tan = p[0] * XX[1, :] + p[1] * XX[0, :]\n",
    "    temp = radial + tan\n",
    "    first = XX * np.stack([temp, temp])\n",
    "    second = np.expand_dims(np.asarray([p[1], p[0]]), axis=1) * np.expand_dims(r2, axis=0)\n",
    "    XXX = first + second\n",
    "    XXX = XXX.transpose(1, 0)\n",
    "    proj = f * XXX + c\n",
    "    \n",
    "    return proj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def crop_image(image, center, scale, rotate, resolution):\n",
    "    center = Vector2(center)  # assign new array\n",
    "    height, width, channel = image.shape\n",
    "    crop_ratio = 200 * scale / resolution\n",
    "    if crop_ratio >= 2:  # if box size is greater than two time of resolution px\n",
    "        # scale down image\n",
    "        height = math.floor(height / crop_ratio)\n",
    "        width = math.floor(width / crop_ratio)\n",
    "\n",
    "        if max([height, width]) < 2:\n",
    "            # Zoomed out so much that the image is now a single pixel or less\n",
    "            raise ValueError(\"Width or height is invalid!\")\n",
    "\n",
    "        image = skimage.transform.resize(image, (height, width), mode='constant')\n",
    "#         image = image.resize(image, (height, width), mode='constant')\n",
    "        center /= crop_ratio\n",
    "        scale /= crop_ratio\n",
    "\n",
    "    ul = (center - 200 * scale / 2).astype(int)\n",
    "    br = (center + 200 * scale / 2).astype(int)  # Vector2\n",
    "\n",
    "    if crop_ratio >= 2:  # force image size 256 x 256\n",
    "        br -= (br - ul - resolution)\n",
    "\n",
    "    pad_length = math.ceil((ul - br).length - (br.x - ul.x) / 2)\n",
    "\n",
    "    if rotate != 0:\n",
    "        ul -= pad_length\n",
    "        br += pad_length\n",
    "\n",
    "    src = [max(0, ul.y), min(height, br.y), max(0, ul.x), min(width, br.x)]\n",
    "    dst = [max(0, -ul.y), min(height, br.y) - ul.y, max(0, -ul.x), min(width, br.x) - ul.x]\n",
    "\n",
    "    new_image = np.zeros([br.y - ul.y, br.x - ul.x, channel], dtype=np.float32)\n",
    "    new_image[dst[0]:dst[1], dst[2]:dst[3], :] = image[src[0]:src[1], src[2]:src[3], :]\n",
    "\n",
    "    if rotate != 0:\n",
    "        new_image = skimage.transform.rotate(new_image, rotate)\n",
    "        new_height, new_width, _ = new_image.shape\n",
    "        new_image = new_image[pad_length:new_height - pad_length, pad_length:new_width - pad_length, :]\n",
    "\n",
    "    if crop_ratio < 2:\n",
    "        new_image = skimage.transform.resize(new_image, (resolution, resolution), mode='constant')\n",
    "#         new_image = Image.resize(new_image, (resolution, resolution), mode='constant')\n",
    "\n",
    "    return new_image\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pelvis = [1]\n",
    "left_leg = [7, 8, 9]\n",
    "right_leg = [2, 3, 4]\n",
    "spine = [13, 14, 15, 16]\n",
    "left_arm = [18, 19, 20]\n",
    "right_arm = [26, 27, 28]\n",
    "keypoints = pelvis + left_leg + right_leg + spine + left_arm + right_arm\n",
    "\n",
    "converted = dict()\n",
    "converted ['S'] = list()\n",
    "converted ['part'] = list()\n",
    "converted ['center'] = list()\n",
    "converted ['scale'] = list()\n",
    "converted ['image'] = list()\n",
    "\n",
    "total = 0\n",
    "\n",
    "for subject in [1, 5, 6, 7, 8, ]:\n",
    "    for action in range(2, 16 + 1):\n",
    "        for sub_action in [1, 2]:\n",
    "            for camera in [1, 2, 3, 4]:\n",
    "\n",
    "                # Data corrupted.\n",
    "                if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n",
    "                    continue\n",
    "                \n",
    "                max_frame = get_max_frame(subject, action, sub_action)\n",
    "                total = total + max_frame//5\n",
    "                \n",
    "    \n",
    "with tqdm(total=total) as progress:\n",
    "\n",
    "    for subject in [1, 5, 6, 7, 8, ]:\n",
    "        for action in range(2, 16 + 1):\n",
    "            for sub_action in [1, 2]:\n",
    "                for camera in [1, 2, 3, 4]:\n",
    "\n",
    "                    progress.set_description('subject(%d) action(%d-%d) camera(%d)' % (subject, action, sub_action, camera))\n",
    "\n",
    "                    # Data corrupted.\n",
    "                    if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n",
    "                        continue\n",
    "\n",
    "                    max_frame = get_max_frame(subject, action, sub_action)\n",
    "\n",
    "                    video_name = get_video_name(subject, action, sub_action, camera)\n",
    "                    sub = video_name.split('/')[-3].split('\\\\')[0]\n",
    "                    act, cam = video_name.split('/')[-1].split('.mp4')[0].split('.')\n",
    "                    \n",
    "                    data_root = 'D:/data/Human3.6M/downloaded/'\n",
    "                    bb_path = os.path.join(data_root, sub, 'MySegmentsMat', 'ground_truth_bb', '%s.%s.mat' % (act, cam))\n",
    "                    \n",
    "                    act = act.replace(' ', '_')\n",
    "                    video_name = '%s_%s.%s' % (sub, act, cam)\n",
    "                    \n",
    "                    with h5py.File(bb_path, 'r') as file:\n",
    "\n",
    "                        for frame in range(1, max_frame+1, 5):\n",
    "                            mask = np.asarray(file[file['Masks'][frame-1][0]]).transpose(1, 0)\n",
    "\n",
    "                            flatten = mask.flatten()\n",
    "                            flatten = np.nonzero(flatten)[0]\n",
    "                            ul, br = [flatten[where] for where in [0, -1]]\n",
    "                            ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n",
    "                            br = Vector2(br % mask.shape[1], br // mask.shape[1])\n",
    "\n",
    "                            center = (ul + br) / 2 # center\n",
    "                            height = (br - ul).y\n",
    "                            width  = (br - ul).x\n",
    "                            scale = max(height, width) / 200 # scale\n",
    "                \n",
    "                            # center, scale = get_center_scale(subject, action, sub_action, camera, frame) # center, scale\n",
    "                            in_image_space, in_camera_space = get_pose(subject, action, sub_action, camera, frame) # part, S\n",
    "\n",
    "                            converted ['S'].append(np.reshape([in_camera_space[idx-1] for idx in keypoints], (-1, 3)))\n",
    "                            converted ['part'].append(np.reshape([in_image_space[idx-1] for idx in keypoints], (-1, 2)))\n",
    "                            converted ['center'].append(center)\n",
    "                            converted ['scale'].append(scale)\n",
    "                            converted ['image'].append('%s_%06d.jpg' % (video_name, frame))\n",
    "\n",
    "                            progress.update(1)\n",
    "\n",
    "pickle.dump(converted, open('train.bin', 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pelvis = [1]\n",
    "left_leg = [7, 8, 9]\n",
    "right_leg = [2, 3, 4]\n",
    "spine = [13, 14, 15, 16]\n",
    "left_arm = [18, 19, 20]\n",
    "right_arm = [26, 27, 28]\n",
    "keypoints = pelvis + left_leg + right_leg + spine + left_arm + right_arm\n",
    "\n",
    "converted = dict()\n",
    "converted ['S'] = list()\n",
    "converted ['part'] = list()\n",
    "converted ['center'] = list()\n",
    "converted ['scale'] = list()\n",
    "converted ['image'] = list()\n",
    "\n",
    "total = 0\n",
    "\n",
    "for subject in [9. 11, ]:\n",
    "    for action in range(2, 16 + 1):\n",
    "        for sub_action in [1, 2]:\n",
    "            for camera in [1, 2, 3, 4]:\n",
    "\n",
    "                # Data corrupted.\n",
    "                if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n",
    "                    continue\n",
    "                \n",
    "                max_frame = get_max_frame(subject, action, sub_action)\n",
    "                total = total + max_frame//5\n",
    "                \n",
    "    \n",
    "with tqdm(total=total) as progress:\n",
    "\n",
    "    for subject in [9, 11, ]:\n",
    "        for action in range(2, 16 + 1):\n",
    "            for sub_action in [1, 2]:\n",
    "                for camera in [1, 2, 3, 4]:\n",
    "\n",
    "                    progress.set_description('subject(%d) action(%d-%d) camera(%d)' % (subject, action, sub_action, camera))\n",
    "\n",
    "                    # Data corrupted.\n",
    "                    if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n",
    "                        continue\n",
    "\n",
    "                    max_frame = get_max_frame(subject, action, sub_action)\n",
    "\n",
    "                    video_name = get_video_name(subject, action, sub_action, camera)\n",
    "                    sub = video_name.split('/')[-3].split('\\\\')[0]\n",
    "                    act, cam = video_name.split('/')[-1].split('.mp4')[0].split('.')\n",
    "                    \n",
    "                    data_root = 'D:/data/Human3.6M/downloaded/'\n",
    "                    bb_path = os.path.join(data_root, sub, 'MySegmentsMat', 'ground_truth_bb', '%s.%s.mat' % (act, cam))\n",
    "                    \n",
    "                    act = act.replace(' ', '_')\n",
    "                    video_name = '%s_%s.%s' % (sub, act, cam)\n",
    "                    \n",
    "                    with h5py.File(bb_path, 'r') as file:\n",
    "\n",
    "                        for frame in range(1, max_frame+1, 5):\n",
    "                            mask = np.asarray(file[file['Masks'][frame-1][0]]).transpose(1, 0)\n",
    "\n",
    "                            flatten = mask.flatten()\n",
    "                            flatten = np.nonzero(flatten)[0]\n",
    "                            ul, br = [flatten[where] for where in [0, -1]]\n",
    "                            ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n",
    "                            br = Vector2(br % mask.shape[1], br // mask.shape[1])\n",
    "\n",
    "                            center = (ul + br) / 2 # center\n",
    "                            height = (br - ul).y\n",
    "                            width  = (br - ul).x\n",
    "                            scale = max(height, width) / 200 # scale\n",
    "                \n",
    "                            # center, scale = get_center_scale(subject, action, sub_action, camera, frame) # center, scale\n",
    "                            in_image_space, in_camera_space = get_pose(subject, action, sub_action, camera, frame) # part, S\n",
    "\n",
    "                            converted ['S'].append(np.reshape([in_camera_space[idx-1] for idx in keypoints], (-1, 3)))\n",
    "                            converted ['part'].append(np.reshape([in_image_space[idx-1] for idx in keypoints], (-1, 2)))\n",
    "                            converted ['center'].append(center)\n",
    "                            converted ['scale'].append(scale)\n",
    "                            converted ['image'].append('%s_%06d.jpg' % (video_name, frame))\n",
    "\n",
    "                            progress.update(1)\n",
    "\n",
    "pickle.dump(converted, open('valid.bin', 'wb'))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import copy\n",
	"import cv2\n",
	"import h5py\n",
	"import imageio\n",
	"import math\n",
	"import matlab.engine\n",
	"import numpy as np\n",
	"import os\n",
	"import pickle\n",
	"import skimage\n",
	"import skimage.io\n",
	"import skimage.transform\n",
	"from functools import lru_cache\n",
	"from tqdm import tqdm as tqdm\n",
	"from vectormath import Vector2, Vector3"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"root = 'D:/data/Human3.6M/Release-v1.1/'\n",
	"script_paths = [subdir for subdir, _, _ in os.walk(root) if '.git' not in subdir]\n",
	"additional_script_paths = [\n",
	" # empty\n",
	"]\n",
	"subjects = [\n",
	" 1, 5, 6, 7, 8, # training\n",
	" 9, 11, # validation\n",
	"]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"core = matlab.engine.start_matlab()\n",
	"for script_path in script_paths + additional_script_paths:\n",
	" core.addpath(script_path)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"core.workspace['DB'] = core.H36MDataBase.instance()\n",
	"core.workspace['feature_RGB'] = core.H36MRGBVideoFeature()\n",
	"core.workspace['feature_BB'] = core.H36MMyBBMask()\n",
	"core.workspace['feature_BG'] = core.H36MMyBGMask()\n",
	"core.workspace['features'] = [\n",
	" core.H36MPose2DPositionsFeature(),\n",
	" core.H36MPose3DPositionsFeature('Monocular', True),\n",
	"]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def valid_sequence(subject, action, sub_action, camera):\n",
	" return subject in [1, 5, 6, 7, 8, 9, 11] and\\\n",
	" 1 <= action <= 16 and\\\n",
	" 1 <= sub_action <= 2 and\\\n",
	" 1 <= camera <= 4"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_max_frame(subject, action, sub_action):\n",
	" return int(core.getNumFrames(core.workspace['DB'], subject, action, sub_action))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_sequence(subject, action, sub_action, camera):\n",
	" core.workspace['sequence'] = core.H36MSequence(subject, action, sub_action, camera, -1)\n",
	" return core.workspace['sequence']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_intrinsics(subject, action, sub_action, camera):\n",
	" if not valid_sequence(subject, action, sub_action, camera):\n",
	" raise IndexError()\n",
	" \n",
	" sequence = get_sequence(subject, action, sub_action, camera)\n",
	" core.workspace['camera'] = core.getCamera(sequence)\n",
	" \n",
	" f, c, k, p = [core.eval('camera.%s' % attrib)[0] for attrib in ['f', 'c', 'k', 'p']]\n",
	" \n",
	" return f, c, k, p"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_RGB(subject, action, sub_action, camera, frame):\n",
	" if not valid_sequence(subject, action, sub_action, camera):\n",
	" raise IndexError()\n",
	" \n",
	" max_frame = get_max_frame(subject, action, sub_action)\n",
	" if not (1 <= frame <= max_frame):\n",
	" raise IndexError()\n",
	" \n",
	" sequence = get_sequence(subject, action, sub_action, camera)\n",
	" core.workspace['metadata'] = core.serializer(core.workspace['feature_RGB'], sequence)\n",
	" \n",
	" image = core.getFrame(core.workspace['metadata'], core.double(frame))\n",
	" image = np.reshape(np.asarray(image._data, dtype=np.float), newshape=(image._size[2], image._size[1], image._size[0])).transpose(2, 1, 0)\n",
	" \n",
	" video_name = core.eval('metadata.Reader.VideoName')\n",
	" \n",
	" return image, video_name"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_video_name(subject, action, sub_action, camera):\n",
	" if not valid_sequence(subject, action, sub_action, camera):\n",
	" raise IndexError()\n",
	" \n",
	" sequence = get_sequence(subject, action, sub_action, camera)\n",
	" core.workspace['metadata'] = core.serializer(core.workspace['feature_RGB'], sequence)\n",
	" \n",
	" video_name = core.eval('metadata.Reader.VideoName')\n",
	" \n",
	" return video_name"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_pose(subject, action, sub_action, camera, frame):\n",
	" if not valid_sequence(subject, action, sub_action, camera):\n",
	" raise IndexError()\n",
	" \n",
	" max_frame = get_max_frame(subject, action, sub_action)\n",
	" if not (1 <= frame <= max_frame):\n",
	" raise IndexError()\n",
	" \n",
	" sequence = get_sequence(subject, action, sub_action, camera)\n",
	" core.eval('sequence.IdxFrames = %d;' % frame, nargout=0)\n",
	" \n",
	" pose = core.H36MComputeFeatures(sequence, core.workspace['features'])\n",
	" \n",
	" return np.reshape(np.asarray(pose[0]), newshape=(32, 2)),\\\n",
	" np.reshape(np.asarray(pose[1]), newshape=(32, 3))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_center_scale(subject, action, sub_action, camera, frame):\n",
	" if not valid_sequence(subject, action, sub_action, camera):\n",
	" raise IndexError()\n",
	" \n",
	" max_frame = get_max_frame(subject, action, sub_action)\n",
	" if not (1 <= frame <= max_frame):\n",
	" raise IndexError()\n",
	" \n",
	" sequence = get_sequence(subject, action, sub_action, camera)\n",
	" core.workspace['metadata'] = core.serializer(core.workspace['feature_BB'], sequence)\n",
	" \n",
	" mask = core.getFrame(core.workspace['metadata'], core.double(frame))\n",
	" mask = np.reshape(np.asarray(mask._data, dtype=np.float), newshape=(mask._size[1], mask._size[0])).transpose(1, 0)\n",
	" \n",
	" flatten = mask.flatten()\n",
	" flatten = np.nonzero(flatten)[0]\n",
	" ul, br = [flatten[where] for where in [0, -1]]\n",
	" ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n",
	" br = Vector2(br % mask.shape[1], br // mask.shape[1])\n",
	"\n",
	" center = (ul + br) / 2\n",
	" height = (br - ul).y\n",
	" width = (br - ul).x\n",
	" scale = max(height, width) / 200\n",
	" \n",
	" return center, scale"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_center_scale_directly(video_name, frame):\n",
	" \n",
	" sub = video_name.split('/')[-3].split('\\\\')[0]\n",
	" act, cam = video_name.split('/')[-1].split('.mp4')[0].split('.')\n",
	" #act = act.replace(' ', '_')\n",
	" \n",
	" data_root = 'D:/data/Human3.6M/downloaded/'\n",
	" bb_path = os.path.join(data_root, sub, 'MySegmentsMat', 'ground_truth_bb', '%s.%s.mat' % (act, cam))\n",
	" with h5py.File(bb_path, 'r') as file:\n",
	" mask = np.asarray(file[file['Masks'][frame][0]]).transpose(1, 0)\n",
	"\n",
	" flatten = mask.flatten()\n",
	" flatten = np.nonzero(flatten)[0]\n",
	" ul, br = [flatten[where] for where in [0, -1]]\n",
	" ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n",
	" br = Vector2(br % mask.shape[1], br // mask.shape[1])\n",
	"\n",
	" center = (ul + br) / 2\n",
	" height = (br - ul).y\n",
	" width = (br - ul).x\n",
	" scale = max(height, width) / 200\n",
	" \n",
	" return center, scale"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def project(keypoints, f, c, k, p):\n",
	" X = keypoints.transpose(1, 0) # Already in 3D pose\n",
	" XX = np.divide(X[0:2, :], X[2, :])\n",
	" r2 = np.power(XX[0, :], 2) + np.power(XX[1, :], 2)\n",
	" radial = np.dot(k, np.asarray([r2, np.power(r2, 2), np.power(r2, 3)])) + 1\n",
	" tan = p[0] * XX[1, :] + p[1] * XX[0, :]\n",
	" temp = radial + tan\n",
	" first = XX * np.stack([temp, temp])\n",
	" second = np.expand_dims(np.asarray([p[1], p[0]]), axis=1) * np.expand_dims(r2, axis=0)\n",
	" XXX = first + second\n",
	" XXX = XXX.transpose(1, 0)\n",
	" proj = f * XXX + c\n",
	" \n",
	" return proj"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def crop_image(image, center, scale, rotate, resolution):\n",
	" center = Vector2(center) # assign new array\n",
	" height, width, channel = image.shape\n",
	" crop_ratio = 200 * scale / resolution\n",
	" if crop_ratio >= 2: # if box size is greater than two time of resolution px\n",
	" # scale down image\n",
	" height = math.floor(height / crop_ratio)\n",
	" width = math.floor(width / crop_ratio)\n",
	"\n",
	" if max([height, width]) < 2:\n",
	" # Zoomed out so much that the image is now a single pixel or less\n",
	" raise ValueError(\"Width or height is invalid!\")\n",
	"\n",
	" image = skimage.transform.resize(image, (height, width), mode='constant')\n",
	"# image = image.resize(image, (height, width), mode='constant')\n",
	" center /= crop_ratio\n",
	" scale /= crop_ratio\n",
	"\n",
	" ul = (center - 200 * scale / 2).astype(int)\n",
	" br = (center + 200 * scale / 2).astype(int) # Vector2\n",
	"\n",
	" if crop_ratio >= 2: # force image size 256 x 256\n",
	" br -= (br - ul - resolution)\n",
	"\n",
	" pad_length = math.ceil((ul - br).length - (br.x - ul.x) / 2)\n",
	"\n",
	" if rotate != 0:\n",
	" ul -= pad_length\n",
	" br += pad_length\n",
	"\n",
	" src = [max(0, ul.y), min(height, br.y), max(0, ul.x), min(width, br.x)]\n",
	" dst = [max(0, -ul.y), min(height, br.y) - ul.y, max(0, -ul.x), min(width, br.x) - ul.x]\n",
	"\n",
	" new_image = np.zeros([br.y - ul.y, br.x - ul.x, channel], dtype=np.float32)\n",
	" new_image[dst[0]:dst[1], dst[2]:dst[3], :] = image[src[0]:src[1], src[2]:src[3], :]\n",
	"\n",
	" if rotate != 0:\n",
	" new_image = skimage.transform.rotate(new_image, rotate)\n",
	" new_height, new_width, _ = new_image.shape\n",
	" new_image = new_image[pad_length:new_height - pad_length, pad_length:new_width - pad_length, :]\n",
	"\n",
	" if crop_ratio < 2:\n",
	" new_image = skimage.transform.resize(new_image, (resolution, resolution), mode='constant')\n",
	"# new_image = Image.resize(new_image, (resolution, resolution), mode='constant')\n",
	"\n",
	" return new_image\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"pelvis = [1]\n",
	"left_leg = [7, 8, 9]\n",
	"right_leg = [2, 3, 4]\n",
	"spine = [13, 14, 15, 16]\n",
	"left_arm = [18, 19, 20]\n",
	"right_arm = [26, 27, 28]\n",
	"keypoints = pelvis + left_leg + right_leg + spine + left_arm + right_arm\n",
	"\n",
	"converted = dict()\n",
	"converted ['S'] = list()\n",
	"converted ['part'] = list()\n",
	"converted ['center'] = list()\n",
	"converted ['scale'] = list()\n",
	"converted ['image'] = list()\n",
	"\n",
	"total = 0\n",
	"\n",
	"for subject in [1, 5, 6, 7, 8, ]:\n",
	" for action in range(2, 16 + 1):\n",
	" for sub_action in [1, 2]:\n",
	" for camera in [1, 2, 3, 4]:\n",
	"\n",
	" # Data corrupted.\n",
	" if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n",
	" continue\n",
	" \n",
	" max_frame = get_max_frame(subject, action, sub_action)\n",
	" total = total + max_frame//5\n",
	" \n",
	" \n",
	"with tqdm(total=total) as progress:\n",
	"\n",
	" for subject in [1, 5, 6, 7, 8, ]:\n",
	" for action in range(2, 16 + 1):\n",
	" for sub_action in [1, 2]:\n",
	" for camera in [1, 2, 3, 4]:\n",
	"\n",
	" progress.set_description('subject(%d) action(%d-%d) camera(%d)' % (subject, action, sub_action, camera))\n",
	"\n",
	" # Data corrupted.\n",
	" if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n",
	" continue\n",
	"\n",
	" max_frame = get_max_frame(subject, action, sub_action)\n",
	"\n",
	" video_name = get_video_name(subject, action, sub_action, camera)\n",
	" sub = video_name.split('/')[-3].split('\\\\')[0]\n",
	" act, cam = video_name.split('/')[-1].split('.mp4')[0].split('.')\n",
	" \n",
	" data_root = 'D:/data/Human3.6M/downloaded/'\n",
	" bb_path = os.path.join(data_root, sub, 'MySegmentsMat', 'ground_truth_bb', '%s.%s.mat' % (act, cam))\n",
	" \n",
	" act = act.replace(' ', '_')\n",
	" video_name = '%s_%s.%s' % (sub, act, cam)\n",
	" \n",
	" with h5py.File(bb_path, 'r') as file:\n",
	"\n",
	" for frame in range(1, max_frame+1, 5):\n",
	" mask = np.asarray(file[file['Masks'][frame-1][0]]).transpose(1, 0)\n",
	"\n",
	" flatten = mask.flatten()\n",
	" flatten = np.nonzero(flatten)[0]\n",
	" ul, br = [flatten[where] for where in [0, -1]]\n",
	" ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n",
	" br = Vector2(br % mask.shape[1], br // mask.shape[1])\n",
	"\n",
	" center = (ul + br) / 2 # center\n",
	" height = (br - ul).y\n",
	" width = (br - ul).x\n",
	" scale = max(height, width) / 200 # scale\n",
	" \n",
	" # center, scale = get_center_scale(subject, action, sub_action, camera, frame) # center, scale\n",
	" in_image_space, in_camera_space = get_pose(subject, action, sub_action, camera, frame) # part, S\n",
	"\n",
	" converted ['S'].append(np.reshape([in_camera_space[idx-1] for idx in keypoints], (-1, 3)))\n",
	" converted ['part'].append(np.reshape([in_image_space[idx-1] for idx in keypoints], (-1, 2)))\n",
	" converted ['center'].append(center)\n",
	" converted ['scale'].append(scale)\n",
	" converted ['image'].append('%s_%06d.jpg' % (video_name, frame))\n",
	"\n",
	" progress.update(1)\n",
	"\n",
	"pickle.dump(converted, open('train.bin', 'wb'))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"pelvis = [1]\n",
	"left_leg = [7, 8, 9]\n",
	"right_leg = [2, 3, 4]\n",
	"spine = [13, 14, 15, 16]\n",
	"left_arm = [18, 19, 20]\n",
	"right_arm = [26, 27, 28]\n",
	"keypoints = pelvis + left_leg + right_leg + spine + left_arm + right_arm\n",
	"\n",
	"converted = dict()\n",
	"converted ['S'] = list()\n",
	"converted ['part'] = list()\n",
	"converted ['center'] = list()\n",
	"converted ['scale'] = list()\n",
	"converted ['image'] = list()\n",
	"\n",
	"total = 0\n",
	"\n",
	"for subject in [9. 11, ]:\n",
	" for action in range(2, 16 + 1):\n",
	" for sub_action in [1, 2]:\n",
	" for camera in [1, 2, 3, 4]:\n",
	"\n",
	" # Data corrupted.\n",
	" if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n",
	" continue\n",
	" \n",
	" max_frame = get_max_frame(subject, action, sub_action)\n",
	" total = total + max_frame//5\n",
	" \n",
	" \n",
	"with tqdm(total=total) as progress:\n",
	"\n",
	" for subject in [9, 11, ]:\n",
	" for action in range(2, 16 + 1):\n",
	" for sub_action in [1, 2]:\n",
	" for camera in [1, 2, 3, 4]:\n",
	"\n",
	" progress.set_description('subject(%d) action(%d-%d) camera(%d)' % (subject, action, sub_action, camera))\n",
	"\n",
	" # Data corrupted.\n",
	" if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n",
	" continue\n",
	"\n",
	" max_frame = get_max_frame(subject, action, sub_action)\n",
	"\n",
	" video_name = get_video_name(subject, action, sub_action, camera)\n",
	" sub = video_name.split('/')[-3].split('\\\\')[0]\n",
	" act, cam = video_name.split('/')[-1].split('.mp4')[0].split('.')\n",
	" \n",
	" data_root = 'D:/data/Human3.6M/downloaded/'\n",
	" bb_path = os.path.join(data_root, sub, 'MySegmentsMat', 'ground_truth_bb', '%s.%s.mat' % (act, cam))\n",
	" \n",
	" act = act.replace(' ', '_')\n",
	" video_name = '%s_%s.%s' % (sub, act, cam)\n",
	" \n",
	" with h5py.File(bb_path, 'r') as file:\n",
	"\n",
	" for frame in range(1, max_frame+1, 5):\n",
	" mask = np.asarray(file[file['Masks'][frame-1][0]]).transpose(1, 0)\n",
	"\n",
	" flatten = mask.flatten()\n",
	" flatten = np.nonzero(flatten)[0]\n",
	" ul, br = [flatten[where] for where in [0, -1]]\n",
	" ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n",
	" br = Vector2(br % mask.shape[1], br // mask.shape[1])\n",
	"\n",
	" center = (ul + br) / 2 # center\n",
	" height = (br - ul).y\n",
	" width = (br - ul).x\n",
	" scale = max(height, width) / 200 # scale\n",
	" \n",
	" # center, scale = get_center_scale(subject, action, sub_action, camera, frame) # center, scale\n",
	" in_image_space, in_camera_space = get_pose(subject, action, sub_action, camera, frame) # part, S\n",
	"\n",
	" converted ['S'].append(np.reshape([in_camera_space[idx-1] for idx in keypoints], (-1, 3)))\n",
	" converted ['part'].append(np.reshape([in_image_space[idx-1] for idx in keypoints], (-1, 2)))\n",
	" converted ['center'].append(center)\n",
	" converted ['scale'].append(scale)\n",
	" converted ['image'].append('%s_%06d.jpg' % (video_name, frame))\n",
	"\n",
	" progress.update(1)\n",
	"\n",
	"pickle.dump(converted, open('valid.bin', 'wb'))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}