nulledge/H36M.ipynb

## H36M.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prerequisite\n",
    "\n",
    "- Install *MATLAB engine API*\n",
    "- Update *VideoUtils* under *${Human3.6M code}/external_utils/VideoUtils_v1_2/*\n",
    "- Change all MATLAB classes to inherit *handle* class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from functools import lru_cache\n",
    "import imageio\n",
    "import matlab.engine\n",
    "import numpy as np\n",
    "from os import path, walk\n",
    "from os.path import abspath, curdir"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class H36M(object):\n",
    "    '''Parser for Human3.6M.\n",
    "    \n",
    "    With MATLAB engine, this class runs Human3.6M MATLAB scripts and gets the data.\n",
    "    \n",
    "    Attrib:\n",
    "        root: The path to Human3.6M MATLAB scripts directory.\n",
    "        script_paths: The sub-directory paths excluding git directory.\n",
    "        additional_script_paths: The paths to the customized MATLAB scripts.\n",
    "        \n",
    "        _core: The MATLAB engine object.\n",
    "    '''\n",
    "    root = abspath(path.join('D:', 'data', 'Human3.6M', 'Release-v1.1'))\n",
    "    script_paths = [subdir for subdir, _, _ in walk(root) if '.git' not in subdir]\n",
    "    additional_script_paths = [\n",
    "    ]\n",
    "    subjects = [1, 5, 6, 7, 8, 9, 11]\n",
    "    \n",
    "    def __init__(self):\n",
    "        '''Initialize the MATLAB engine and preload common instances.\n",
    "        '''\n",
    "        \n",
    "        # Initialize the MATLAB engine object.\n",
    "        self._core = matlab.engine.start_matlab()\n",
    "        for script_path in H36M.script_paths + H36M.additional_script_paths:\n",
    "            self._core.addpath(script_path)\n",
    "\n",
    "        # Preload H36MDataBase and necessary metadata of annotations.\n",
    "        self._core.workspace['DB'] = self._core.H36MDataBase.instance()\n",
    "        self._core.workspace['feature_RGB'] = self._core.H36MRGBVideoFeature()\n",
    "        self._core.workspace['feature_BB'] = self._core.H36MMyBBMask()\n",
    "        self._core.workspace['feature_BG'] = self._core.H36MMyBGMask()\n",
    "        self._core.workspace['features'] = [\n",
    "            self._core.H36MPose3DPositionsFeature(),\n",
    "            # self._core.H36MPose2DPositionsFeature(), # Use Camera.project()\n",
    "            # self._core.H36MPose3DPositionsFeature('Monocular', True), # Use TransformJointsPosition()\n",
    "        ]\n",
    "    \n",
    "    def RGB_image(self, subject, action, sub_action, camera, frame):\n",
    "        '''Get a single RGB image.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "            camera: The camera number in the range between 1 and 4.\n",
    "            frame: The frame number.\n",
    "        \n",
    "        Return:\n",
    "            RGB image as numpy array in the height-width-channel shape.\n",
    "        '''\n",
    "        if not self.valid_sequence(subject, action, sub_action, camera):\n",
    "            raise IndexError()\n",
    "        \n",
    "        max_frame = self.max_frame(subject, action, sub_action)\n",
    "        if not (1 <= frame <= max_frame):\n",
    "            raise IndexError()\n",
    "        \n",
    "        sequence = self._sequence(subject, action, sub_action, camera)\n",
    "        RGB = self._RGB_handle(subject, action, sub_action, camera)\n",
    "        image = self._core.getFrame(RGB, self._core.double(frame))\n",
    "        image = np.reshape(np.asarray(image._data, dtype=np.float), newshape=(image._size[2], image._size[1], image._size[0])).transpose(2, 1, 0)\n",
    "\n",
    "        return image\n",
    "    \n",
    "    def BB_mask(self, subject, action, sub_action, camera, frame):\n",
    "        '''Get a single bounding-box mask.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "            camera: The camera number in the range between 1 and 4.\n",
    "            frame: The frame number.\n",
    "        \n",
    "        Return:\n",
    "            The bounding-box mask as numpy array in the height-width shape.\n",
    "        '''\n",
    "        if not self.valid_sequence(subject, action, sub_action, camera):\n",
    "            raise IndexError()\n",
    "        \n",
    "        max_frame = self.max_frame(subject, action, sub_action)\n",
    "        if not (1 <= frame <= max_frame):\n",
    "            raise IndexError()\n",
    "        \n",
    "        sequence = self._sequence(subject, action, sub_action, camera)\n",
    "        BB = self._BB_handle(subject, action, sub_action, camera)\n",
    "        mask = self._core.getFrame(BB, self._core.double(frame))\n",
    "        mask = np.reshape(np.asarray(mask._data, dtype=np.float), newshape=(mask._size[1], mask._size[0])).transpose(1, 0)\n",
    "\n",
    "        return mask\n",
    "    \n",
    "    def BG_mask(self, subject, action, sub_action, camera, frame):\n",
    "        '''Get a single background mask.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "            camera: The camera number in the range between 1 and 4.\n",
    "            frame: The frame number.\n",
    "        \n",
    "        Return:\n",
    "            The background mask as numpy array in the height-width shape.\n",
    "        '''\n",
    "        if not self.valid_sequence(subject, action, sub_action, camera):\n",
    "            raise IndexError()\n",
    "        \n",
    "        max_frame = self.max_frame(subject, action, sub_action)\n",
    "        if not (1 <= frame <= max_frame):\n",
    "            raise IndexError()\n",
    "        \n",
    "        sequence = self._sequence(subject, action, sub_action, camera)\n",
    "        BG = self._BG_handle(subject, action, sub_action, camera)\n",
    "        mask = self._core.getFrame(BG, self._core.double(frame))\n",
    "        mask = np.reshape(np.asarray(mask._data, dtype=np.float), newshape=(mask._size[1], mask._size[0])).transpose(1, 0)\n",
    "\n",
    "        return mask\n",
    "    \n",
    "    def ToF(self, subject, action, sub_action, frame):\n",
    "        '''Get a single time-of-flight image.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "            frame: The frame number.\n",
    "        \n",
    "        Return:\n",
    "            The background mask as numpy array in the height-width shape.\n",
    "        '''\n",
    "        if not self.valid_sequence(subject, action, sub_action, camera = 2):\n",
    "            raise IndexError()\n",
    "        \n",
    "        max_frame = self.max_frame(subject, action, sub_action)\n",
    "        if not (1 <= frame <= max_frame):\n",
    "            raise IndexError()\n",
    "        \n",
    "        ToF = self._ToF_handle(subject, action, sub_action)\n",
    "        ToF = self._core.getFrame(ToF, self._core.double(frame))\n",
    "        ToF = np.reshape(np.asarray(ToF._data, dtype=np.float), newshape=(ToF._size[1], ToF._size[0])).transpose(1, 0)\n",
    "\n",
    "        return ToF\n",
    "    \n",
    "    def RGB_video_name(self, subject, action, sub_action, camera):\n",
    "        '''Get the RGB video file name.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "            camera: The camera number in the range between 1 and 4.\n",
    "        \n",
    "        Return:\n",
    "            The file name of RGB video.\n",
    "        '''\n",
    "        if not self.valid_sequence(subject, action, sub_action, camera):\n",
    "            raise IndexError()\n",
    "        \n",
    "        RGB = self._RGB_handle(subject, action, sub_action, camera)\n",
    "        var_name = 'RGB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
    "        \n",
    "        return self._core.eval('%s.Reader.VideoName;' % var_name)\n",
    "    \n",
    "    def pose_3D(self, subject, action, sub_action, camera, frame):\n",
    "        '''Get the keypoint positions.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "            camera: The camera number in the range between 1 and 4.\n",
    "            frame: The frame number.\n",
    "        \n",
    "        Return:\n",
    "            32 Keypoint 3D positions.\n",
    "        '''\n",
    "        if not self.valid_sequence(subject, action, sub_action, camera):\n",
    "            raise IndexError()\n",
    "        \n",
    "        max_frame = self.max_frame(subject, action, sub_action)\n",
    "        if not (1 <= frame <= max_frame):\n",
    "            raise IndexError()\n",
    "        \n",
    "        sequence = self._sequence(subject, action, sub_action, camera)\n",
    "        var_name = 'sequence_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
    "        self._core.eval('%s.IdxFrames = %d;' % (var_name, frame), nargout = 0)\n",
    "    \n",
    "        pose_3D = self._core.H36MComputeFeatures(sequence, self._core.workspace['features'])[0]\n",
    "        return np.reshape(np.asarray(pose_3D), newshape=(32, 3))\n",
    "    \n",
    "    def valid_sequence(self, subject, action, sub_action, camera):\n",
    "        '''Check if the sequence is valid.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "            camera: The camera number in the range between 1 and 4.\n",
    "            frame: The frame number.\n",
    "        \n",
    "        Return:\n",
    "            True if the valid sequence.\n",
    "            Otherwise False.\n",
    "        '''\n",
    "        return subject in H36M.subjects and\\\n",
    "            1 <= action <= 16 and\\\n",
    "            1 <= sub_action <= 2 and\\\n",
    "            1 <= camera <= 4\n",
    "    \n",
    "    def max_frame(self, subject, action, sub_action):\n",
    "        '''Get the maximum frame.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "        \n",
    "        Return:\n",
    "            The maximum frame.\n",
    "        '''\n",
    "        return self._core.getNumFrames(self._core.workspace['DB'], subject, action, sub_action)\n",
    "    \n",
    "    def clear_workspace(self):\n",
    "        '''Clear the MATLAB workspace.\n",
    "        '''\n",
    "        self._core.eval('clear;', nargout = 0)\n",
    "    \n",
    "    @lru_cache(maxsize = 1024)\n",
    "    def _sequence(self, subject, action, sub_action, camera):\n",
    "        '''Covert to sequence_object.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "            camera: The camera number in the range between 1 and 4.\n",
    "        \n",
    "        Return:\n",
    "            H36MSequence object.\n",
    "        '''\n",
    "        var_name = 'sequence_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
    "        self._core.workspace[var_name] = self._core.H36MSequence(subject, action, sub_action, camera, -1)\n",
    "        \n",
    "        return self._core.workspace[var_name]\n",
    "    \n",
    "    @lru_cache(maxsize = 1024)\n",
    "    def _RGB_handle(self, subject, action, sub_action, camera):\n",
    "        '''Get RGB video handle object.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "            camera: The camera number in the range between 1 and 4.\n",
    "        \n",
    "        Return:\n",
    "            The MATLAB VideoPlayer.\n",
    "        '''\n",
    "        var_name = 'RGB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
    "        sequence = self._sequence(subject, action, sub_action, camera)\n",
    "        self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_RGB'], sequence)\n",
    "        \n",
    "        return self._core.workspace[var_name]\n",
    "    \n",
    "    @lru_cache(maxsize = 1024)\n",
    "    def _BB_handle(self, subject, action, sub_action, camera):\n",
    "        '''Get the bounding-box handle object.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "            camera: The camera number in the range between 1 and 4.\n",
    "        \n",
    "        Return:\n",
    "            The MATLAB VideoPlayer.\n",
    "        '''\n",
    "        var_name = 'BB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
    "        sequence = self._sequence(subject, action, sub_action, camera)\n",
    "        self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_BB'], sequence)\n",
    "        \n",
    "        return self._core.workspace[var_name]\n",
    "    \n",
    "    @lru_cache(maxsize = 1024)\n",
    "    def _BG_handle(self, subject, action, sub_action, camera):\n",
    "        '''Get the background handle object.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "            camera: The camera number in the range between 1 and 4.\n",
    "        \n",
    "        Return:\n",
    "            The MATLAB VideoPlayer.\n",
    "        '''\n",
    "        var_name = 'BG_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
    "        sequence = self._sequence(subject, action, sub_action, camera)\n",
    "        self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_BG'], sequence)\n",
    "        \n",
    "        return self._core.workspace[var_name]\n",
    "    \n",
    "    @lru_cache(maxsize = 256)\n",
    "    def _ToF_handle(self, subject, action, sub_action):\n",
    "        '''Get the time-of-flight handle object.\n",
    "        \n",
    "        Params:\n",
    "            subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
    "            action: The action number in the range between 1 and 16.\n",
    "            sub_action: The sub-action number 1 or 2.\n",
    "        \n",
    "        Return:\n",
    "            The ToF cdf file wrapper.\n",
    "        '''\n",
    "        var_name = 'ToF_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
    "        self._core.workspace[var_name] = self._core.H36MTOFDataAccess(subject, action, sub_action)\n",
    "        \n",
    "        return self._core.workspace[var_name]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = H36M()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "subject = 5\n",
    "action = 14\n",
    "sub_action = 2\n",
    "camera = 2\n",
    "frame = query.max_frame(subject, action, sub_action)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Video at: D:\\data\\Human3.6M\\downloaded/S5\\/Videos/Walking.55011271.mp4\n"
     ]
    }
   ],
   "source": [
    "name = query.RGB_video_name(subject, action, sub_action, camera)\n",
    "print('Video at: %s' % name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
     ]
    }
   ],
   "source": [
    "image = query.RGB_image(subject, action, sub_action, camera, frame)\n",
    "imageio.imwrite('test.png', image)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(32, 3)\n"
     ]
    }
   ],
   "source": [
    "pose = query.pose_3D(subject, action, sub_action, camera, frame)\n",
    "print(pose.shape)\n",
    "for keypoint in pose:\n",
    "    for ty in range(-5, 5):\n",
    "        for tx in range(-5, 5):\n",
    "            if not 0 <= keypoint[0] + ty < pose.shape[0] or not 0 <= keypoint[1] + tx < pose.shape[1]:\n",
    "                continue\n",
    "            image[keypoint[0] + ty, keypoint[1] + tx, :] = [1, 0, 0]\n",
    "imageio.imwrite('pose.png', image)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
     ]
    }
   ],
   "source": [
    "bb = query.BB_mask(subject, action, sub_action, camera, frame)\n",
    "imageio.imwrite('bb.png', bb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
     ]
    }
   ],
   "source": [
    "bg = query.BG_mask(subject, action, sub_action, camera, frame)\n",
    "imageio.imwrite('bg.png', bg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
     ]
    }
   ],
   "source": [
    "tof = query.ToF(subject, action, sub_action, frame)\n",
    "tof /= np.max(tof)\n",
    "imageio.imwrite('tof.png', tof)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "engine.workspace['Features'] = [\n",
    "    engine.H36MPose3DPositionsFeature(),\n",
    "    engine.H36MPose2DPositionsFeature(),\n",
    "    engine.H36MPose3DPositionsFeature('Monocular', True),\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "engine.workspace['Sequence'] = engine.H36MSequence(query.subject, query.action, query.sub_action, query.camera, query.frame)\n",
    "engine.workspace['F'] = engine.H36MComputeFeatures(engine.workspace['Sequence'], engine.workspace['Features'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "engine.workspace['c'] = engine.getCamera(engine.workspace['Sequence'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "engine.workspace['projected'] = engine.project(engine.workspace['c'], engine.workspace['F'][2])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "engine.workspace['camera0'] = engine.H36MCamera(engine.workspace['DB'], 0, 1)\n",
    "engine.workspace['projected'] = engine.project(engine.workspace['camera0'], engine.workspace['F'][2])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

## Human3.6M.md

      
    Raw
  

              Human3.6M.md
            
          
    Reference

@article{ionescu2014human3,
  title={Human3. 6m: Large scale datasets and predictive methods for 3d human sensing in natural environments},
  author={Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},
  journal={IEEE transactions on pattern analysis and machine intelligence},
  volume={36},
  number={7},
  pages={1325--1339},
  year={2014},
  publisher={IEEE}
}
Setup


Download all archive files from the site.

You have to register with an academy mail.


Extract all archive files.

NOTE: All training/test data must be extracted in the same path.
"The setup for our experiments requires that all the archives are extracted in the same directory (we call this the experiment directory)" quoted from ${code archive}/README.data file.


Download and extract all dependencies described at ${code archive}/README file.

Neil Lawrence's mocap toolbox - required for visualizing poses
Mark Schmidt's great optimization package - required by our KDE inference


Update VideoUtils from v.1.2 to v.1.2.4.

The memory release bug was fixed.
The maximum number of instances is increased from 128 to 1024.


Documantation

H36M/

H36MCamera.m


R, T: Camera transform parameters

R: Euler angle
T: World position


f, c, k p: Camera calibration parameters

f: Focal length
c: Principal point
k: Radial distortion
p: Tangential distortion


H36MCamera(db, s, c)


db: H36MDataBase instance
s: Subject number (camera is monocular if s == 0)
c: Camera number (1 <= c <= 4)

project(obj, X)


obj: Camera instance
X: 3D position list


H36MDataAccess.m

An abstract class for H36MFeatureDataAccess, H36MPoseDataAccess, H36MTOFDataAccess and H36MVideoDataAccess classes. In H36MBL/H36MComputeFeatures.m, the Exists property is used to check if the precomputed data exists and the Permanent property is used to save the precomputed data if necessary.

Exists: If the precomputed data exists
Permanent: Save the precomputed data into the file

getFrame(obj, fno)


obj: DataAccess instance
fno: Frame number


H36MDataBase.m

The preloaded data from metadata.xml file. This includes camera parameters, joint relations and etc.

H36MFeature.m

An abstract class for H36MPose2DPositionsFeature, H36MPose3DAnglesFeature, H36MPose3DPositionFeature and etc.

H36MFeatureDataAccess.m

Called by H36MPose(2DPosition|3DPosition|3DAngles)Feature.serializer(Sequence), it reads whole precomputed feature data specified by the feature and the sequence.

Buffer: Precomputed data

getFrame(obj, fno)


obj: DataAccess instance
fno: Frame number


H36MMyBBMask.m

Similar to the factory design pattern, generates the bounding-box mask DataAccess instance.
serializer(obj, Sequence)

Generates the bounding-box mask DataAccess instance.

obj: H36MMyBBMask instance
Sequence: Sequence instance

%% Sample code
sequence = H36MSequence(4, 3, 2, 1, -1)
bb_factory = H36MMyBBMask();
bb_handle = bb_factory.serializer(sequence);
bb_mask = bb_handle.getFrame(100);

H36MMyBGMask.m

Similar to the factory design pattern, generates the background mask DataAccess instance.
serializer(obj, Sequence)

Similar to H36MMyBBMask.serializer(obj, Sequence).

Sample codes

Visualize ToF

addpaths;
clear;
close all;

tof = H36MTOFDataAccess(6, 13, 2).getFrame(800); % subject, action, sub-action
figure(1); imshow(tof/10);
Issue

Regex error during the matlab version check

Delete the following code.

${code archive}/external_utils/xml_io_tools/xml_read.m

%% Check Matlab Version
v = ver('MATLAB');
version = str2double(regexp(v.Version, '\d.\d','match','once'));
if (version<7.1)
  error('Your MATLAB version is too old. You need version 7.1 or newer.');
end
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Prerequisite\n",
	"\n",
	"- Install MATLAB engine API\n",
	"- Update VideoUtils under ${Human3.6M code}/external_utils/VideoUtils_v1_2/\n",
	"- Change all MATLAB classes to inherit handle class"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from functools import lru_cache\n",
	"import imageio\n",
	"import matlab.engine\n",
	"import numpy as np\n",
	"from os import path, walk\n",
	"from os.path import abspath, curdir"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"class H36M(object):\n",
	" '''Parser for Human3.6M.\n",
	" \n",
	" With MATLAB engine, this class runs Human3.6M MATLAB scripts and gets the data.\n",
	" \n",
	" Attrib:\n",
	" root: The path to Human3.6M MATLAB scripts directory.\n",
	" script_paths: The sub-directory paths excluding git directory.\n",
	" additional_script_paths: The paths to the customized MATLAB scripts.\n",
	" \n",
	" _core: The MATLAB engine object.\n",
	" '''\n",
	" root = abspath(path.join('D:', 'data', 'Human3.6M', 'Release-v1.1'))\n",
	" script_paths = [subdir for subdir, _, _ in walk(root) if '.git' not in subdir]\n",
	" additional_script_paths = [\n",
	" ]\n",
	" subjects = [1, 5, 6, 7, 8, 9, 11]\n",
	" \n",
	" def __init__(self):\n",
	" '''Initialize the MATLAB engine and preload common instances.\n",
	" '''\n",
	" \n",
	" # Initialize the MATLAB engine object.\n",
	" self._core = matlab.engine.start_matlab()\n",
	" for script_path in H36M.script_paths + H36M.additional_script_paths:\n",
	" self._core.addpath(script_path)\n",
	"\n",
	" # Preload H36MDataBase and necessary metadata of annotations.\n",
	" self._core.workspace['DB'] = self._core.H36MDataBase.instance()\n",
	" self._core.workspace['feature_RGB'] = self._core.H36MRGBVideoFeature()\n",
	" self._core.workspace['feature_BB'] = self._core.H36MMyBBMask()\n",
	" self._core.workspace['feature_BG'] = self._core.H36MMyBGMask()\n",
	" self._core.workspace['features'] = [\n",
	" self._core.H36MPose3DPositionsFeature(),\n",
	" # self._core.H36MPose2DPositionsFeature(), # Use Camera.project()\n",
	" # self._core.H36MPose3DPositionsFeature('Monocular', True), # Use TransformJointsPosition()\n",
	" ]\n",
	" \n",
	" def RGB_image(self, subject, action, sub_action, camera, frame):\n",
	" '''Get a single RGB image.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" camera: The camera number in the range between 1 and 4.\n",
	" frame: The frame number.\n",
	" \n",
	" Return:\n",
	" RGB image as numpy array in the height-width-channel shape.\n",
	" '''\n",
	" if not self.valid_sequence(subject, action, sub_action, camera):\n",
	" raise IndexError()\n",
	" \n",
	" max_frame = self.max_frame(subject, action, sub_action)\n",
	" if not (1 <= frame <= max_frame):\n",
	" raise IndexError()\n",
	" \n",
	" sequence = self._sequence(subject, action, sub_action, camera)\n",
	" RGB = self._RGB_handle(subject, action, sub_action, camera)\n",
	" image = self._core.getFrame(RGB, self._core.double(frame))\n",
	" image = np.reshape(np.asarray(image._data, dtype=np.float), newshape=(image._size[2], image._size[1], image._size[0])).transpose(2, 1, 0)\n",
	"\n",
	" return image\n",
	" \n",
	" def BB_mask(self, subject, action, sub_action, camera, frame):\n",
	" '''Get a single bounding-box mask.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" camera: The camera number in the range between 1 and 4.\n",
	" frame: The frame number.\n",
	" \n",
	" Return:\n",
	" The bounding-box mask as numpy array in the height-width shape.\n",
	" '''\n",
	" if not self.valid_sequence(subject, action, sub_action, camera):\n",
	" raise IndexError()\n",
	" \n",
	" max_frame = self.max_frame(subject, action, sub_action)\n",
	" if not (1 <= frame <= max_frame):\n",
	" raise IndexError()\n",
	" \n",
	" sequence = self._sequence(subject, action, sub_action, camera)\n",
	" BB = self._BB_handle(subject, action, sub_action, camera)\n",
	" mask = self._core.getFrame(BB, self._core.double(frame))\n",
	" mask = np.reshape(np.asarray(mask._data, dtype=np.float), newshape=(mask._size[1], mask._size[0])).transpose(1, 0)\n",
	"\n",
	" return mask\n",
	" \n",
	" def BG_mask(self, subject, action, sub_action, camera, frame):\n",
	" '''Get a single background mask.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" camera: The camera number in the range between 1 and 4.\n",
	" frame: The frame number.\n",
	" \n",
	" Return:\n",
	" The background mask as numpy array in the height-width shape.\n",
	" '''\n",
	" if not self.valid_sequence(subject, action, sub_action, camera):\n",
	" raise IndexError()\n",
	" \n",
	" max_frame = self.max_frame(subject, action, sub_action)\n",
	" if not (1 <= frame <= max_frame):\n",
	" raise IndexError()\n",
	" \n",
	" sequence = self._sequence(subject, action, sub_action, camera)\n",
	" BG = self._BG_handle(subject, action, sub_action, camera)\n",
	" mask = self._core.getFrame(BG, self._core.double(frame))\n",
	" mask = np.reshape(np.asarray(mask._data, dtype=np.float), newshape=(mask._size[1], mask._size[0])).transpose(1, 0)\n",
	"\n",
	" return mask\n",
	" \n",
	" def ToF(self, subject, action, sub_action, frame):\n",
	" '''Get a single time-of-flight image.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" frame: The frame number.\n",
	" \n",
	" Return:\n",
	" The background mask as numpy array in the height-width shape.\n",
	" '''\n",
	" if not self.valid_sequence(subject, action, sub_action, camera = 2):\n",
	" raise IndexError()\n",
	" \n",
	" max_frame = self.max_frame(subject, action, sub_action)\n",
	" if not (1 <= frame <= max_frame):\n",
	" raise IndexError()\n",
	" \n",
	" ToF = self._ToF_handle(subject, action, sub_action)\n",
	" ToF = self._core.getFrame(ToF, self._core.double(frame))\n",
	" ToF = np.reshape(np.asarray(ToF._data, dtype=np.float), newshape=(ToF._size[1], ToF._size[0])).transpose(1, 0)\n",
	"\n",
	" return ToF\n",
	" \n",
	" def RGB_video_name(self, subject, action, sub_action, camera):\n",
	" '''Get the RGB video file name.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" camera: The camera number in the range between 1 and 4.\n",
	" \n",
	" Return:\n",
	" The file name of RGB video.\n",
	" '''\n",
	" if not self.valid_sequence(subject, action, sub_action, camera):\n",
	" raise IndexError()\n",
	" \n",
	" RGB = self._RGB_handle(subject, action, sub_action, camera)\n",
	" var_name = 'RGB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
	" \n",
	" return self._core.eval('%s.Reader.VideoName;' % var_name)\n",
	" \n",
	" def pose_3D(self, subject, action, sub_action, camera, frame):\n",
	" '''Get the keypoint positions.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" camera: The camera number in the range between 1 and 4.\n",
	" frame: The frame number.\n",
	" \n",
	" Return:\n",
	" 32 Keypoint 3D positions.\n",
	" '''\n",
	" if not self.valid_sequence(subject, action, sub_action, camera):\n",
	" raise IndexError()\n",
	" \n",
	" max_frame = self.max_frame(subject, action, sub_action)\n",
	" if not (1 <= frame <= max_frame):\n",
	" raise IndexError()\n",
	" \n",
	" sequence = self._sequence(subject, action, sub_action, camera)\n",
	" var_name = 'sequence_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
	" self._core.eval('%s.IdxFrames = %d;' % (var_name, frame), nargout = 0)\n",
	" \n",
	" pose_3D = self._core.H36MComputeFeatures(sequence, self._core.workspace['features'])[0]\n",
	" return np.reshape(np.asarray(pose_3D), newshape=(32, 3))\n",
	" \n",
	" def valid_sequence(self, subject, action, sub_action, camera):\n",
	" '''Check if the sequence is valid.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" camera: The camera number in the range between 1 and 4.\n",
	" frame: The frame number.\n",
	" \n",
	" Return:\n",
	" True if the valid sequence.\n",
	" Otherwise False.\n",
	" '''\n",
	" return subject in H36M.subjects and\\\n",
	" 1 <= action <= 16 and\\\n",
	" 1 <= sub_action <= 2 and\\\n",
	" 1 <= camera <= 4\n",
	" \n",
	" def max_frame(self, subject, action, sub_action):\n",
	" '''Get the maximum frame.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" \n",
	" Return:\n",
	" The maximum frame.\n",
	" '''\n",
	" return self._core.getNumFrames(self._core.workspace['DB'], subject, action, sub_action)\n",
	" \n",
	" def clear_workspace(self):\n",
	" '''Clear the MATLAB workspace.\n",
	" '''\n",
	" self._core.eval('clear;', nargout = 0)\n",
	" \n",
	" @lru_cache(maxsize = 1024)\n",
	" def _sequence(self, subject, action, sub_action, camera):\n",
	" '''Covert to sequence_object.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" camera: The camera number in the range between 1 and 4.\n",
	" \n",
	" Return:\n",
	" H36MSequence object.\n",
	" '''\n",
	" var_name = 'sequence_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
	" self._core.workspace[var_name] = self._core.H36MSequence(subject, action, sub_action, camera, -1)\n",
	" \n",
	" return self._core.workspace[var_name]\n",
	" \n",
	" @lru_cache(maxsize = 1024)\n",
	" def _RGB_handle(self, subject, action, sub_action, camera):\n",
	" '''Get RGB video handle object.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" camera: The camera number in the range between 1 and 4.\n",
	" \n",
	" Return:\n",
	" The MATLAB VideoPlayer.\n",
	" '''\n",
	" var_name = 'RGB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
	" sequence = self._sequence(subject, action, sub_action, camera)\n",
	" self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_RGB'], sequence)\n",
	" \n",
	" return self._core.workspace[var_name]\n",
	" \n",
	" @lru_cache(maxsize = 1024)\n",
	" def _BB_handle(self, subject, action, sub_action, camera):\n",
	" '''Get the bounding-box handle object.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" camera: The camera number in the range between 1 and 4.\n",
	" \n",
	" Return:\n",
	" The MATLAB VideoPlayer.\n",
	" '''\n",
	" var_name = 'BB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
	" sequence = self._sequence(subject, action, sub_action, camera)\n",
	" self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_BB'], sequence)\n",
	" \n",
	" return self._core.workspace[var_name]\n",
	" \n",
	" @lru_cache(maxsize = 1024)\n",
	" def _BG_handle(self, subject, action, sub_action, camera):\n",
	" '''Get the background handle object.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" camera: The camera number in the range between 1 and 4.\n",
	" \n",
	" Return:\n",
	" The MATLAB VideoPlayer.\n",
	" '''\n",
	" var_name = 'BG_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
	" sequence = self._sequence(subject, action, sub_action, camera)\n",
	" self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_BG'], sequence)\n",
	" \n",
	" return self._core.workspace[var_name]\n",
	" \n",
	" @lru_cache(maxsize = 256)\n",
	" def _ToF_handle(self, subject, action, sub_action):\n",
	" '''Get the time-of-flight handle object.\n",
	" \n",
	" Params:\n",
	" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
	" action: The action number in the range between 1 and 16.\n",
	" sub_action: The sub-action number 1 or 2.\n",
	" \n",
	" Return:\n",
	" The ToF cdf file wrapper.\n",
	" '''\n",
	" var_name = 'ToF_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
	" self._core.workspace[var_name] = self._core.H36MTOFDataAccess(subject, action, sub_action)\n",
	" \n",
	" return self._core.workspace[var_name]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"query = H36M()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [],
	"source": [
	"subject = 5\n",
	"action = 14\n",
	"sub_action = 2\n",
	"camera = 2\n",
	"frame = query.max_frame(subject, action, sub_action)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Video at: D:\\data\\Human3.6M\\downloaded/S5\\/Videos/Walking.55011271.mp4\n"
	]
	}
	],
	"source": [
	"name = query.RGB_video_name(subject, action, sub_action, camera)\n",
	"print('Video at: %s' % name)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
	]
	}
	],
	"source": [
	"image = query.RGB_image(subject, action, sub_action, camera, frame)\n",
	"imageio.imwrite('test.png', image)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(32, 3)\n"
	]
	}
	],
	"source": [
	"pose = query.pose_3D(subject, action, sub_action, camera, frame)\n",
	"print(pose.shape)\n",
	"for keypoint in pose:\n",
	" for ty in range(-5, 5):\n",
	" for tx in range(-5, 5):\n",
	" if not 0 <= keypoint[0] + ty < pose.shape[0] or not 0 <= keypoint[1] + tx < pose.shape[1]:\n",
	" continue\n",
	" image[keypoint[0] + ty, keypoint[1] + tx, :] = [1, 0, 0]\n",
	"imageio.imwrite('pose.png', image)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
	]
	}
	],
	"source": [
	"bb = query.BB_mask(subject, action, sub_action, camera, frame)\n",
	"imageio.imwrite('bb.png', bb)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
	]
	}
	],
	"source": [
	"bg = query.BG_mask(subject, action, sub_action, camera, frame)\n",
	"imageio.imwrite('bg.png', bg)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
	]
	}
	],
	"source": [
	"tof = query.ToF(subject, action, sub_action, frame)\n",
	"tof /= np.max(tof)\n",
	"imageio.imwrite('tof.png', tof)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"engine.workspace['Features'] = [\n",
	" engine.H36MPose3DPositionsFeature(),\n",
	" engine.H36MPose2DPositionsFeature(),\n",
	" engine.H36MPose3DPositionsFeature('Monocular', True),\n",
	"]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"engine.workspace['Sequence'] = engine.H36MSequence(query.subject, query.action, query.sub_action, query.camera, query.frame)\n",
	"engine.workspace['F'] = engine.H36MComputeFeatures(engine.workspace['Sequence'], engine.workspace['Features'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"engine.workspace['c'] = engine.getCamera(engine.workspace['Sequence'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"engine.workspace['projected'] = engine.project(engine.workspace['c'], engine.workspace['F'][2])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"engine.workspace['camera0'] = engine.H36MCamera(engine.workspace['DB'], 0, 1)\n",
	"engine.workspace['projected'] = engine.project(engine.workspace['camera0'], engine.workspace['F'][2])"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}