Skip to content

Instantly share code, notes, and snippets.

Last active September 8, 2021 03:39
Show Gist options
  • Save nulledge/9b087ece756de53dfc71a01e52b25125 to your computer and use it in GitHub Desktop.
Save nulledge/9b087ece756de53dfc71a01e52b25125 to your computer and use it in GitHub Desktop.
Description for Human3.6M dataset.
Display the source blob
Display the rendered blob
"cells": [
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisite\n",
"- Install *MATLAB engine API*\n",
"- Update *VideoUtils* under *${Human3.6M code}/external_utils/VideoUtils_v1_2/*\n",
"- Change all MATLAB classes to inherit *handle* class"
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
"outputs": [],
"source": [
"from functools import lru_cache\n",
"import imageio\n",
"import matlab.engine\n",
"import numpy as np\n",
"from os import path, walk\n",
"from os.path import abspath, curdir"
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class H36M(object):\n",
" '''Parser for Human3.6M.\n",
" \n",
" With MATLAB engine, this class runs Human3.6M MATLAB scripts and gets the data.\n",
" \n",
" Attrib:\n",
" root: The path to Human3.6M MATLAB scripts directory.\n",
" script_paths: The sub-directory paths excluding git directory.\n",
" additional_script_paths: The paths to the customized MATLAB scripts.\n",
" \n",
" _core: The MATLAB engine object.\n",
" '''\n",
" root = abspath(path.join('D:', 'data', 'Human3.6M', 'Release-v1.1'))\n",
" script_paths = [subdir for subdir, _, _ in walk(root) if '.git' not in subdir]\n",
" additional_script_paths = [\n",
" ]\n",
" subjects = [1, 5, 6, 7, 8, 9, 11]\n",
" \n",
" def __init__(self):\n",
" '''Initialize the MATLAB engine and preload common instances.\n",
" '''\n",
" \n",
" # Initialize the MATLAB engine object.\n",
" self._core = matlab.engine.start_matlab()\n",
" for script_path in H36M.script_paths + H36M.additional_script_paths:\n",
" self._core.addpath(script_path)\n",
" # Preload H36MDataBase and necessary metadata of annotations.\n",
" self._core.workspace['DB'] = self._core.H36MDataBase.instance()\n",
" self._core.workspace['feature_RGB'] = self._core.H36MRGBVideoFeature()\n",
" self._core.workspace['feature_BB'] = self._core.H36MMyBBMask()\n",
" self._core.workspace['feature_BG'] = self._core.H36MMyBGMask()\n",
" self._core.workspace['features'] = [\n",
" self._core.H36MPose3DPositionsFeature(),\n",
" # self._core.H36MPose2DPositionsFeature(), # Use Camera.project()\n",
" # self._core.H36MPose3DPositionsFeature('Monocular', True), # Use TransformJointsPosition()\n",
" ]\n",
" \n",
" def RGB_image(self, subject, action, sub_action, camera, frame):\n",
" '''Get a single RGB image.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" RGB image as numpy array in the height-width-channel shape.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera):\n",
" raise IndexError()\n",
" \n",
" max_frame = self.max_frame(subject, action, sub_action)\n",
" if not (1 <= frame <= max_frame):\n",
" raise IndexError()\n",
" \n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" RGB = self._RGB_handle(subject, action, sub_action, camera)\n",
" image = self._core.getFrame(RGB, self._core.double(frame))\n",
" image = np.reshape(np.asarray(image._data, dtype=np.float), newshape=(image._size[2], image._size[1], image._size[0])).transpose(2, 1, 0)\n",
" return image\n",
" \n",
" def BB_mask(self, subject, action, sub_action, camera, frame):\n",
" '''Get a single bounding-box mask.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" The bounding-box mask as numpy array in the height-width shape.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera):\n",
" raise IndexError()\n",
" \n",
" max_frame = self.max_frame(subject, action, sub_action)\n",
" if not (1 <= frame <= max_frame):\n",
" raise IndexError()\n",
" \n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" BB = self._BB_handle(subject, action, sub_action, camera)\n",
" mask = self._core.getFrame(BB, self._core.double(frame))\n",
" mask = np.reshape(np.asarray(mask._data, dtype=np.float), newshape=(mask._size[1], mask._size[0])).transpose(1, 0)\n",
" return mask\n",
" \n",
" def BG_mask(self, subject, action, sub_action, camera, frame):\n",
" '''Get a single background mask.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" The background mask as numpy array in the height-width shape.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera):\n",
" raise IndexError()\n",
" \n",
" max_frame = self.max_frame(subject, action, sub_action)\n",
" if not (1 <= frame <= max_frame):\n",
" raise IndexError()\n",
" \n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" BG = self._BG_handle(subject, action, sub_action, camera)\n",
" mask = self._core.getFrame(BG, self._core.double(frame))\n",
" mask = np.reshape(np.asarray(mask._data, dtype=np.float), newshape=(mask._size[1], mask._size[0])).transpose(1, 0)\n",
" return mask\n",
" \n",
" def ToF(self, subject, action, sub_action, frame):\n",
" '''Get a single time-of-flight image.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" The background mask as numpy array in the height-width shape.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera = 2):\n",
" raise IndexError()\n",
" \n",
" max_frame = self.max_frame(subject, action, sub_action)\n",
" if not (1 <= frame <= max_frame):\n",
" raise IndexError()\n",
" \n",
" ToF = self._ToF_handle(subject, action, sub_action)\n",
" ToF = self._core.getFrame(ToF, self._core.double(frame))\n",
" ToF = np.reshape(np.asarray(ToF._data, dtype=np.float), newshape=(ToF._size[1], ToF._size[0])).transpose(1, 0)\n",
" return ToF\n",
" \n",
" def RGB_video_name(self, subject, action, sub_action, camera):\n",
" '''Get the RGB video file name.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" \n",
" Return:\n",
" The file name of RGB video.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera):\n",
" raise IndexError()\n",
" \n",
" RGB = self._RGB_handle(subject, action, sub_action, camera)\n",
" var_name = 'RGB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" \n",
" return self._core.eval('%s.Reader.VideoName;' % var_name)\n",
" \n",
" def pose_3D(self, subject, action, sub_action, camera, frame):\n",
" '''Get the keypoint positions.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" 32 Keypoint 3D positions.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera):\n",
" raise IndexError()\n",
" \n",
" max_frame = self.max_frame(subject, action, sub_action)\n",
" if not (1 <= frame <= max_frame):\n",
" raise IndexError()\n",
" \n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" var_name = 'sequence_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" self._core.eval('%s.IdxFrames = %d;' % (var_name, frame), nargout = 0)\n",
" \n",
" pose_3D = self._core.H36MComputeFeatures(sequence, self._core.workspace['features'])[0]\n",
" return np.reshape(np.asarray(pose_3D), newshape=(32, 3))\n",
" \n",
" def valid_sequence(self, subject, action, sub_action, camera):\n",
" '''Check if the sequence is valid.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" True if the valid sequence.\n",
" Otherwise False.\n",
" '''\n",
" return subject in H36M.subjects and\\\n",
" 1 <= action <= 16 and\\\n",
" 1 <= sub_action <= 2 and\\\n",
" 1 <= camera <= 4\n",
" \n",
" def max_frame(self, subject, action, sub_action):\n",
" '''Get the maximum frame.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" \n",
" Return:\n",
" The maximum frame.\n",
" '''\n",
" return self._core.getNumFrames(self._core.workspace['DB'], subject, action, sub_action)\n",
" \n",
" def clear_workspace(self):\n",
" '''Clear the MATLAB workspace.\n",
" '''\n",
" self._core.eval('clear;', nargout = 0)\n",
" \n",
" @lru_cache(maxsize = 1024)\n",
" def _sequence(self, subject, action, sub_action, camera):\n",
" '''Covert to sequence_object.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" \n",
" Return:\n",
" H36MSequence object.\n",
" '''\n",
" var_name = 'sequence_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" self._core.workspace[var_name] = self._core.H36MSequence(subject, action, sub_action, camera, -1)\n",
" \n",
" return self._core.workspace[var_name]\n",
" \n",
" @lru_cache(maxsize = 1024)\n",
" def _RGB_handle(self, subject, action, sub_action, camera):\n",
" '''Get RGB video handle object.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" \n",
" Return:\n",
" The MATLAB VideoPlayer.\n",
" '''\n",
" var_name = 'RGB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_RGB'], sequence)\n",
" \n",
" return self._core.workspace[var_name]\n",
" \n",
" @lru_cache(maxsize = 1024)\n",
" def _BB_handle(self, subject, action, sub_action, camera):\n",
" '''Get the bounding-box handle object.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" \n",
" Return:\n",
" The MATLAB VideoPlayer.\n",
" '''\n",
" var_name = 'BB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_BB'], sequence)\n",
" \n",
" return self._core.workspace[var_name]\n",
" \n",
" @lru_cache(maxsize = 1024)\n",
" def _BG_handle(self, subject, action, sub_action, camera):\n",
" '''Get the background handle object.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" \n",
" Return:\n",
" The MATLAB VideoPlayer.\n",
" '''\n",
" var_name = 'BG_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_BG'], sequence)\n",
" \n",
" return self._core.workspace[var_name]\n",
" \n",
" @lru_cache(maxsize = 256)\n",
" def _ToF_handle(self, subject, action, sub_action):\n",
" '''Get the time-of-flight handle object.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" \n",
" Return:\n",
" The ToF cdf file wrapper.\n",
" '''\n",
" var_name = 'ToF_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" self._core.workspace[var_name] = self._core.H36MTOFDataAccess(subject, action, sub_action)\n",
" \n",
" return self._core.workspace[var_name]"
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"query = H36M()"
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"subject = 5\n",
"action = 14\n",
"sub_action = 2\n",
"camera = 2\n",
"frame = query.max_frame(subject, action, sub_action)"
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
"name": "stdout",
"output_type": "stream",
"text": [
"Video at: D:\\data\\Human3.6M\\downloaded/S5\\/Videos/Walking.55011271.mp4\n"
"source": [
"name = query.RGB_video_name(subject, action, sub_action, camera)\n",
"print('Video at: %s' % name)"
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
"source": [
"image = query.RGB_image(subject, action, sub_action, camera, frame)\n",
"imageio.imwrite('test.png', image)"
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
"name": "stdout",
"output_type": "stream",
"text": [
"(32, 3)\n"
"source": [
"pose = query.pose_3D(subject, action, sub_action, camera, frame)\n",
"for keypoint in pose:\n",
" for ty in range(-5, 5):\n",
" for tx in range(-5, 5):\n",
" if not 0 <= keypoint[0] + ty < pose.shape[0] or not 0 <= keypoint[1] + tx < pose.shape[1]:\n",
" continue\n",
" image[keypoint[0] + ty, keypoint[1] + tx, :] = [1, 0, 0]\n",
"imageio.imwrite('pose.png', image)"
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
"source": [
"bb = query.BB_mask(subject, action, sub_action, camera, frame)\n",
"imageio.imwrite('bb.png', bb)"
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
"source": [
"bg = query.BG_mask(subject, action, sub_action, camera, frame)\n",
"imageio.imwrite('bg.png', bg)"
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
"source": [
"tof = query.ToF(subject, action, sub_action, frame)\n",
"tof /= np.max(tof)\n",
"imageio.imwrite('tof.png', tof)"
"cell_type": "markdown",
"metadata": {},
"source": [
"engine.workspace['Features'] = [\n",
" engine.H36MPose3DPositionsFeature(),\n",
" engine.H36MPose2DPositionsFeature(),\n",
" engine.H36MPose3DPositionsFeature('Monocular', True),\n",
"cell_type": "markdown",
"metadata": {},
"source": [
"engine.workspace['Sequence'] = engine.H36MSequence(query.subject, query.action, query.sub_action,, query.frame)\n",
"engine.workspace['F'] = engine.H36MComputeFeatures(engine.workspace['Sequence'], engine.workspace['Features'])"
"cell_type": "markdown",
"metadata": {},
"source": [
"engine.workspace['c'] = engine.getCamera(engine.workspace['Sequence'])"
"cell_type": "markdown",
"metadata": {},
"source": [
"engine.workspace['projected'] = engine.project(engine.workspace['c'], engine.workspace['F'][2])"
"cell_type": "markdown",
"metadata": {},
"source": [
"engine.workspace['camera0'] = engine.H36MCamera(engine.workspace['DB'], 0, 1)\n",
"engine.workspace['projected'] = engine.project(engine.workspace['camera0'], engine.workspace['F'][2])"
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
"nbformat": 4,
"nbformat_minor": 2


  title={Human3. 6m: Large scale datasets and predictive methods for 3d human sensing in natural environments},
  author={Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},
  journal={IEEE transactions on pattern analysis and machine intelligence},





  • R, T: Camera transform parameters
    • R: Euler angle
    • T: World position
  • f, c, k p: Camera calibration parameters
    • f: Focal length
    • c: Principal point
    • k: Radial distortion
    • p: Tangential distortion
H36MCamera(db, s, c)
  • db: H36MDataBase instance
  • s: Subject number (camera is monocular if s == 0)
  • c: Camera number (1 <= c <= 4)
project(obj, X)
  • obj: Camera instance
  • X: 3D position list


An abstract class for H36MFeatureDataAccess, H36MPoseDataAccess, H36MTOFDataAccess and H36MVideoDataAccess classes. In H36MBL/H36MComputeFeatures.m, the Exists property is used to check if the precomputed data exists and the Permanent property is used to save the precomputed data if necessary.

  • Exists: If the precomputed data exists
  • Permanent: Save the precomputed data into the file
getFrame(obj, fno)
  • obj: DataAccess instance
  • fno: Frame number


The preloaded data from metadata.xml file. This includes camera parameters, joint relations and etc.


An abstract class for H36MPose2DPositionsFeature, H36MPose3DAnglesFeature, H36MPose3DPositionFeature and etc.


Called by H36MPose(2DPosition|3DPosition|3DAngles)Feature.serializer(Sequence), it reads whole precomputed feature data specified by the feature and the sequence.

  • Buffer: Precomputed data
getFrame(obj, fno)
  • obj: DataAccess instance
  • fno: Frame number


Similar to the factory design pattern, generates the bounding-box mask DataAccess instance.

serializer(obj, Sequence)

Generates the bounding-box mask DataAccess instance.

  • obj: H36MMyBBMask instance
  • Sequence: Sequence instance
%% Sample code
sequence = H36MSequence(4, 3, 2, 1, -1)
bb_factory = H36MMyBBMask();
bb_handle = bb_factory.serializer(sequence);
bb_mask = bb_handle.getFrame(100);


Similar to the factory design pattern, generates the background mask DataAccess instance.

serializer(obj, Sequence)

Similar to H36MMyBBMask.serializer(obj, Sequence).

Sample codes

Visualize ToF

close all;

tof = H36MTOFDataAccess(6, 13, 2).getFrame(800); % subject, action, sub-action
figure(1); imshow(tof/10);


Regex error during the matlab version check

Delete the following code.

  • ${code archive}/external_utils/xml_io_tools/xml_read.m
%% Check Matlab Version
v = ver('MATLAB');
version = str2double(regexp(v.Version, '\d.\d','match','once'));
if (version<7.1)
  error('Your MATLAB version is too old. You need version 7.1 or newer.');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment