Skip to content

Instantly share code, notes, and snippets.

@nulledge
Last active September 8, 2021 03:39
Show Gist options
  • Save nulledge/9b087ece756de53dfc71a01e52b25125 to your computer and use it in GitHub Desktop.
Save nulledge/9b087ece756de53dfc71a01e52b25125 to your computer and use it in GitHub Desktop.
Description for Human3.6M dataset.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisite\n",
"\n",
"- Install *MATLAB engine API*\n",
"- Update *VideoUtils* under *${Human3.6M code}/external_utils/VideoUtils_v1_2/*\n",
"- Change all MATLAB classes to inherit *handle* class"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from functools import lru_cache\n",
"import imageio\n",
"import matlab.engine\n",
"import numpy as np\n",
"from os import path, walk\n",
"from os.path import abspath, curdir"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class H36M(object):\n",
" '''Parser for Human3.6M.\n",
" \n",
" With MATLAB engine, this class runs Human3.6M MATLAB scripts and gets the data.\n",
" \n",
" Attrib:\n",
" root: The path to Human3.6M MATLAB scripts directory.\n",
" script_paths: The sub-directory paths excluding git directory.\n",
" additional_script_paths: The paths to the customized MATLAB scripts.\n",
" \n",
" _core: The MATLAB engine object.\n",
" '''\n",
" root = abspath(path.join('D:', 'data', 'Human3.6M', 'Release-v1.1'))\n",
" script_paths = [subdir for subdir, _, _ in walk(root) if '.git' not in subdir]\n",
" additional_script_paths = [\n",
" ]\n",
" subjects = [1, 5, 6, 7, 8, 9, 11]\n",
" \n",
" def __init__(self):\n",
" '''Initialize the MATLAB engine and preload common instances.\n",
" '''\n",
" \n",
" # Initialize the MATLAB engine object.\n",
" self._core = matlab.engine.start_matlab()\n",
" for script_path in H36M.script_paths + H36M.additional_script_paths:\n",
" self._core.addpath(script_path)\n",
"\n",
" # Preload H36MDataBase and necessary metadata of annotations.\n",
" self._core.workspace['DB'] = self._core.H36MDataBase.instance()\n",
" self._core.workspace['feature_RGB'] = self._core.H36MRGBVideoFeature()\n",
" self._core.workspace['feature_BB'] = self._core.H36MMyBBMask()\n",
" self._core.workspace['feature_BG'] = self._core.H36MMyBGMask()\n",
" self._core.workspace['features'] = [\n",
" self._core.H36MPose3DPositionsFeature(),\n",
" # self._core.H36MPose2DPositionsFeature(), # Use Camera.project()\n",
" # self._core.H36MPose3DPositionsFeature('Monocular', True), # Use TransformJointsPosition()\n",
" ]\n",
" \n",
" def RGB_image(self, subject, action, sub_action, camera, frame):\n",
" '''Get a single RGB image.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" RGB image as numpy array in the height-width-channel shape.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera):\n",
" raise IndexError()\n",
" \n",
" max_frame = self.max_frame(subject, action, sub_action)\n",
" if not (1 <= frame <= max_frame):\n",
" raise IndexError()\n",
" \n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" RGB = self._RGB_handle(subject, action, sub_action, camera)\n",
" image = self._core.getFrame(RGB, self._core.double(frame))\n",
" image = np.reshape(np.asarray(image._data, dtype=np.float), newshape=(image._size[2], image._size[1], image._size[0])).transpose(2, 1, 0)\n",
"\n",
" return image\n",
" \n",
" def BB_mask(self, subject, action, sub_action, camera, frame):\n",
" '''Get a single bounding-box mask.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" The bounding-box mask as numpy array in the height-width shape.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera):\n",
" raise IndexError()\n",
" \n",
" max_frame = self.max_frame(subject, action, sub_action)\n",
" if not (1 <= frame <= max_frame):\n",
" raise IndexError()\n",
" \n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" BB = self._BB_handle(subject, action, sub_action, camera)\n",
" mask = self._core.getFrame(BB, self._core.double(frame))\n",
" mask = np.reshape(np.asarray(mask._data, dtype=np.float), newshape=(mask._size[1], mask._size[0])).transpose(1, 0)\n",
"\n",
" return mask\n",
" \n",
" def BG_mask(self, subject, action, sub_action, camera, frame):\n",
" '''Get a single background mask.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" The background mask as numpy array in the height-width shape.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera):\n",
" raise IndexError()\n",
" \n",
" max_frame = self.max_frame(subject, action, sub_action)\n",
" if not (1 <= frame <= max_frame):\n",
" raise IndexError()\n",
" \n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" BG = self._BG_handle(subject, action, sub_action, camera)\n",
" mask = self._core.getFrame(BG, self._core.double(frame))\n",
" mask = np.reshape(np.asarray(mask._data, dtype=np.float), newshape=(mask._size[1], mask._size[0])).transpose(1, 0)\n",
"\n",
" return mask\n",
" \n",
" def ToF(self, subject, action, sub_action, frame):\n",
" '''Get a single time-of-flight image.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" The background mask as numpy array in the height-width shape.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera = 2):\n",
" raise IndexError()\n",
" \n",
" max_frame = self.max_frame(subject, action, sub_action)\n",
" if not (1 <= frame <= max_frame):\n",
" raise IndexError()\n",
" \n",
" ToF = self._ToF_handle(subject, action, sub_action)\n",
" ToF = self._core.getFrame(ToF, self._core.double(frame))\n",
" ToF = np.reshape(np.asarray(ToF._data, dtype=np.float), newshape=(ToF._size[1], ToF._size[0])).transpose(1, 0)\n",
"\n",
" return ToF\n",
" \n",
" def RGB_video_name(self, subject, action, sub_action, camera):\n",
" '''Get the RGB video file name.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" \n",
" Return:\n",
" The file name of RGB video.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera):\n",
" raise IndexError()\n",
" \n",
" RGB = self._RGB_handle(subject, action, sub_action, camera)\n",
" var_name = 'RGB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" \n",
" return self._core.eval('%s.Reader.VideoName;' % var_name)\n",
" \n",
" def pose_3D(self, subject, action, sub_action, camera, frame):\n",
" '''Get the keypoint positions.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" 32 Keypoint 3D positions.\n",
" '''\n",
" if not self.valid_sequence(subject, action, sub_action, camera):\n",
" raise IndexError()\n",
" \n",
" max_frame = self.max_frame(subject, action, sub_action)\n",
" if not (1 <= frame <= max_frame):\n",
" raise IndexError()\n",
" \n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" var_name = 'sequence_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" self._core.eval('%s.IdxFrames = %d;' % (var_name, frame), nargout = 0)\n",
" \n",
" pose_3D = self._core.H36MComputeFeatures(sequence, self._core.workspace['features'])[0]\n",
" return np.reshape(np.asarray(pose_3D), newshape=(32, 3))\n",
" \n",
" def valid_sequence(self, subject, action, sub_action, camera):\n",
" '''Check if the sequence is valid.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" frame: The frame number.\n",
" \n",
" Return:\n",
" True if the valid sequence.\n",
" Otherwise False.\n",
" '''\n",
" return subject in H36M.subjects and\\\n",
" 1 <= action <= 16 and\\\n",
" 1 <= sub_action <= 2 and\\\n",
" 1 <= camera <= 4\n",
" \n",
" def max_frame(self, subject, action, sub_action):\n",
" '''Get the maximum frame.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" \n",
" Return:\n",
" The maximum frame.\n",
" '''\n",
" return self._core.getNumFrames(self._core.workspace['DB'], subject, action, sub_action)\n",
" \n",
" def clear_workspace(self):\n",
" '''Clear the MATLAB workspace.\n",
" '''\n",
" self._core.eval('clear;', nargout = 0)\n",
" \n",
" @lru_cache(maxsize = 1024)\n",
" def _sequence(self, subject, action, sub_action, camera):\n",
" '''Covert to sequence_object.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" \n",
" Return:\n",
" H36MSequence object.\n",
" '''\n",
" var_name = 'sequence_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" self._core.workspace[var_name] = self._core.H36MSequence(subject, action, sub_action, camera, -1)\n",
" \n",
" return self._core.workspace[var_name]\n",
" \n",
" @lru_cache(maxsize = 1024)\n",
" def _RGB_handle(self, subject, action, sub_action, camera):\n",
" '''Get RGB video handle object.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" \n",
" Return:\n",
" The MATLAB VideoPlayer.\n",
" '''\n",
" var_name = 'RGB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_RGB'], sequence)\n",
" \n",
" return self._core.workspace[var_name]\n",
" \n",
" @lru_cache(maxsize = 1024)\n",
" def _BB_handle(self, subject, action, sub_action, camera):\n",
" '''Get the bounding-box handle object.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" \n",
" Return:\n",
" The MATLAB VideoPlayer.\n",
" '''\n",
" var_name = 'BB_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_BB'], sequence)\n",
" \n",
" return self._core.workspace[var_name]\n",
" \n",
" @lru_cache(maxsize = 1024)\n",
" def _BG_handle(self, subject, action, sub_action, camera):\n",
" '''Get the background handle object.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" camera: The camera number in the range between 1 and 4.\n",
" \n",
" Return:\n",
" The MATLAB VideoPlayer.\n",
" '''\n",
" var_name = 'BG_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" sequence = self._sequence(subject, action, sub_action, camera)\n",
" self._core.workspace[var_name] = self._core.serializer(self._core.workspace['feature_BG'], sequence)\n",
" \n",
" return self._core.workspace[var_name]\n",
" \n",
" @lru_cache(maxsize = 256)\n",
" def _ToF_handle(self, subject, action, sub_action):\n",
" '''Get the time-of-flight handle object.\n",
" \n",
" Params:\n",
" subject: The subject number in 1, 5, 6, 7, 8, 9 and 11.\n",
" action: The action number in the range between 1 and 16.\n",
" sub_action: The sub-action number 1 or 2.\n",
" \n",
" Return:\n",
" The ToF cdf file wrapper.\n",
" '''\n",
" var_name = 'ToF_%02d_%02d_%d_%d' % (subject, action, sub_action, camera)\n",
" self._core.workspace[var_name] = self._core.H36MTOFDataAccess(subject, action, sub_action)\n",
" \n",
" return self._core.workspace[var_name]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"query = H36M()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"subject = 5\n",
"action = 14\n",
"sub_action = 2\n",
"camera = 2\n",
"frame = query.max_frame(subject, action, sub_action)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Video at: D:\\data\\Human3.6M\\downloaded/S5\\/Videos/Walking.55011271.mp4\n"
]
}
],
"source": [
"name = query.RGB_video_name(subject, action, sub_action, camera)\n",
"print('Video at: %s' % name)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
]
}
],
"source": [
"image = query.RGB_image(subject, action, sub_action, camera, frame)\n",
"imageio.imwrite('test.png', image)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(32, 3)\n"
]
}
],
"source": [
"pose = query.pose_3D(subject, action, sub_action, camera, frame)\n",
"print(pose.shape)\n",
"for keypoint in pose:\n",
" for ty in range(-5, 5):\n",
" for tx in range(-5, 5):\n",
" if not 0 <= keypoint[0] + ty < pose.shape[0] or not 0 <= keypoint[1] + tx < pose.shape[1]:\n",
" continue\n",
" image[keypoint[0] + ty, keypoint[1] + tx, :] = [1, 0, 0]\n",
"imageio.imwrite('pose.png', image)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
]
}
],
"source": [
"bb = query.BB_mask(subject, action, sub_action, camera, frame)\n",
"imageio.imwrite('bb.png', bb)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
]
}
],
"source": [
"bg = query.BG_mask(subject, action, sub_action, camera, frame)\n",
"imageio.imwrite('bg.png', bg)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\n"
]
}
],
"source": [
"tof = query.ToF(subject, action, sub_action, frame)\n",
"tof /= np.max(tof)\n",
"imageio.imwrite('tof.png', tof)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"engine.workspace['Features'] = [\n",
" engine.H36MPose3DPositionsFeature(),\n",
" engine.H36MPose2DPositionsFeature(),\n",
" engine.H36MPose3DPositionsFeature('Monocular', True),\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"engine.workspace['Sequence'] = engine.H36MSequence(query.subject, query.action, query.sub_action, query.camera, query.frame)\n",
"engine.workspace['F'] = engine.H36MComputeFeatures(engine.workspace['Sequence'], engine.workspace['Features'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"engine.workspace['c'] = engine.getCamera(engine.workspace['Sequence'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"engine.workspace['projected'] = engine.project(engine.workspace['c'], engine.workspace['F'][2])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"engine.workspace['camera0'] = engine.H36MCamera(engine.workspace['DB'], 0, 1)\n",
"engine.workspace['projected'] = engine.project(engine.workspace['camera0'], engine.workspace['F'][2])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Reference

@article{ionescu2014human3,
  title={Human3. 6m: Large scale datasets and predictive methods for 3d human sensing in natural environments},
  author={Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian},
  journal={IEEE transactions on pattern analysis and machine intelligence},
  volume={36},
  number={7},
  pages={1325--1339},
  year={2014},
  publisher={IEEE}
}

Setup

Documantation

H36M/

H36MCamera.m

  • R, T: Camera transform parameters
    • R: Euler angle
    • T: World position
  • f, c, k p: Camera calibration parameters
    • f: Focal length
    • c: Principal point
    • k: Radial distortion
    • p: Tangential distortion
H36MCamera(db, s, c)
  • db: H36MDataBase instance
  • s: Subject number (camera is monocular if s == 0)
  • c: Camera number (1 <= c <= 4)
project(obj, X)
  • obj: Camera instance
  • X: 3D position list

H36MDataAccess.m

An abstract class for H36MFeatureDataAccess, H36MPoseDataAccess, H36MTOFDataAccess and H36MVideoDataAccess classes. In H36MBL/H36MComputeFeatures.m, the Exists property is used to check if the precomputed data exists and the Permanent property is used to save the precomputed data if necessary.

  • Exists: If the precomputed data exists
  • Permanent: Save the precomputed data into the file
getFrame(obj, fno)
  • obj: DataAccess instance
  • fno: Frame number

H36MDataBase.m

The preloaded data from metadata.xml file. This includes camera parameters, joint relations and etc.


H36MFeature.m

An abstract class for H36MPose2DPositionsFeature, H36MPose3DAnglesFeature, H36MPose3DPositionFeature and etc.


H36MFeatureDataAccess.m

Called by H36MPose(2DPosition|3DPosition|3DAngles)Feature.serializer(Sequence), it reads whole precomputed feature data specified by the feature and the sequence.

  • Buffer: Precomputed data
getFrame(obj, fno)
  • obj: DataAccess instance
  • fno: Frame number

H36MMyBBMask.m

Similar to the factory design pattern, generates the bounding-box mask DataAccess instance.

serializer(obj, Sequence)

Generates the bounding-box mask DataAccess instance.

  • obj: H36MMyBBMask instance
  • Sequence: Sequence instance
%% Sample code
sequence = H36MSequence(4, 3, 2, 1, -1)
bb_factory = H36MMyBBMask();
bb_handle = bb_factory.serializer(sequence);
bb_mask = bb_handle.getFrame(100);

H36MMyBGMask.m

Similar to the factory design pattern, generates the background mask DataAccess instance.

serializer(obj, Sequence)

Similar to H36MMyBBMask.serializer(obj, Sequence).


Sample codes

Visualize ToF

addpaths;
clear;
close all;

tof = H36MTOFDataAccess(6, 13, 2).getFrame(800); % subject, action, sub-action
figure(1); imshow(tof/10);

Issue

Regex error during the matlab version check

Delete the following code.

  • ${code archive}/external_utils/xml_io_tools/xml_read.m
%% Check Matlab Version
v = ver('MATLAB');
version = str2double(regexp(v.Version, '\d.\d','match','once'));
if (version<7.1)
  error('Your MATLAB version is too old. You need version 7.1 or newer.');
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment