Skip to content

Instantly share code, notes, and snippets.

@hobbeshunter
Created October 21, 2020 14:31
Show Gist options
  • Save hobbeshunter/5ecf24f10d6dea0f469106885f0feb55 to your computer and use it in GitHub Desktop.
Save hobbeshunter/5ecf24f10d6dea0f469106885f0feb55 to your computer and use it in GitHub Desktop.
Compute depth maps from stereo dataset
#!/usr/bin/env python3
import argparse
import numpy as np
import cv2 as cv
import glob
import os
import sophus as sp
from multiprocessing import Pool
# see https://vision.in.tum.de/data/datasets/rgbd-dataset/file_formats#intrinsic_camera_calibration_of_the_kinect
# metric z coordinate is found through depth_image[v,u] / depth_factor
depth_factor = 5000 # 5000 for 16bit
# intrinsics (Kannala-Brandt k1,...,k4 = OpenCV fisheye) and extrinsics of the TUM-VI 512 dataset cameras
# see https://vision.in.tum.de/data/datasets/visual-inertial-dataset
# cam0 = left
# cam1 = right
# replace with your calibration
left_K = np.array([[190.97847715128717, 0, 254.93170605935475],
[0, 190.9733070521226, 256.8974428996504],
[0, 0, 1]])
left_dist = np.array([0.0034823894022493434, 0.0007150348452162257, -0.0020532361418706202,
0.00020293673591811182])
right_K = np.array([[190.44236969414825, 0, 252.59949716835982],
[0, 190.4344384721956, 254.91723064636983],
[0, 0, 1]])
right_dist = np.array([0.0034003170790442797, 0.001766278153469831, -0.00266312569781606,
0.0003299517423931039])
T_imu_cam0 = sp.SE3([[-0.9995250378696743, 0.029615343885863205,
-0.008522328211654736, 0.04727988224914392],
[0.0075019185074052044, -0.03439736061393144,
-0.9993800792498829, -0.047443232143367084],
[-0.02989013031643309, -0.998969345370175,
0.03415885127385616, -0.0681999605066297],
[0.0, 0.0, 0.0, 1.0]])
T_imu_cam1 = sp.SE3([[-0.9995110484978581, 0.030299116376600627,
-0.0077218830287333565, -0.053697434688869734],
[0.008104079263822521, 0.012511643720192351,
-0.9998888851620987, -0.046131737923635924],
[-0.030199136245891378, -0.9994625667418545,
-0.012751072573940885, -0.07149261284195751],
[0.0, 0.0, 0.0, 1.0]])
# image size
size = (512, 512)
# size of downsampled images for stereo matching
smaller_size = (int(size[0] / 2), int(size[1] / 2))
# will be run in parallel by several workers
def rectification_disparity_depth(left_image_path):
"""Read and rectify both stereo images. Calculate the disparity image and the depth map. Save everything to disk.
Keyword arguments:
left_image_path[string] -- path to the left stereo image that should be processed. The base name is extracted and used to find the right stereo image.
"""
image_name = os.path.basename(left_image_path)
right_image_path = os.path.join(args.path_right, image_name)
if not os.path.isfile(right_image_path):
print(f"Right image for {image_name} does not exist!")
return
# read the images from
left_image = cv.imread(left_image_path, cv.IMREAD_GRAYSCALE)
right_image = cv.imread(right_image_path, cv.IMREAD_GRAYSCALE)
left_image_rect = cv.remap(
left_image, left_map_x, left_map_y, interpolation=cv.INTER_LINEAR, borderMode=cv.BORDER_CONSTANT)
right_image_rect = cv.remap(
right_image, right_map_x, right_map_y, interpolation=cv.INTER_LINEAR, borderMode=cv.BORDER_CONSTANT)
# save rectifies images
cv.imwrite(os.path.join(left_output_path, image_name), left_image_rect)
cv.imwrite(os.path.join(right_output_path, image_name), right_image_rect)
# downscale to smooth depth map
left_image_rect_downscaled = cv.resize(
left_image_rect, smaller_size, interpolation=cv.INTER_AREA)
right_image_rect_downscaled = cv.resize(
right_image_rect, smaller_size, interpolation=cv.INTER_AREA)
# compute disparities in both directions
left_disp = stereo_left.compute(
left_image_rect_downscaled, right_image_rect_downscaled)
right_disp = stereo_right.compute(
right_image_rect_downscaled, left_image_rect_downscaled)
# smooth depth map
disp = wls_filter.filter(disparity_map_left=left_disp,
left_view=left_image_rect, disparity_map_right=right_disp)
# upscale
disp = cv.resize(disp, size, interpolation=cv.INTER_AREA)
# save disparity image
cv.imwrite(os.path.join(disparity_output_path,
image_name), disp)
# calculate depth map
# see https://docs.opencv.org/4.5.0/d9/d0c/group__calib3d.html#ga1bc1152bd57d63bc524204f21fde6e02
# convert disparity to float32 before
depth = cv.reprojectImageTo3D(disp.astype(
np.float32) / 16, disparity_to_depth, handleMissingValues=True)
depth = depth[:, :, 2]
cv_outlier_depth = 10000
depth[depth == cv_outlier_depth] = 0
depth = depth * depth_factor
# save depth map as 16bit grayscale image
cv.imwrite(os.path.join(depth_output_path,
image_name), depth.astype('uint16'))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='''
Reads in a stereo dataset and computes 16bit depth maps for the left images by semi-global matching.
''')
parser.add_argument(
"--path_left", help="Path to a directory containing the left png images.")
parser.add_argument(
"--path_right", help="Path to a directory containing the right png images.")
parser.add_argument(
"--output_path", help="Path to a directory where the rectified images, disparity images and depth maps will be saved.")
args = parser.parse_args()
if not os.path.isdir(args.output_path):
print("Output path is not a directory")
exit
# create output folders
left_output_path = os.path.join(args.output_path, "left")
right_output_path = os.path.join(args.output_path, "right")
disparity_output_path = os.path.join(args.output_path, "disparity")
depth_output_path = os.path.join(args.output_path, "depth")
try:
if not os.path.isdir(left_output_path):
os.mkdir(left_output_path)
except OSError:
print(f"Could not create directory {left_output_path}")
exit
try:
if not os.path.isdir(right_output_path):
os.mkdir(right_output_path)
except OSError:
print(f"Could not create directory {right_output_path}")
exit
try:
if not os.path.isdir(disparity_output_path):
os.mkdir(disparity_output_path)
except OSError:
print(f"Could not create directory {disparity_output_path}")
exit
try:
if not os.path.isdir(depth_output_path):
os.mkdir(depth_output_path)
except OSError:
print(f"Could not create directory {depth_output_path}")
exit
# transform from cam0 (left) to cam1 (right)
T_cam1_cam0 = T_imu_cam1.inverse() * T_imu_cam0
R = T_cam1_cam0.rotationMatrix()
t = T_cam1_cam0.translation()
# use the opencv fisheye camera model aka Kannala Brandt
# see https://docs.opencv.org/4.5.0/db/d58/group__calib3d__fisheye.html#gac1af58774006689056b0f2ef1db55ecc
# calculates new camera matrices (projections) and rectification rotations
# if some image regions have an invalid remapping decrease fov_scale
left_rectification, right_rectification, left_projection, right_projection, disparity_to_depth = cv.fisheye.stereoRectify(
left_K, left_dist, right_K, right_dist, size, R, t, flags=cv.CALIB_ZERO_DISPARITY, balance=0.5, fov_scale=0.6)
# calculate lookup tables for stereo rectification
left_map_x, left_map_y = cv.fisheye.initUndistortRectifyMap(
left_K, left_dist, left_rectification, left_projection, size, cv.CV_32FC1)
right_map_x, right_map_y = cv.fisheye.initUndistortRectifyMap(
right_K, right_dist, right_rectification, right_projection, size, cv.CV_32FC1)
# find image coordinates with invalid remapping
# for u in range(512):
# for v in range(512):
# if left_map_x[u,v] < 0 or left_map_x[u,v] > 511 or left_map_y[u,v] < 0 or left_map_y[u,v] > 511:
# print(u, v, left_map_x[u,v], left_map_y[u,v])
T_cam0rect_cam0 = sp.SE3(left_rectification, np.zeros(3))
T_cam1rect_cam1 = sp.SE3(right_rectification, np.zeros(3))
T_imu_cam0rect = T_imu_cam0 * T_cam0rect_cam0.inverse()
T_imu_cam1rect = T_imu_cam1 * T_cam1rect_cam1.inverse()
print("T_imu_cam0rect", T_imu_cam0rect)
print("T_imu_cam1rect", T_imu_cam1rect)
print("new left K", left_projection)
print("new right k", right_projection)
# see https://docs.opencv.org/4.5.0/d2/d85/classcv_1_1StereoSGBM.html
# and https://answers.opencv.org/question/182049/pythonstereo-disparity-quality-problems/?answer=183650#post-id-183650
block_size = 1
min_disp = 0
num_disp = 16
stereo_left = cv.StereoSGBM_create(minDisparity=min_disp,
numDisparities=num_disp,
blockSize=block_size,
P1=4*block_size**2,
P2=32*block_size**2,
disp12MaxDiff=1,
preFilterCap=63,
uniquenessRatio=10,
speckleWindowSize=100,
speckleRange=32,
mode=cv.STEREO_SGBM_MODE_HH
)
# see https://docs.opencv.org/4.5.0/d9/dba/classcv_1_1StereoBM.html
# stereo_left = cv.StereoBM_create(numDisparities=16, blockSize=15)
# use weighted least squares filtering to post process the depth maps
# see https://docs.opencv.org/4.5.0/d9/d51/classcv_1_1ximgproc_1_1DisparityWLSFilter.html
# and https://docs.opencv.org/4.5.0/d3/d14/tutorial_ximgproc_disparity_filtering.html
stereo_right = cv.ximgproc.createRightMatcher(stereo_left)
wls_filter = cv.ximgproc.createDisparityWLSFilter(stereo_left)
# parallelize using python multiprocessing
num_cores = 6
with Pool(num_cores) as p:
left_image_paths = glob.glob(
os.path.join(args.path_left, "*.png"))
done_counter = 0
for x in p.imap(rectification_disparity_depth, left_image_paths):
done_counter += 1
percent = done_counter / len(left_image_paths) * 100
print(
f"progress: {done_counter}/{len(left_image_paths)} = {percent}%")
@hobbeshunter
Copy link
Author

Input left:

1520530308199447626

Input right:

1520530308199447626

Input images taken from https://vision.in.tum.de/data/datasets/visual-inertial-dataset room1 sequence

Output left:

1520530308199447626

Output right

1520530308199447626

Output disparity

1520530308199447626

Output depth

1520530308299450626

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment