tlpss/zed-pointcloud-difference.py

## zed-pointcloud-difference.py
"""minimal script to show the difference between retrieving a pointcloud from the zed camera and manually creating one from the depth image and rgb image."""

import pyzed.sl as sl
import numpy as np
# import rerun
# rerun.init("test",spawn=True)
resolution = sl.RESOLUTION.HD720
fps = 30
depth_mode = sl.DEPTH_MODE.NEURAL

camera = sl.Camera()

camera_params = sl.InitParameters()
camera_params.camera_resolution = resolution
camera_params.camera_fps = fps
# https://www.stereolabs.com/docs/depth-sensing/depth-settings/
camera_params.depth_mode = depth_mode
camera_params.coordinate_units = sl.UNIT.METER
# objects closerby will have artifacts so they are filtered out (querying them will give a - Infinty)
camera_params.depth_minimum_distance = 0.3
camera_params.depth_maximum_distance = 10.0  # filter out far away objects


status = camera.open(camera_params)
if status != sl.ERROR_CODE.SUCCESS:
    raise IndexError(f"could not open camera, error = {status}")

# TODO: create a configuration class for the runtime parameters
runtime_params = sl.RuntimeParameters()
runtime_params.sensing_mode = sl.SENSING_MODE.STANDARD  # standard > fill for accuracy. See docs.
runtime_params.texture_confidence_threshold = 100
runtime_params.confidence_threshold = 100
depth_enabled = True

image_matrix = sl.Mat()
depth_matrix = sl.Mat()
pointcloud_matrix = sl.Mat()


#############
## GRAB FRAME
#############
error_code = camera.grab(runtime_params)
if error_code != sl.ERROR_CODE.SUCCESS:
    raise IndexError("Could not grab new camera frame")


error_code = camera.grab(runtime_params)
if error_code != sl.ERROR_CODE.SUCCESS:
    raise IndexError("Could not grab new camera frame")

#############
## DEPTH IMAGE
#############
camera.retrieve_measure(depth_matrix, sl.MEASURE.DEPTH)
depth_map = depth_matrix.get_data()

#############
## RGB IMAGE
#############

camera.retrieve_image(image_matrix, sl.VIEW.LEFT)
rgb = image_matrix.get_data()
rgb = rgb[..., :3]  # remove alpha channel
# convert from int to float image
# this can take up ~ ms for larger images (can impact FPS)

#############
## POINTCLOUD
#############

camera.retrieve_measure(pointcloud_matrix, sl.MEASURE.XYZRGBA)
# shape (width, height, 4) with the 4th dim being x,y,z,(rgba packed into float)
# can be nan,nan,nan, nan (no point in the pointcloud on this pixel)
# or x,y,z, nan (no color information on this pixel??)
# or x,y,z, value (color information on this pixel)

# filter out all that have nan in any of the positions of the 3th dim
# and reshape to (width*height, 4)
point_cloud = pointcloud_matrix.get_data()
point_cloud = point_cloud[~np.isnan(point_cloud).any(axis=2), :]

# unpack the colors, drop alpha channel and convert to 0-1 range
points = point_cloud[:, :3]
colors = point_cloud[:, 3]
rgba = np.ravel(colors).view(np.uint8).reshape(-1, 4)
pc_rgb = rgba[:, :3]
rgb_float = pc_rgb.astype(np.float32) / 255.0  # convert to 0-1 range
colored_pointcloud = np.concatenate((points, rgb_float), axis=1)

#############
## INTRINSICS
#############
fx = camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.fx
fy = camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.fy
cx = camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.cx
cy = camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.cy

intrinsics = np.zeros((3, 3))
intrinsics[0, 0] = fx
intrinsics[1, 1] = fy
intrinsics[2, 2] = 1
intrinsics[0, 2] = cx
intrinsics[1, 2] = cy

####################
## MANUAL POINTCLOUD
####################

diy_pointcloud_points = []
diy_pointcloud_colors = []
inverse_intrinsics = np.linalg.inv(intrinsics)
# for every pixel in the image, get the corresponding 3D point
for i in range(rgb.shape[0]):
    for j in range(rgb.shape[1]):
        # get the 3D point
        image_ray = np.array([j,i,1])
        camera_ray = inverse_intrinsics @ image_ray
        t = depth_map[i,j] / camera_ray[2]
        point = t * camera_ray
        diy_pointcloud_points.append(point)
        diy_pointcloud_colors.append(rgb[i,j,:])

print(colored_pointcloud.shape)
# rerun.log_points("world/camera1/pointcloud",positions=colored_pointcloud[:,:3],colors=colored_pointcloud[:,3:])
# rerun.log_points("world/camera1/diy_pointcloud",positions=diy_pointcloud_points,colors=diy_pointcloud_colors)
	"""minimal script to show the difference between retrieving a pointcloud from the zed camera and manually creating one from the depth image and rgb image."""

	import pyzed.sl as sl
	import numpy as np
	# import rerun
	# rerun.init("test",spawn=True)
	resolution = sl.RESOLUTION.HD720
	fps = 30
	depth_mode = sl.DEPTH_MODE.NEURAL

	camera = sl.Camera()

	camera_params = sl.InitParameters()
	camera_params.camera_resolution = resolution
	camera_params.camera_fps = fps
	# https://www.stereolabs.com/docs/depth-sensing/depth-settings/
	camera_params.depth_mode = depth_mode
	camera_params.coordinate_units = sl.UNIT.METER
	# objects closerby will have artifacts so they are filtered out (querying them will give a - Infinty)
	camera_params.depth_minimum_distance = 0.3
	camera_params.depth_maximum_distance = 10.0 # filter out far away objects


	status = camera.open(camera_params)
	if status != sl.ERROR_CODE.SUCCESS:
	raise IndexError(f"could not open camera, error = {status}")

	# TODO: create a configuration class for the runtime parameters
	runtime_params = sl.RuntimeParameters()
	runtime_params.sensing_mode = sl.SENSING_MODE.STANDARD # standard > fill for accuracy. See docs.
	runtime_params.texture_confidence_threshold = 100
	runtime_params.confidence_threshold = 100
	depth_enabled = True

	image_matrix = sl.Mat()
	depth_matrix = sl.Mat()
	pointcloud_matrix = sl.Mat()


	#############
	## GRAB FRAME
	#############
	error_code = camera.grab(runtime_params)
	if error_code != sl.ERROR_CODE.SUCCESS:
	raise IndexError("Could not grab new camera frame")


	error_code = camera.grab(runtime_params)
	if error_code != sl.ERROR_CODE.SUCCESS:
	raise IndexError("Could not grab new camera frame")

	#############
	## DEPTH IMAGE
	#############
	camera.retrieve_measure(depth_matrix, sl.MEASURE.DEPTH)
	depth_map = depth_matrix.get_data()

	#############
	## RGB IMAGE
	#############

	camera.retrieve_image(image_matrix, sl.VIEW.LEFT)
	rgb = image_matrix.get_data()
	rgb = rgb[..., :3] # remove alpha channel
	# convert from int to float image
	# this can take up ~ ms for larger images (can impact FPS)

	#############
	## POINTCLOUD
	#############

	camera.retrieve_measure(pointcloud_matrix, sl.MEASURE.XYZRGBA)
	# shape (width, height, 4) with the 4th dim being x,y,z,(rgba packed into float)
	# can be nan,nan,nan, nan (no point in the pointcloud on this pixel)
	# or x,y,z, nan (no color information on this pixel??)
	# or x,y,z, value (color information on this pixel)

	# filter out all that have nan in any of the positions of the 3th dim
	# and reshape to (width*height, 4)
	point_cloud = pointcloud_matrix.get_data()
	point_cloud = point_cloud[~np.isnan(point_cloud).any(axis=2), :]

	# unpack the colors, drop alpha channel and convert to 0-1 range
	points = point_cloud[:, :3]
	colors = point_cloud[:, 3]
	rgba = np.ravel(colors).view(np.uint8).reshape(-1, 4)
	pc_rgb = rgba[:, :3]
	rgb_float = pc_rgb.astype(np.float32) / 255.0 # convert to 0-1 range
	colored_pointcloud = np.concatenate((points, rgb_float), axis=1)

	#############
	## INTRINSICS
	#############
	fx = camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.fx
	fy = camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.fy
	cx = camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.cx
	cy = camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.cy

	intrinsics = np.zeros((3, 3))
	intrinsics[0, 0] = fx
	intrinsics[1, 1] = fy
	intrinsics[2, 2] = 1
	intrinsics[0, 2] = cx
	intrinsics[1, 2] = cy

	####################
	## MANUAL POINTCLOUD
	####################

	diy_pointcloud_points = []
	diy_pointcloud_colors = []
	inverse_intrinsics = np.linalg.inv(intrinsics)
	# for every pixel in the image, get the corresponding 3D point
	for i in range(rgb.shape[0]):
	for j in range(rgb.shape[1]):
	# get the 3D point
	image_ray = np.array([j,i,1])
	camera_ray = inverse_intrinsics @ image_ray
	t = depth_map[i,j] / camera_ray[2]
	point = t * camera_ray
	diy_pointcloud_points.append(point)
	diy_pointcloud_colors.append(rgb[i,j,:])

	print(colored_pointcloud.shape)
	# rerun.log_points("world/camera1/pointcloud",positions=colored_pointcloud[:,:3],colors=colored_pointcloud[:,3:])
	# rerun.log_points("world/camera1/diy_pointcloud",positions=diy_pointcloud_points,colors=diy_pointcloud_colors)