Skip to content

Instantly share code, notes, and snippets.

@mauigna06
Forked from d3rezz/2d_rgb_to_3d.py
Created November 12, 2023 22:31
Show Gist options
  • Save mauigna06/45c6defbe513d3579f1daa1e9a67ba0b to your computer and use it in GitHub Desktop.
Save mauigna06/45c6defbe513d3579f1daa1e9a67ba0b to your computer and use it in GitHub Desktop.
"""Estimating 3d coordinates from an RGB image
Support for the "Estimating 3d coordinates from an RGB image"
blog post in d3rezz.github.io
"""
import cv2
import numpy as np
import time
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def main():
# For scaling down images and camera matrix for speeding up detection
scale = 0.5
# intrinsics, obtained with MATLAB cameraCalibrator
camera_matrix = np.array([[1046.0, 0, 657.9],[ 0, 1043.8, 362.0],[0, 0, 1]])
dist_coeffs = np.array([0.111412276912893, -0.270066987227198, 0, 0])
focal_length_pixels = (camera_matrix[0, 0] + camera_matrix[1, 1])/2
print("Intrinsics:")
print("Camera Matrix:\n", camera_matrix)
print("Distortion Coefficients:\n", dist_coeffs)
#extrinsics: camera is in the origin of world frame
R = np.eye(3, dtype=np.float64)
t = np.array([[0], [0], [0]], dtype=np.float64)
print("Extrinsics:")
print("R:\n", R)
print("t:\n", t)
#chessboard
square_size = 1.5 #cm
pattern_size = (9, 6)
diagonal_cms = np.sqrt(((pattern_size[0]-1)*square_size)**2+((pattern_size[1]-1)*square_size)**2)
# Get webcam feed
capture = cv2.VideoCapture(0)
fps = 10 # max frames per second
# Init position plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
sc, = ax.plot([], [], [], "o", markersize=5)
ax.set_xlabel('x')
ax.set_zlabel('y')
ax.set_ylabel('z')
ax.set_xlim(-0.6,0.6)
ax.set_zlim(-0.6,0.6)
ax.set_ylim(0,2)
ax.invert_zaxis()
fig.show()
while True:
start_time = time.time()
# Get frame from webcam
ret, frame = capture.read()
# Undistort
frame_undistorted = cv2.undistort(frame, camera_matrix, dist_coeffs)
# Scale down to speed up
frame_small = cv2.resize(frame_undistorted, (0,0), fx=scale, fy=scale)
#find chessboard pattern
found, corners = cv2.findChessboardCorners(frame_small, pattern_size, flags=cv2.CALIB_CB_FAST_CHECK)
if found:
cv2.drawChessboardCorners(frame_small, pattern_size, corners, found)
# Compute center of corners (54, 1, 2)
chessboard_center = np.mean(corners, axis=(0, 1))/scale # u, v
chessboard_center = np.rint(chessboard_center).astype(int)
measured_diagonal_pixels = np.sqrt((corners[0, 0, 0]-corners[-1, 0, 0])**2+(corners[0, 0, 1]-corners[-1, 0, 1])**2)
#compute how far chessboard is (Z) https://stackoverflow.com/questions/14038002/opencv-how-to-calculate-distance-between-camera-and-object-using-image
Z = diagonal_cms * 0.01 * focal_length_pixels*scale/ (measured_diagonal_pixels) #Get distance to camera in meters
# Compute XY world coordinates
uv1 = np.append(chessboard_center, 1).reshape((3, 1))
new_mat = R.copy()
new_mat[:, 2] = new_mat[:, 2]*Z+ np.squeeze(t)
sXY1 = np.dot(np.dot(np.linalg.inv(new_mat), np.linalg.inv(camera_matrix)), uv1)
XY = sXY1[:2,:]/sXY1[2,0] #convert to cartesian
XYZ = np.vstack((XY, [[Z]]))
print("Computed coordinates in world frame:\n", XYZ)
# Plot 3D location in world coordinates. Camera is placed in origin
sc.set_data(XYZ[0], XYZ[2])
sc.set_3d_properties(XYZ[1])
plt.pause(0.001)
cv2.imshow('Input', frame_small)
if cv2.waitKey(1) == 27: #ESC key
break
# constant fps
elapsed_time = time.time() - start_time
time.sleep(max(0, 1/fps-elapsed_time))
capture.release()
if __name__ == '__main__':
main()
cv2.destroyAllWindows()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment