"""Estimating 3d coordinates from an RGB image | |
Support for the "Estimating 3d coordinates from an RGB image" | |
blog post in d3rezz.github.io | |
""" | |
import cv2 | |
import numpy as np | |
import time | |
import matplotlib.pyplot as plt | |
from mpl_toolkits.mplot3d import Axes3D | |
def main(): | |
# For scaling down images and camera matrix for speeding up detection | |
scale = 0.5 | |
# intrinsics, obtained with MATLAB cameraCalibrator | |
camera_matrix = np.array([[1046.0, 0, 657.9],[ 0, 1043.8, 362.0],[0, 0, 1]]) | |
dist_coeffs = np.array([0.111412276912893, -0.270066987227198, 0, 0]) | |
focal_length_pixels = (camera_matrix[0, 0] + camera_matrix[1, 1])/2 | |
print("Intrinsics:") | |
print("Camera Matrix:\n", camera_matrix) | |
print("Distortion Coefficients:\n", dist_coeffs) | |
#extrinsics: camera is in the origin of world frame | |
R = np.eye(3, dtype=np.float64) | |
t = np.array([[0], [0], [0]], dtype=np.float64) | |
print("Extrinsics:") | |
print("R:\n", R) | |
print("t:\n", t) | |
#chessboard | |
square_size = 1.5 #cm | |
pattern_size = (9, 6) | |
diagonal_cms = np.sqrt(((pattern_size[0]-1)*square_size)**2+((pattern_size[1]-1)*square_size)**2) | |
# Get webcam feed | |
capture = cv2.VideoCapture(0) | |
fps = 10 # max frames per second | |
# Init position plot | |
fig = plt.figure() | |
ax = fig.add_subplot(111, projection='3d') | |
sc, = ax.plot([], [], [], "o", markersize=5) | |
ax.set_xlabel('x') | |
ax.set_zlabel('y') | |
ax.set_ylabel('z') | |
ax.set_xlim(-0.6,0.6) | |
ax.set_zlim(-0.6,0.6) | |
ax.set_ylim(0,2) | |
ax.invert_zaxis() | |
fig.show() | |
while True: | |
start_time = time.time() | |
# Get frame from webcam | |
ret, frame = capture.read() | |
# Undistort | |
frame_undistorted = cv2.undistort(frame, camera_matrix, dist_coeffs) | |
# Scale down to speed up | |
frame_small = cv2.resize(frame_undistorted, (0,0), fx=scale, fy=scale) | |
#find chessboard pattern | |
found, corners = cv2.findChessboardCorners(frame_small, pattern_size, flags=cv2.CALIB_CB_FAST_CHECK) | |
if found: | |
cv2.drawChessboardCorners(frame_small, pattern_size, corners, found) | |
# Compute center of corners (54, 1, 2) | |
chessboard_center = np.mean(corners, axis=(0, 1))/scale # u, v | |
chessboard_center = np.rint(chessboard_center).astype(int) | |
measured_diagonal_pixels = np.sqrt((corners[0, 0, 0]-corners[-1, 0, 0])**2+(corners[0, 0, 1]-corners[-1, 0, 1])**2) | |
#compute how far chessboard is (Z) https://stackoverflow.com/questions/14038002/opencv-how-to-calculate-distance-between-camera-and-object-using-image | |
Z = diagonal_cms * 0.01 * focal_length_pixels*scale/ (measured_diagonal_pixels) #Get distance to camera in meters | |
# Compute XY world coordinates | |
uv1 = np.append(chessboard_center, 1).reshape((3, 1)) | |
new_mat = R.copy() | |
new_mat[:, 2] = new_mat[:, 2]*Z+ np.squeeze(t) | |
sXY1 = np.dot(np.dot(np.linalg.inv(new_mat), np.linalg.inv(camera_matrix)), uv1) | |
XY = sXY1[:2,:]/sXY1[2,0] #convert to cartesian | |
XYZ = np.vstack((XY, [[Z]])) | |
print("Computed coordinates in world frame:\n", XYZ) | |
# Plot 3D location in world coordinates. Camera is placed in origin | |
sc.set_data(XYZ[0], XYZ[2]) | |
sc.set_3d_properties(XYZ[1]) | |
plt.pause(0.001) | |
cv2.imshow('Input', frame_small) | |
if cv2.waitKey(1) == 27: #ESC key | |
break | |
# constant fps | |
elapsed_time = time.time() - start_time | |
time.sleep(max(0, 1/fps-elapsed_time)) | |
capture.release() | |
if __name__ == '__main__': | |
main() | |
cv2.destroyAllWindows() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment