Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
"""Estimating 3d coordinates from an RGB image
Support for the "Estimating 3d coordinates from an RGB image"
blog post in d3rezz.github.io
"""
import cv2
import numpy as np
import time
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def main():
# For scaling down images and camera matrix for speeding up detection
scale = 0.5
# intrinsics, obtained with MATLAB cameraCalibrator
camera_matrix = np.array([[1046.0, 0, 657.9],[ 0, 1043.8, 362.0],[0, 0, 1]])
dist_coeffs = np.array([0.111412276912893, -0.270066987227198, 0, 0])
focal_length_pixels = (camera_matrix[0, 0] + camera_matrix[1, 1])/2
print("Intrinsics:")
print("Camera Matrix:\n", camera_matrix)
print("Distortion Coefficients:\n", dist_coeffs)
#extrinsics: camera is in the origin of world frame
R = np.eye(3, dtype=np.float64)
t = np.array([[0], [0], [0]], dtype=np.float64)
print("Extrinsics:")
print("R:\n", R)
print("t:\n", t)
#chessboard
square_size = 1.5 #cm
pattern_size = (9, 6)
diagonal_cms = np.sqrt(((pattern_size[0]-1)*square_size)**2+((pattern_size[1]-1)*square_size)**2)
# Get webcam feed
capture = cv2.VideoCapture(0)
fps = 10 # max frames per second
# Init position plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
sc, = ax.plot([], [], [], "o", markersize=5)
ax.set_xlabel('x')
ax.set_zlabel('y')
ax.set_ylabel('z')
ax.set_xlim(-0.6,0.6)
ax.set_zlim(-0.6,0.6)
ax.set_ylim(0,2)
ax.invert_zaxis()
fig.show()
while True:
start_time = time.time()
# Get frame from webcam
ret, frame = capture.read()
# Undistort
frame_undistorted = cv2.undistort(frame, camera_matrix, dist_coeffs)
# Scale down to speed up
frame_small = cv2.resize(frame_undistorted, (0,0), fx=scale, fy=scale)
#find chessboard pattern
found, corners = cv2.findChessboardCorners(frame_small, pattern_size, flags=cv2.CALIB_CB_FAST_CHECK)
if found:
cv2.drawChessboardCorners(frame_small, pattern_size, corners, found)
# Compute center of corners (54, 1, 2)
chessboard_center = np.mean(corners, axis=(0, 1))/scale # u, v
chessboard_center = np.rint(chessboard_center).astype(int)
measured_diagonal_pixels = np.sqrt((corners[0, 0, 0]-corners[-1, 0, 0])**2+(corners[0, 0, 1]-corners[-1, 0, 1])**2)
#compute how far chessboard is (Z) https://stackoverflow.com/questions/14038002/opencv-how-to-calculate-distance-between-camera-and-object-using-image
Z = diagonal_cms * 0.01 * focal_length_pixels*scale/ (measured_diagonal_pixels) #Get distance to camera in meters
# Compute XY world coordinates
uv1 = np.append(chessboard_center, 1).reshape((3, 1))
new_mat = R.copy()
new_mat[:, 2] = new_mat[:, 2]*Z+ np.squeeze(t)
sXY1 = np.dot(np.dot(np.linalg.inv(new_mat), np.linalg.inv(camera_matrix)), uv1)
XY = sXY1[:2,:]/sXY1[2,0] #convert to cartesian
XYZ = np.vstack((XY, [[Z]]))
print("Computed coordinates in world frame:\n", XYZ)
# Plot 3D location in world coordinates. Camera is placed in origin
sc.set_data(XYZ[0], XYZ[2])
sc.set_3d_properties(XYZ[1])
plt.pause(0.001)
cv2.imshow('Input', frame_small)
if cv2.waitKey(1) == 27: #ESC key
break
# constant fps
elapsed_time = time.time() - start_time
time.sleep(max(0, 1/fps-elapsed_time))
capture.release()
if __name__ == '__main__':
main()
cv2.destroyAllWindows()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.