Skip to content

Instantly share code, notes, and snippets.

# d3rezz/2d_rgb_to_3d.py

Created August 10, 2019 08:38
Show Gist options
• Save d3rezz/b956501d5b1f73b8092b2531981c86c6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 """Estimating 3d coordinates from an RGB image Support for the "Estimating 3d coordinates from an RGB image" blog post in d3rezz.github.io """ import cv2 import numpy as np import time import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D def main(): # For scaling down images and camera matrix for speeding up detection scale = 0.5 # intrinsics, obtained with MATLAB cameraCalibrator camera_matrix = np.array([[1046.0, 0, 657.9],[ 0, 1043.8, 362.0],[0, 0, 1]]) dist_coeffs = np.array([0.111412276912893, -0.270066987227198, 0, 0]) focal_length_pixels = (camera_matrix[0, 0] + camera_matrix[1, 1])/2 print("Intrinsics:") print("Camera Matrix:\n", camera_matrix) print("Distortion Coefficients:\n", dist_coeffs) #extrinsics: camera is in the origin of world frame R = np.eye(3, dtype=np.float64) t = np.array([[0], [0], [0]], dtype=np.float64) print("Extrinsics:") print("R:\n", R) print("t:\n", t) #chessboard square_size = 1.5 #cm pattern_size = (9, 6) diagonal_cms = np.sqrt(((pattern_size[0]-1)*square_size)**2+((pattern_size[1]-1)*square_size)**2) # Get webcam feed capture = cv2.VideoCapture(0) fps = 10 # max frames per second # Init position plot fig = plt.figure() ax = fig.add_subplot(111, projection='3d') sc, = ax.plot([], [], [], "o", markersize=5) ax.set_xlabel('x') ax.set_zlabel('y') ax.set_ylabel('z') ax.set_xlim(-0.6,0.6) ax.set_zlim(-0.6,0.6) ax.set_ylim(0,2) ax.invert_zaxis() fig.show() while True: start_time = time.time() # Get frame from webcam ret, frame = capture.read() # Undistort frame_undistorted = cv2.undistort(frame, camera_matrix, dist_coeffs) # Scale down to speed up frame_small = cv2.resize(frame_undistorted, (0,0), fx=scale, fy=scale) #find chessboard pattern found, corners = cv2.findChessboardCorners(frame_small, pattern_size, flags=cv2.CALIB_CB_FAST_CHECK) if found: cv2.drawChessboardCorners(frame_small, pattern_size, corners, found) # Compute center of corners (54, 1, 2) chessboard_center = np.mean(corners, axis=(0, 1))/scale # u, v chessboard_center = np.rint(chessboard_center).astype(int) measured_diagonal_pixels = np.sqrt((corners[0, 0, 0]-corners[-1, 0, 0])**2+(corners[0, 0, 1]-corners[-1, 0, 1])**2) #compute how far chessboard is (Z) https://stackoverflow.com/questions/14038002/opencv-how-to-calculate-distance-between-camera-and-object-using-image Z = diagonal_cms * 0.01 * focal_length_pixels*scale/ (measured_diagonal_pixels) #Get distance to camera in meters # Compute XY world coordinates uv1 = np.append(chessboard_center, 1).reshape((3, 1)) new_mat = R.copy() new_mat[:, 2] = new_mat[:, 2]*Z+ np.squeeze(t) sXY1 = np.dot(np.dot(np.linalg.inv(new_mat), np.linalg.inv(camera_matrix)), uv1) XY = sXY1[:2,:]/sXY1[2,0] #convert to cartesian XYZ = np.vstack((XY, [[Z]])) print("Computed coordinates in world frame:\n", XYZ) # Plot 3D location in world coordinates. Camera is placed in origin sc.set_data(XYZ[0], XYZ[2]) sc.set_3d_properties(XYZ[1]) plt.pause(0.001) cv2.imshow('Input', frame_small) if cv2.waitKey(1) == 27: #ESC key break # constant fps elapsed_time = time.time() - start_time time.sleep(max(0, 1/fps-elapsed_time)) capture.release() if __name__ == '__main__': main() cv2.destroyAllWindows()
to join this conversation on GitHub. Already have an account? Sign in to comment