d3rezz/2d_rgb_to_3d.py

## 2d_rgb_to_3d.py
"""Estimating 3d coordinates from an RGB image
Support for the  "Estimating 3d coordinates from an RGB image"
blog post in d3rezz.github.io
"""


import cv2
import numpy as np
import time
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def main():
    # For scaling down images and camera matrix for speeding up detection
    scale = 0.5

    # intrinsics, obtained with MATLAB cameraCalibrator
    camera_matrix = np.array([[1046.0, 0, 657.9],[  0, 1043.8, 362.0],[0, 0, 1]])
    dist_coeffs = np.array([0.111412276912893,  -0.270066987227198, 0, 0])
    focal_length_pixels = (camera_matrix[0, 0] + camera_matrix[1, 1])/2

    print("Intrinsics:")
    print("Camera Matrix:\n", camera_matrix)
    print("Distortion Coefficients:\n", dist_coeffs)

    #extrinsics: camera is in the origin of world frame
    R = np.eye(3, dtype=np.float64)
    t = np.array([[0], [0], [0]], dtype=np.float64)

    print("Extrinsics:")
    print("R:\n", R)
    print("t:\n", t)

    #chessboard
    square_size = 1.5   #cm
    pattern_size = (9, 6)
    diagonal_cms = np.sqrt(((pattern_size[0]-1)*square_size)**2+((pattern_size[1]-1)*square_size)**2)

    # Get webcam feed
    capture = cv2.VideoCapture(0)
    fps = 10    # max frames per second

    # Init position plot
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    sc, = ax.plot([], [], [], "o", markersize=5)
    ax.set_xlabel('x')
    ax.set_zlabel('y')
    ax.set_ylabel('z')
    ax.set_xlim(-0.6,0.6)
    ax.set_zlim(-0.6,0.6)
    ax.set_ylim(0,2)
    ax.invert_zaxis()
    fig.show()

    while True:
        start_time = time.time()

        # Get frame from webcam
        ret, frame = capture.read()

        # Undistort
        frame_undistorted = cv2.undistort(frame, camera_matrix, dist_coeffs)

        # Scale down to speed up
        frame_small = cv2.resize(frame_undistorted, (0,0), fx=scale, fy=scale)

        #find chessboard pattern
        found, corners = cv2.findChessboardCorners(frame_small, pattern_size, flags=cv2.CALIB_CB_FAST_CHECK)

        if found:
            cv2.drawChessboardCorners(frame_small, pattern_size, corners, found)

            # Compute center of corners (54, 1, 2)
            chessboard_center = np.mean(corners, axis=(0, 1))/scale  # u, v
            chessboard_center = np.rint(chessboard_center).astype(int)

            measured_diagonal_pixels = np.sqrt((corners[0, 0, 0]-corners[-1, 0, 0])**2+(corners[0, 0, 1]-corners[-1, 0, 1])**2)

            #compute how far chessboard is (Z) https://stackoverflow.com/questions/14038002/opencv-how-to-calculate-distance-between-camera-and-object-using-image
            Z = diagonal_cms * 0.01 * focal_length_pixels*scale/ (measured_diagonal_pixels)   #Get distance to camera in meters

            # Compute XY world coordinates
            uv1 = np.append(chessboard_center, 1).reshape((3, 1))

            new_mat = R.copy()
            new_mat[:, 2] = new_mat[:, 2]*Z+ np.squeeze(t)

            sXY1 = np.dot(np.dot(np.linalg.inv(new_mat), np.linalg.inv(camera_matrix)), uv1)
            XY = sXY1[:2,:]/sXY1[2,0]     #convert to cartesian
            XYZ = np.vstack((XY, [[Z]]))
            print("Computed coordinates in world frame:\n", XYZ)

            # Plot 3D location in world coordinates. Camera is placed in origin
            sc.set_data(XYZ[0], XYZ[2])
            sc.set_3d_properties(XYZ[1])

            plt.pause(0.001)

        cv2.imshow('Input', frame_small)

        if cv2.waitKey(1) == 27:    #ESC key
            break

        # constant fps
        elapsed_time = time.time() - start_time
        time.sleep(max(0, 1/fps-elapsed_time))

    capture.release()

if __name__ == '__main__':
    main()
    cv2.destroyAllWindows()
	"""Estimating 3d coordinates from an RGB image
	Support for the "Estimating 3d coordinates from an RGB image"
	blog post in d3rezz.github.io
	"""


	import cv2
	import numpy as np
	import time
	import matplotlib.pyplot as plt
	from mpl_toolkits.mplot3d import Axes3D

	def main():
	# For scaling down images and camera matrix for speeding up detection
	scale = 0.5

	# intrinsics, obtained with MATLAB cameraCalibrator
	camera_matrix = np.array([[1046.0, 0, 657.9],[ 0, 1043.8, 362.0],[0, 0, 1]])
	dist_coeffs = np.array([0.111412276912893, -0.270066987227198, 0, 0])
	focal_length_pixels = (camera_matrix[0, 0] + camera_matrix[1, 1])/2

	print("Intrinsics:")
	print("Camera Matrix:\n", camera_matrix)
	print("Distortion Coefficients:\n", dist_coeffs)

	#extrinsics: camera is in the origin of world frame
	R = np.eye(3, dtype=np.float64)
	t = np.array([[0], [0], [0]], dtype=np.float64)

	print("Extrinsics:")
	print("R:\n", R)
	print("t:\n", t)

	#chessboard
	square_size = 1.5 #cm
	pattern_size = (9, 6)
	diagonal_cms = np.sqrt(((pattern_size[0]-1)square_size)2+((pattern_size[1]-1)square_size)**2)

	# Get webcam feed
	capture = cv2.VideoCapture(0)
	fps = 10 # max frames per second

	# Init position plot
	fig = plt.figure()
	ax = fig.add_subplot(111, projection='3d')
	sc, = ax.plot([], [], [], "o", markersize=5)
	ax.set_xlabel('x')
	ax.set_zlabel('y')
	ax.set_ylabel('z')
	ax.set_xlim(-0.6,0.6)
	ax.set_zlim(-0.6,0.6)
	ax.set_ylim(0,2)
	ax.invert_zaxis()
	fig.show()

	while True:
	start_time = time.time()

	# Get frame from webcam
	ret, frame = capture.read()

	# Undistort
	frame_undistorted = cv2.undistort(frame, camera_matrix, dist_coeffs)

	# Scale down to speed up
	frame_small = cv2.resize(frame_undistorted, (0,0), fx=scale, fy=scale)

	#find chessboard pattern
	found, corners = cv2.findChessboardCorners(frame_small, pattern_size, flags=cv2.CALIB_CB_FAST_CHECK)

	if found:
	cv2.drawChessboardCorners(frame_small, pattern_size, corners, found)

	# Compute center of corners (54, 1, 2)
	chessboard_center = np.mean(corners, axis=(0, 1))/scale # u, v
	chessboard_center = np.rint(chessboard_center).astype(int)

	measured_diagonal_pixels = np.sqrt((corners[0, 0, 0]-corners[-1, 0, 0])2+(corners[0, 0, 1]-corners[-1, 0, 1])2)

	#compute how far chessboard is (Z) https://stackoverflow.com/questions/14038002/opencv-how-to-calculate-distance-between-camera-and-object-using-image
	Z = diagonal_cms * 0.01 * focal_length_pixels*scale/ (measured_diagonal_pixels) #Get distance to camera in meters

	# Compute XY world coordinates
	uv1 = np.append(chessboard_center, 1).reshape((3, 1))

	new_mat = R.copy()
	new_mat[:, 2] = new_mat[:, 2]*Z+ np.squeeze(t)

	sXY1 = np.dot(np.dot(np.linalg.inv(new_mat), np.linalg.inv(camera_matrix)), uv1)
	XY = sXY1[:2,:]/sXY1[2,0] #convert to cartesian
	XYZ = np.vstack((XY, [[Z]]))
	print("Computed coordinates in world frame:\n", XYZ)

	# Plot 3D location in world coordinates. Camera is placed in origin
	sc.set_data(XYZ[0], XYZ[2])
	sc.set_3d_properties(XYZ[1])

	plt.pause(0.001)

	cv2.imshow('Input', frame_small)

	if cv2.waitKey(1) == 27: #ESC key
	break

	# constant fps
	elapsed_time = time.time() - start_time
	time.sleep(max(0, 1/fps-elapsed_time))

	capture.release()

	if __name__ == '__main__':
	main()
	cv2.destroyAllWindows()