Last active
August 1, 2019 09:04
-
-
Save etienne87/b0ec1ea3c4303683185dc7af39ece47e to your computer and use it in GitHub Desktop.
world_to_camera_and_back
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from __future__ import print_function | |
""" | |
* World To Camera & Back (projective equations of pinhole cameras) * | |
* -------------------- * | |
* The OpenCV reference frame: * | |
* (or sometimes called "Camera Reference Frame"): * | |
* Z points towards the direction of forward motion of the camera * | |
* X points right wrt Y * | |
* Y points down * | |
* * | |
* * | |
* * | |
* ^ (Z) * | |
* | * | |
* | * | |
* | * | |
* | * | |
* (Y) v---------->(X) | |
* | |
* -------------------- | |
* * | |
""" | |
import numpy as np | |
import math | |
import cv2 | |
# define plane parallel to the ground | |
def make_xz_plane(npts_x=5, npts_z=10, x_min=-10, x_max=10, z_min=2, z_max=50, height=0): | |
assert z_min > 0 | |
n1 = npts_z | |
n2 = npts_x | |
z_step = float(z_max - z_min) / n1 | |
x_step = float(x_max - x_min) / n2 | |
n = n1 * n2 | |
objp = np.zeros((n, 3), np.float32) | |
mg = np.mgrid[z_min:z_max:z_step, x_min:x_max:x_step].T.reshape(-1, 2) | |
objp[:, 0] = mg[:, 1] # side | |
objp[:, 1] = height # height | |
objp[:, 2] = mg[:, 0] # depth | |
return objp | |
def make_plane(npoints, axis1, axis2, axisn, a, b, c, d, e): | |
objp = np.zeros((npoints * npoints, 3), np.float32) | |
a, b, c, d, e = float(a), float(b), float(c), float(d), float(e) | |
mg = np.mgrid[a:b:(b - a) / npoints, c:d:(d - c) / npoints].T.reshape(-1, 2) | |
objp[:, axisn] = e | |
objp[:, axis1] = mg[:, 0] | |
objp[:, axis2] = mg[:, 1] | |
return objp | |
def make_cube_mesh(n=50, x_min=-1, x_max=1, y_min=-1, y_max=1, z_min=0, z_max=5): | |
z_step = float(z_max - z_min) / n | |
y_step = float(y_max - y_min) / n | |
x_step = float(x_max - x_min) / n | |
objp = np.mgrid[x_min:x_max:x_step, y_min:y_max:y_step, z_min:z_max:z_step].T.reshape(-1, 3) | |
return objp | |
def make_cube_8points(x_min=-1, x_max=1, y_min=-1, y_max=1, z_min=0, z_max=5): | |
objp = np.array([ | |
[x_min, y_min, z_min], | |
[x_min, y_min, z_max], | |
[x_min, y_max, z_max], | |
[x_min, y_max, z_min], | |
[x_max, y_min, z_min], | |
[x_max, y_min, z_max], | |
[x_max, y_max, z_max], | |
[x_max, y_max, z_min]], dtype=np.float32) | |
return objp | |
# This is cv2.undistortPoints, I keep it for pedagogic purpose | |
def dist2undist(pts, K_inv, dist_coeffs, niter=10): | |
ones = np.ones((pts.shape[0], 1), dtype=pts.dtype) | |
pts = np.concatenate((pts, ones), axis=1) | |
pts_norm = pts.dot(K_inv.T) | |
k = dist_coeffs | |
x0 = pts_norm[:, 0] | |
y0 = pts_norm[:, 1] | |
x = x0.copy() | |
y = y0.copy() | |
# iterate | |
for i in range(niter): | |
xy = x * y | |
xx = x**2 | |
yy = y**2 | |
r2 = xx + yy | |
icdist = 1.0 / (1.0 + ((k[4] * r2 + k[1]) * r2 + k[0]) * r2) | |
deltaX = 2 * k[2] * xy + k[3] * (r2 + 2 * xx) | |
deltaY = k[2] * (r2 + 2 * yy) + 2 * k[3] * xy | |
x = (x0 - deltaX) * icdist | |
y = (y0 - deltaY) * icdist | |
pts_undist = pts_norm.copy() | |
pts_undist[:, 0] = x | |
pts_undist[:, 1] = y | |
return pts_undist | |
def dist2undist_cv(pts, K, dist_coeffs): | |
im_grid_cv = np.expand_dims(pts, axis=0) | |
rect_grid_cv = cv2.undistortPoints(im_grid_cv, K, dist_coeffs)[0] | |
ones = np.ones((rect_grid_cv.shape[0], 1), dtype=rect_grid_cv.dtype) | |
rect_grid_cv = np.concatenate((rect_grid_cv, ones), axis=1) | |
return rect_grid_cv | |
def filter_pts(pts, height, width): | |
ids = np.where((pts[:, 0] >= 0) & (pts[:, 0] < width) & (pts[:, 1] >= 0) & (pts[:, 1] < height)) | |
return ids | |
# for all p such that (p-p0).dot(normal) = 0 (1) (i.e plane) | |
# for all p such that p=d*l+l0 (def. line) (2) (i.e line) | |
# subst: d = (p0-l0).dot(normal)/(l.dot(normal)) in (2) | |
def plane_intersect(rays, tvec, p0=np.array([0, 0, 0]), normal=np.array([0, 1, 0])): | |
n = normal.T | |
l = rays | |
l0 = tvec | |
a = (p0 - l0).dot(n) | |
b = l.dot(n) + 1e-20 | |
d = a / b | |
d = np.expand_dims(d, axis=1) | |
p = d * l + l0 | |
return p | |
# For pedagogic purpose | |
def projectPoints(objp, R, T, K, dist_coeffs): | |
xyz = R.dot(objp.T).T + T #left-multiplication | |
z = xyz[:, 2].reshape(-1, 1) | |
xyp = xyz[:, :2] / z | |
k1, k2, p1, p2, k3 = dist_coeffs.tolist() | |
rp2 = xyp[:, 0]**2 + xyp[:, 1]**2 | |
xp = xyp[:, 0] | |
yp = xyp[:, 1] | |
xy = np.zeros_like(xyp) | |
xy[:, 0] = xp * (1 + k1 * rp2 + k2 * rp2 * rp2 + k3 * rp2 * rp2 * rp2) + \ | |
2 * p1 * xp * yp + p2 * (rp2 + 2 * xp * xp) | |
xy[:, 1] = yp * (1 + k1 * rp2 + k2 * rp2 * rp2 + k3 * rp2 * rp2 * rp2) + \ | |
2 * p2 * xp * yp + p1 * (rp2 + 2 * yp * yp) | |
xy[:, 0] = xy[:, 0] * K[0, 0] + K[0, 2] | |
xy[:, 1] = xy[:, 1] * K[1, 1] + K[1, 2] | |
return xy | |
def angles_to_R(theta): | |
R_x = np.array([[1, 0, 0], | |
[0, math.cos(theta[0]), -math.sin(theta[0])], | |
[0, math.sin(theta[0]), math.cos(theta[0])] | |
]) | |
R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1])], | |
[0, 1, 0], | |
[-math.sin(theta[1]), 0, math.cos(theta[1])] | |
]) | |
R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0], | |
[math.sin(theta[2]), math.cos(theta[2]), 0], | |
[0, 0, 1] | |
]) | |
R = R_z.dot(R_y).dot(R_x) | |
# alternatively | |
#R = cv2.Rodrigues(theta)[0] | |
return R | |
def compose_RT(rvec, tvec): | |
RT = np.zeros((4, 4), dtype=np.float64) | |
RT[:3, :3] = angles_to_R(rvec) | |
RT[:3, 3] = tvec | |
RT[3, 3] = 1 | |
return RT | |
def world_to_img(objp, rvec, tvec, K, dist_coeffs, height, width): | |
R = angles_to_R(rvec) | |
T = tvec | |
#img_grid = cv2.projectPoints(objp, R, T, K, dist_coeffs )[0].squeeze().reshape(-1,2) | |
img_grid = projectPoints(objp, R, T, K, dist_coeffs) | |
ids = filter_pts(img_grid, height, width) | |
objp = objp[ids] | |
img_grid = img_grid[ids] | |
return img_grid, objp | |
def img_to_world(img_grid, rvec, tvec, K, dist_coeffs, offset=0, axis=1): | |
R = angles_to_R(rvec).T | |
T = -R.dot(tvec) | |
K_inv = np.linalg.inv(K) | |
undist_grid = dist2undist(img_grid, K_inv, dist_coeffs, 5) | |
rays = undist_grid.dot(R.T) | |
n = np.array([0, 0, 0]) | |
n[axis] = 1 | |
o = np.array([0, 0, 0]) | |
o[axis] = offset | |
objp2 = plane_intersect(rays, T, o, n) | |
return objp2 | |
def world_to_img_from_RT(objp, RT, K, dist_coeffs, height, width): | |
Ri = RT[:3,:3] | |
Ti = RT[:3, 3] | |
img_grid = projectPoints(objp, Ri, Ti, K, dist_coeffs) | |
ids = filter_pts(img_grid, height, width) | |
objp = objp[ids] | |
img_grid = img_grid[ids] | |
return img_grid, objp | |
def img_to_world_from_RT(img_grid, RT, K, dist_coeffs, offset=0, axis=1): | |
R = RT[:3, :3].T | |
tvec = RT[:3, 3] | |
T = -R.dot(tvec) | |
K_inv = np.linalg.inv(K) | |
undist_grid = dist2undist(img_grid, K_inv, dist_coeffs, 5) | |
rays = undist_grid.dot(R.T) | |
n = np.array([0, 0, 0]) | |
n[axis] = 1 | |
o = np.array([0, 0, 0]) | |
o[axis] = offset | |
objp2 = plane_intersect(rays, T, o, n) | |
return objp2 | |
def utest_project_back(objp, rvec, tvec, K, dist_coeffs, height, width, offset=0, axis=1, use_RT=True): | |
if use_RT: | |
RT = compose_RT(rvec, tvec) | |
img_grid, objp = world_to_img_from_RT(objp, RT, K, dist_coeffs, height, width) | |
objpback = img_to_world_from_RT(img_grid, RT, K, dist_coeffs, offset, axis) | |
else: | |
img_grid, objp = world_to_img(objp, rvec, tvec, K, dist_coeffs, height, width) | |
objpback = img_to_world(img_grid, rvec, tvec, K, dist_coeffs, offset, axis) | |
# print(objpback, objp) | |
d = np.where(np.isclose(objp, objpback, rtol=1e-5, atol=1e-5) == False) | |
assert(d[0].size == 0) | |
def show_plane_animation(objp, rvec, tvec, K, dist, height=480, width=640, use_RT=True): | |
img = np.zeros((height, width, 3), dtype=np.uint8) | |
for axis in range(3): | |
rvec2 = np.zeros_like(rvec) | |
for angle in np.arange(-np.pi / 16, np.pi / 16, np.pi / 200): | |
img[...] = 128 | |
rvec2[axis] = angle | |
if use_RT: | |
RT = compose_RT(rvec2, tvec) | |
grid, _ = world_to_img_from_RT(objp, RT, K, dist, height, width) | |
else: | |
grid, _ = world_to_img(objp, rvec2, tvec, K, dist, height, width) | |
for i in range(grid.shape[0]): | |
pt = (int(grid[i, 0]), int(grid[i, 1])) | |
cv2.circle(img, pt, 0, (0, 0, 255), 3) | |
cv2.putText(img, "{:.2f}".format(angle), (10, height - 20), | |
cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 1, (255, 0, 0), 2, 8) | |
cv2.imshow('img', img) | |
cv2.waitKey(0) | |
def main(): | |
dtype = np.float64 | |
fx, fy = 552.523, 552.251 | |
cx, cy = 321.671, 245.602 | |
K = np.array([[fx, 0, cx], | |
[0, fy, cy], | |
[0, 0, 1]], dtype=np.float32) | |
dist_coeffs = np.array([-0.0842571, 0.155771, 0, 0, 0], dtype=dtype).reshape(5, ) | |
#Rvec & Rvec are expressed FROM the camera to the origin of the world | |
# (that is why we put a positive height, although y points downward) | |
rvec = np.array([0.0, 0.0, 0.0], dtype=dtype) | |
tvec = np.array([0.0, 1.4, 0], dtype=dtype) # X, Y, Z (m) (origin from camera frame -> corresponds to Z, X, -Y) | |
objp = make_xz_plane(npts_x=50, npts_z=100, x_min=-10, x_max=10, z_min=1, z_max=100, height=0) | |
# objp = make_plane(50, 0, 1, 2, -10, 10, -10, 10, 40) #plane parallel to camera | |
show_plane_animation(objp, rvec, tvec, K, dist_coeffs, height=480, width=640, use_RT=True) | |
try: | |
#utest_project_back(objp, rvec, tvec, K, dist_coeffs, height=480, width=640, offset=40, axis=2) | |
utest_project_back(objp, rvec, tvec, K, dist_coeffs, height=480, width=640, offset=0, axis=1, use_RT=True) | |
print('passed the 3D-2D-3D test') | |
except Exception as inst: | |
print(type(inst)) | |
print(inst.args) | |
def super_test_2d_3d(): | |
r""" | |
This test | |
:param file_2d: | |
:param file_3d: | |
:param calib: | |
:return: | |
""" | |
import csv | |
root = '/home/eperot/workspace/data/test_2d_3d/' | |
file_3d = root + 'CPNC_5kph_1m_60kph_aeb.csv' | |
RT = np.loadtxt(root + 'world_to_cam.txt').astype(np.float64) | |
K = np.loadtxt(root + 'cam.txt') | |
dist_coeffs = np.loadtxt(root + 'dist.txt') | |
#read 2d boxes from svnet | |
boxes = [] | |
with open(file_3d, 'r') as csvfile: | |
reader = csv.DictReader(csvfile, delimiter=' ', quotechar='|') | |
for row in reader: | |
x1, y1, width, height = float(row['xcam']), float(row['ycam']), float(row['wcam']), float(row['hcam']) | |
bottom = (x1 + width/2, y1 + height) | |
row['bottom'] = bottom | |
boxes.append(row) | |
#convert bottom to points | |
pts = np.zeros((len(boxes), 2), dtype=np.float64) | |
for i, box in enumerate(boxes): | |
pts[i][0] = box['bottom'][0] | |
pts[i][1] = box['bottom'][1] | |
pts3d = img_to_world_from_RT(pts, np.linalg.inv(RT), K, dist_coeffs, offset=0, axis=1) | |
def img_to_world_sanity_check(img_grid, RT, K): | |
"""R is identity""" | |
f = K[0, 0] | |
cam_height = -RT[2, 3] | |
cam_depth = RT[3, 3] | |
cy = K[1,2] | |
depth = f * cam_height / (cy - img_grid[:, 1]) - cam_depth | |
return depth | |
depths = img_to_world_sanity_check(pts, RT, K) | |
#small debug :-) | |
height, width = 720, 1280 | |
step = 10 | |
img = np.zeros((720, 1280, 3), dtype=np.uint8) | |
for box, pt3d, depth in zip(boxes[::step], pts3d[::step], depths[::step]): | |
img[...] = 0 | |
#Project the original 3d Point given by Carla and see if it falls | |
xw, yw, zw = float(box['xctr']), float(box['yctr']), 0 | |
xcam, ycam, zcam = -yw, 0, xw - float(box['radius']) | |
pt2d, _ = world_to_img_from_RT(np.array([[xcam, ycam, zcam]]), RT, K, dist_coeffs, height, width) | |
pt2di = (int(pt2d[0,0]), int(pt2d[0, 1])) | |
cv2.circle(img, pt2di, 7, (0,0,255), 2, 1) | |
x, y, w, h = float(box['xcam']), float(box['ycam']), float(box['wcam']), float(box['hcam']) | |
x2, y2 = x+w, y+h | |
pt1 = (int(x), int(y)) | |
pt2 = (int(x2), int(y2)) | |
pt3 = (int(box['bottom'][0]), int(box['bottom'][1])) | |
pt4 = (int(x+w/2), int(y+h/2)) | |
cv2.rectangle(img, pt1, pt2, (0,0,255), 2) | |
cv2.circle(img, pt3, 2, (0,255,0), 2, 1) | |
gt_dist = float(box['xctr']) - float(box['radius']) | |
gt_dist_hat = pt3d[2] | |
diff = gt_dist - gt_dist_hat | |
print('difference3d: ', diff) | |
diff2d = max(x+w/2 - pt2d[0,0], y+h - pt2d[0,1]) | |
print('difference2d: ', diff2d) | |
cv2.putText(img, '{0:.2f}'.format(gt_dist), pt4, cv2.FONT_HERSHEY_SIMPLEX, 1, (200, 0, 0), 3, cv2.LINE_AA) | |
cv2.putText(img, '{0:.2f}'.format(gt_dist_hat), (pt4[0], pt4[1]+30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 200), 3, cv2.LINE_AA) | |
cv2.putText(img, '{0:.2f}'.format(gt_dist_hat), (pt4[0], pt4[1] + 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 200, 0), | |
3, cv2.LINE_AA) | |
cv2.imshow('img', img) | |
cv2.waitKey(0) | |
def show_box_on_plane_animation(): | |
r""" | |
Just Show a cuboid going toward the camera while doing the 3d-2d-3d Test | |
:param rvec: | |
:param tvec: | |
:param K: | |
:param dist: | |
:param height: | |
:param width: | |
:param use_RT: | |
:return: | |
""" | |
def draw_cube_faces(img, cube, cube3d, colormap=cv2.COLORMAP_AUTUMN): | |
planes = [[0, 1, 2, 3], | |
[4, 5, 6, 7], | |
[0, 4, 7, 3], | |
[1, 5, 6, 2], | |
[0, 1, 5, 4], | |
[3, 2, 6, 7], | |
] | |
zbuffer = 1.0 / (1e+7 + cube3d[:, 2]) | |
cmap = cv2.applyColorMap(np.arange(0, 255, 255 / 8, dtype=np.uint8), colormap).tolist() | |
planes2 = [] | |
for i, plane in enumerate(planes): | |
zavg = zbuffer[plane].min() | |
clr = cmap[i][0] + [1.0-zavg/3.0] | |
planes2.append((zavg, cube[plane].astype(np.int32), clr)) | |
planes2 = sorted(planes2, key=lambda x: x[0], reverse=False) | |
for zm, plane, clr in planes2: | |
cv2.fillConvexPoly(img, plane, clr) | |
def draw_ground(img, grid, grid3d, colormap=cv2.COLORMAP_JET): | |
cmap = cv2.applyColorMap(np.arange(0, 255, 1, dtype=np.uint8), colormap).tolist() | |
for i in range(grid.shape[0]): | |
pt = (int(grid[i, 0]), int(grid[i, 1])) | |
z = int(np.log(grid3d[i, 2]/100.0) * 255) | |
clr = cmap[z%255][0] | |
cv2.circle(img, pt, 0, clr, 3) | |
#objects | |
ground = make_xz_plane(npts_x=20, npts_z=100, x_min=-10, x_max=10, z_min=1, z_max=100, height=0) | |
car = make_cube_8points(x_min=-0.7, x_max=0.7, y_min=-1.0, y_max=0, z_min=50, z_max=46) | |
#rotate car onto itself | |
# cog = car.mean(0) | |
# angle = np.array([0,0.2,0]) | |
# R = cv2.Rodrigues(angle)[0].T | |
# car = (car-cog).dot(R) + cog | |
car[:,0] -= 2 | |
#camera | |
height, width = 480, 640 | |
dtype = np.float64 | |
fx, fy = 552.523, 552.251 | |
cx, cy = 321.671, 245.602 | |
K = np.array([[fx, 0, cx], | |
[0, fy, cy], | |
[0, 0, 1]], dtype=np.float32) | |
dist_coeffs = np.array([-0.0842571, 0.155771, 0, 0, 0], dtype=dtype).reshape(5, ) | |
rvec = np.array([0.0, 0.0, 0.0], dtype=dtype) # PITCH, YAW, ROLL | |
tvec = np.array([0.0, -1.4, 0], dtype=dtype) # X, Y, Z (m) (camera -> corresponds to Z, X, -Y) | |
grid_ground, fltr_ground = world_to_img(ground, rvec, tvec, K, dist_coeffs, height, width) | |
img = np.zeros((height, width, 3), dtype=np.uint8) | |
for i in range(50): | |
img[...] = 127 | |
draw_ground(img, grid_ground, fltr_ground) | |
car[:, 2] -= 1 | |
car[:, 0] -=0.05 | |
grid_cube, fltr_cube_3d = world_to_img(car, rvec, tvec, K, dist_coeffs, height, width) | |
# Project Back Test | |
xmin, ymin, xmax, ymax = grid_cube[:,0].min(), grid_cube[:,1].min(), grid_cube[:,0].max(), grid_cube[:,1].max() | |
pt1i = (int(xmin), int(ymin)) | |
pt2i = (int(xmax), int(ymax)) | |
cv2.rectangle(img, pt1i, pt2i, (0,0,255), 4) | |
if len(grid_cube) == 8: | |
draw_cube_faces(img, grid_cube, fltr_cube_3d, cv2.COLORMAP_AUTUMN) | |
# Project Bottom of the box | |
bottom = np.array([[(xmin+xmax)/2, ymax]], dtype=np.float32).reshape(1, 2) | |
bti = (int(bottom[0, 0]), | |
int(bottom[0, 1])) | |
cv2.circle(img, bti, 3, (255,0,0), 10) | |
bottom3d = img_to_world(bottom, rvec, tvec, K, dist_coeffs) | |
print(bottom3d[:, 2], fltr_cube_3d[:, 2].min(0)) | |
# Sanity check 1: take indices of ground points from the cube & project them back to ground. | |
# ground_ids = np.where(fltr_cube_3d[:,1] == 0) | |
# ground_cube3d = fltr_cube_3d[ground_ids] | |
# ground_cube = grid_cube[ground_ids] | |
# projback = img_to_world(ground_cube, rvec, tvec, K, dist_coeffs) | |
cv2.imshow('img', img) | |
cv2.waitKey(0) | |
if __name__ == '__main__': | |
main() | |
#super_test_2d_3d() | |
#show_box_on_plane_animation() |
now taking RT as input (TODO: remove redundant fun maybe)
added a cube animation
clearer convention. rvec & tvec are expressed "from the camera". It means world_to_image is left_multiply R.dot(xyz (3 x N) ) + tvec
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
removed confusing names like image_to_world_z. now this is "image_to_world", + axis & offset given by user