Skip to content

Instantly share code, notes, and snippets.

@etienne87
Last active August 1, 2019 09:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save etienne87/b0ec1ea3c4303683185dc7af39ece47e to your computer and use it in GitHub Desktop.
Save etienne87/b0ec1ea3c4303683185dc7af39ece47e to your computer and use it in GitHub Desktop.
world_to_camera_and_back
#!/usr/bin/python
from __future__ import print_function
"""
* World To Camera & Back (projective equations of pinhole cameras) *
* -------------------- *
* The OpenCV reference frame: *
* (or sometimes called "Camera Reference Frame"): *
* Z points towards the direction of forward motion of the camera *
* X points right wrt Y *
* Y points down *
* *
* *
* *
* ^ (Z) *
* | *
* | *
* | *
* | *
* (Y) v---------->(X)
*
* --------------------
* *
"""
import numpy as np
import math
import cv2
# define plane parallel to the ground
def make_xz_plane(npts_x=5, npts_z=10, x_min=-10, x_max=10, z_min=2, z_max=50, height=0):
assert z_min > 0
n1 = npts_z
n2 = npts_x
z_step = float(z_max - z_min) / n1
x_step = float(x_max - x_min) / n2
n = n1 * n2
objp = np.zeros((n, 3), np.float32)
mg = np.mgrid[z_min:z_max:z_step, x_min:x_max:x_step].T.reshape(-1, 2)
objp[:, 0] = mg[:, 1] # side
objp[:, 1] = height # height
objp[:, 2] = mg[:, 0] # depth
return objp
def make_plane(npoints, axis1, axis2, axisn, a, b, c, d, e):
objp = np.zeros((npoints * npoints, 3), np.float32)
a, b, c, d, e = float(a), float(b), float(c), float(d), float(e)
mg = np.mgrid[a:b:(b - a) / npoints, c:d:(d - c) / npoints].T.reshape(-1, 2)
objp[:, axisn] = e
objp[:, axis1] = mg[:, 0]
objp[:, axis2] = mg[:, 1]
return objp
def make_cube_mesh(n=50, x_min=-1, x_max=1, y_min=-1, y_max=1, z_min=0, z_max=5):
z_step = float(z_max - z_min) / n
y_step = float(y_max - y_min) / n
x_step = float(x_max - x_min) / n
objp = np.mgrid[x_min:x_max:x_step, y_min:y_max:y_step, z_min:z_max:z_step].T.reshape(-1, 3)
return objp
def make_cube_8points(x_min=-1, x_max=1, y_min=-1, y_max=1, z_min=0, z_max=5):
objp = np.array([
[x_min, y_min, z_min],
[x_min, y_min, z_max],
[x_min, y_max, z_max],
[x_min, y_max, z_min],
[x_max, y_min, z_min],
[x_max, y_min, z_max],
[x_max, y_max, z_max],
[x_max, y_max, z_min]], dtype=np.float32)
return objp
# This is cv2.undistortPoints, I keep it for pedagogic purpose
def dist2undist(pts, K_inv, dist_coeffs, niter=10):
ones = np.ones((pts.shape[0], 1), dtype=pts.dtype)
pts = np.concatenate((pts, ones), axis=1)
pts_norm = pts.dot(K_inv.T)
k = dist_coeffs
x0 = pts_norm[:, 0]
y0 = pts_norm[:, 1]
x = x0.copy()
y = y0.copy()
# iterate
for i in range(niter):
xy = x * y
xx = x**2
yy = y**2
r2 = xx + yy
icdist = 1.0 / (1.0 + ((k[4] * r2 + k[1]) * r2 + k[0]) * r2)
deltaX = 2 * k[2] * xy + k[3] * (r2 + 2 * xx)
deltaY = k[2] * (r2 + 2 * yy) + 2 * k[3] * xy
x = (x0 - deltaX) * icdist
y = (y0 - deltaY) * icdist
pts_undist = pts_norm.copy()
pts_undist[:, 0] = x
pts_undist[:, 1] = y
return pts_undist
def dist2undist_cv(pts, K, dist_coeffs):
im_grid_cv = np.expand_dims(pts, axis=0)
rect_grid_cv = cv2.undistortPoints(im_grid_cv, K, dist_coeffs)[0]
ones = np.ones((rect_grid_cv.shape[0], 1), dtype=rect_grid_cv.dtype)
rect_grid_cv = np.concatenate((rect_grid_cv, ones), axis=1)
return rect_grid_cv
def filter_pts(pts, height, width):
ids = np.where((pts[:, 0] >= 0) & (pts[:, 0] < width) & (pts[:, 1] >= 0) & (pts[:, 1] < height))
return ids
# for all p such that (p-p0).dot(normal) = 0 (1) (i.e plane)
# for all p such that p=d*l+l0 (def. line) (2) (i.e line)
# subst: d = (p0-l0).dot(normal)/(l.dot(normal)) in (2)
def plane_intersect(rays, tvec, p0=np.array([0, 0, 0]), normal=np.array([0, 1, 0])):
n = normal.T
l = rays
l0 = tvec
a = (p0 - l0).dot(n)
b = l.dot(n) + 1e-20
d = a / b
d = np.expand_dims(d, axis=1)
p = d * l + l0
return p
# For pedagogic purpose
def projectPoints(objp, R, T, K, dist_coeffs):
xyz = R.dot(objp.T).T + T #left-multiplication
z = xyz[:, 2].reshape(-1, 1)
xyp = xyz[:, :2] / z
k1, k2, p1, p2, k3 = dist_coeffs.tolist()
rp2 = xyp[:, 0]**2 + xyp[:, 1]**2
xp = xyp[:, 0]
yp = xyp[:, 1]
xy = np.zeros_like(xyp)
xy[:, 0] = xp * (1 + k1 * rp2 + k2 * rp2 * rp2 + k3 * rp2 * rp2 * rp2) + \
2 * p1 * xp * yp + p2 * (rp2 + 2 * xp * xp)
xy[:, 1] = yp * (1 + k1 * rp2 + k2 * rp2 * rp2 + k3 * rp2 * rp2 * rp2) + \
2 * p2 * xp * yp + p1 * (rp2 + 2 * yp * yp)
xy[:, 0] = xy[:, 0] * K[0, 0] + K[0, 2]
xy[:, 1] = xy[:, 1] * K[1, 1] + K[1, 2]
return xy
def angles_to_R(theta):
R_x = np.array([[1, 0, 0],
[0, math.cos(theta[0]), -math.sin(theta[0])],
[0, math.sin(theta[0]), math.cos(theta[0])]
])
R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1])],
[0, 1, 0],
[-math.sin(theta[1]), 0, math.cos(theta[1])]
])
R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0],
[math.sin(theta[2]), math.cos(theta[2]), 0],
[0, 0, 1]
])
R = R_z.dot(R_y).dot(R_x)
# alternatively
#R = cv2.Rodrigues(theta)[0]
return R
def compose_RT(rvec, tvec):
RT = np.zeros((4, 4), dtype=np.float64)
RT[:3, :3] = angles_to_R(rvec)
RT[:3, 3] = tvec
RT[3, 3] = 1
return RT
def world_to_img(objp, rvec, tvec, K, dist_coeffs, height, width):
R = angles_to_R(rvec)
T = tvec
#img_grid = cv2.projectPoints(objp, R, T, K, dist_coeffs )[0].squeeze().reshape(-1,2)
img_grid = projectPoints(objp, R, T, K, dist_coeffs)
ids = filter_pts(img_grid, height, width)
objp = objp[ids]
img_grid = img_grid[ids]
return img_grid, objp
def img_to_world(img_grid, rvec, tvec, K, dist_coeffs, offset=0, axis=1):
R = angles_to_R(rvec).T
T = -R.dot(tvec)
K_inv = np.linalg.inv(K)
undist_grid = dist2undist(img_grid, K_inv, dist_coeffs, 5)
rays = undist_grid.dot(R.T)
n = np.array([0, 0, 0])
n[axis] = 1
o = np.array([0, 0, 0])
o[axis] = offset
objp2 = plane_intersect(rays, T, o, n)
return objp2
def world_to_img_from_RT(objp, RT, K, dist_coeffs, height, width):
Ri = RT[:3,:3]
Ti = RT[:3, 3]
img_grid = projectPoints(objp, Ri, Ti, K, dist_coeffs)
ids = filter_pts(img_grid, height, width)
objp = objp[ids]
img_grid = img_grid[ids]
return img_grid, objp
def img_to_world_from_RT(img_grid, RT, K, dist_coeffs, offset=0, axis=1):
R = RT[:3, :3].T
tvec = RT[:3, 3]
T = -R.dot(tvec)
K_inv = np.linalg.inv(K)
undist_grid = dist2undist(img_grid, K_inv, dist_coeffs, 5)
rays = undist_grid.dot(R.T)
n = np.array([0, 0, 0])
n[axis] = 1
o = np.array([0, 0, 0])
o[axis] = offset
objp2 = plane_intersect(rays, T, o, n)
return objp2
def utest_project_back(objp, rvec, tvec, K, dist_coeffs, height, width, offset=0, axis=1, use_RT=True):
if use_RT:
RT = compose_RT(rvec, tvec)
img_grid, objp = world_to_img_from_RT(objp, RT, K, dist_coeffs, height, width)
objpback = img_to_world_from_RT(img_grid, RT, K, dist_coeffs, offset, axis)
else:
img_grid, objp = world_to_img(objp, rvec, tvec, K, dist_coeffs, height, width)
objpback = img_to_world(img_grid, rvec, tvec, K, dist_coeffs, offset, axis)
# print(objpback, objp)
d = np.where(np.isclose(objp, objpback, rtol=1e-5, atol=1e-5) == False)
assert(d[0].size == 0)
def show_plane_animation(objp, rvec, tvec, K, dist, height=480, width=640, use_RT=True):
img = np.zeros((height, width, 3), dtype=np.uint8)
for axis in range(3):
rvec2 = np.zeros_like(rvec)
for angle in np.arange(-np.pi / 16, np.pi / 16, np.pi / 200):
img[...] = 128
rvec2[axis] = angle
if use_RT:
RT = compose_RT(rvec2, tvec)
grid, _ = world_to_img_from_RT(objp, RT, K, dist, height, width)
else:
grid, _ = world_to_img(objp, rvec2, tvec, K, dist, height, width)
for i in range(grid.shape[0]):
pt = (int(grid[i, 0]), int(grid[i, 1]))
cv2.circle(img, pt, 0, (0, 0, 255), 3)
cv2.putText(img, "{:.2f}".format(angle), (10, height - 20),
cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 1, (255, 0, 0), 2, 8)
cv2.imshow('img', img)
cv2.waitKey(0)
def main():
dtype = np.float64
fx, fy = 552.523, 552.251
cx, cy = 321.671, 245.602
K = np.array([[fx, 0, cx],
[0, fy, cy],
[0, 0, 1]], dtype=np.float32)
dist_coeffs = np.array([-0.0842571, 0.155771, 0, 0, 0], dtype=dtype).reshape(5, )
#Rvec & Rvec are expressed FROM the camera to the origin of the world
# (that is why we put a positive height, although y points downward)
rvec = np.array([0.0, 0.0, 0.0], dtype=dtype)
tvec = np.array([0.0, 1.4, 0], dtype=dtype) # X, Y, Z (m) (origin from camera frame -> corresponds to Z, X, -Y)
objp = make_xz_plane(npts_x=50, npts_z=100, x_min=-10, x_max=10, z_min=1, z_max=100, height=0)
# objp = make_plane(50, 0, 1, 2, -10, 10, -10, 10, 40) #plane parallel to camera
show_plane_animation(objp, rvec, tvec, K, dist_coeffs, height=480, width=640, use_RT=True)
try:
#utest_project_back(objp, rvec, tvec, K, dist_coeffs, height=480, width=640, offset=40, axis=2)
utest_project_back(objp, rvec, tvec, K, dist_coeffs, height=480, width=640, offset=0, axis=1, use_RT=True)
print('passed the 3D-2D-3D test')
except Exception as inst:
print(type(inst))
print(inst.args)
def super_test_2d_3d():
r"""
This test
:param file_2d:
:param file_3d:
:param calib:
:return:
"""
import csv
root = '/home/eperot/workspace/data/test_2d_3d/'
file_3d = root + 'CPNC_5kph_1m_60kph_aeb.csv'
RT = np.loadtxt(root + 'world_to_cam.txt').astype(np.float64)
K = np.loadtxt(root + 'cam.txt')
dist_coeffs = np.loadtxt(root + 'dist.txt')
#read 2d boxes from svnet
boxes = []
with open(file_3d, 'r') as csvfile:
reader = csv.DictReader(csvfile, delimiter=' ', quotechar='|')
for row in reader:
x1, y1, width, height = float(row['xcam']), float(row['ycam']), float(row['wcam']), float(row['hcam'])
bottom = (x1 + width/2, y1 + height)
row['bottom'] = bottom
boxes.append(row)
#convert bottom to points
pts = np.zeros((len(boxes), 2), dtype=np.float64)
for i, box in enumerate(boxes):
pts[i][0] = box['bottom'][0]
pts[i][1] = box['bottom'][1]
pts3d = img_to_world_from_RT(pts, np.linalg.inv(RT), K, dist_coeffs, offset=0, axis=1)
def img_to_world_sanity_check(img_grid, RT, K):
"""R is identity"""
f = K[0, 0]
cam_height = -RT[2, 3]
cam_depth = RT[3, 3]
cy = K[1,2]
depth = f * cam_height / (cy - img_grid[:, 1]) - cam_depth
return depth
depths = img_to_world_sanity_check(pts, RT, K)
#small debug :-)
height, width = 720, 1280
step = 10
img = np.zeros((720, 1280, 3), dtype=np.uint8)
for box, pt3d, depth in zip(boxes[::step], pts3d[::step], depths[::step]):
img[...] = 0
#Project the original 3d Point given by Carla and see if it falls
xw, yw, zw = float(box['xctr']), float(box['yctr']), 0
xcam, ycam, zcam = -yw, 0, xw - float(box['radius'])
pt2d, _ = world_to_img_from_RT(np.array([[xcam, ycam, zcam]]), RT, K, dist_coeffs, height, width)
pt2di = (int(pt2d[0,0]), int(pt2d[0, 1]))
cv2.circle(img, pt2di, 7, (0,0,255), 2, 1)
x, y, w, h = float(box['xcam']), float(box['ycam']), float(box['wcam']), float(box['hcam'])
x2, y2 = x+w, y+h
pt1 = (int(x), int(y))
pt2 = (int(x2), int(y2))
pt3 = (int(box['bottom'][0]), int(box['bottom'][1]))
pt4 = (int(x+w/2), int(y+h/2))
cv2.rectangle(img, pt1, pt2, (0,0,255), 2)
cv2.circle(img, pt3, 2, (0,255,0), 2, 1)
gt_dist = float(box['xctr']) - float(box['radius'])
gt_dist_hat = pt3d[2]
diff = gt_dist - gt_dist_hat
print('difference3d: ', diff)
diff2d = max(x+w/2 - pt2d[0,0], y+h - pt2d[0,1])
print('difference2d: ', diff2d)
cv2.putText(img, '{0:.2f}'.format(gt_dist), pt4, cv2.FONT_HERSHEY_SIMPLEX, 1, (200, 0, 0), 3, cv2.LINE_AA)
cv2.putText(img, '{0:.2f}'.format(gt_dist_hat), (pt4[0], pt4[1]+30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 200), 3, cv2.LINE_AA)
cv2.putText(img, '{0:.2f}'.format(gt_dist_hat), (pt4[0], pt4[1] + 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 200, 0),
3, cv2.LINE_AA)
cv2.imshow('img', img)
cv2.waitKey(0)
def show_box_on_plane_animation():
r"""
Just Show a cuboid going toward the camera while doing the 3d-2d-3d Test
:param rvec:
:param tvec:
:param K:
:param dist:
:param height:
:param width:
:param use_RT:
:return:
"""
def draw_cube_faces(img, cube, cube3d, colormap=cv2.COLORMAP_AUTUMN):
planes = [[0, 1, 2, 3],
[4, 5, 6, 7],
[0, 4, 7, 3],
[1, 5, 6, 2],
[0, 1, 5, 4],
[3, 2, 6, 7],
]
zbuffer = 1.0 / (1e+7 + cube3d[:, 2])
cmap = cv2.applyColorMap(np.arange(0, 255, 255 / 8, dtype=np.uint8), colormap).tolist()
planes2 = []
for i, plane in enumerate(planes):
zavg = zbuffer[plane].min()
clr = cmap[i][0] + [1.0-zavg/3.0]
planes2.append((zavg, cube[plane].astype(np.int32), clr))
planes2 = sorted(planes2, key=lambda x: x[0], reverse=False)
for zm, plane, clr in planes2:
cv2.fillConvexPoly(img, plane, clr)
def draw_ground(img, grid, grid3d, colormap=cv2.COLORMAP_JET):
cmap = cv2.applyColorMap(np.arange(0, 255, 1, dtype=np.uint8), colormap).tolist()
for i in range(grid.shape[0]):
pt = (int(grid[i, 0]), int(grid[i, 1]))
z = int(np.log(grid3d[i, 2]/100.0) * 255)
clr = cmap[z%255][0]
cv2.circle(img, pt, 0, clr, 3)
#objects
ground = make_xz_plane(npts_x=20, npts_z=100, x_min=-10, x_max=10, z_min=1, z_max=100, height=0)
car = make_cube_8points(x_min=-0.7, x_max=0.7, y_min=-1.0, y_max=0, z_min=50, z_max=46)
#rotate car onto itself
# cog = car.mean(0)
# angle = np.array([0,0.2,0])
# R = cv2.Rodrigues(angle)[0].T
# car = (car-cog).dot(R) + cog
car[:,0] -= 2
#camera
height, width = 480, 640
dtype = np.float64
fx, fy = 552.523, 552.251
cx, cy = 321.671, 245.602
K = np.array([[fx, 0, cx],
[0, fy, cy],
[0, 0, 1]], dtype=np.float32)
dist_coeffs = np.array([-0.0842571, 0.155771, 0, 0, 0], dtype=dtype).reshape(5, )
rvec = np.array([0.0, 0.0, 0.0], dtype=dtype) # PITCH, YAW, ROLL
tvec = np.array([0.0, -1.4, 0], dtype=dtype) # X, Y, Z (m) (camera -> corresponds to Z, X, -Y)
grid_ground, fltr_ground = world_to_img(ground, rvec, tvec, K, dist_coeffs, height, width)
img = np.zeros((height, width, 3), dtype=np.uint8)
for i in range(50):
img[...] = 127
draw_ground(img, grid_ground, fltr_ground)
car[:, 2] -= 1
car[:, 0] -=0.05
grid_cube, fltr_cube_3d = world_to_img(car, rvec, tvec, K, dist_coeffs, height, width)
# Project Back Test
xmin, ymin, xmax, ymax = grid_cube[:,0].min(), grid_cube[:,1].min(), grid_cube[:,0].max(), grid_cube[:,1].max()
pt1i = (int(xmin), int(ymin))
pt2i = (int(xmax), int(ymax))
cv2.rectangle(img, pt1i, pt2i, (0,0,255), 4)
if len(grid_cube) == 8:
draw_cube_faces(img, grid_cube, fltr_cube_3d, cv2.COLORMAP_AUTUMN)
# Project Bottom of the box
bottom = np.array([[(xmin+xmax)/2, ymax]], dtype=np.float32).reshape(1, 2)
bti = (int(bottom[0, 0]),
int(bottom[0, 1]))
cv2.circle(img, bti, 3, (255,0,0), 10)
bottom3d = img_to_world(bottom, rvec, tvec, K, dist_coeffs)
print(bottom3d[:, 2], fltr_cube_3d[:, 2].min(0))
# Sanity check 1: take indices of ground points from the cube & project them back to ground.
# ground_ids = np.where(fltr_cube_3d[:,1] == 0)
# ground_cube3d = fltr_cube_3d[ground_ids]
# ground_cube = grid_cube[ground_ids]
# projback = img_to_world(ground_cube, rvec, tvec, K, dist_coeffs)
cv2.imshow('img', img)
cv2.waitKey(0)
if __name__ == '__main__':
main()
#super_test_2d_3d()
#show_box_on_plane_animation()
@etienne87
Copy link
Author

removed confusing names like image_to_world_z. now this is "image_to_world", + axis & offset given by user

@etienne87
Copy link
Author

now taking RT as input (TODO: remove redundant fun maybe)

@etienne87
Copy link
Author

added a cube animation

@etienne87
Copy link
Author

etienne87 commented Aug 1, 2019

clearer convention. rvec & tvec are expressed "from the camera". It means world_to_image is left_multiply R.dot(xyz (3 x N) ) + tvec

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment