Last active
November 27, 2023 16:50
-
-
Save hushell/40a9b2266d07248ee231bcd1cb6ac8d9 to your computer and use it in GitHub Desktop.
COLMAP known poses preparation from DTU data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import numpy as np | |
import trimesh | |
import os | |
import glob | |
from PIL import Image | |
from colmap_utils import qvec2rotmat, rotmat2qvec, read_images_binary, read_cameras_text, read_images_text | |
def load_K_Rt_from_P(P): | |
# Ref: | |
# P10: https://www.uio.no/studier/emner/matnat/its/nedlagte-emner/UNIK4690/v17/forelesninger/lecture_5_2_pose_from_known_3d_points.pdf | |
out = cv2.decomposeProjectionMatrix(P) # decomposes a projection matrix into a rotation matrix and a camera matrix | |
K = out[0] # 3x3 intrinsic matrix | |
R = out[1] # extrinsic[:3, :3] of shape 3x3 | |
c = out[2] # camera center of shape 4x1, related to pose[:3, 3] | |
c= (c[:3] / c[3])[:,0] # pose[:3, 3] | |
K = K/K[2,2] # normalize K to make sure K[2,2] == 1 | |
intrinsics = np.eye(4) | |
intrinsics[:3, :3] = K | |
# https://ksimek.github.io/2012/08/22/extrinsic/ | |
extrinsics = np.eye(4, dtype=np.float32) | |
extrinsics[:3, :3] = R | |
extrinsics[:3, 3] = -R @ c | |
return intrinsics, extrinsics | |
def visualize(poses, poses2, size=0.05): | |
axes = trimesh.creation.axis(axis_length=4) | |
box = trimesh.primitives.Box(extents=[2]*3).as_outline() | |
box.colors = np.array([[128, 128, 128]] * len(box.entities)) | |
objects = [axes, box] | |
# draw camera poses | |
def _vis_poses(_poses, color=[0, 0, 0]): | |
for pose in _poses: | |
# a camera is visualized with 8 line segments. | |
pos = pose[:3, 3] | |
a = pos + size * pose[:3, 0] + size * pose[:3, 1] - size * pose[:3, 2] | |
b = pos - size * pose[:3, 0] + size * pose[:3, 1] - size * pose[:3, 2] | |
c = pos - size * pose[:3, 0] - size * pose[:3, 1] - size * pose[:3, 2] | |
d = pos + size * pose[:3, 0] - size * pose[:3, 1] - size * pose[:3, 2] | |
dir = (a + b + c + d) / 4 - pos | |
dir = dir / (np.linalg.norm(dir) + 1e-8) | |
o = pos + dir * 0.2 | |
segs = np.array([[pos, a], [pos, b], [pos, c], [pos, d], [a, b], [b, c], [c, d], [d, a], [pos, o]]) | |
segs = trimesh.load_path(segs) | |
segs.colors = np.array([color] * len(segs.entities)) | |
objects.append(segs) | |
_vis_poses(poses, [0, 128, 0]) | |
_vis_poses(poses2, [0, 0, 128]) | |
# draw scene | |
scene = trimesh.Scene(objects) | |
scene.set_camera(distance=1, center=[0, 0, 0]) | |
scene.show() | |
def rot_to_quat(R): | |
R = R[np.newaxis, :, :] | |
q = np.ones(4) | |
R00 = R[:, 0, 0] | |
R01 = R[:, 0, 1] | |
R02 = R[:, 0, 2] | |
R10 = R[:, 1, 0] | |
R11 = R[:, 1, 1] | |
R12 = R[:, 1, 2] | |
R20 = R[:, 2, 0] | |
R21 = R[:, 2, 1] | |
R22 = R[:, 2, 2] | |
q[0]=np.sqrt(1.0+R00+R11+R22)/2 | |
q[1]=(R21-R12)/(4*q[0]) | |
q[2] = (R02 - R20) / (4 * q[0]) | |
q[3] = (R10 - R01) / (4 * q[0]) | |
return q | |
def from_colmap(sfm_path): | |
cameras=read_cameras_text(os.path.join(sfm_path, "cameras.txt")) | |
images=read_images_text(os.path.join(sfm_path, "images.txt")) | |
K = np.eye(3) | |
K[0, 0] = cameras[1].params[0] | |
K[1, 1] = cameras[1].params[1] | |
K[0, 2] = cameras[1].params[2] | |
K[1, 2] = cameras[1].params[3] | |
cameras_npz_format = {} | |
extrinsics_all = [] | |
for ii in range(len(images)): | |
cur_image=images[ii + 1] | |
M=np.zeros((3,4)) | |
M[:,3]=cur_image.tvec | |
M[:3,:3]=qvec2rotmat(cur_image.qvec) | |
P=np.eye(4) | |
P[:3, :] = K @ M | |
cameras_npz_format['world_mat_%d' % ii] = P | |
cameras_npz_format['M_%d' % ii] = M | |
extrinsics_all.append(M) | |
#np.savez("cameras_before_normalization.npz", | |
# **cameras_npz_format) | |
return cameras_npz_format, extrinsics_all | |
def to_colmap(cameras_npz_format, sfm_path): | |
scene_path = '.' | |
image_path = os.path.join(scene_path, 'images', '*.png') | |
images_path = sorted(glob.glob(image_path)) | |
os.makedirs(sfm_path, exist_ok=True) | |
n_images = len(images_path) | |
h_all, w_all = [], [] | |
for i, path in enumerate(images_path): | |
img = Image.open(path) | |
w_all.append(img.size[0]) | |
h_all.append(img.size[1]) | |
camera_dict = cameras_npz_format | |
scale_mats = [camera_dict['scale_mat_%d' % idx].astype(np.float32) for idx in range(n_images)] | |
world_mats = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in range(n_images)] | |
intrinsics_all = [] | |
extrinsics_all = [] | |
quarternion_all = [] | |
translation_all = [] | |
fx_all = [] | |
fy_all = [] | |
cx_all = [] | |
cy_all = [] | |
for scale_mat, world_mat in zip(scale_mats, world_mats): | |
#for world_mat in world_mats: | |
P = world_mat @ scale_mat | |
#P = world_mat | |
P = P[:3, :4] | |
intrinsics, extrinsics = load_K_Rt_from_P(P) | |
intrinsics_all.append(intrinsics) | |
extrinsics_all.append(extrinsics) | |
q = rot_to_quat(extrinsics[:3, :3]) | |
quarternion_all.append(q) | |
translation_all.append(extrinsics[:3, 3]) | |
fx_all.append(intrinsics[0,0]) | |
fy_all.append(intrinsics[1,1]) | |
cx_all.append(intrinsics[0,2]) | |
cy_all.append(intrinsics[1,2]) | |
# cameras.txt | |
# camera_id, model, width, height, fx, fy, cx, cy | |
with open(os.path.join(sfm_path, 'cameras.txt'), 'w') as f: | |
for i in range(n_images): | |
f.write(f'{i+1} PINHOLE {w_all[i]} {h_all[i]} {fx_all[i]} {fy_all[i]} {cx_all[i]} {cy_all[i]}\n') | |
# images.txt | |
# images_id, qw, qx, qy, qz, tx, ty, tz, camera_id, name | |
with open(os.path.join(sfm_path, 'images.txt'), 'w') as f: | |
for i in range(n_images): | |
name = os.path.basename(images_path[i]) | |
quarternion = quarternion_all[i] | |
translation = translation_all[i] | |
f.write(f'{i+1} {quarternion[0]} {quarternion[1]} {quarternion[2]} {quarternion[3]} {translation[0]} {translation[1]} {translation[2]} {i+1} {name}\n') | |
f.write('\n') | |
# points3D.txt | |
with open(os.path.join(sfm_path, 'points3D.txt'), 'w') as f: | |
pass | |
return extrinsics_all | |
if __name__ == "__main__": | |
scene_path = '.' | |
cam_file = os.path.join(scene_path, 'cameras.npz') | |
camera_dict = np.load(cam_file) | |
# convert to colmap format | |
e_all = to_colmap(camera_dict, os.path.join(scene_path, 'pose')) | |
# read camera info from colmap format | |
camera_dict2, e_all2 = from_colmap(os.path.join(scene_path, 'pose')) | |
print('world matrix comparison:') | |
for k in camera_dict2.keys(): | |
if 'world' in k: | |
print('-------------------') | |
print(camera_dict[k]) | |
print(camera_dict2[k]) | |
# camera_dict2, e_all2 = from_colmap(os.path.join(scene_path, 'pose2')) | |
# e_all = to_colmap(camera_dict2, os.path.join(scene_path, 'pose3')) | |
print('extrinsics comparison:') | |
for i in range(len(e_all)): | |
print('-------------------') | |
print(e_all[i]) | |
print(e_all2[i]) | |
################################################################################# | |
# compare pose from colmap automatic reconstruction | |
# # read image data (imkeys, img_paths) | |
# imdata = read_images_binary(os.path.join(scene_path, 'sparse', '0', 'images.bin')) | |
# imkeys = np.array(sorted(imdata.keys())) | |
# # read extrinsics & poses | |
# extrinsics = [] | |
# for k in imkeys: | |
# P = np.eye(4, dtype=np.float64) | |
# P[:3, :3] = imdata[k].qvec2rotmat() | |
# P[:3, 3] = imdata[k].tvec | |
# extrinsics.append(P) | |
# _poses = np.linalg.inv(extrinsics) # [K, 4, 4] | |
# poses = np.linalg.inv(e_all) # [K, 4, 4] | |
# visualize(poses, _poses) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment