Skip to content

Instantly share code, notes, and snippets.

Forked from Sazoji/
Created December 18, 2023 05:08
Show Gist options
  • Save adhiraj2001/248f0d2e6b6241f4acc69611b9285da3 to your computer and use it in GitHub Desktop.
Save adhiraj2001/248f0d2e6b6241f4acc69611b9285da3 to your computer and use it in GitHub Desktop.
colmap2poses is simple! make sure you have opencv (and rembg if you wish to mask). The only thing required is a path to the dataset folder, containing a "images" subfolder. Avoid weird filenames.
# most of the code is refactored colmap parsing from LLFF, a GPLv3 project.
# this is meant to be comparable to instant-ngp's, and uses the
# same arguments, with additional functions for masking and LLFF format poses.
# LLFF format .npy files will always be made, remove if you ONLY want NeRF format
#~~~provide a dataset folder path with another "image" subfolder with the pics~~~
# ~~~colmap can be installed to PATH or linked as an argument (--colmap_path)~~~
# usage: (for LLFF format, nvdiffrec)
# --mask "/path/to/dataset/"
# a "mask" folder is required, manual or passed with --mask as shown
# if you wish for NeRF format, pass --json
# masks must be applied to the image for NeRF format, (--alpha)
# nvdiffrec loads the .npy first, but for NeRF format datasets, the image
# extentions must removed to prevent errors
# video and aabb_scale options are from instant-ngp, follow their guide to use
import argparse
import os
import sys
import numpy as np
import json
import math
import cv2
import subprocess
import struct
import collections
import shutil
def parse_args():
parser = argparse.ArgumentParser(description="convert colmap to NeRF or LLFF(NeRD) format, automask objects, and process video")
parser.add_argument('--json', action="store_true",
default='false', help='output json/instant-ngp format')
parser.add_argument("--mask", action="store_true",
default="", help="use rembg to automatically mask the entire image folder. reccomended to use manual masks for better results in nvdiffrec.")
parser.add_argument("--alpha", action="store_true",
default="", help="only works when --mask is enabled, applys a mask to the alpha channel of the image (instead of a separate folder). enables masking in instant-ngp.")
default="", help="run ffmpeg first to convert a provided video file into a set of images. uses the video_fps parameter also. needs ffmpeg in path or installed.")
default="", help="time (in seconds) in the format t1,t2 within which the images should be generated from the video. eg: \"--time_slice '10,300'\" will generate images only from 10th second to 300th second of the video")
parser.add_argument('--match_type', type=str,
default='exhaustive_matcher', help='type of matcher used. Valid options: exhaustive_matcher sequential_matcher. Other matchers not supported at this time')
default="database.db", help="colmap database filename")
default=2, choices=["1","2","4","8","16"], help="large scene scale factor. 1=scene fits in unit cube; power of 2 up to 16")
default="transforms_test.json", help="output json filename")
default="images", help="name of the directory containing the images")
parser.add_argument('--colmap_path', type=str,
default='', help='path to colmap batch file, if not in PATH')
parser.add_argument('scenedir', type=str,
help='input scene directory, where the dataset will be stored, required to run')
args = parser.parse_args()
return args
args = parse_args()
if args.match_type != 'exhaustive_matcher' and args.match_type != 'sequential_matcher':
print('ERROR: matcher type ' + args.match_type + ' is not valid. Aborting')
CameraModel = collections.namedtuple(
"CameraModel", ["model_id", "model_name", "num_params"])
Camera = collections.namedtuple(
"Camera", ["id", "model", "width", "height", "params"])
BaseImage = collections.namedtuple(
"Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
Point3D = collections.namedtuple(
"Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
class Image(BaseImage):
def qvec2rotmat(self):
return qvec2rotmat(self.qvec)
CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
CameraModel(model_id=3, model_name="RADIAL", num_params=5),
CameraModel(model_id=4, model_name="OPENCV", num_params=8),
CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
CameraModel(model_id=7, model_name="FOV", num_params=5),
CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model) \
for camera_model in CAMERA_MODELS])
def run_colmap(basedir, match_type, colmap_path="Colmap"):
logfile_name = os.path.join(basedir, 'colmap_output.txt')
logfile = open(logfile_name, 'w')
feature_extractor_args = [
colmap_path, 'feature_extractor',
'--database_path', os.path.join(basedir, args.database_db),
'--image_path', os.path.join(basedir, args.images),
'--ImageReader.single_camera', '1',
# '--SiftExtraction.use_gpu', '0',
feat_output = ( subprocess.check_output(feature_extractor_args, universal_newlines=True) )
print('Features extracted')
exhaustive_matcher_args = [
colmap_path, match_type,
'--database_path', os.path.join(basedir, args.database_db),
match_output = ( subprocess.check_output(exhaustive_matcher_args, universal_newlines=True) )
print('Features matched')
p = os.path.join(basedir, 'sparse')
if not os.path.exists(p):
mapper_args = [
colmap_path, 'mapper',
'--database_path', os.path.join(basedir, args.database_db),
'--image_path', os.path.join(basedir, args.images),
'--output_path', os.path.join(basedir, 'sparse'), # --export_path changed to --output_path in colmap 3.6
'--Mapper.num_threads', '16',
'--Mapper.init_min_tri_angle', '4',
'--Mapper.multiple_models', '0',
'--Mapper.extract_colors', '0',
map_output = ( subprocess.check_output(mapper_args, universal_newlines=True) )
print('Sparse map created')
print( 'Finished running COLMAP, see {} for logs'.format(logfile_name) )
def run_ffmpeg(args):
images = args.images
video = args.video_in
fps = float(args.video_fps) or 1.0
print(f"running ffmpeg with input video file={video}, output image folder={images}, fps={fps}.")
if (input(f"warning! folder '{images}' will be deleted/replaced. continue? (Y/n)").lower().strip()+"y")[:1] != "y":
time_slice_value = ""
time_slice = args.time_slice
if time_slice:
start, end = time_slice.split(",")
time_slice_value = f",select='between(t\,{start}\,{end})'"
do_system(f"ffmpeg -i {video} -qscale:v 1 -qmin 1 -vf \"fps={fps}{time_slice_value}, scale=-2:512\" {os.path.join(args.scenedir,args.images)}\%04d.jpg")
def gen_poses(basedir, match_type, colmap_path, factors=None):
files_needed = ['{}.bin'.format(f) for f in ['cameras', 'images', 'points3D']]
if os.path.exists(os.path.join(basedir, 'sparse/0')):
files_had = os.listdir(os.path.join(basedir, 'sparse/0'))
files_had = []
if not all([f in files_had for f in files_needed]):
print('Need to run COLMAP')
run_colmap(basedir, match_type, colmap_path)
print('Don\'t need to run COLMAP')
print( 'Post-colmap')
# poses, pts3d, perm = load_colmap_data(basedir)
# save_poses(basedir, poses, pts3d, perm)
if factors is not None:
print( 'Factors:', factors)
minify(basedir, factors)
print( 'Done with imgs2poses' )
return True
def load_colmap_data(realdir):
camerasfile = os.path.join(realdir, 'sparse/0/cameras.bin')
camdata = read_cameras_binary(camerasfile)
# cam = camdata[camdata.keys()[0]]
list_of_keys = list(camdata.keys())
cam = camdata[list_of_keys[0]]
print( 'Cameras', len(cam))
h, w, f = cam.height, cam.width, cam.params[0]
# w, h, f = factor * w, factor * h, factor * f
hwf = np.array([h,w,f]).reshape([3,1])
imagesfile = os.path.join(realdir, 'sparse/0/images.bin')
imdata = read_images_binary(imagesfile)
w2c_mats = []
bottom = np.array([0,0,0,1.]).reshape([1,4])
names = [imdata[k].name for k in imdata]
print( 'Images #', len(names))
perm = np.argsort(names)
for k in imdata:
im = imdata[k]
R = im.qvec2rotmat()
t = im.tvec.reshape([3,1])
m = np.concatenate([np.concatenate([R, t], 1), bottom], 0)
w2c_mats = np.stack(w2c_mats, 0)
c2w_mats = np.linalg.inv(w2c_mats)
poses = c2w_mats[:, :3, :4].transpose([1,2,0])
poses = np.concatenate([poses, np.tile(hwf[..., np.newaxis], [1,1,poses.shape[-1]])], 1)
points3dfile = os.path.join(realdir, 'sparse/0/points3D.bin')
pts3d = read_points3d_binary(points3dfile)
# must switch to [-u, r, -t] from [r, -u, t], NOT [r, u, -t]
poses = np.concatenate([poses[:, 1:2, :], poses[:, 0:1, :], -poses[:, 2:3, :], poses[:, 3:4, :], poses[:, 4:5, :]], 1)
return poses, pts3d, perm
def save_poses(basedir, poses, pts3d, perm):
pts_arr = []
vis_arr = []
for k in pts3d:
cams = [0] * poses.shape[-1]
for ind in pts3d[k].image_ids:
if len(cams) < ind - 1:
print('ERROR: the correct camera poses for current points cannot be accessed')
cams[ind-1] = 1
pts_arr = np.array(pts_arr)
vis_arr = np.array(vis_arr)
print( 'Points', pts_arr.shape, 'Visibility', vis_arr.shape )
zvals = np.sum(-(pts_arr[:, np.newaxis, :].transpose([2,0,1]) - poses[:3, 3:4, :]) * poses[:3, 2:3, :], 0)
valid_z = zvals[vis_arr==1]
print( 'Depth stats', valid_z.min(), valid_z.max(), valid_z.mean() )
save_arr = []
for i in perm:
vis = vis_arr[:, i]
zs = zvals[:, i]
zs = zs[vis==1]
close_depth, inf_depth = np.percentile(zs, .1), np.percentile(zs, 99.9)
# print( i, close_depth, inf_depth )
save_arr.append(np.concatenate([poses[..., i].ravel(), np.array([close_depth, inf_depth])], 0))
save_arr = np.array(save_arr), 'poses_bounds.npy'), save_arr)
def save_views(realdir,names):
with open(os.path.join(realdir,'view_imgs.txt'), mode='w') as f:
def load_save_pose(realdir):
# load colmap data
model_path = os.path.join(realdir, 'sparse/0/')
modeldata = read_model(model_path, ".bin")
camdata = modeldata[0]
list_of_keys = list(camdata.keys())
cam = camdata[list_of_keys[0]]
print( 'Cameras', cam)
h, w, f = cam.height, cam.width, cam.params[0]
hwf = np.array([h,w,f]).reshape([3,1])
imdata = modeldata[1]
real_ids = [k for k in imdata]
w2c_mats = []
bottom = np.array([0,0,0,1.]).reshape([1,4])
names = [imdata[k].name for k in imdata]
print( 'Images #', len(names))
# if (len(names)< 32):
# raise ValueError(f'{realdir} only {len(names)} images register, need Re-run colmap or reset the threshold')
perm = np.argsort(names)
sort_names = [names[i] for i in perm]
for k in imdata:
im = imdata[k]
R = im.qvec2rotmat()
t = im.tvec.reshape([3,1])
m = np.concatenate([np.concatenate([R, t], 1), bottom], 0)
w2c_mats = np.stack(w2c_mats, 0)
c2w_mats = np.linalg.inv(w2c_mats)
poses = c2w_mats[:, :3, :4].transpose([1,2,0])
poses = np.concatenate([poses, np.tile(hwf[..., np.newaxis], [1,1,poses.shape[-1]])], 1)
pts3d = modeldata[2]
# must switch to [-u, r, -t] from [r, -u, t], NOT [r, u, -t]
poses = np.concatenate([poses[:, 1:2, :], poses[:, 0:1, :], -poses[:, 2:3, :], poses[:, 3:4, :], poses[:, 4:5, :]], 1)
# save pose
pts_arr = []
vis_arr = []
for k in pts3d:
cams = [0] * poses.shape[-1]
for ind in pts3d[k].image_ids:
if len(cams) < real_ids.index(ind):
print('ERROR: the correct camera poses for current points cannot be accessed')
cams[real_ids.index(ind)] = 1
pts_arr = np.array(pts_arr)
vis_arr = np.array(vis_arr)
print( 'Points', pts_arr.shape, 'Visibility', vis_arr.shape)
zvals = np.sum(-(pts_arr[:, np.newaxis, :].transpose([2,0,1]) - poses[:3, 3:4, :]) * poses[:3, 2:3, :], 0)
valid_z = zvals[vis_arr==1]
print( 'Depth stats', valid_z.min(), valid_z.max(), valid_z.mean() )
save_arr = []
for i in perm:
vis = vis_arr[:, i]
zs = zvals[:, i]
zs = zs[vis==1]
close_depth, inf_depth = np.percentile(zs, .1), np.percentile(zs, 99.9)
save_arr.append(np.concatenate([poses[..., i].ravel(), np.array([close_depth, inf_depth])], 0))
save_arr = np.array(save_arr), 'poses_bounds.npy'), save_arr)
def convert_to_json (args):
AABB_SCALE = int(args.aabb_scale)
text = os.path.normpath(args.scenedir + '/text')
OUT_PATH = os.path.normpath(args.scenedir+ '/' + args.out_json)
sparce = os.path.normpath(args.scenedir + '/sparse')
do_system(f"mkdir {text}")
do_system(f"colmap model_converter --input_path {sparce}/0 --output_path {text} --output_type TXT")
print(f"outputting to {OUT_PATH}...")
with open(os.path.join(text,"cameras.txt"), "r") as f:
angle_x = math.pi / 2
for line in f:
# 1 SIMPLE_RADIAL 2048 1536 1580.46 1024 768 0.0045691
# 1 OPENCV 3840 2160 3178.27 3182.09 1920 1080 0.159668 -0.231286 -0.00123982 0.00272224
# 1 RADIAL 1920 1080 1665.1 960 540 0.0672856 -0.0761443
if line[0] == "#":
els = line.split(" ")
w = float(els[2])
h = float(els[3])
fl_x = float(els[4])
fl_y = float(els[4])
k1 = 0
k2 = 0
p1 = 0
p2 = 0
cx = w / 2
cy = h / 2
if els[1] == "SIMPLE_PINHOLE":
cx = float(els[5])
cy = float(els[6])
elif els[1] == "PINHOLE":
fl_y = float(els[5])
cx = float(els[6])
cy = float(els[7])
elif els[1] == "SIMPLE_RADIAL":
cx = float(els[5])
cy = float(els[6])
k1 = float(els[7])
elif els[1] == "RADIAL":
cx = float(els[5])
cy = float(els[6])
k1 = float(els[7])
k2 = float(els[8])
elif els[1] == "OPENCV":
fl_y = float(els[5])
cx = float(els[6])
cy = float(els[7])
k1 = float(els[8])
k2 = float(els[9])
p1 = float(els[10])
p2 = float(els[11])
print("unknown camera model ", els[1])
# fl = 0.5 * w / tan(0.5 * angle_x);
angle_x = math.atan(w / (fl_x * 2)) * 2
angle_y = math.atan(h / (fl_y * 2)) * 2
fovx = angle_x * 180 / math.pi
fovy = angle_y * 180 / math.pi
print(f"camera:\n\tres={w,h}\n\tcenter={cx,cy}\n\tfocal={fl_x,fl_y}\n\tfov={fovx,fovy}\n\tk={k1,k2} p={p1,p2} ")
with open(os.path.join(text,"images.txt"), "r") as f:
i = 0
bottom = np.array([0.0, 0.0, 0.0, 1.0]).reshape([1, 4])
out = {
"camera_angle_x": angle_x,
"camera_angle_y": angle_y,
"fl_x": fl_x,
"fl_y": fl_y,
"k1": k1,
"k2": k2,
"p1": p1,
"p2": p2,
"cx": cx,
"cy": cy,
"w": w,
"h": h,
"aabb_scale": AABB_SCALE,
"frames": [],
up = np.zeros(3)
for line in f:
line = line.strip()
if line[0] == "#":
i = i + 1
if i % 2 == 1:
elems=line.split(" ") # 1-4 is quat, 5-7 is trans, 9ff is filename (9, if filename contains no spaces)
name = str(f"./{args.images}/{elems[9]}")
print(name, "sharpness=",b)
image_id = int(elems[0])
qvec = np.array(tuple(map(float, elems[1:5])))
tvec = np.array(tuple(map(float, elems[5:8])))
R = qvec2rotmat(-qvec)
t = tvec.reshape([3,1])
m = np.concatenate([np.concatenate([R, t], 1), bottom], 0)
c2w = np.linalg.inv(m)
c2w[0:3,2] *= -1 # flip the y and z axis
c2w[0:3,1] *= -1
c2w = c2w[[1,0,2,3],:] # swap y and z
c2w[2,:] *= -1 # flip whole world upside down
up += c2w[0:3,1]
frame={"file_path":name,"sharpness":b,"transform_matrix": c2w}
nframes = len(out["frames"])
up = up / np.linalg.norm(up)
print("up vector was", up)
R = rotmat(up,[0,0,1]) # rotate up vector to [0,0,1]
R = np.pad(R,[0,1])
R[-1, -1] = 1
for f in out["frames"]:
f["transform_matrix"] = np.matmul(R, f["transform_matrix"]) # rotate up to be the z axis
# find a central point they are all looking at
print("computing center of attention...")
totw = 0.0
totp = np.array([0.0, 0.0, 0.0])
for f in out["frames"]:
mf = f["transform_matrix"][0:3,:]
for g in out["frames"]:
mg = g["transform_matrix"][0:3,:]
p, w = closest_point_2_lines(mf[:,3], mf[:,2], mg[:,3], mg[:,2])
if w > 0.01:
totp += p*w
totw += w
totp /= totw
print(totp) # the cameras are looking at totp
for f in out["frames"]:
f["transform_matrix"][0:3,3] -= totp
avglen = 0.
for f in out["frames"]:
avglen += np.linalg.norm(f["transform_matrix"][0:3,3])
avglen /= nframes
print("avg camera distance from origin", avglen)
for f in out["frames"]:
f["transform_matrix"][0:3,3] *= 4.0 / avglen # scale to "nerf sized"
for f in out["frames"]:
f["transform_matrix"] = f["transform_matrix"].tolist()
print(f"writing {OUT_PATH}")
with open(OUT_PATH, "w") as outfile:
json.dump(out, outfile, indent=2)
def do_system(arg):
print(f"==== running: {arg}")
err = os.system(arg)
if err:
print("FATAL: command failed")
def minify(basedir, factors=[], resolutions=[]):
needtoload = False
for r in factors:
imgdir = os.path.join(basedir, 'images_{}'.format(r))
if not os.path.exists(imgdir):
needtoload = True
for r in resolutions:
imgdir = os.path.join(basedir, 'images_{}x{}'.format(r[1], r[0]))
if not os.path.exists(imgdir):
needtoload = True
if not needtoload:
def qvec2rotmat(qvec):
return np.array([
[1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
[2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
[2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
def rotmat(a, b):
a, b = a / np.linalg.norm(a), b / np.linalg.norm(b)
v = np.cross(a, b)
c =, b)
s = np.linalg.norm(v)
kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
return np.eye(3) + kmat + * ((1 - c) / (s ** 2 + 1e-10))
def variance_of_laplacian(image):
return cv2.Laplacian(image, cv2.CV_64F).var()
def sharpness(imagePath):
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
fm = variance_of_laplacian(gray)
return fm
def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays of form o+t*d, and a weight factor that goes to 0 if the lines are parallel
da = da / np.linalg.norm(da)
db = db / np.linalg.norm(db)
c = np.cross(da, db)
denom = np.linalg.norm(c)**2
t = ob - oa
ta = np.linalg.det([t, db, c]) / (denom + 1e-10)
tb = np.linalg.det([t, da, c]) / (denom + 1e-10)
if ta > 0:
ta = 0
if tb > 0:
tb = 0
return (oa+ta*da+ob+tb*db) * 0.5, denom
def read_points3d_binary(path_to_model_file):
see: src/base/
void Reconstruction::ReadPoints3DBinary(const std::string& path)
void Reconstruction::WritePoints3DBinary(const std::string& path)
points3D = {}
with open(path_to_model_file, "rb") as fid:
num_points = read_next_bytes(fid, 8, "Q")[0]
for point_line_index in range(num_points):
binary_point_line_properties = read_next_bytes(
fid, num_bytes=43, format_char_sequence="QdddBBBd")
point3D_id = binary_point_line_properties[0]
xyz = np.array(binary_point_line_properties[1:4])
rgb = np.array(binary_point_line_properties[4:7])
error = np.array(binary_point_line_properties[7])
track_length = read_next_bytes(
fid, num_bytes=8, format_char_sequence="Q")[0]
track_elems = read_next_bytes(
fid, num_bytes=8*track_length,
image_ids = np.array(tuple(map(int, track_elems[0::2])))
point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
points3D[point3D_id] = Point3D(
id=point3D_id, xyz=xyz, rgb=rgb,
error=error, image_ids=image_ids,
return points3D
def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
"""Read and unpack the next bytes from a binary file.
:param fid:
:param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
:param endian_character: Any of {@, =, <, >, !}
:return: Tuple of read and unpacked values.
data =
return struct.unpack(endian_character + format_char_sequence, data)
def read_model(path, ext):
if ext == ".txt":
cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
images = read_images_text(os.path.join(path, "images" + ext))
points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
images = read_images_binary(os.path.join(path, "images" + ext))
points3D = read_points3d_binary(os.path.join(path, "points3D") + ext)
return cameras, images, points3D
def read_points3D_text(path):
see: src/base/
void Reconstruction::ReadPoints3DText(const std::string& path)
void Reconstruction::WritePoints3DText(const std::string& path)
points3D = {}
with open(path, "r") as fid:
while True:
line = fid.readline()
if not line:
line = line.strip()
if len(line) > 0 and line[0] != "#":
elems = line.split()
point3D_id = int(elems[0])
xyz = np.array(tuple(map(float, elems[1:4])))
rgb = np.array(tuple(map(int, elems[4:7])))
error = float(elems[7])
image_ids = np.array(tuple(map(int, elems[8::2])))
point2D_idxs = np.array(tuple(map(int, elems[9::2])))
points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb,
error=error, image_ids=image_ids,
return points3D
def read_cameras_text(path):
see: src/base/
void Reconstruction::WriteCamerasText(const std::string& path)
void Reconstruction::ReadCamerasText(const std::string& path)
cameras = {}
with open(path, "r") as fid:
while True:
line = fid.readline()
if not line:
line = line.strip()
if len(line) > 0 and line[0] != "#":
elems = line.split()
camera_id = int(elems[0])
model = elems[1]
width = int(elems[2])
height = int(elems[3])
params = np.array(tuple(map(float, elems[4:])))
cameras[camera_id] = Camera(id=camera_id, model=model,
width=width, height=height,
return cameras
def read_cameras_binary(path_to_model_file):
see: src/base/
void Reconstruction::WriteCamerasBinary(const std::string& path)
void Reconstruction::ReadCamerasBinary(const std::string& path)
cameras = {}
with open(path_to_model_file, "rb") as fid:
num_cameras = read_next_bytes(fid, 8, "Q")[0]
for camera_line_index in range(num_cameras):
camera_properties = read_next_bytes(
fid, num_bytes=24, format_char_sequence="iiQQ")
camera_id = camera_properties[0]
model_id = camera_properties[1]
model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
width = camera_properties[2]
height = camera_properties[3]
num_params = CAMERA_MODEL_IDS[model_id].num_params
params = read_next_bytes(fid, num_bytes=8*num_params,
cameras[camera_id] = Camera(id=camera_id,
assert len(cameras) == num_cameras
return cameras
def read_images_text(path):
see: src/base/
void Reconstruction::ReadImagesText(const std::string& path)
void Reconstruction::WriteImagesText(const std::string& path)
images = {}
with open(path, "r") as fid:
while True:
line = fid.readline()
if not line:
line = line.strip()
if len(line) > 0 and line[0] != "#":
elems = line.split()
image_id = int(elems[0])
qvec = np.array(tuple(map(float, elems[1:5])))
tvec = np.array(tuple(map(float, elems[5:8])))
camera_id = int(elems[8])
image_name = elems[9]
elems = fid.readline().split()
xys = np.column_stack([tuple(map(float, elems[0::3])),
tuple(map(float, elems[1::3]))])
point3D_ids = np.array(tuple(map(int, elems[2::3])))
images[image_id] = Image(
id=image_id, qvec=qvec, tvec=tvec,
camera_id=camera_id, name=image_name,
xys=xys, point3D_ids=point3D_ids)
return images
def read_images_binary(path_to_model_file):
see: src/base/
void Reconstruction::ReadImagesBinary(const std::string& path)
void Reconstruction::WriteImagesBinary(const std::string& path)
images = {}
with open(path_to_model_file, "rb") as fid:
num_reg_images = read_next_bytes(fid, 8, "Q")[0]
for image_index in range(num_reg_images):
binary_image_properties = read_next_bytes(
fid, num_bytes=64, format_char_sequence="idddddddi")
image_id = binary_image_properties[0]
qvec = np.array(binary_image_properties[1:5])
tvec = np.array(binary_image_properties[5:8])
camera_id = binary_image_properties[8]
image_name = ""
current_char = read_next_bytes(fid, 1, "c")[0]
while current_char != b"\x00": # look for the ASCII 0 entry
image_name += current_char.decode("utf-8")
current_char = read_next_bytes(fid, 1, "c")[0]
num_points2D = read_next_bytes(fid, num_bytes=8,
x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
tuple(map(float, x_y_id_s[1::3]))])
point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
images[image_id] = Image(
id=image_id, qvec=qvec, tvec=tvec,
camera_id=camera_id, name=image_name,
xys=xys, point3D_ids=point3D_ids)
return images
if __name__=='__main__':
if args.video_in != "":
if args.mask != "":
from rembg import remove
if args.alpha != "":
print("applying alpha masks, images will be saved as _alpha.png (this is VERY slow)")
try: os.mkdir(os.path.normpath(f"{args.scenedir}/images_alpha/"))
except: pass
for img in os.listdir(os.path.normpath(f"{args.scenedir}/{args.images}/")):
with open(os.path.normpath(f"{args.scenedir}/{args.images}/{img}"), "rb") as i:
with open(os.path.normpath(f"{args.scenedir}/images_alpha/{img}_alpha.png"), 'wb') as o:
im =
output = remove(im, alpha_matting=True)
print(f"matting failed for {img}, applying mask")
output = remove(im)
print(f"{img}_alpha.png done")
if (input(f"please filter images with bad alpha in images_alpha. continue? (Y/n)").lower().strip()+"n")[:1] != "n":
print('images_alpha is the new default image folder, please do not rename it or the files inside after colmapping')
args.images = "images_alpha"
print("producing mask images")
try: os.mkdir(os.path.normpath(f"{args.scenedir}/masks/"))
except: pass
for img in os.listdir(os.path.normpath(f"{args.scenedir}/{args.images}/")):
im = cv2.imread(os.path.normpath(f"{args.scenedir}/{args.images}/{img}"))
output = remove(im, only_mask=True)
cv2.imwrite(os.path.normpath(f"{args.scenedir}/masks/{img}"), output)
print(f"{img} done")
if args.colmap_path != '':
if == 'nt' and "COLMAP.bat" not in args.colmap_path:
sys.exit("colmap path fail")
elif os.path.exists(args.colmap_path):
print ('valid custom colmap path!')
colmap_path = os.path.normpath(args.colmap_path)
print("using colmap path:", colmap_path)
else: sys.exit("colmap path fail")
elif == 'nt':
colmap_path = "colmap.bat"
gen_poses(args.scenedir, args.match_type, colmap_path)
if args.json == 1:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment