Skip to content

Instantly share code, notes, and snippets.

@Ednaordinary
Created June 18, 2023 22:06
Show Gist options
  • Save Ednaordinary/855dada2bd655bc135ce29eb93e4603d to your computer and use it in GitHub Desktop.
Save Ednaordinary/855dada2bd655bc135ce29eb93e4603d to your computer and use it in GitHub Desktop.
Immediate NeRF from webcams
#!/usr/bin/env python3
# Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
import argparse
import os
import commentjson as json
import numpy as np
import shutil
import time
import subprocess
import threading
from common import *
from scenes import *
from tqdm import tqdm
import pyngp as ngp # noqa
def parse_args():
parser = argparse.ArgumentParser(description="Run instant neural graphics primitives with additional configuration & output options")
parser.add_argument("files", nargs="*", help="Files to be loaded. Can be a scene, network config, snapshot, camera path, or a combination of those.")
parser.add_argument("--scene", "--training_data", default="", help="The scene to load. Can be the scene's name or a full path to the training data. Can be NeRF dataset, a *.obj/*.stl mesh for training a SDF, an image, or a *.nvdb volume.")
parser.add_argument("--mode", default="", type=str, help=argparse.SUPPRESS) # deprecated
parser.add_argument("--network", default="", help="Path to the network config. Uses the scene's default if unspecified.")
parser.add_argument("--load_snapshot", "--snapshot", default="", help="Load this snapshot before training. recommended extension: .ingp/.msgpack")
parser.add_argument("--save_snapshot", default="", help="Save this snapshot after training. recommended extension: .ingp/.msgpack")
parser.add_argument("--nerf_compatibility", action="store_true", help="Matches parameters with original NeRF. Can cause slowness and worse results on some scenes, but helps with high PSNR on synthetic scenes.")
parser.add_argument("--test_transforms", default="", help="Path to a nerf style transforms json from which we will compute PSNR.")
parser.add_argument("--near_distance", default=-1, type=float, help="Set the distance from the camera at which training rays start for nerf. <0 means use ngp default")
parser.add_argument("--exposure", default=0.0, type=float, help="Controls the brightness of the image. Positive numbers increase brightness, negative numbers decrease it.")
parser.add_argument("--screenshot_transforms", default="", help="Path to a nerf style transforms.json from which to save screenshots.")
parser.add_argument("--screenshot_frames", nargs="*", help="Which frame(s) to take screenshots of.")
parser.add_argument("--screenshot_dir", default="", help="Which directory to output screenshots to.")
parser.add_argument("--screenshot_spp", type=int, default=16, help="Number of samples per pixel in screenshots.")
parser.add_argument("--video_camera_path", default="", help="The camera path to render, e.g., base_cam.json.")
parser.add_argument("--video_camera_smoothing", action="store_true", help="Applies additional smoothing to the camera trajectory with the caveat that the endpoint of the camera path may not be reached.")
parser.add_argument("--video_fps", type=int, default=60, help="Number of frames per second.")
parser.add_argument("--video_n_seconds", type=int, default=1, help="Number of seconds the rendered video should be long.")
parser.add_argument("--video_render_range", type=int, nargs=2, default=(-1, -1), metavar=("START_FRAME", "END_FRAME"), help="Limit output to frames between START_FRAME and END_FRAME (inclusive)")
parser.add_argument("--video_spp", type=int, default=8, help="Number of samples per pixel. A larger number means less noise, but slower rendering.")
parser.add_argument("--video_output", type=str, default="video.mp4", help="Filename of the output video (video.mp4) or video frames (video_%%04d.png).")
parser.add_argument("--save_mesh", default="", help="Output a marching-cubes based mesh from the NeRF or SDF model. Supports OBJ and PLY format.")
parser.add_argument("--marching_cubes_res", default=256, type=int, help="Sets the resolution for the marching cubes grid.")
parser.add_argument("--width", "--screenshot_w", type=int, default=0, help="Resolution width of GUI and screenshots.")
parser.add_argument("--height", "--screenshot_h", type=int, default=0, help="Resolution height of GUI and screenshots.")
parser.add_argument("--gui", action="store_true", help="Run the testbed GUI interactively.")
parser.add_argument("--train", action="store_true", help="If the GUI is enabled, controls whether training starts immediately.")
parser.add_argument("--n_steps", type=int, default=-1, help="Number of steps to train for before quitting.")
parser.add_argument("--second_window", action="store_true", help="Open a second window containing a copy of the main output.")
parser.add_argument("--vr", action="store_true", help="Render to a VR headset.")
parser.add_argument("--sharpen", default=0, help="Set amount of sharpening applied to NeRF training images. Range 0.0 to 1.0.")
return parser.parse_args()
def get_scene(scene):
for scenes in [scenes_sdf, scenes_nerf, scenes_image, scenes_volume]:
if scene in scenes:
return scenes[scene]
return None
def capturecam():
cams = [] # Use v4l2-ctl --list-devices
cams_1 = cams[:len(cams)//2]
cams1_1 = cams_1[:len(cams_1)//2]
cams1_2 = cams_1[len(cams_1)//2:]
cams_2 = cams[len(cams)//2:]
cams2_1 = cams_2[:len(cams_2)//2]
cams2_2 = cams_2[len(cams_2)//2:]
def startcam(cam, num):
directory = "/"
subprocess.call("ffmpeg -y -f v4l2 -input_format mjpeg -framerate 30 -video_size 2592x1944 -i /dev/video" + str(cam) + " -c:v copy -update 1 -vframes 2 " + directory + str(num) + ".png", shell=True)
def camloop(cams, num):
originalnum = num
num = originalnum
for i in cams:
startcam(i, num)
num += 1
threads = []
threads.append(threading.Thread(target=camloop, args=[cams1_1, 0]))
threads.append(threading.Thread(target=camloop, args=[cams2_1, len(cams1_1) + len(cams1_2)]))
threads.append(threading.Thread(target=camloop, args=[cams1_2, len(cams1_1)]))
threads.append(threading.Thread(target=camloop, args=[cams2_2, len(cams1_1) + len(cams1_2) + len(cams2_1)]))
for x in threads:
x.start()
for x in threads:
x.join()
if __name__ == "__main__":
args = parse_args()
if args.vr: # VR implies having the GUI running at the moment
args.gui = True
if args.mode:
print("Warning: the '--mode' argument is no longer in use. It has no effect. The mode is automatically chosen based on the scene.")
testbed = ngp.Testbed()
testbed.root_dir = ROOT_DIR
for file in args.files:
scene_info = get_scene(file)
if scene_info:
file = os.path.join(scene_info["data_dir"], scene_info["dataset"])
testbed.load_file(file)
if args.scene:
scene_info = get_scene(args.scene)
if scene_info is not None:
args.scene = os.path.join(scene_info["data_dir"], scene_info["dataset"])
if not args.network and "network" in scene_info:
args.network = scene_info["network"]
testbed.load_training_data(args.scene)
if args.gui:
# Pick a sensible GUI resolution depending on arguments.
sw = args.width or 1920
sh = args.height or 1080
while sw * sh > 1920 * 1080 * 4:
sw = int(sw / 2)
sh = int(sh / 2)
testbed.init_window(sw, sh, second_window=args.second_window)
if args.vr:
testbed.init_vr()
if args.load_snapshot:
scene_info = get_scene(args.load_snapshot)
if scene_info is not None:
args.load_snapshot = default_snapshot_filename(scene_info)
testbed.load_snapshot(args.load_snapshot)
elif args.network:
testbed.reload_network_from_file(args.network)
ref_transforms = {}
if args.screenshot_transforms: # try to load the given file straight away
print("Screenshot transforms from ", args.screenshot_transforms)
with open(args.screenshot_transforms) as f:
ref_transforms = json.load(f)
if testbed.mode == ngp.TestbedMode.Sdf:
testbed.tonemap_curve = ngp.TonemapCurve.ACES
testbed.nerf.sharpen = float(args.sharpen)
testbed.exposure = args.exposure
testbed.shall_train = args.train if args.gui else True
testbed.nerf.render_with_lens_distortion = True
network_stem = os.path.splitext(os.path.basename(args.network))[0] if args.network else "base"
if testbed.mode == ngp.TestbedMode.Sdf:
setup_colored_sdf(testbed, args.scene)
if args.near_distance >= 0.0:
print("NeRF training ray near_distance ", args.near_distance)
testbed.nerf.training.near_distance = args.near_distance
if args.nerf_compatibility:
print(f"NeRF compatibility mode enabled")
# Prior nerf papers accumulate/blend in the sRGB
# color space. This messes not only with background
# alpha, but also with DOF effects and the likes.
# We support this behavior, but we only enable it
# for the case of synthetic nerf data where we need
# to compare PSNR numbers to results of prior work.
testbed.color_space = ngp.ColorSpace.SRGB
# No exponential cone tracing. Slightly increases
# quality at the cost of speed. This is done by
# default on scenes with AABB 1 (like the synthetic
# ones), but not on larger scenes. So force the
# setting here.
testbed.nerf.cone_angle_constant = 0
# Match nerf paper behaviour and train on a fixed bg.
testbed.nerf.training.random_bg_color = False
old_training_step = 0
n_steps = args.n_steps
# If we loaded a snapshot, didn't specify a number of steps, _and_ didn't open a GUI,
# don't train by default and instead assume that the goal is to render screenshots,
# compute PSNR, or render a video.
if n_steps < 0 and (not args.load_snapshot or args.gui):
n_steps = 35000
tqdm_last_update = 0
camprocess = threading.Thread(target=capturecam)
camprocess.start()
while camprocess.is_alive():
time.sleep(0.01)
if n_steps > 0:
with tqdm(desc="Training", total=n_steps, unit="steps") as t:
while testbed.frame():
if testbed.want_repl():
repl(testbed)
# What will happen when training is done?
if testbed.training_step >= n_steps:
if args.gui:
testbed.shall_train = False
else:
break
# Update progress bar
if testbed.training_step < old_training_step or old_training_step == 0:
old_training_step = 0
t.reset()
#if not testbed.training_step % 25:
if not camprocess.is_alive():
if args.scene:
scene_info = get_scene(args.scene)
if scene_info is not None:
args.scene = os.path.join(scene_info["data_dir"], scene_info["dataset"])
if not args.network and "network" in scene_info:
args.network = scene_info["network"]
testbed.load_training_data(args.scene)
camprocess = threading.Thread(target=capturecam)
camprocess.start()
now = time.monotonic()
if now - tqdm_last_update > 0.1:
t.update(testbed.training_step - old_training_step)
t.set_postfix(loss=testbed.loss)
old_training_step = testbed.training_step
tqdm_last_update = now
if args.save_snapshot:
testbed.save_snapshot(args.save_snapshot, False)
if args.test_transforms:
print("Evaluating test transforms from ", args.test_transforms)
with open(args.test_transforms) as f:
test_transforms = json.load(f)
data_dir=os.path.dirname(args.test_transforms)
totmse = 0
totpsnr = 0
totssim = 0
totcount = 0
minpsnr = 1000
maxpsnr = 0
# Evaluate metrics on black background
testbed.background_color = [0.0, 0.0, 0.0, 1.0]
# Prior nerf papers don't typically do multi-sample anti aliasing.
# So snap all pixels to the pixel centers.
testbed.snap_to_pixel_centers = True
spp = 8
testbed.nerf.render_min_transmittance = 1e-4
testbed.shall_train = False
testbed.load_training_data(args.test_transforms)
with tqdm(range(testbed.nerf.training.dataset.n_images), unit="images", desc=f"Rendering test frame") as t:
for i in t:
resolution = testbed.nerf.training.dataset.metadata[i].resolution
testbed.render_ground_truth = True
testbed.set_camera_to_training_view(i)
ref_image = testbed.render(resolution[0], resolution[1], 1, True)
testbed.render_ground_truth = False
image = testbed.render(resolution[0], resolution[1], spp, True)
if i == 0:
write_image(f"ref.png", ref_image)
write_image(f"out.png", image)
diffimg = np.absolute(image - ref_image)
diffimg[...,3:4] = 1.0
write_image("diff.png", diffimg)
A = np.clip(linear_to_srgb(image[...,:3]), 0.0, 1.0)
R = np.clip(linear_to_srgb(ref_image[...,:3]), 0.0, 1.0)
mse = float(compute_error("MSE", A, R))
ssim = float(compute_error("SSIM", A, R))
totssim += ssim
totmse += mse
psnr = mse2psnr(mse)
totpsnr += psnr
minpsnr = psnr if psnr<minpsnr else minpsnr
maxpsnr = psnr if psnr>maxpsnr else maxpsnr
totcount = totcount+1
t.set_postfix(psnr = totpsnr/(totcount or 1))
psnr_avgmse = mse2psnr(totmse/(totcount or 1))
psnr = totpsnr/(totcount or 1)
ssim = totssim/(totcount or 1)
print(f"PSNR={psnr} [min={minpsnr} max={maxpsnr}] SSIM={ssim}")
if args.save_mesh:
res = args.marching_cubes_res or 256
print(f"Generating mesh via marching cubes and saving to {args.save_mesh}. Resolution=[{res},{res},{res}]")
testbed.compute_and_save_marching_cubes_mesh(args.save_mesh, [res, res, res])
if ref_transforms:
testbed.fov_axis = 0
testbed.fov = ref_transforms["camera_angle_x"] * 180 / np.pi
if not args.screenshot_frames:
args.screenshot_frames = range(len(ref_transforms["frames"]))
print(args.screenshot_frames)
for idx in args.screenshot_frames:
f = ref_transforms["frames"][int(idx)]
cam_matrix = f["transform_matrix"]
testbed.set_nerf_camera_matrix(np.matrix(cam_matrix)[:-1,:])
outname = os.path.join(args.screenshot_dir, os.path.basename(f["file_path"]))
# Some NeRF datasets lack the .png suffix in the dataset metadata
if not os.path.splitext(outname)[1]:
outname = outname + ".png"
print(f"rendering {outname}")
image = testbed.render(args.width or int(ref_transforms["w"]), args.height or int(ref_transforms["h"]), args.screenshot_spp, True)
os.makedirs(os.path.dirname(outname), exist_ok=True)
write_image(outname, image)
elif args.screenshot_dir:
outname = os.path.join(args.screenshot_dir, args.scene + "_" + network_stem)
print(f"Rendering {outname}.png")
image = testbed.render(args.width or 1920, args.height or 1080, args.screenshot_spp, True)
if os.path.dirname(outname) != "":
os.makedirs(os.path.dirname(outname), exist_ok=True)
write_image(outname + ".png", image)
if args.video_camera_path:
testbed.load_camera_path(args.video_camera_path)
resolution = [args.width or 1920, args.height or 1080]
n_frames = args.video_n_seconds * args.video_fps
save_frames = "%" in args.video_output
start_frame, end_frame = args.video_render_range
if "tmp" in os.listdir():
shutil.rmtree("tmp")
os.makedirs("tmp")
for i in tqdm(list(range(min(n_frames, n_frames+1))), unit="frames", desc=f"Rendering video"):
testbed.camera_smoothing = args.video_camera_smoothing
if start_frame >= 0 and i < start_frame:
# For camera smoothing and motion blur to work, we cannot just start rendering
# from middle of the sequence. Instead we render a very small image and discard it
# for these initial frames.
# TODO Replace this with a no-op render method once it's available
frame = testbed.render(32, 32, 1, True, float(i)/n_frames, float(i + 1)/n_frames, args.video_fps, shutter_fraction=0.5)
continue
elif end_frame >= 0 and i > end_frame:
continue
frame = testbed.render(resolution[0], resolution[1], args.video_spp, True, float(i)/n_frames, float(i + 1)/n_frames, args.video_fps, shutter_fraction=0.5)
if save_frames:
write_image(args.video_output % i, np.clip(frame * 2**args.exposure, 0.0, 1.0), quality=100)
else:
write_image(f"tmp/{i:04d}.jpg", np.clip(frame * 2**args.exposure, 0.0, 1.0), quality=100)
if not save_frames:
os.system(f"ffmpeg -y -framerate {args.video_fps} -i tmp/%04d.jpg -c:v libx264 -pix_fmt yuv420p {args.video_output}")
shutil.rmtree("tmp")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment