Created
February 21, 2022 15:14
-
-
Save fkluger/6ef0199d2e06000bfc1188dcfa73c1f4 to your computer and use it in GitHub Desktop.
Modified test_simple.py from monodepth2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright Niantic 2019. Patent Pending. All rights reserved. | |
# | |
# This software is licensed under the terms of the Monodepth2 licence | |
# which allows for non-commercial use only, the full terms of which are made | |
# available in the LICENSE file. | |
from __future__ import absolute_import, division, print_function | |
import os | |
import argparse | |
import numpy as np | |
import utils | |
import torch | |
import networks | |
from layers import disp_to_depth | |
from utils import download_model_if_doesnt_exist | |
from evaluate_depth import STEREO_SCALE_FACTOR | |
from datasets.kitti_dataset import KITTIRAWDataset | |
def parse_args(): | |
parser = argparse.ArgumentParser( | |
description='Simple testing funtion for Monodepthv2 models.') | |
parser.add_argument('--image_path', type=str, | |
help='path to a test image or folder of images', required=True) | |
parser.add_argument('--image_file', type=str, | |
help='path to a file with all relative Images to image_path', required=True) | |
parser.add_argument('--out_folder', type=str, | |
help='path to where datas are saved', required=True) | |
parser.add_argument('--model_name', type=str, | |
help='name of a pretrained model to use', | |
choices=[ | |
"mono_640x192", | |
"stereo_640x192", | |
"mono+stereo_640x192", | |
"mono_no_pt_640x192", | |
"stereo_no_pt_640x192", | |
"mono+stereo_no_pt_640x192", | |
"mono_1024x320", | |
"stereo_1024x320", | |
"mono+stereo_1024x320"]) | |
parser.add_argument('--ext', type=str, | |
help='image extension to search for in folder', default="jpg") | |
parser.add_argument("--no_cuda", | |
help='if set, disables CUDA', | |
action='store_true') | |
parser.add_argument("--pred_metric_depth", | |
help='if set, predicts metric depth instead of disparity. (This only ' | |
'makes sense for stereo-trained KITTI models).', | |
action='store_true') | |
parser.add_argument("--pred_depth", | |
help='if set, predicts depth instead of disparity', | |
action='store_true') | |
return parser.parse_args() | |
def test_simple(args): | |
"""Function to predict for a single image or folder of images | |
""" | |
assert args.model_name is not None, \ | |
"You must specify the --model_name parameter; see README.md for an example" | |
if torch.cuda.is_available() and not args.no_cuda: | |
device = torch.device("cuda") | |
else: | |
device = torch.device("cpu") | |
if args.pred_metric_depth and "stereo" not in args.model_name: | |
print("Warning: The --pred_metric_depth flag only makes sense for stereo-trained KITTI " | |
"models. For mono-trained models, output depths will not in metric space.") | |
download_model_if_doesnt_exist(args.model_name) | |
model_path = os.path.join("models", args.model_name) | |
print("-> Loading model from ", model_path) | |
encoder_path = os.path.join(model_path, "encoder.pth") | |
depth_decoder_path = os.path.join(model_path, "depth.pth") | |
# LOADING PRETRAINED MODEL | |
print(" Loading pretrained encoder") | |
encoder = networks.ResnetEncoder(18, False) | |
loaded_dict_enc = torch.load(encoder_path, map_location=device) | |
# extract the height and width of image that this model was trained with | |
feed_height = loaded_dict_enc['height'] | |
feed_width = loaded_dict_enc['width'] | |
filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()} | |
encoder.load_state_dict(filtered_dict_enc) | |
encoder.to(device) | |
encoder.eval() | |
print(" Loading pretrained decoder") | |
depth_decoder = networks.DepthDecoder( | |
num_ch_enc=encoder.num_ch_enc, scales=range(4)) | |
loaded_dict = torch.load(depth_decoder_path, map_location=device) | |
depth_decoder.load_state_dict(loaded_dict) | |
depth_decoder.to(device) | |
depth_decoder.eval() | |
filenames = utils.readlines(args.image_file) | |
dataset = KITTIRAWDataset(data_path=args.image_path, filenames=filenames, frame_idxs=[0], num_scales=1, | |
height=192, width=640, img_ext=".jpg") | |
output_directory = args.out_folder | |
os.makedirs(output_directory, exist_ok=True) | |
assert os.path.exists(output_directory), "output-directory does not exist" | |
with torch.no_grad(): | |
for idx, data in enumerate(dataset): | |
fileName = filenames[idx].split(" ") | |
if not os.path.exists(fileName[0]): | |
os.makedirs(os.path.join(args.out_folder, fileName[0]), exist_ok=True) | |
input_image = data[("color", 0, 0)].unsqueeze(0).cuda() | |
# PREDICTION | |
input_image = input_image.to(device) | |
features = encoder(input_image) | |
outputs = depth_decoder(features) | |
for scale in range(4): | |
disp = outputs[("disp", scale)] | |
output_name = fileName[1] + "_" + fileName[2] + "_%d" % scale | |
scaled_disp, depth = disp_to_depth(disp, 0.1, 100) | |
if args.pred_metric_depth: | |
name_dest_npy = os.path.join(output_directory, fileName[0], "{}_depth.npy".format(output_name)) | |
metric_depth = STEREO_SCALE_FACTOR * depth.cpu().numpy() | |
np.save(name_dest_npy, metric_depth) | |
elif args.pred_depth: | |
name_dest_npy = os.path.join(output_directory, fileName[0], "{}_depth.npy".format(output_name)) | |
metric_depth = depth.cpu().numpy() | |
np.save(name_dest_npy, metric_depth) | |
else: | |
name_dest_npy = os.path.join(output_directory, fileName[0], "{}_disp.npy".format(output_name)) | |
np.save(name_dest_npy, scaled_disp.cpu().numpy()) | |
print(" Processed {:d} of {:d} images - saved predictions to:".format( | |
idx + 1, len(filenames))) | |
print(" - {}".format(name_dest_npy)) | |
print('-> Done!') | |
if __name__ == '__main__': | |
args = parse_args() | |
test_simple(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment