Skip to content

Instantly share code, notes, and snippets.

@wangg12
Last active January 2, 2023 21:34
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wangg12/32250e352671c7f8c9a42ddb437142a6 to your computer and use it in GitHub Desktop.
Save wangg12/32250e352671c7f8c9a42ddb437142a6 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
import math
import numpy as np
from typing import Tuple
import torch
import torch.nn.functional as F
from pytorch3d.transforms import Rotate, Transform3d, Translate
from .utils import TensorProperties, convert_to_tensors_and_broadcast
# Default values for rotation and translation matrices.
r = np.expand_dims(np.eye(3), axis=0) # (1, 3, 3)
t = np.expand_dims(np.zeros(3), axis=0) # (1, 3)
class OpenGLRealPerspectiveCameras(TensorProperties):
"""
A class which stores a batch of parameters to generate a batch of
projection matrices using the OpenGL convention for a perspective camera.
The extrinsics of the camera (R and T matrices) can also be set in the
initializer or passed in to `get_full_projection_transform` to get
the full transformation from world -> screen.
The `transform_points` method calculates the full world -> screen transform
and then applies it to the input points.
The transforms can also be returned separately as Transform3d objects.
"""
def __init__(
self,
focal_length=1.0,
principal_point=((0.0, 0.0),),
R=r,
T=t,
znear=0.01,
zfar=100.0,
x0=0,
y0=0,
w=640,
h=480,
device="cpu",
):
"""
__init__(self, znear, zfar, R, T, device) -> None # noqa
Args:
znear: near clipping plane of the view frustrum.
zfar: far clipping plane of the view frustrum.
R: Rotation matrix of shape (N, 3, 3)
T: Translation matrix of shape (N, 3)
device: torch.device or string
"""
# The initializer formats all inputs to torch tensors and broadcasts
# all the inputs to have the same batch dimension where necessary.
super().__init__(
device=device,
focal_length=focal_length,
principal_point=principal_point,
R=R,
T=T,
znear=znear,
zfar=zfar,
x0=x0,
y0=y0,
h=h,
w=w,
)
def get_projection_transform(self, **kwargs) -> Transform3d:
"""
Calculate the OpenGL perpective projection matrix with a symmetric
viewing frustrum. Use column major order.
Args:
**kwargs: parameters for the projection can be passed in as keyword
arguments to override the default values set in `__init__`.
Return:
P: a Transform3d object which represents a batch of projection
matrices of shape (N, 3, 3)
.. code-block:: python
q = -(far + near)/(far - near)
qn = -2*far*near/(far-near)
P.T = [
[2*fx/w, 0, 0, 0],
[0, -2*fy/h, 0, 0],
[(2*px-w)/w, (-2*py+h)/h, -q, 1],
[0, 0, qn, 0],
]
sometimes P[2,:] *= -1, P[1, :] *= -1
"""
znear = kwargs.get("znear", self.znear) # pyre-ignore[16]
zfar = kwargs.get("zfar", self.zfar) # pyre-ignore[16]
x0 = kwargs.get("x0", self.x0) # pyre-ignore[16]
y0 = kwargs.get("y0", self.y0) # pyre-ignore[16]
w = kwargs.get("w", self.w) # pyre-ignore[16]
h = kwargs.get("h", self.h) # pyre-ignore[16]
principal_point = kwargs.get(
"principal_point", self.principal_point
) # pyre-ignore[16]
focal_length = kwargs.get(
"focal_length", self.focal_length
) # pyre-ignore[16]
if not torch.is_tensor(focal_length):
focal_length = torch.tensor(focal_length, device=self.device)
if len(focal_length.shape) in (0, 1) or focal_length.shape[1] == 1:
fx = fy = focal_length
else:
fx, fy = focal_length.unbind(1)
if not torch.is_tensor(principal_point):
principal_point = torch.tensor(principal_point, device=self.device)
px, py = principal_point.unbind(1)
P = torch.zeros(
(self._N, 4, 4), device=self.device, dtype=torch.float32
)
ones = torch.ones((self._N), dtype=torch.float32, device=self.device)
# NOTE: In OpenGL the projection matrix changes the handedness of the
# coordinate frame. i.e the NDC space postive z direction is the
# camera space negative z direction. This is because the sign of the z
# in the projection matrix is set to -1.0.
# In pytorch3d we maintain a right handed coordinate system throughout
# so the so the z sign is 1.0.
z_sign = 1.0
# define P.T directly
P[:, 0, 0] = 2.0 * fx / w
P[:, 1, 1] = -2.0 * fy / h
P[:, 2, 0] = -(-2 * px + w + 2 * x0) / w
P[:, 2, 1] = -(+2 * py - h + 2 * y0) / h
P[:, 2, 3] = z_sign * ones
# NOTE: This part of the matrix is for z renormalization in OpenGL
# which maps the z to [-1, 1]. This won't work yet as the torch3d
# rasterizer ignores faces which have z < 0.
# P[:, 2, 2] = z_sign * (far + near) / (far - near)
# P[:, 2, 3] = -2.0 * far * near / (far - near)
# P[:, 2, 3] = z_sign * torch.ones((N))
# NOTE: This maps the z coordinate from [0, 1] where z = 0 if the point
# is at the near clipping plane and z = 1 when the point is at the far
# clipping plane. This replaces the OpenGL z normalization to [-1, 1]
# until rasterization is changed to clip at z = -1.
P[:, 2, 2] = z_sign * zfar / (zfar - znear)
P[:, 3, 2] = -(zfar * znear) / (zfar - znear)
# OpenGL uses column vectors so need to transpose the projection matrix
# as torch3d uses row vectors.
transform = Transform3d(device=self.device)
transform._matrix = P
return transform
def clone(self):
other = OpenGLRealPerspectiveCameras(device=self.device)
return super().clone(other)
def get_camera_center(self, **kwargs):
"""
Return the 3D location of the camera optical center
in the world coordinates.
Args:
**kwargs: parameters for the camera extrinsics can be passed in
as keyword arguments to override the default values
set in __init__.
Setting T here will update the values set in init as this
value may be needed later on in the rendering pipeline e.g. for
lighting calculations.
Returns:
C: a batch of 3D locations of shape (N, 3) denoting
the locations of the center of each camera in the batch.
"""
w2v_trans = self.get_world_to_view_transform(**kwargs)
P = w2v_trans.inverse().get_matrix()
# the camera center is the translation component (the first 3 elements
# of the last row) of the inverted world-to-view
# transform (4x4 RT matrix)
C = P[:, 3, :3]
return C
def get_world_to_view_transform(self, **kwargs) -> Transform3d:
"""
Return the world-to-view transform.
Args:
**kwargs: parameters for the camera extrinsics can be passed in
as keyword arguments to override the default values
set in __init__.
Setting R and T here will update the values set in init as these
values may be needed later on in the rendering pipeline e.g. for
lighting calculations.
Returns:
T: a Transform3d object which represents a batch of transforms
of shape (N, 3, 3)
"""
R = self.R = kwargs.get("R", self.R) # pyre-ignore[16]
T = self.T = kwargs.get("T", self.T) # pyre-ignore[16]
if T.shape[0] != R.shape[0]:
msg = "Expected R, T to have the same batch dimension; got %r, %r"
raise ValueError(msg % (R.shape[0], T.shape[0]))
if T.dim() != 2 or T.shape[1:] != (3,):
msg = "Expected T to have shape (N, 3); got %r"
raise ValueError(msg % repr(T.shape))
if R.dim() != 3 or R.shape[1:] != (3, 3):
msg = "Expected R to have shape (N, 3, 3); got %r"
raise ValueError(msg % R.shape)
# Create a Transform3d object
T = Translate(T, device=T.device)
R = Rotate(R, device=R.device)
world_to_view_transform = R.compose(T)
return world_to_view_transform
def get_full_projection_transform(self, **kwargs) -> Transform3d:
"""
Return the full world-to-screen transform composing the
world-to-view and view-to-screen transforms.
Args:
**kwargs: parameters for the projection transforms can be passed in
as keyword arguments to override the default values
set in __init__.
Setting R and T here will update the values set in init as these
values may be needed later on in the rendering pipeline e.g. for
lighting calculations.
Returns:
T: a Transform3d object which represents a batch of transforms
of shape (N, 3, 3)
"""
self.R = kwargs.get("R", self.R) # pyre-ignore[16]
self.T = kwargs.get("T", self.T) # pyre-ignore[16]
world_to_view_transform = self.get_world_to_view_transform(
R=self.R, T=self.T
)
view_to_screen_transform = self.get_projection_transform(**kwargs)
return world_to_view_transform.compose(view_to_screen_transform)
def transform_points(self, points, **kwargs) -> torch.Tensor:
"""
Transform input points from world to screen space.
Args:
points: torch tensor of shape (..., 3).
Returns
new_points: transformed points with the same shape as the input.
"""
world_to_screen_transform = self.get_full_projection_transform(**kwargs)
return world_to_screen_transform.transform_points(points)
# test pytorch 3d renderer
# TODO: make this work
# render multi objects in batch, one in one image
import errno
import os
import os.path as osp
import sys
import time
import struct
import matplotlib.pyplot as plt
import numpy as np
import torch
from tqdm import tqdm
from transforms3d.axangles import axangle2mat
from transforms3d.euler import euler2quat, mat2euler, quat2euler
from transforms3d.quaternions import axangle2quat, mat2quat, qinverse, qmult
# io utils
# from pytorch3d.io import load_obj, load_ply
# rendering components
from pytorch3d.renderer import (BlendParams, MeshRasterizer, MeshRenderer,
OpenGLPerspectiveCameras, PhongShader,
PointLights, RasterizationSettings,
SilhouetteShader, look_at_rotation,
look_at_view_transform)
# from pytorch3d.renderer.cameras import SfMPerspectiveCameras
from pytorch3d.renderer.cameras_real import OpenGLRealPerspectiveCameras
# datastructures
from pytorch3d.structures import Meshes, Textures, list_to_padded
# 3D transformations functions
from pytorch3d.transforms import Rotate, Translate
cur_dir = osp.dirname(osp.abspath(__file__))
sys.path.append(osp.join(cur_dir, '../'))
data_dir = osp.join(cur_dir, '../datasets/')
output_directory = osp.join(cur_dir, '../output/results')
output_directory_ren = osp.join(output_directory, 'p3d')
os.makedirs(output_directory_ren, exist_ok=True)
ply_model_root = osp.join(data_dir, "BOP_DATASETS/lm/models")
HEIGHT = 480
WIDTH = 640
IMG_SIZE = 640
ZNEAR = 0.01
ZFAR = 10.0
K = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
objects = ["ape", "benchvise", "bowl", "camera", "can", "cat",
"cup", "driller", "duck", "eggbox", "glue", "holepuncher", "iron", "lamp", "phone"]
id2obj = {
1: "ape",
2: "benchvise",
3: "bowl",
4: "camera",
5: "can",
6: "cat",
7: "cup",
8: "driller",
9: "duck",
10: "eggbox",
11: "glue",
12: "holepuncher",
13: "iron",
14: "lamp",
15: "phone",
}
obj_num = len(id2obj)
obj2id = {_name: _id for _id, _name in id2obj.items()}
def load_ply(path, vertex_scale=1.0):
# https://github.com/thodan/sixd_toolkit/blob/master/pysixd/inout.py
# bop_toolkit
"""Loads a 3D mesh model from a PLY file.
:param path: Path to a PLY file.
:return: The loaded model given by a dictionary with items:
-' pts' (nx3 ndarray),
- 'normals' (nx3 ndarray), optional
- 'colors' (nx3 ndarray), optional
- 'faces' (mx3 ndarray), optional.
- 'texture_uv' (nx2 ndarray), optional
- 'texture_uv_face' (mx6 ndarray), optional
- 'texture_file' (string), optional
"""
f = open(path, "r")
# Only triangular faces are supported.
face_n_corners = 3
n_pts = 0
n_faces = 0
pt_props = []
face_props = []
is_binary = False
header_vertex_section = False
header_face_section = False
texture_file = None
# Read the header.
while True:
# Strip the newline character(s)
line = f.readline()
if isinstance(line, str):
line = line.rstrip("\n").rstrip("\r")
else:
line = str(line, 'utf-8').rstrip("\n").rstrip("\r")
if line.startswith('comment TextureFile'):
texture_file = line.split()[-1]
elif line.startswith("element vertex"):
n_pts = int(line.split()[-1])
header_vertex_section = True
header_face_section = False
elif line.startswith("element face"):
n_faces = int(line.split()[-1])
header_vertex_section = False
header_face_section = True
elif line.startswith("element"): # Some other element.
header_vertex_section = False
header_face_section = False
elif line.startswith("property") and header_vertex_section:
# (name of the property, data type)
prop_name = line.split()[-1]
if prop_name == "s":
prop_name = "texture_u"
if prop_name == "t":
prop_name = "texture_v"
prop_type = line.split()[-2]
pt_props.append((prop_name, prop_type))
elif line.startswith("property list") and header_face_section:
elems = line.split()
if elems[-1] == "vertex_indices" or elems[-1] == 'vertex_index':
# (name of the property, data type)
face_props.append(("n_corners", elems[2]))
for i in range(face_n_corners):
face_props.append(("ind_" + str(i), elems[3]))
elif elems[-1] == 'texcoord':
# (name of the property, data type)
face_props.append(('texcoord', elems[2]))
for i in range(face_n_corners * 2):
face_props.append(('texcoord_ind_' + str(i), elems[3]))
else:
print("Warning: Not supported face property: " + elems[-1])
elif line.startswith("format"):
if "binary" in line:
is_binary = True
elif line.startswith("end_header"):
break
# Prepare data structures.
model = {}
if texture_file is not None:
model['texture_file'] = texture_file
model["pts"] = np.zeros((n_pts, 3), np.float)
if n_faces > 0:
model["faces"] = np.zeros((n_faces, face_n_corners), np.float)
# print(pt_props)
pt_props_names = [p[0] for p in pt_props]
face_props_names = [p[0] for p in face_props]
# print(pt_props_names)
is_normal = False
if {"nx", "ny", "nz"}.issubset(set(pt_props_names)):
is_normal = True
model["normals"] = np.zeros((n_pts, 3), np.float)
is_color = False
if {"red", "green", "blue"}.issubset(set(pt_props_names)):
is_color = True
model["colors"] = np.zeros((n_pts, 3), np.float)
is_texture_pt = False
if {"texture_u", "texture_v"}.issubset(set(pt_props_names)):
is_texture_pt = True
model["texture_uv"] = np.zeros((n_pts, 2), np.float)
is_texture_face = False
if {'texcoord'}.issubset(set(face_props_names)):
is_texture_face = True
model['texture_uv_face'] = np.zeros((n_faces, 6), np.float)
# Formats for the binary case.
formats = {
"float": ("f", 4),
"double": ("d", 8),
"int": ("i", 4),
"uchar": ("B", 1),
}
# Load vertices.
for pt_id in range(n_pts):
prop_vals = {}
load_props = ["x", "y", "z", "nx", "ny", "nz",
"red", "green", "blue", "texture_u", "texture_v"]
if is_binary:
for prop in pt_props:
format = formats[prop[1]]
read_data = f.read(format[1])
val = struct.unpack(format[0], read_data)[0]
if prop[0] in load_props:
prop_vals[prop[0]] = val
else:
elems = f.readline().rstrip("\n").rstrip("\r").split()
for prop_id, prop in enumerate(pt_props):
if prop[0] in load_props:
prop_vals[prop[0]] = elems[prop_id]
model["pts"][pt_id, 0] = float(prop_vals["x"])
model["pts"][pt_id, 1] = float(prop_vals["y"])
model["pts"][pt_id, 2] = float(prop_vals["z"])
if is_normal:
model["normals"][pt_id, 0] = float(prop_vals["nx"])
model["normals"][pt_id, 1] = float(prop_vals["ny"])
model["normals"][pt_id, 2] = float(prop_vals["nz"])
if is_color:
model["colors"][pt_id, 0] = float(prop_vals["red"])
model["colors"][pt_id, 1] = float(prop_vals["green"])
model["colors"][pt_id, 2] = float(prop_vals["blue"])
if is_texture_pt:
model["texture_uv"][pt_id, 0] = float(prop_vals["texture_u"])
model["texture_uv"][pt_id, 1] = float(prop_vals["texture_v"])
# Load faces.
for face_id in range(n_faces):
prop_vals = {}
if is_binary:
for prop in face_props:
format = formats[prop[1]]
val = struct.unpack(format[0], f.read(format[1]))[0]
if prop[0] == "n_corners":
if val != face_n_corners:
raise ValueError("Only triangular faces are supported.")
# print("Number of face corners: " + str(val))
# exit(-1)
elif prop[0] == 'texcoord':
if val != face_n_corners * 2:
raise ValueError('Wrong number of UV face coordinates.')
else:
prop_vals[prop[0]] = val
else:
elems = f.readline().rstrip("\n").rstrip("\r").split()
for prop_id, prop in enumerate(face_props):
if prop[0] == "n_corners":
if int(elems[prop_id]) != face_n_corners:
raise ValueError("Only triangular faces are supported.")
elif prop[0] == 'texcoord':
if int(elems[prop_id]) != face_n_corners * 2:
raise ValueError('Wrong number of UV face coordinates.')
else:
prop_vals[prop[0]] = elems[prop_id]
model["faces"][face_id, 0] = int(prop_vals["ind_0"])
model["faces"][face_id, 1] = int(prop_vals["ind_1"])
model["faces"][face_id, 2] = int(prop_vals["ind_2"])
if is_texture_face:
for i in range(6):
model['texture_uv_face'][face_id, i] = float(
prop_vals['texcoord_ind_{}'.format(i)])
f.close()
model['pts'] *= vertex_scale
return model
def grid_show(ims, titles=None, row=1, col=3, dpi=200, save_path=None, title_fontsize=5, show=True):
if row * col < len(ims):
print('_____________row*col < len(ims)___________')
col = int(np.ceil(len(ims) / row))
fig = plt.figure(dpi=dpi, figsize=plt.figaspect(row / float(col)))
k = 0
for i in range(row):
for j in range(col):
plt.subplot(row, col, k + 1)
plt.axis('off')
plt.imshow(ims[k])
if titles is not None:
# plt.title(titles[k], size=title_fontsize)
plt.text(0.5, 1.08, titles[k],
horizontalalignment='center',
fontsize=title_fontsize,
transform=plt.gca().transAxes)
k += 1
if k == len(ims):
break
# plt.tight_layout()
if show:
plt.show()
else:
if save_path is not None:
mkdir_p(osp.dirname(save_path))
plt.savefig(save_path)
return fig
def mkdir_p(dirname):
"""Like "mkdir -p", make a dir recursively, but do nothing if the dir
exists.
Args:
dirname(str):
"""
assert dirname is not None
if dirname == "" or os.path.isdir(dirname):
return
try:
os.makedirs(dirname)
except OSError as e:
if e.errno != errno.EEXIST:
raise e
def print_stat(data, name=""):
print(name, "min", data.min(), "max", data.max(),
"mean", data.mean(), "std", data.std(),
"sum", data.sum(), "shape", data.shape)
###################################################################################################
def load_ply_models(model_paths, device='cuda', dtype=torch.float32, vertex_scale=0.001):
ply_models = [load_ply(ply_path, vertex_scale=vertex_scale) for ply_path in model_paths]
verts = [torch.tensor(m['pts'], device=device, dtype=dtype) for m in ply_models]
faces = [torch.tensor(m['faces'], device=device, dtype=torch.int64) for m in ply_models]
for m in ply_models:
if m['colors'].max() > 1.1:
m['colors'] /= 255.0
verts_rgb_list = [torch.tensor(m['colors'], device=device, dtype=dtype) for m in ply_models] # [V,3]
res_models = []
for i in range(len(ply_models)):
model = {}
model['verts'] = verts[i]
model['faces'] = faces[i]
model['verts_rgb'] = verts_rgb_list[i]
res_models.append(model)
return res_models
def main():
# Set the cuda device
device = torch.device("cuda:0")
torch.cuda.set_device(device)
###########################
# load objects
###########################
objs = objects
np.array([[-5.87785252e-01, 8.09016994e-01, 0.00000000e+00], [-4.95380036e-17, -3.59914664e-17, -1.00000000e+00], [-8.09016994e-01, -5.87785252e-01, 6.12323400e-17]])
# obj_paths = [osp.join(model_root, '{}/textured.obj'.format(cls_name)) for cls_name in objs]
# texture_paths = [osp.join(model_root, '{}/texture_map.png'.format(cls_name)) for cls_name in objs]
ply_paths = [osp.join(ply_model_root, "obj_{:06d}.ply".format(obj2id[cls_name]))
for cls_name in objs]
models = load_ply_models(ply_paths, vertex_scale=0.001)
cameras = OpenGLRealPerspectiveCameras(
focal_length=((K[0,0], K[1,1]),), # Nx2
principal_point=((K[0,2], K[1,2]),), # Nx2
x0=0,
y0=0,
w=WIDTH,
h=WIDTH, #HEIGHT,
znear=ZNEAR,
zfar=ZFAR,
device=device,
)
# To blend the 100 faces we set a few parameters which control the opacity and the sharpness of
# edges. Refer to blending.py for more details.
blend_params = BlendParams(sigma=1e-4, gamma=1e-4)
# Define the settings for rasterization and shading. Here we set the output image to be of size
# 640x640. To form the blended image we use 100 faces for each pixel. Refer to rasterize_meshes.py
# for an explanation of this parameter.
silhouette_raster_settings = RasterizationSettings(
image_size=IMG_SIZE, # longer side or scaled longer side
blur_radius=np.log(1. / 1e-4 - 1.) * blend_params.sigma,
faces_per_pixel=100, # the nearest faces_per_pixel points along the z-axis.
bin_size=0
)
# Create a silhouette mesh renderer by composing a rasterizer and a shader.
silhouette_renderer = MeshRenderer(
rasterizer=MeshRasterizer(
cameras=cameras,
raster_settings=silhouette_raster_settings
),
shader=SilhouetteShader(blend_params=blend_params)
)
# We will also create a phong renderer. This is simpler and only needs to render one face per pixel.
phong_raster_settings = RasterizationSettings(
image_size=IMG_SIZE,
blur_radius=0.0,
faces_per_pixel=1,
bin_size=0
)
# We can add a point light in front of the object.
lights = PointLights(device=device, location=((2.0, 2.0, -2.0),))
phong_renderer = MeshRenderer(
rasterizer=MeshRasterizer(
cameras=cameras,
raster_settings=phong_raster_settings
),
shader=PhongShader(device=device, lights=lights)
)
# pose =============================================
R1 = axangle2mat((1, 0, 0), angle=0.5 * np.pi)
R2 = axangle2mat((0, 0, 1), angle=-0.7 * np.pi)
R = np.dot(R1, R2)
print("R det", torch.det(torch.tensor(R)))
quat = mat2quat(R)
t = np.array([-0.1, 0.1, 0.7], dtype=np.float32)
t2 = np.array([0.1, 0.1, 0.7], dtype=np.float32)
t3 = np.array([-0.1, -0.1, 0.7], dtype=np.float32)
t4 = np.array([0.1, -0.1, 0.7], dtype=np.float32)
t5 = np.array([0, 0.1, 0.7], dtype=np.float32)
batch_size = 3
Rs = [R, R.copy(), R.copy(), R.copy(), R.copy()][:batch_size]
print("R", R)
quats = [quat, quat.copy(), quat.copy(), quat.copy(), quat.copy()][:batch_size]
ts = [t, t2, t3, t4, t5][:batch_size]
runs = 100
t_render = 0
for i in tqdm(range(runs)):
t_render_start = time.perf_counter()
obj_ids = np.random.randint(0, len(objs), size=len(quats))
# Render the objs providing the values of R and T.
batch_verts_rgb = list_to_padded([models[obj_id]['verts_rgb'] for obj_id in obj_ids]) # B, Vmax, 3
batch_textures = Textures(verts_rgb=batch_verts_rgb.to(device))
batch_mesh = Meshes(
verts=[models[obj_id]['verts'] for obj_id in obj_ids],
faces=[models[obj_id]['faces'] for obj_id in obj_ids],
textures=batch_textures,
)
batch_R = torch.tensor(np.stack(Rs), device=device, dtype=torch.float32).permute(0,2,1) # Bx3x3
batch_T = torch.tensor(np.stack(ts), device=device, dtype=torch.float32) # Bx3
silhouete = silhouette_renderer(meshes_world=batch_mesh, R=batch_R, T=batch_T)
image_ref = phong_renderer(meshes_world=batch_mesh, R=batch_R, T=batch_T)
# crop results
silhouete = silhouete[:, :HEIGHT, :WIDTH, :].cpu().numpy()
image_ref = image_ref[:, :HEIGHT, :WIDTH, :3].cpu().numpy()
t_render += time.perf_counter() - t_render_start
if True:
pred_images = image_ref
for i in range(pred_images.shape[0]):
pred_mask = silhouete[i, :, :, 3].astype('float32')
print("num rendered images", pred_images.shape[0])
image = pred_images[i]
print('image', image.shape)
print('dr mask area: ', pred_mask.sum())
print_stat(pred_mask, "pred_mask")
show_ims = [image, pred_mask]
show_titles = ['image', 'mask']
grid_show(show_ims, show_titles, row=1, col=2)
print("runs: {}, {:.3f}fps, {:.3f}ms/im".format(runs, runs / t_render, t_render / runs * 1000))
if __name__ == '__main__':
main()
@wangg12
Copy link
Author

wangg12 commented Apr 20, 2020

Does your R contain also translation? Is it 3x3 or 4x4?

@EphChem
Copy link

EphChem commented Apr 20, 2020

My R is 3x3 so no translation

@ForrestPi
Copy link

Thanks your codes, I have anothor problem, I have to train a batch of cropped images, So the FOV and optical center are different,So how to set perspective camera?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment