Last active
January 2, 2023 21:34
-
-
Save wangg12/32250e352671c7f8c9a42ddb437142a6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. | |
import math | |
import numpy as np | |
from typing import Tuple | |
import torch | |
import torch.nn.functional as F | |
from pytorch3d.transforms import Rotate, Transform3d, Translate | |
from .utils import TensorProperties, convert_to_tensors_and_broadcast | |
# Default values for rotation and translation matrices. | |
r = np.expand_dims(np.eye(3), axis=0) # (1, 3, 3) | |
t = np.expand_dims(np.zeros(3), axis=0) # (1, 3) | |
class OpenGLRealPerspectiveCameras(TensorProperties): | |
""" | |
A class which stores a batch of parameters to generate a batch of | |
projection matrices using the OpenGL convention for a perspective camera. | |
The extrinsics of the camera (R and T matrices) can also be set in the | |
initializer or passed in to `get_full_projection_transform` to get | |
the full transformation from world -> screen. | |
The `transform_points` method calculates the full world -> screen transform | |
and then applies it to the input points. | |
The transforms can also be returned separately as Transform3d objects. | |
""" | |
def __init__( | |
self, | |
focal_length=1.0, | |
principal_point=((0.0, 0.0),), | |
R=r, | |
T=t, | |
znear=0.01, | |
zfar=100.0, | |
x0=0, | |
y0=0, | |
w=640, | |
h=480, | |
device="cpu", | |
): | |
""" | |
__init__(self, znear, zfar, R, T, device) -> None # noqa | |
Args: | |
znear: near clipping plane of the view frustrum. | |
zfar: far clipping plane of the view frustrum. | |
R: Rotation matrix of shape (N, 3, 3) | |
T: Translation matrix of shape (N, 3) | |
device: torch.device or string | |
""" | |
# The initializer formats all inputs to torch tensors and broadcasts | |
# all the inputs to have the same batch dimension where necessary. | |
super().__init__( | |
device=device, | |
focal_length=focal_length, | |
principal_point=principal_point, | |
R=R, | |
T=T, | |
znear=znear, | |
zfar=zfar, | |
x0=x0, | |
y0=y0, | |
h=h, | |
w=w, | |
) | |
def get_projection_transform(self, **kwargs) -> Transform3d: | |
""" | |
Calculate the OpenGL perpective projection matrix with a symmetric | |
viewing frustrum. Use column major order. | |
Args: | |
**kwargs: parameters for the projection can be passed in as keyword | |
arguments to override the default values set in `__init__`. | |
Return: | |
P: a Transform3d object which represents a batch of projection | |
matrices of shape (N, 3, 3) | |
.. code-block:: python | |
q = -(far + near)/(far - near) | |
qn = -2*far*near/(far-near) | |
P.T = [ | |
[2*fx/w, 0, 0, 0], | |
[0, -2*fy/h, 0, 0], | |
[(2*px-w)/w, (-2*py+h)/h, -q, 1], | |
[0, 0, qn, 0], | |
] | |
sometimes P[2,:] *= -1, P[1, :] *= -1 | |
""" | |
znear = kwargs.get("znear", self.znear) # pyre-ignore[16] | |
zfar = kwargs.get("zfar", self.zfar) # pyre-ignore[16] | |
x0 = kwargs.get("x0", self.x0) # pyre-ignore[16] | |
y0 = kwargs.get("y0", self.y0) # pyre-ignore[16] | |
w = kwargs.get("w", self.w) # pyre-ignore[16] | |
h = kwargs.get("h", self.h) # pyre-ignore[16] | |
principal_point = kwargs.get( | |
"principal_point", self.principal_point | |
) # pyre-ignore[16] | |
focal_length = kwargs.get( | |
"focal_length", self.focal_length | |
) # pyre-ignore[16] | |
if not torch.is_tensor(focal_length): | |
focal_length = torch.tensor(focal_length, device=self.device) | |
if len(focal_length.shape) in (0, 1) or focal_length.shape[1] == 1: | |
fx = fy = focal_length | |
else: | |
fx, fy = focal_length.unbind(1) | |
if not torch.is_tensor(principal_point): | |
principal_point = torch.tensor(principal_point, device=self.device) | |
px, py = principal_point.unbind(1) | |
P = torch.zeros( | |
(self._N, 4, 4), device=self.device, dtype=torch.float32 | |
) | |
ones = torch.ones((self._N), dtype=torch.float32, device=self.device) | |
# NOTE: In OpenGL the projection matrix changes the handedness of the | |
# coordinate frame. i.e the NDC space postive z direction is the | |
# camera space negative z direction. This is because the sign of the z | |
# in the projection matrix is set to -1.0. | |
# In pytorch3d we maintain a right handed coordinate system throughout | |
# so the so the z sign is 1.0. | |
z_sign = 1.0 | |
# define P.T directly | |
P[:, 0, 0] = 2.0 * fx / w | |
P[:, 1, 1] = -2.0 * fy / h | |
P[:, 2, 0] = -(-2 * px + w + 2 * x0) / w | |
P[:, 2, 1] = -(+2 * py - h + 2 * y0) / h | |
P[:, 2, 3] = z_sign * ones | |
# NOTE: This part of the matrix is for z renormalization in OpenGL | |
# which maps the z to [-1, 1]. This won't work yet as the torch3d | |
# rasterizer ignores faces which have z < 0. | |
# P[:, 2, 2] = z_sign * (far + near) / (far - near) | |
# P[:, 2, 3] = -2.0 * far * near / (far - near) | |
# P[:, 2, 3] = z_sign * torch.ones((N)) | |
# NOTE: This maps the z coordinate from [0, 1] where z = 0 if the point | |
# is at the near clipping plane and z = 1 when the point is at the far | |
# clipping plane. This replaces the OpenGL z normalization to [-1, 1] | |
# until rasterization is changed to clip at z = -1. | |
P[:, 2, 2] = z_sign * zfar / (zfar - znear) | |
P[:, 3, 2] = -(zfar * znear) / (zfar - znear) | |
# OpenGL uses column vectors so need to transpose the projection matrix | |
# as torch3d uses row vectors. | |
transform = Transform3d(device=self.device) | |
transform._matrix = P | |
return transform | |
def clone(self): | |
other = OpenGLRealPerspectiveCameras(device=self.device) | |
return super().clone(other) | |
def get_camera_center(self, **kwargs): | |
""" | |
Return the 3D location of the camera optical center | |
in the world coordinates. | |
Args: | |
**kwargs: parameters for the camera extrinsics can be passed in | |
as keyword arguments to override the default values | |
set in __init__. | |
Setting T here will update the values set in init as this | |
value may be needed later on in the rendering pipeline e.g. for | |
lighting calculations. | |
Returns: | |
C: a batch of 3D locations of shape (N, 3) denoting | |
the locations of the center of each camera in the batch. | |
""" | |
w2v_trans = self.get_world_to_view_transform(**kwargs) | |
P = w2v_trans.inverse().get_matrix() | |
# the camera center is the translation component (the first 3 elements | |
# of the last row) of the inverted world-to-view | |
# transform (4x4 RT matrix) | |
C = P[:, 3, :3] | |
return C | |
def get_world_to_view_transform(self, **kwargs) -> Transform3d: | |
""" | |
Return the world-to-view transform. | |
Args: | |
**kwargs: parameters for the camera extrinsics can be passed in | |
as keyword arguments to override the default values | |
set in __init__. | |
Setting R and T here will update the values set in init as these | |
values may be needed later on in the rendering pipeline e.g. for | |
lighting calculations. | |
Returns: | |
T: a Transform3d object which represents a batch of transforms | |
of shape (N, 3, 3) | |
""" | |
R = self.R = kwargs.get("R", self.R) # pyre-ignore[16] | |
T = self.T = kwargs.get("T", self.T) # pyre-ignore[16] | |
if T.shape[0] != R.shape[0]: | |
msg = "Expected R, T to have the same batch dimension; got %r, %r" | |
raise ValueError(msg % (R.shape[0], T.shape[0])) | |
if T.dim() != 2 or T.shape[1:] != (3,): | |
msg = "Expected T to have shape (N, 3); got %r" | |
raise ValueError(msg % repr(T.shape)) | |
if R.dim() != 3 or R.shape[1:] != (3, 3): | |
msg = "Expected R to have shape (N, 3, 3); got %r" | |
raise ValueError(msg % R.shape) | |
# Create a Transform3d object | |
T = Translate(T, device=T.device) | |
R = Rotate(R, device=R.device) | |
world_to_view_transform = R.compose(T) | |
return world_to_view_transform | |
def get_full_projection_transform(self, **kwargs) -> Transform3d: | |
""" | |
Return the full world-to-screen transform composing the | |
world-to-view and view-to-screen transforms. | |
Args: | |
**kwargs: parameters for the projection transforms can be passed in | |
as keyword arguments to override the default values | |
set in __init__. | |
Setting R and T here will update the values set in init as these | |
values may be needed later on in the rendering pipeline e.g. for | |
lighting calculations. | |
Returns: | |
T: a Transform3d object which represents a batch of transforms | |
of shape (N, 3, 3) | |
""" | |
self.R = kwargs.get("R", self.R) # pyre-ignore[16] | |
self.T = kwargs.get("T", self.T) # pyre-ignore[16] | |
world_to_view_transform = self.get_world_to_view_transform( | |
R=self.R, T=self.T | |
) | |
view_to_screen_transform = self.get_projection_transform(**kwargs) | |
return world_to_view_transform.compose(view_to_screen_transform) | |
def transform_points(self, points, **kwargs) -> torch.Tensor: | |
""" | |
Transform input points from world to screen space. | |
Args: | |
points: torch tensor of shape (..., 3). | |
Returns | |
new_points: transformed points with the same shape as the input. | |
""" | |
world_to_screen_transform = self.get_full_projection_transform(**kwargs) | |
return world_to_screen_transform.transform_points(points) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# test pytorch 3d renderer | |
# TODO: make this work | |
# render multi objects in batch, one in one image | |
import errno | |
import os | |
import os.path as osp | |
import sys | |
import time | |
import struct | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import torch | |
from tqdm import tqdm | |
from transforms3d.axangles import axangle2mat | |
from transforms3d.euler import euler2quat, mat2euler, quat2euler | |
from transforms3d.quaternions import axangle2quat, mat2quat, qinverse, qmult | |
# io utils | |
# from pytorch3d.io import load_obj, load_ply | |
# rendering components | |
from pytorch3d.renderer import (BlendParams, MeshRasterizer, MeshRenderer, | |
OpenGLPerspectiveCameras, PhongShader, | |
PointLights, RasterizationSettings, | |
SilhouetteShader, look_at_rotation, | |
look_at_view_transform) | |
# from pytorch3d.renderer.cameras import SfMPerspectiveCameras | |
from pytorch3d.renderer.cameras_real import OpenGLRealPerspectiveCameras | |
# datastructures | |
from pytorch3d.structures import Meshes, Textures, list_to_padded | |
# 3D transformations functions | |
from pytorch3d.transforms import Rotate, Translate | |
cur_dir = osp.dirname(osp.abspath(__file__)) | |
sys.path.append(osp.join(cur_dir, '../')) | |
data_dir = osp.join(cur_dir, '../datasets/') | |
output_directory = osp.join(cur_dir, '../output/results') | |
output_directory_ren = osp.join(output_directory, 'p3d') | |
os.makedirs(output_directory_ren, exist_ok=True) | |
ply_model_root = osp.join(data_dir, "BOP_DATASETS/lm/models") | |
HEIGHT = 480 | |
WIDTH = 640 | |
IMG_SIZE = 640 | |
ZNEAR = 0.01 | |
ZFAR = 10.0 | |
K = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) | |
objects = ["ape", "benchvise", "bowl", "camera", "can", "cat", | |
"cup", "driller", "duck", "eggbox", "glue", "holepuncher", "iron", "lamp", "phone"] | |
id2obj = { | |
1: "ape", | |
2: "benchvise", | |
3: "bowl", | |
4: "camera", | |
5: "can", | |
6: "cat", | |
7: "cup", | |
8: "driller", | |
9: "duck", | |
10: "eggbox", | |
11: "glue", | |
12: "holepuncher", | |
13: "iron", | |
14: "lamp", | |
15: "phone", | |
} | |
obj_num = len(id2obj) | |
obj2id = {_name: _id for _id, _name in id2obj.items()} | |
def load_ply(path, vertex_scale=1.0): | |
# https://github.com/thodan/sixd_toolkit/blob/master/pysixd/inout.py | |
# bop_toolkit | |
"""Loads a 3D mesh model from a PLY file. | |
:param path: Path to a PLY file. | |
:return: The loaded model given by a dictionary with items: | |
-' pts' (nx3 ndarray), | |
- 'normals' (nx3 ndarray), optional | |
- 'colors' (nx3 ndarray), optional | |
- 'faces' (mx3 ndarray), optional. | |
- 'texture_uv' (nx2 ndarray), optional | |
- 'texture_uv_face' (mx6 ndarray), optional | |
- 'texture_file' (string), optional | |
""" | |
f = open(path, "r") | |
# Only triangular faces are supported. | |
face_n_corners = 3 | |
n_pts = 0 | |
n_faces = 0 | |
pt_props = [] | |
face_props = [] | |
is_binary = False | |
header_vertex_section = False | |
header_face_section = False | |
texture_file = None | |
# Read the header. | |
while True: | |
# Strip the newline character(s) | |
line = f.readline() | |
if isinstance(line, str): | |
line = line.rstrip("\n").rstrip("\r") | |
else: | |
line = str(line, 'utf-8').rstrip("\n").rstrip("\r") | |
if line.startswith('comment TextureFile'): | |
texture_file = line.split()[-1] | |
elif line.startswith("element vertex"): | |
n_pts = int(line.split()[-1]) | |
header_vertex_section = True | |
header_face_section = False | |
elif line.startswith("element face"): | |
n_faces = int(line.split()[-1]) | |
header_vertex_section = False | |
header_face_section = True | |
elif line.startswith("element"): # Some other element. | |
header_vertex_section = False | |
header_face_section = False | |
elif line.startswith("property") and header_vertex_section: | |
# (name of the property, data type) | |
prop_name = line.split()[-1] | |
if prop_name == "s": | |
prop_name = "texture_u" | |
if prop_name == "t": | |
prop_name = "texture_v" | |
prop_type = line.split()[-2] | |
pt_props.append((prop_name, prop_type)) | |
elif line.startswith("property list") and header_face_section: | |
elems = line.split() | |
if elems[-1] == "vertex_indices" or elems[-1] == 'vertex_index': | |
# (name of the property, data type) | |
face_props.append(("n_corners", elems[2])) | |
for i in range(face_n_corners): | |
face_props.append(("ind_" + str(i), elems[3])) | |
elif elems[-1] == 'texcoord': | |
# (name of the property, data type) | |
face_props.append(('texcoord', elems[2])) | |
for i in range(face_n_corners * 2): | |
face_props.append(('texcoord_ind_' + str(i), elems[3])) | |
else: | |
print("Warning: Not supported face property: " + elems[-1]) | |
elif line.startswith("format"): | |
if "binary" in line: | |
is_binary = True | |
elif line.startswith("end_header"): | |
break | |
# Prepare data structures. | |
model = {} | |
if texture_file is not None: | |
model['texture_file'] = texture_file | |
model["pts"] = np.zeros((n_pts, 3), np.float) | |
if n_faces > 0: | |
model["faces"] = np.zeros((n_faces, face_n_corners), np.float) | |
# print(pt_props) | |
pt_props_names = [p[0] for p in pt_props] | |
face_props_names = [p[0] for p in face_props] | |
# print(pt_props_names) | |
is_normal = False | |
if {"nx", "ny", "nz"}.issubset(set(pt_props_names)): | |
is_normal = True | |
model["normals"] = np.zeros((n_pts, 3), np.float) | |
is_color = False | |
if {"red", "green", "blue"}.issubset(set(pt_props_names)): | |
is_color = True | |
model["colors"] = np.zeros((n_pts, 3), np.float) | |
is_texture_pt = False | |
if {"texture_u", "texture_v"}.issubset(set(pt_props_names)): | |
is_texture_pt = True | |
model["texture_uv"] = np.zeros((n_pts, 2), np.float) | |
is_texture_face = False | |
if {'texcoord'}.issubset(set(face_props_names)): | |
is_texture_face = True | |
model['texture_uv_face'] = np.zeros((n_faces, 6), np.float) | |
# Formats for the binary case. | |
formats = { | |
"float": ("f", 4), | |
"double": ("d", 8), | |
"int": ("i", 4), | |
"uchar": ("B", 1), | |
} | |
# Load vertices. | |
for pt_id in range(n_pts): | |
prop_vals = {} | |
load_props = ["x", "y", "z", "nx", "ny", "nz", | |
"red", "green", "blue", "texture_u", "texture_v"] | |
if is_binary: | |
for prop in pt_props: | |
format = formats[prop[1]] | |
read_data = f.read(format[1]) | |
val = struct.unpack(format[0], read_data)[0] | |
if prop[0] in load_props: | |
prop_vals[prop[0]] = val | |
else: | |
elems = f.readline().rstrip("\n").rstrip("\r").split() | |
for prop_id, prop in enumerate(pt_props): | |
if prop[0] in load_props: | |
prop_vals[prop[0]] = elems[prop_id] | |
model["pts"][pt_id, 0] = float(prop_vals["x"]) | |
model["pts"][pt_id, 1] = float(prop_vals["y"]) | |
model["pts"][pt_id, 2] = float(prop_vals["z"]) | |
if is_normal: | |
model["normals"][pt_id, 0] = float(prop_vals["nx"]) | |
model["normals"][pt_id, 1] = float(prop_vals["ny"]) | |
model["normals"][pt_id, 2] = float(prop_vals["nz"]) | |
if is_color: | |
model["colors"][pt_id, 0] = float(prop_vals["red"]) | |
model["colors"][pt_id, 1] = float(prop_vals["green"]) | |
model["colors"][pt_id, 2] = float(prop_vals["blue"]) | |
if is_texture_pt: | |
model["texture_uv"][pt_id, 0] = float(prop_vals["texture_u"]) | |
model["texture_uv"][pt_id, 1] = float(prop_vals["texture_v"]) | |
# Load faces. | |
for face_id in range(n_faces): | |
prop_vals = {} | |
if is_binary: | |
for prop in face_props: | |
format = formats[prop[1]] | |
val = struct.unpack(format[0], f.read(format[1]))[0] | |
if prop[0] == "n_corners": | |
if val != face_n_corners: | |
raise ValueError("Only triangular faces are supported.") | |
# print("Number of face corners: " + str(val)) | |
# exit(-1) | |
elif prop[0] == 'texcoord': | |
if val != face_n_corners * 2: | |
raise ValueError('Wrong number of UV face coordinates.') | |
else: | |
prop_vals[prop[0]] = val | |
else: | |
elems = f.readline().rstrip("\n").rstrip("\r").split() | |
for prop_id, prop in enumerate(face_props): | |
if prop[0] == "n_corners": | |
if int(elems[prop_id]) != face_n_corners: | |
raise ValueError("Only triangular faces are supported.") | |
elif prop[0] == 'texcoord': | |
if int(elems[prop_id]) != face_n_corners * 2: | |
raise ValueError('Wrong number of UV face coordinates.') | |
else: | |
prop_vals[prop[0]] = elems[prop_id] | |
model["faces"][face_id, 0] = int(prop_vals["ind_0"]) | |
model["faces"][face_id, 1] = int(prop_vals["ind_1"]) | |
model["faces"][face_id, 2] = int(prop_vals["ind_2"]) | |
if is_texture_face: | |
for i in range(6): | |
model['texture_uv_face'][face_id, i] = float( | |
prop_vals['texcoord_ind_{}'.format(i)]) | |
f.close() | |
model['pts'] *= vertex_scale | |
return model | |
def grid_show(ims, titles=None, row=1, col=3, dpi=200, save_path=None, title_fontsize=5, show=True): | |
if row * col < len(ims): | |
print('_____________row*col < len(ims)___________') | |
col = int(np.ceil(len(ims) / row)) | |
fig = plt.figure(dpi=dpi, figsize=plt.figaspect(row / float(col))) | |
k = 0 | |
for i in range(row): | |
for j in range(col): | |
plt.subplot(row, col, k + 1) | |
plt.axis('off') | |
plt.imshow(ims[k]) | |
if titles is not None: | |
# plt.title(titles[k], size=title_fontsize) | |
plt.text(0.5, 1.08, titles[k], | |
horizontalalignment='center', | |
fontsize=title_fontsize, | |
transform=plt.gca().transAxes) | |
k += 1 | |
if k == len(ims): | |
break | |
# plt.tight_layout() | |
if show: | |
plt.show() | |
else: | |
if save_path is not None: | |
mkdir_p(osp.dirname(save_path)) | |
plt.savefig(save_path) | |
return fig | |
def mkdir_p(dirname): | |
"""Like "mkdir -p", make a dir recursively, but do nothing if the dir | |
exists. | |
Args: | |
dirname(str): | |
""" | |
assert dirname is not None | |
if dirname == "" or os.path.isdir(dirname): | |
return | |
try: | |
os.makedirs(dirname) | |
except OSError as e: | |
if e.errno != errno.EEXIST: | |
raise e | |
def print_stat(data, name=""): | |
print(name, "min", data.min(), "max", data.max(), | |
"mean", data.mean(), "std", data.std(), | |
"sum", data.sum(), "shape", data.shape) | |
################################################################################################### | |
def load_ply_models(model_paths, device='cuda', dtype=torch.float32, vertex_scale=0.001): | |
ply_models = [load_ply(ply_path, vertex_scale=vertex_scale) for ply_path in model_paths] | |
verts = [torch.tensor(m['pts'], device=device, dtype=dtype) for m in ply_models] | |
faces = [torch.tensor(m['faces'], device=device, dtype=torch.int64) for m in ply_models] | |
for m in ply_models: | |
if m['colors'].max() > 1.1: | |
m['colors'] /= 255.0 | |
verts_rgb_list = [torch.tensor(m['colors'], device=device, dtype=dtype) for m in ply_models] # [V,3] | |
res_models = [] | |
for i in range(len(ply_models)): | |
model = {} | |
model['verts'] = verts[i] | |
model['faces'] = faces[i] | |
model['verts_rgb'] = verts_rgb_list[i] | |
res_models.append(model) | |
return res_models | |
def main(): | |
# Set the cuda device | |
device = torch.device("cuda:0") | |
torch.cuda.set_device(device) | |
########################### | |
# load objects | |
########################### | |
objs = objects | |
np.array([[-5.87785252e-01, 8.09016994e-01, 0.00000000e+00], [-4.95380036e-17, -3.59914664e-17, -1.00000000e+00], [-8.09016994e-01, -5.87785252e-01, 6.12323400e-17]]) | |
# obj_paths = [osp.join(model_root, '{}/textured.obj'.format(cls_name)) for cls_name in objs] | |
# texture_paths = [osp.join(model_root, '{}/texture_map.png'.format(cls_name)) for cls_name in objs] | |
ply_paths = [osp.join(ply_model_root, "obj_{:06d}.ply".format(obj2id[cls_name])) | |
for cls_name in objs] | |
models = load_ply_models(ply_paths, vertex_scale=0.001) | |
cameras = OpenGLRealPerspectiveCameras( | |
focal_length=((K[0,0], K[1,1]),), # Nx2 | |
principal_point=((K[0,2], K[1,2]),), # Nx2 | |
x0=0, | |
y0=0, | |
w=WIDTH, | |
h=WIDTH, #HEIGHT, | |
znear=ZNEAR, | |
zfar=ZFAR, | |
device=device, | |
) | |
# To blend the 100 faces we set a few parameters which control the opacity and the sharpness of | |
# edges. Refer to blending.py for more details. | |
blend_params = BlendParams(sigma=1e-4, gamma=1e-4) | |
# Define the settings for rasterization and shading. Here we set the output image to be of size | |
# 640x640. To form the blended image we use 100 faces for each pixel. Refer to rasterize_meshes.py | |
# for an explanation of this parameter. | |
silhouette_raster_settings = RasterizationSettings( | |
image_size=IMG_SIZE, # longer side or scaled longer side | |
blur_radius=np.log(1. / 1e-4 - 1.) * blend_params.sigma, | |
faces_per_pixel=100, # the nearest faces_per_pixel points along the z-axis. | |
bin_size=0 | |
) | |
# Create a silhouette mesh renderer by composing a rasterizer and a shader. | |
silhouette_renderer = MeshRenderer( | |
rasterizer=MeshRasterizer( | |
cameras=cameras, | |
raster_settings=silhouette_raster_settings | |
), | |
shader=SilhouetteShader(blend_params=blend_params) | |
) | |
# We will also create a phong renderer. This is simpler and only needs to render one face per pixel. | |
phong_raster_settings = RasterizationSettings( | |
image_size=IMG_SIZE, | |
blur_radius=0.0, | |
faces_per_pixel=1, | |
bin_size=0 | |
) | |
# We can add a point light in front of the object. | |
lights = PointLights(device=device, location=((2.0, 2.0, -2.0),)) | |
phong_renderer = MeshRenderer( | |
rasterizer=MeshRasterizer( | |
cameras=cameras, | |
raster_settings=phong_raster_settings | |
), | |
shader=PhongShader(device=device, lights=lights) | |
) | |
# pose ============================================= | |
R1 = axangle2mat((1, 0, 0), angle=0.5 * np.pi) | |
R2 = axangle2mat((0, 0, 1), angle=-0.7 * np.pi) | |
R = np.dot(R1, R2) | |
print("R det", torch.det(torch.tensor(R))) | |
quat = mat2quat(R) | |
t = np.array([-0.1, 0.1, 0.7], dtype=np.float32) | |
t2 = np.array([0.1, 0.1, 0.7], dtype=np.float32) | |
t3 = np.array([-0.1, -0.1, 0.7], dtype=np.float32) | |
t4 = np.array([0.1, -0.1, 0.7], dtype=np.float32) | |
t5 = np.array([0, 0.1, 0.7], dtype=np.float32) | |
batch_size = 3 | |
Rs = [R, R.copy(), R.copy(), R.copy(), R.copy()][:batch_size] | |
print("R", R) | |
quats = [quat, quat.copy(), quat.copy(), quat.copy(), quat.copy()][:batch_size] | |
ts = [t, t2, t3, t4, t5][:batch_size] | |
runs = 100 | |
t_render = 0 | |
for i in tqdm(range(runs)): | |
t_render_start = time.perf_counter() | |
obj_ids = np.random.randint(0, len(objs), size=len(quats)) | |
# Render the objs providing the values of R and T. | |
batch_verts_rgb = list_to_padded([models[obj_id]['verts_rgb'] for obj_id in obj_ids]) # B, Vmax, 3 | |
batch_textures = Textures(verts_rgb=batch_verts_rgb.to(device)) | |
batch_mesh = Meshes( | |
verts=[models[obj_id]['verts'] for obj_id in obj_ids], | |
faces=[models[obj_id]['faces'] for obj_id in obj_ids], | |
textures=batch_textures, | |
) | |
batch_R = torch.tensor(np.stack(Rs), device=device, dtype=torch.float32).permute(0,2,1) # Bx3x3 | |
batch_T = torch.tensor(np.stack(ts), device=device, dtype=torch.float32) # Bx3 | |
silhouete = silhouette_renderer(meshes_world=batch_mesh, R=batch_R, T=batch_T) | |
image_ref = phong_renderer(meshes_world=batch_mesh, R=batch_R, T=batch_T) | |
# crop results | |
silhouete = silhouete[:, :HEIGHT, :WIDTH, :].cpu().numpy() | |
image_ref = image_ref[:, :HEIGHT, :WIDTH, :3].cpu().numpy() | |
t_render += time.perf_counter() - t_render_start | |
if True: | |
pred_images = image_ref | |
for i in range(pred_images.shape[0]): | |
pred_mask = silhouete[i, :, :, 3].astype('float32') | |
print("num rendered images", pred_images.shape[0]) | |
image = pred_images[i] | |
print('image', image.shape) | |
print('dr mask area: ', pred_mask.sum()) | |
print_stat(pred_mask, "pred_mask") | |
show_ims = [image, pred_mask] | |
show_titles = ['image', 'mask'] | |
grid_show(show_ims, show_titles, row=1, col=2) | |
print("runs: {}, {:.3f}fps, {:.3f}ms/im".format(runs, runs / t_render, t_render / runs * 1000)) | |
if __name__ == '__main__': | |
main() |
My R is 3x3 so no translation
Thanks your codes, I have anothor problem, I have to train a batch of cropped images, So the FOV and optical center are different,So how to set perspective camera?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Does your R contain also translation? Is it 3x3 or 4x4?