Last active
May 19, 2021 05:04
-
-
Save Neo-Zhixing/b632059ab0a0d277be96bbe02b46bed3 to your computer and use it in GitHub Desktop.
Efficient Sparse Voxel Octrees
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#version 460 | |
#extension GL_EXT_shader_16bit_storage : require | |
#extension GL_EXT_shader_8bit_storage : require | |
struct Node { | |
uint16_t occupancy_freemask; | |
uint16_t _padding2; | |
uint children; | |
uint16_t data[8]; | |
}; | |
struct Ray { | |
vec3 origin; | |
vec3 dir; | |
}; | |
uint MaskLocationNthOne(uint mask, uint location) { | |
return bitCount(mask & ((1 << location) - 1)); | |
} | |
#define GROUP_SIZE_X 8 | |
#define GROUP_SIZE_Y 8 | |
layout(local_size_x = GROUP_SIZE_X, local_size_y = GROUP_SIZE_Y, local_size_z = 1) in; | |
layout(rgba32f, set = 2, binding = 0) uniform image2D imgOutput; | |
layout(push_constant) uniform constants | |
{ | |
uvec2 extents; | |
float aspectRatio; | |
} PushConstants; | |
layout(set = 0, binding = 0) uniform u_Camera { | |
mat3 rotation; | |
vec3 position; | |
float tanHalfFov; | |
} Camera; | |
layout(set = 1, binding = 0) readonly buffer Chunk { | |
Node Chunk_Nodes[]; | |
}; | |
Ray GenerateRay() { | |
uvec2 pixelRaster = gl_GlobalInvocationID.xy; | |
vec2 pixelNDC = vec2(pixelRaster) / vec2(PushConstants.extents); | |
vec2 pixelCamera = 2 * pixelNDC - 1; | |
pixelCamera.y *= -1; | |
pixelCamera.x *= PushConstants.aspectRatio; | |
pixelCamera *= Camera.tanHalfFov; | |
vec3 pixelCameraWorld = Camera.rotation * vec3(pixelCamera, -1); | |
Ray ray; | |
ray.origin = Camera.position; | |
ray.dir = normalize(pixelCameraWorld); | |
return ray; | |
} | |
struct ChildDescriptor { | |
uint children; | |
uint occupancy_freemask; | |
}; | |
struct StackItem { | |
uint parent; | |
float t_max; | |
}; | |
#define CAST_STACK_DEPTH 23 | |
#define MAX_RAYCAST_ITERATIONS 200 | |
shared StackItem stack[GROUP_SIZE_X * GROUP_SIZE_Y][CAST_STACK_DEPTH]; | |
void main() { | |
Ray ray = GenerateRay(); | |
ray.dir *= 100; | |
const float epsilon = exp2(-CAST_STACK_DEPTH); | |
uint iter = 0; | |
// Get rid of small ray direction components to avoid division by zero | |
// if (abs(ray.dir.x) < epsilon) ray.dir.x = copysign(epsilon, ray.dir.x); | |
// if (fabsf(ray.dir.y) < epsilon) ray.dir.y = copysignf(epsilon, ray.dir.y); | |
// if (fabsf(ray.dir.z) < epsilon) ray.dir.z = copysignf(epsilon, ray.dir.z); | |
vec3 t_coef = 1.0 / -abs(ray.dir); | |
vec3 t_bias = t_coef * ray.origin; | |
// Select octant mask to mirror the coordinate system so | |
// that ray direction is negative along each axis. | |
int octant_mask = 0; // This is different from the original esvo because our coordinate system was flipped. | |
if (ray.dir.x > 0.0f) octant_mask ^= 4, t_bias.x = 3.0f * t_coef.x - t_bias.x; | |
if (ray.dir.y > 0.0f) octant_mask ^= 2, t_bias.y = 3.0f * t_coef.y - t_bias.y; | |
if (ray.dir.z > 0.0f) octant_mask ^= 1, t_bias.z = 3.0f * t_coef.z - t_bias.z; | |
// Initialize the active span of t-values. | |
float t_min = max(max(2.0f * t_coef.x - t_bias.x, 2.0f * t_coef.y - t_bias.y), 2.0f * t_coef.z - t_bias.z); | |
float t_max = min(min(t_coef.x - t_bias.x, t_coef.y - t_bias.y), t_coef.z - t_bias.z); | |
float h = t_max; | |
t_min = max(t_min, 0.0f); | |
t_max = min(t_max, 1.0f); | |
bool hit_originally = false; | |
if (t_min < t_max){ | |
hit_originally = true; | |
} | |
float t_max_original = t_max; | |
// Initialize the current voxel to the first child of the root. | |
uint parent = 0; | |
ChildDescriptor child_descriptor; | |
child_descriptor.children = 0; | |
child_descriptor.occupancy_freemask = 0; // invalid until fetched | |
uint idx = 0; | |
vec3 pos = vec3(1.0f, 1.0f, 1.0f); | |
uint scale = CAST_STACK_DEPTH - 1; | |
float scale_exp2 = 0.5f; // exp2f(scale - s_max) | |
if (1.5f * t_coef.x - t_bias.x > t_min) idx ^= 4, pos.x = 1.5f; | |
if (1.5f * t_coef.y - t_bias.y > t_min) idx ^= 2, pos.y = 1.5f; | |
if (1.5f * t_coef.z - t_bias.z > t_min) idx ^= 1, pos.z = 1.5f; | |
// Traverse voxels along the ray as long as the current voxel | |
// stays within the octree. | |
while(iter < MAX_RAYCAST_ITERATIONS){ | |
iter++; | |
// Fetch child descriptor unless it is already valid. | |
if (child_descriptor.children == 0) { | |
child_descriptor.occupancy_freemask = uint(Chunk_Nodes[parent].occupancy_freemask); | |
child_descriptor.children = Chunk_Nodes[parent].children; | |
} | |
// Determine maximum t-value of the cube by evaluating | |
// tx(), ty(), and tz() at its corner. | |
vec3 t_corner = pos * t_coef - t_bias; | |
float tc_max = min(min(t_corner.x, t_corner.y), t_corner.z); | |
// Process voxel if the corresponding bit in valid mask is set | |
// and the active t-span is non-empty. | |
uint child_shift = idx ^ octant_mask; | |
if ( | |
( | |
((child_descriptor.occupancy_freemask & (1 << child_shift)) != 0) || | |
(((child_descriptor.occupancy_freemask >> 8) & (1 << child_shift )) != 0) // check occupancy | |
) && | |
(t_min <= t_max) | |
) { | |
// Terminate if the voxel is small enough. | |
// TODO | |
// INTERSECT | |
// Intersect active t-span with the cube and evaluate | |
// tx(), ty(), and tz() at the center of the voxel. | |
float tv_max = min(t_max, tc_max); | |
float half_length = scale_exp2 * 0.5f; | |
vec3 t_center = half_length * t_coef + t_corner; // maybe replace t_corner her to reduce register usage? | |
if (t_min <= tv_max) { | |
// Terminate if the corresponding bit in the non-leaf mask is not set. | |
if ((child_descriptor.occupancy_freemask & (1 << child_shift)) == 0) | |
break; | |
// PUSH | |
// Write current parent to the stack. | |
if (tc_max < h) | |
{ | |
// stack.write(scale, parent, t_max); | |
StackItem item; | |
item.parent = parent; | |
item.t_max = t_max; | |
stack[gl_LocalInvocationIndex][scale] = item; | |
} | |
h = tc_max; | |
// Find child descriptor corresponding to the current voxel. | |
parent = child_descriptor.children + MaskLocationNthOne(child_descriptor.occupancy_freemask, child_shift); | |
// Select child voxel that the ray enters first. | |
idx = 0; | |
scale--; | |
scale_exp2 = half_length; | |
if (t_center.x > t_min) idx ^= 4, pos.x += scale_exp2; | |
if (t_center.y > t_min) idx ^= 2, pos.y += scale_exp2; | |
if (t_center.z > t_min) idx ^= 1, pos.z += scale_exp2; | |
// Update active t-span and invalidate cached child descriptor. | |
t_max = tv_max; | |
child_descriptor.children = 0; | |
continue; | |
} | |
} | |
// ADVANCE | |
// Step along the ray | |
uint step_mask = 0; | |
if (t_corner.x <= tc_max) step_mask ^= 4, pos.x -= scale_exp2; | |
if (t_corner.y <= tc_max) step_mask ^= 2, pos.y -= scale_exp2; | |
if (t_corner.z <= tc_max) step_mask ^= 1, pos.z -= scale_exp2; | |
// Update active t-span and flip bits of the child slot index. | |
t_min = tc_max; | |
idx ^= step_mask; | |
// Proceed with pop if the bit flips disagree with the ray direction. | |
if ((idx & step_mask) != 0) { | |
// POP | |
// Find the highest differing bit between the two positions. | |
uint differing_bits = 0; | |
if ((step_mask & 4) != 0) differing_bits |= floatBitsToUint(pos.x) ^ floatBitsToUint(pos.x + scale_exp2); | |
if ((step_mask & 2) != 0) differing_bits |= floatBitsToUint(pos.y) ^ floatBitsToUint(pos.y + scale_exp2); | |
if ((step_mask & 1) != 0) differing_bits |= floatBitsToUint(pos.z) ^ floatBitsToUint(pos.z + scale_exp2); | |
scale = (floatBitsToUint(float(differing_bits)) >> 23) - 127; // position of the highest bit | |
if (scale >= CAST_STACK_DEPTH) { | |
break; | |
} | |
scale_exp2 = uintBitsToFloat((scale - CAST_STACK_DEPTH + 127) << 23); // exp2f(scale - s_max) | |
// Restore parent voxel from the stack. | |
StackItem stackVal = stack[gl_LocalInvocationIndex][scale]; | |
parent = stackVal.parent; | |
t_max = stackVal.t_max; | |
uint shx = floatBitsToUint(pos.x) >> scale; | |
uint shy = floatBitsToUint(pos.y) >> scale; | |
uint shz = floatBitsToUint(pos.z) >> scale; | |
pos.x = uintBitsToFloat(shx << scale); | |
pos.y = uintBitsToFloat(shy << scale); | |
pos.z = uintBitsToFloat(shz << scale); | |
idx = (shz & 1) | ((shy & 1) << 1) | ((shx & 1) << 2); | |
// Prevent same parent from being stored again and invalidate cached child descriptor. | |
h = 0.0f; | |
child_descriptor.children = 0; | |
} | |
} | |
// Indicate miss if we are outside the octree. | |
if (scale >= CAST_STACK_DEPTH || iter > MAX_RAYCAST_ITERATIONS) { | |
t_min = 2.0f; | |
} | |
// Undo mirroring of the coordinate system. | |
if ((octant_mask & 4) == 0) pos.x = 3.0f - scale_exp2 - pos.x; | |
if ((octant_mask & 2) == 0) pos.y = 3.0f - scale_exp2 - pos.y; | |
if ((octant_mask & 1) == 0) pos.z = 3.0f - scale_exp2 - pos.z; | |
// Output results. | |
/* | |
res.t = t_min; | |
res.iter = iter; | |
res.pos.x = fminf(fmaxf(ray.orig.x + t_min * ray.dir.x, pos.x + epsilon), pos.x + scale_exp2 - epsilon); | |
res.pos.y = fminf(fmaxf(ray.orig.y + t_min * ray.dir.y, pos.y + epsilon), pos.y + scale_exp2 - epsilon); | |
res.pos.z = fminf(fmaxf(ray.orig.z + t_min * ray.dir.z, pos.z + epsilon), pos.z + scale_exp2 - epsilon); | |
res.node = parent; | |
res.childIdx = idx ^ octant_mask ^ 7; | |
res.stackPtr = scale; | |
*/ | |
vec4 color = vec4(0.0, 0.0, 0.0 ,1.0); | |
if (t_min > 1.0) { | |
// No hit | |
if(hit_originally) { | |
color.r = 1.0; | |
} | |
} else { | |
float v = float(iter) / 200; | |
color.r = v; | |
color.g = v; | |
color.b = v; | |
} | |
imageStore(imgOutput, ivec2(gl_GlobalInvocationID.xy), color); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment