Skip to content

Instantly share code, notes, and snippets.

@fuchstraumer
Last active July 22, 2025 19:28
Show Gist options
  • Select an option

  • Save fuchstraumer/e36c6273db04cd0248df8f597c9c628e to your computer and use it in GitHub Desktop.

Select an option

Save fuchstraumer/e36c6273db04cd0248df8f597c9c628e to your computer and use it in GitHub Desktop.
// Branch-based sphere-AABB intersection implementation
internal bool SphereInsideAABB_Branched(float4 sphere, float3 aabbMin, float3 aabbMax)
{
float3 center = sphere.xyz;
float radius = sphere.w;
// Check if sphere center is completely outside any face of the AABB
if (center.x < aabbMin.x - radius || center.x > aabbMax.x + radius) return false;
if (center.y < aabbMin.y - radius || center.y > aabbMax.y + radius) return false;
if (center.z < aabbMin.z - radius || center.z > aabbMax.z + radius) return false;
// Find closest point on AABB to sphere center
float3 closestPoint = clamp(center, aabbMin, aabbMax);
// Check if distance from sphere center to closest point is within radius
float3 diff = center - closestPoint;
float distanceSquared = dot(diff, diff);
return distanceSquared <= (radius * radius);
}
// Scratch entry point to test branched sphere-AABB intersection
[shader("compute")][NumThreads(256, 1, 1)]
public void TestSphereAABB_Branched(
uint3 dispatchThreadID : SV_DispatchThreadID,
uniform float4* sphere, // xyz = center, w = radius
uniform float4* aabbMin, // AABB minimum corner
uniform float4* aabbMax, // AABB maximum corner
uint* output)
{
uint threadIndex = dispatchThreadID.x;
// Use the branched version defined above
bool result = SphereInsideAABB_Branched(sphere[threadIndex], aabbMin[threadIndex].xyz, aabbMax[threadIndex].xyz);
output[threadIndex] = result ? 1u : 0u;
}
; SPIR-V
; Version: 1.5
; Generator: Khronos Slang Compiler; 0
; Bound: 99
; Schema: 0
OpCapability PhysicalStorageBufferAddresses
OpCapability Shader
OpExtension "SPV_KHR_physical_storage_buffer"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %TestSphereAABB_Branched "main" %entryPointParams %gl_GlobalInvocationID
OpExecutionMode %TestSphereAABB_Branched LocalSize 256 1 1
; Debug Information
OpSource Slang 1
OpName %threadIndex "threadIndex" ; id %5
OpName %EntryPointParams_std430 "EntryPointParams_std430" ; id %6
OpMemberName %EntryPointParams_std430 0 "sphere"
OpMemberName %EntryPointParams_std430 1 "aabbMin"
OpMemberName %EntryPointParams_std430 2 "aabbMax"
OpMemberName %EntryPointParams_std430 3 "output"
OpName %entryPointParams "entryPointParams" ; id %3
OpName %TestSphereAABB_Branched "TestSphereAABB_Branched" ; id %2
; Annotations
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
OpDecorate %_ptr_PhysicalStorageBuffer_v4float ArrayStride 16
OpDecorate %_ptr_PhysicalStorageBuffer_uint ArrayStride 4
OpDecorate %EntryPointParams_std430 Block
OpMemberDecorate %EntryPointParams_std430 0 Offset 0
OpMemberDecorate %EntryPointParams_std430 1 Offset 8
OpMemberDecorate %EntryPointParams_std430 2 Offset 16
OpMemberDecorate %EntryPointParams_std430 3 Offset 24
; Types, variables and constants
%void = OpTypeVoid
%10 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_ptr_PhysicalStorageBuffer_v4float = OpTypePointer PhysicalStorageBuffer %v4float ; ArrayStride 16
%_ptr_PhysicalStorageBuffer_uint = OpTypePointer PhysicalStorageBuffer %uint ; ArrayStride 4
%EntryPointParams_std430 = OpTypeStruct %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_uint ; Block
%_ptr_PushConstant_EntryPointParams_std430 = OpTypePointer PushConstant %EntryPointParams_std430
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_v4float
%int_1 = OpConstant %int 1
%v3float = OpTypeVector %float 3
%int_2 = OpConstant %int 2
%bool = OpTypeBool
%true = OpConstantTrue %bool
%false = OpConstantFalse %bool
%int_3 = OpConstant %int 3
%_ptr_PushConstant__ptr_PhysicalStorageBuffer_uint = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_uint
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input ; BuiltIn GlobalInvocationId
%entryPointParams = OpVariable %_ptr_PushConstant_EntryPointParams_std430 PushConstant
; Function TestSphereAABB_Branched
%TestSphereAABB_Branched = OpFunction %void None %10
%30 = OpLabel
%31 = OpLoad %v3uint %gl_GlobalInvocationID
%threadIndex = OpCompositeExtract %uint %31 0
%32 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_0
%33 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %32
%34 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %33 %threadIndex
%35 = OpLoad %v4float %34 Aligned 4
%36 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_1
%37 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %36
%38 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %37 %threadIndex
%39 = OpLoad %v4float %38 Aligned 4
%40 = OpVectorShuffle %v3float %39 %39 0 1 2
%41 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_2
%42 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %41
%43 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %42 %threadIndex
%44 = OpLoad %v4float %43 Aligned 4
%45 = OpVectorShuffle %v3float %44 %44 0 1 2
OpSelectionMerge %46 None
OpSwitch %uint_0 %47
%47 = OpLabel
%48 = OpVectorShuffle %v3float %35 %35 0 1 2
%49 = OpCompositeExtract %float %35 3
%50 = OpCompositeExtract %float %35 0
%51 = OpCompositeExtract %float %39 0
%52 = OpFSub %float %51 %49
%53 = OpFOrdLessThan %bool %50 %52
OpSelectionMerge %54 None
OpBranchConditional %53 %55 %56
%56 = OpLabel
%57 = OpCompositeExtract %float %44 0
%58 = OpFAdd %float %57 %49
%59 = OpFOrdGreaterThan %bool %50 %58
OpBranch %54
%55 = OpLabel
OpBranch %54
%54 = OpLabel
%60 = OpPhi %bool %59 %56 %true %55
OpSelectionMerge %61 None
OpBranchConditional %60 %62 %61
%61 = OpLabel
%63 = OpCompositeExtract %float %35 1
%64 = OpCompositeExtract %float %39 1
%65 = OpFSub %float %64 %49
%66 = OpFOrdLessThan %bool %63 %65
OpSelectionMerge %67 None
OpBranchConditional %66 %68 %69
%69 = OpLabel
%70 = OpCompositeExtract %float %44 1
%71 = OpFAdd %float %70 %49
%72 = OpFOrdGreaterThan %bool %63 %71
OpBranch %67
%68 = OpLabel
OpBranch %67
%67 = OpLabel
%73 = OpPhi %bool %72 %69 %true %68
OpSelectionMerge %74 None
OpBranchConditional %73 %75 %74
%74 = OpLabel
%76 = OpCompositeExtract %float %35 2
%77 = OpCompositeExtract %float %39 2
%78 = OpFSub %float %77 %49
%79 = OpFOrdLessThan %bool %76 %78
OpSelectionMerge %80 None
OpBranchConditional %79 %81 %82
%82 = OpLabel
%83 = OpCompositeExtract %float %44 2
%84 = OpFAdd %float %83 %49
%85 = OpFOrdGreaterThan %bool %76 %84
OpBranch %80
%81 = OpLabel
OpBranch %80
%80 = OpLabel
%86 = OpPhi %bool %85 %82 %true %81
OpSelectionMerge %87 None
OpBranchConditional %86 %88 %87
%87 = OpLabel
%89 = OpExtInst %v3float %1 FClamp %48 %40 %45
%90 = OpFSub %v3float %48 %89
%91 = OpDot %float %90 %90
%92 = OpFMul %float %49 %49
%93 = OpFOrdLessThanEqual %bool %91 %92
OpBranch %46
%88 = OpLabel
OpBranch %46
%75 = OpLabel
OpBranch %46
%62 = OpLabel
OpBranch %46
%46 = OpLabel
%94 = OpPhi %bool %93 %87 %false %88 %false %75 %false %62
%95 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_uint %entryPointParams %int_3
%96 = OpLoad %_ptr_PhysicalStorageBuffer_uint %95
%97 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_uint %96 %threadIndex
%98 = OpSelect %uint %94 %uint_1 %uint_0
OpStore %97 %98 Aligned 4
OpReturn
OpFunctionEnd
// Returns float3(1.0f, 1.0f, 1.0f) if lhs < rhs, otherwise returns float3(0.0f, 0.0f, 0.0f)
internal float3 WhenLessThan(in const float3 lhs, in const float3 rhs)
{
return max(sign(rhs - lhs), float3(0.0f));
}
internal float3 WhenGreaterThan(in const float3 lhs, in const float3 rhs)
{
return max(sign(lhs - rhs), float3(0.0f));
}
internal bool SphereInsideAABBFast(in const float4 sphere, in const float3 aabb_min, in const float3 aabb_max)
{
float3 result = float3(0.0f);
result += WhenLessThan(sphere.xyz, aabb_min) * (sphere.xyz - aabb_min) * (sphere.xyz - aabb_min);
result += WhenGreaterThan(sphere.xyz, aabb_max) * (sphere.xyz - aabb_max) * (sphere.xyz - aabb_max);
return (result.x + result.y + result.z) <= (sphere.w * sphere.w);
}
// Scratch entry point to test branchless sphere-AABB intersection
[shader("compute")][NumThreads(256, 1, 1)]
public void TestSphereAABB_Fast(
uint3 dispatchThreadID : SV_DispatchThreadID,
uniform float4* sphere, // xyz = center, w = radius
uniform float4* aabbMin, // AABB minimum corner
uniform float4* aabbMax, // AABB maximum corner
uint* output)
{
uint threadIndex = dispatchThreadID.x;
// Use the fast/branchless version from VtfFunctions
bool result = SphereInsideAABBFast(sphere[threadIndex], aabbMin[threadIndex].xyz, aabbMax[threadIndex].xyz);
output[threadIndex] = result ? 1u : 0u;
}
; SPIR-V
; Version: 1.5
; Generator: Khronos Slang Compiler; 0
; Bound: 77
; Schema: 0
OpCapability PhysicalStorageBufferAddresses
OpCapability Shader
OpExtension "SPV_KHR_physical_storage_buffer"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
OpEntryPoint GLCompute %TestSphereAABB_Fast "main" %entryPointParams %gl_GlobalInvocationID
OpExecutionMode %TestSphereAABB_Fast LocalSize 256 1 1
; Debug Information
OpSource Slang 1
OpName %threadIndex "threadIndex" ; id %5
OpName %EntryPointParams_std430 "EntryPointParams_std430" ; id %6
OpMemberName %EntryPointParams_std430 0 "sphere"
OpMemberName %EntryPointParams_std430 1 "aabbMin"
OpMemberName %EntryPointParams_std430 2 "aabbMax"
OpMemberName %EntryPointParams_std430 3 "output"
OpName %entryPointParams "entryPointParams" ; id %3
OpName %TestSphereAABB_Fast "TestSphereAABB_Fast" ; id %2
; Annotations
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
OpDecorate %_ptr_PhysicalStorageBuffer_v4float ArrayStride 16
OpDecorate %_ptr_PhysicalStorageBuffer_uint ArrayStride 4
OpDecorate %EntryPointParams_std430 Block
OpMemberDecorate %EntryPointParams_std430 0 Offset 0
OpMemberDecorate %EntryPointParams_std430 1 Offset 8
OpMemberDecorate %EntryPointParams_std430 2 Offset 16
OpMemberDecorate %EntryPointParams_std430 3 Offset 24
; Types, variables and constants
%void = OpTypeVoid
%10 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_ptr_PhysicalStorageBuffer_v4float = OpTypePointer PhysicalStorageBuffer %v4float ; ArrayStride 16
%_ptr_PhysicalStorageBuffer_uint = OpTypePointer PhysicalStorageBuffer %uint ; ArrayStride 4
%EntryPointParams_std430 = OpTypeStruct %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_uint ; Block
%_ptr_PushConstant_EntryPointParams_std430 = OpTypePointer PushConstant %EntryPointParams_std430
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_v4float
%int_1 = OpConstant %int 1
%v3float = OpTypeVector %float 3
%int_2 = OpConstant %int 2
%bool = OpTypeBool
%v3int = OpTypeVector %int 3
%float_0 = OpConstant %float 0
%26 = OpConstantComposite %v3float %float_0 %float_0 %float_0
%27 = OpConstantComposite %v3float %float_0 %float_0 %float_0
%int_3 = OpConstant %int 3
%_ptr_PushConstant__ptr_PhysicalStorageBuffer_uint = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_uint
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input ; BuiltIn GlobalInvocationId
%entryPointParams = OpVariable %_ptr_PushConstant_EntryPointParams_std430 PushConstant
; Function TestSphereAABB_Fast
%TestSphereAABB_Fast = OpFunction %void None %10
%32 = OpLabel
%33 = OpLoad %v3uint %gl_GlobalInvocationID
%threadIndex = OpCompositeExtract %uint %33 0
%34 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_0
%35 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %34
%36 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %35 %threadIndex
%37 = OpLoad %v4float %36 Aligned 4
%38 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_1
%39 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %38
%40 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %39 %threadIndex
%41 = OpLoad %v4float %40 Aligned 4
%42 = OpVectorShuffle %v3float %41 %41 0 1 2
%43 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_2
%44 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %43
%45 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %44 %threadIndex
%46 = OpLoad %v4float %45 Aligned 4
%47 = OpVectorShuffle %v3float %46 %46 0 1 2
%48 = OpVectorShuffle %v3float %37 %37 0 1 2
%49 = OpFSub %v3float %42 %48
%50 = OpExtInst %v3float %1 FSign %49
%51 = OpConvertFToS %v3int %50
%52 = OpConvertSToF %v3float %51
%53 = OpExtInst %v3float %1 FMax %52 %26
%54 = OpFSub %v3float %48 %42
%55 = OpFMul %v3float %53 %54
%56 = OpFMul %v3float %55 %54
%57 = OpFSub %v3float %48 %47
%58 = OpExtInst %v3float %1 FSign %57
%59 = OpConvertFToS %v3int %58
%60 = OpConvertSToF %v3float %59
%61 = OpExtInst %v3float %1 FMax %60 %27
%62 = OpFMul %v3float %61 %57
%63 = OpFMul %v3float %62 %57
%64 = OpFAdd %v3float %56 %63
%65 = OpCompositeExtract %float %64 0
%66 = OpCompositeExtract %float %64 1
%67 = OpFAdd %float %65 %66
%68 = OpCompositeExtract %float %64 2
%69 = OpFAdd %float %67 %68
%70 = OpCompositeExtract %float %37 3
%71 = OpFMul %float %70 %70
%72 = OpFOrdLessThanEqual %bool %69 %71
%73 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_uint %entryPointParams %int_3
%74 = OpLoad %_ptr_PhysicalStorageBuffer_uint %73
%75 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_uint %74 %threadIndex
%76 = OpSelect %uint %72 %uint_1 %uint_0
OpStore %75 %76 Aligned 4
OpReturn
OpFunctionEnd
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment