Last active
July 22, 2025 19:28
-
-
Save fuchstraumer/e36c6273db04cd0248df8f597c9c628e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Branch-based sphere-AABB intersection implementation | |
| internal bool SphereInsideAABB_Branched(float4 sphere, float3 aabbMin, float3 aabbMax) | |
| { | |
| float3 center = sphere.xyz; | |
| float radius = sphere.w; | |
| // Check if sphere center is completely outside any face of the AABB | |
| if (center.x < aabbMin.x - radius || center.x > aabbMax.x + radius) return false; | |
| if (center.y < aabbMin.y - radius || center.y > aabbMax.y + radius) return false; | |
| if (center.z < aabbMin.z - radius || center.z > aabbMax.z + radius) return false; | |
| // Find closest point on AABB to sphere center | |
| float3 closestPoint = clamp(center, aabbMin, aabbMax); | |
| // Check if distance from sphere center to closest point is within radius | |
| float3 diff = center - closestPoint; | |
| float distanceSquared = dot(diff, diff); | |
| return distanceSquared <= (radius * radius); | |
| } | |
| // Scratch entry point to test branched sphere-AABB intersection | |
| [shader("compute")][NumThreads(256, 1, 1)] | |
| public void TestSphereAABB_Branched( | |
| uint3 dispatchThreadID : SV_DispatchThreadID, | |
| uniform float4* sphere, // xyz = center, w = radius | |
| uniform float4* aabbMin, // AABB minimum corner | |
| uniform float4* aabbMax, // AABB maximum corner | |
| uint* output) | |
| { | |
| uint threadIndex = dispatchThreadID.x; | |
| // Use the branched version defined above | |
| bool result = SphereInsideAABB_Branched(sphere[threadIndex], aabbMin[threadIndex].xyz, aabbMax[threadIndex].xyz); | |
| output[threadIndex] = result ? 1u : 0u; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; SPIR-V | |
| ; Version: 1.5 | |
| ; Generator: Khronos Slang Compiler; 0 | |
| ; Bound: 99 | |
| ; Schema: 0 | |
| OpCapability PhysicalStorageBufferAddresses | |
| OpCapability Shader | |
| OpExtension "SPV_KHR_physical_storage_buffer" | |
| %1 = OpExtInstImport "GLSL.std.450" | |
| OpMemoryModel PhysicalStorageBuffer64 GLSL450 | |
| OpEntryPoint GLCompute %TestSphereAABB_Branched "main" %entryPointParams %gl_GlobalInvocationID | |
| OpExecutionMode %TestSphereAABB_Branched LocalSize 256 1 1 | |
| ; Debug Information | |
| OpSource Slang 1 | |
| OpName %threadIndex "threadIndex" ; id %5 | |
| OpName %EntryPointParams_std430 "EntryPointParams_std430" ; id %6 | |
| OpMemberName %EntryPointParams_std430 0 "sphere" | |
| OpMemberName %EntryPointParams_std430 1 "aabbMin" | |
| OpMemberName %EntryPointParams_std430 2 "aabbMax" | |
| OpMemberName %EntryPointParams_std430 3 "output" | |
| OpName %entryPointParams "entryPointParams" ; id %3 | |
| OpName %TestSphereAABB_Branched "TestSphereAABB_Branched" ; id %2 | |
| ; Annotations | |
| OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId | |
| OpDecorate %_ptr_PhysicalStorageBuffer_v4float ArrayStride 16 | |
| OpDecorate %_ptr_PhysicalStorageBuffer_uint ArrayStride 4 | |
| OpDecorate %EntryPointParams_std430 Block | |
| OpMemberDecorate %EntryPointParams_std430 0 Offset 0 | |
| OpMemberDecorate %EntryPointParams_std430 1 Offset 8 | |
| OpMemberDecorate %EntryPointParams_std430 2 Offset 16 | |
| OpMemberDecorate %EntryPointParams_std430 3 Offset 24 | |
| ; Types, variables and constants | |
| %void = OpTypeVoid | |
| %10 = OpTypeFunction %void | |
| %uint = OpTypeInt 32 0 | |
| %v3uint = OpTypeVector %uint 3 | |
| %_ptr_Input_v3uint = OpTypePointer Input %v3uint | |
| %float = OpTypeFloat 32 | |
| %v4float = OpTypeVector %float 4 | |
| %_ptr_PhysicalStorageBuffer_v4float = OpTypePointer PhysicalStorageBuffer %v4float ; ArrayStride 16 | |
| %_ptr_PhysicalStorageBuffer_uint = OpTypePointer PhysicalStorageBuffer %uint ; ArrayStride 4 | |
| %EntryPointParams_std430 = OpTypeStruct %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_uint ; Block | |
| %_ptr_PushConstant_EntryPointParams_std430 = OpTypePointer PushConstant %EntryPointParams_std430 | |
| %int = OpTypeInt 32 1 | |
| %int_0 = OpConstant %int 0 | |
| %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_v4float | |
| %int_1 = OpConstant %int 1 | |
| %v3float = OpTypeVector %float 3 | |
| %int_2 = OpConstant %int 2 | |
| %bool = OpTypeBool | |
| %true = OpConstantTrue %bool | |
| %false = OpConstantFalse %bool | |
| %int_3 = OpConstant %int 3 | |
| %_ptr_PushConstant__ptr_PhysicalStorageBuffer_uint = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_uint | |
| %uint_0 = OpConstant %uint 0 | |
| %uint_1 = OpConstant %uint 1 | |
| %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input ; BuiltIn GlobalInvocationId | |
| %entryPointParams = OpVariable %_ptr_PushConstant_EntryPointParams_std430 PushConstant | |
| ; Function TestSphereAABB_Branched | |
| %TestSphereAABB_Branched = OpFunction %void None %10 | |
| %30 = OpLabel | |
| %31 = OpLoad %v3uint %gl_GlobalInvocationID | |
| %threadIndex = OpCompositeExtract %uint %31 0 | |
| %32 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_0 | |
| %33 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %32 | |
| %34 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %33 %threadIndex | |
| %35 = OpLoad %v4float %34 Aligned 4 | |
| %36 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_1 | |
| %37 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %36 | |
| %38 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %37 %threadIndex | |
| %39 = OpLoad %v4float %38 Aligned 4 | |
| %40 = OpVectorShuffle %v3float %39 %39 0 1 2 | |
| %41 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_2 | |
| %42 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %41 | |
| %43 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %42 %threadIndex | |
| %44 = OpLoad %v4float %43 Aligned 4 | |
| %45 = OpVectorShuffle %v3float %44 %44 0 1 2 | |
| OpSelectionMerge %46 None | |
| OpSwitch %uint_0 %47 | |
| %47 = OpLabel | |
| %48 = OpVectorShuffle %v3float %35 %35 0 1 2 | |
| %49 = OpCompositeExtract %float %35 3 | |
| %50 = OpCompositeExtract %float %35 0 | |
| %51 = OpCompositeExtract %float %39 0 | |
| %52 = OpFSub %float %51 %49 | |
| %53 = OpFOrdLessThan %bool %50 %52 | |
| OpSelectionMerge %54 None | |
| OpBranchConditional %53 %55 %56 | |
| %56 = OpLabel | |
| %57 = OpCompositeExtract %float %44 0 | |
| %58 = OpFAdd %float %57 %49 | |
| %59 = OpFOrdGreaterThan %bool %50 %58 | |
| OpBranch %54 | |
| %55 = OpLabel | |
| OpBranch %54 | |
| %54 = OpLabel | |
| %60 = OpPhi %bool %59 %56 %true %55 | |
| OpSelectionMerge %61 None | |
| OpBranchConditional %60 %62 %61 | |
| %61 = OpLabel | |
| %63 = OpCompositeExtract %float %35 1 | |
| %64 = OpCompositeExtract %float %39 1 | |
| %65 = OpFSub %float %64 %49 | |
| %66 = OpFOrdLessThan %bool %63 %65 | |
| OpSelectionMerge %67 None | |
| OpBranchConditional %66 %68 %69 | |
| %69 = OpLabel | |
| %70 = OpCompositeExtract %float %44 1 | |
| %71 = OpFAdd %float %70 %49 | |
| %72 = OpFOrdGreaterThan %bool %63 %71 | |
| OpBranch %67 | |
| %68 = OpLabel | |
| OpBranch %67 | |
| %67 = OpLabel | |
| %73 = OpPhi %bool %72 %69 %true %68 | |
| OpSelectionMerge %74 None | |
| OpBranchConditional %73 %75 %74 | |
| %74 = OpLabel | |
| %76 = OpCompositeExtract %float %35 2 | |
| %77 = OpCompositeExtract %float %39 2 | |
| %78 = OpFSub %float %77 %49 | |
| %79 = OpFOrdLessThan %bool %76 %78 | |
| OpSelectionMerge %80 None | |
| OpBranchConditional %79 %81 %82 | |
| %82 = OpLabel | |
| %83 = OpCompositeExtract %float %44 2 | |
| %84 = OpFAdd %float %83 %49 | |
| %85 = OpFOrdGreaterThan %bool %76 %84 | |
| OpBranch %80 | |
| %81 = OpLabel | |
| OpBranch %80 | |
| %80 = OpLabel | |
| %86 = OpPhi %bool %85 %82 %true %81 | |
| OpSelectionMerge %87 None | |
| OpBranchConditional %86 %88 %87 | |
| %87 = OpLabel | |
| %89 = OpExtInst %v3float %1 FClamp %48 %40 %45 | |
| %90 = OpFSub %v3float %48 %89 | |
| %91 = OpDot %float %90 %90 | |
| %92 = OpFMul %float %49 %49 | |
| %93 = OpFOrdLessThanEqual %bool %91 %92 | |
| OpBranch %46 | |
| %88 = OpLabel | |
| OpBranch %46 | |
| %75 = OpLabel | |
| OpBranch %46 | |
| %62 = OpLabel | |
| OpBranch %46 | |
| %46 = OpLabel | |
| %94 = OpPhi %bool %93 %87 %false %88 %false %75 %false %62 | |
| %95 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_uint %entryPointParams %int_3 | |
| %96 = OpLoad %_ptr_PhysicalStorageBuffer_uint %95 | |
| %97 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_uint %96 %threadIndex | |
| %98 = OpSelect %uint %94 %uint_1 %uint_0 | |
| OpStore %97 %98 Aligned 4 | |
| OpReturn | |
| OpFunctionEnd |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Returns float3(1.0f, 1.0f, 1.0f) if lhs < rhs, otherwise returns float3(0.0f, 0.0f, 0.0f) | |
| internal float3 WhenLessThan(in const float3 lhs, in const float3 rhs) | |
| { | |
| return max(sign(rhs - lhs), float3(0.0f)); | |
| } | |
| internal float3 WhenGreaterThan(in const float3 lhs, in const float3 rhs) | |
| { | |
| return max(sign(lhs - rhs), float3(0.0f)); | |
| } | |
| internal bool SphereInsideAABBFast(in const float4 sphere, in const float3 aabb_min, in const float3 aabb_max) | |
| { | |
| float3 result = float3(0.0f); | |
| result += WhenLessThan(sphere.xyz, aabb_min) * (sphere.xyz - aabb_min) * (sphere.xyz - aabb_min); | |
| result += WhenGreaterThan(sphere.xyz, aabb_max) * (sphere.xyz - aabb_max) * (sphere.xyz - aabb_max); | |
| return (result.x + result.y + result.z) <= (sphere.w * sphere.w); | |
| } | |
| // Scratch entry point to test branchless sphere-AABB intersection | |
| [shader("compute")][NumThreads(256, 1, 1)] | |
| public void TestSphereAABB_Fast( | |
| uint3 dispatchThreadID : SV_DispatchThreadID, | |
| uniform float4* sphere, // xyz = center, w = radius | |
| uniform float4* aabbMin, // AABB minimum corner | |
| uniform float4* aabbMax, // AABB maximum corner | |
| uint* output) | |
| { | |
| uint threadIndex = dispatchThreadID.x; | |
| // Use the fast/branchless version from VtfFunctions | |
| bool result = SphereInsideAABBFast(sphere[threadIndex], aabbMin[threadIndex].xyz, aabbMax[threadIndex].xyz); | |
| output[threadIndex] = result ? 1u : 0u; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; SPIR-V | |
| ; Version: 1.5 | |
| ; Generator: Khronos Slang Compiler; 0 | |
| ; Bound: 77 | |
| ; Schema: 0 | |
| OpCapability PhysicalStorageBufferAddresses | |
| OpCapability Shader | |
| OpExtension "SPV_KHR_physical_storage_buffer" | |
| %1 = OpExtInstImport "GLSL.std.450" | |
| OpMemoryModel PhysicalStorageBuffer64 GLSL450 | |
| OpEntryPoint GLCompute %TestSphereAABB_Fast "main" %entryPointParams %gl_GlobalInvocationID | |
| OpExecutionMode %TestSphereAABB_Fast LocalSize 256 1 1 | |
| ; Debug Information | |
| OpSource Slang 1 | |
| OpName %threadIndex "threadIndex" ; id %5 | |
| OpName %EntryPointParams_std430 "EntryPointParams_std430" ; id %6 | |
| OpMemberName %EntryPointParams_std430 0 "sphere" | |
| OpMemberName %EntryPointParams_std430 1 "aabbMin" | |
| OpMemberName %EntryPointParams_std430 2 "aabbMax" | |
| OpMemberName %EntryPointParams_std430 3 "output" | |
| OpName %entryPointParams "entryPointParams" ; id %3 | |
| OpName %TestSphereAABB_Fast "TestSphereAABB_Fast" ; id %2 | |
| ; Annotations | |
| OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId | |
| OpDecorate %_ptr_PhysicalStorageBuffer_v4float ArrayStride 16 | |
| OpDecorate %_ptr_PhysicalStorageBuffer_uint ArrayStride 4 | |
| OpDecorate %EntryPointParams_std430 Block | |
| OpMemberDecorate %EntryPointParams_std430 0 Offset 0 | |
| OpMemberDecorate %EntryPointParams_std430 1 Offset 8 | |
| OpMemberDecorate %EntryPointParams_std430 2 Offset 16 | |
| OpMemberDecorate %EntryPointParams_std430 3 Offset 24 | |
| ; Types, variables and constants | |
| %void = OpTypeVoid | |
| %10 = OpTypeFunction %void | |
| %uint = OpTypeInt 32 0 | |
| %v3uint = OpTypeVector %uint 3 | |
| %_ptr_Input_v3uint = OpTypePointer Input %v3uint | |
| %float = OpTypeFloat 32 | |
| %v4float = OpTypeVector %float 4 | |
| %_ptr_PhysicalStorageBuffer_v4float = OpTypePointer PhysicalStorageBuffer %v4float ; ArrayStride 16 | |
| %_ptr_PhysicalStorageBuffer_uint = OpTypePointer PhysicalStorageBuffer %uint ; ArrayStride 4 | |
| %EntryPointParams_std430 = OpTypeStruct %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_v4float %_ptr_PhysicalStorageBuffer_uint ; Block | |
| %_ptr_PushConstant_EntryPointParams_std430 = OpTypePointer PushConstant %EntryPointParams_std430 | |
| %int = OpTypeInt 32 1 | |
| %int_0 = OpConstant %int 0 | |
| %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_v4float | |
| %int_1 = OpConstant %int 1 | |
| %v3float = OpTypeVector %float 3 | |
| %int_2 = OpConstant %int 2 | |
| %bool = OpTypeBool | |
| %v3int = OpTypeVector %int 3 | |
| %float_0 = OpConstant %float 0 | |
| %26 = OpConstantComposite %v3float %float_0 %float_0 %float_0 | |
| %27 = OpConstantComposite %v3float %float_0 %float_0 %float_0 | |
| %int_3 = OpConstant %int 3 | |
| %_ptr_PushConstant__ptr_PhysicalStorageBuffer_uint = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_uint | |
| %uint_0 = OpConstant %uint 0 | |
| %uint_1 = OpConstant %uint 1 | |
| %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input ; BuiltIn GlobalInvocationId | |
| %entryPointParams = OpVariable %_ptr_PushConstant_EntryPointParams_std430 PushConstant | |
| ; Function TestSphereAABB_Fast | |
| %TestSphereAABB_Fast = OpFunction %void None %10 | |
| %32 = OpLabel | |
| %33 = OpLoad %v3uint %gl_GlobalInvocationID | |
| %threadIndex = OpCompositeExtract %uint %33 0 | |
| %34 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_0 | |
| %35 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %34 | |
| %36 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %35 %threadIndex | |
| %37 = OpLoad %v4float %36 Aligned 4 | |
| %38 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_1 | |
| %39 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %38 | |
| %40 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %39 %threadIndex | |
| %41 = OpLoad %v4float %40 Aligned 4 | |
| %42 = OpVectorShuffle %v3float %41 %41 0 1 2 | |
| %43 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_v4float %entryPointParams %int_2 | |
| %44 = OpLoad %_ptr_PhysicalStorageBuffer_v4float %43 | |
| %45 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_v4float %44 %threadIndex | |
| %46 = OpLoad %v4float %45 Aligned 4 | |
| %47 = OpVectorShuffle %v3float %46 %46 0 1 2 | |
| %48 = OpVectorShuffle %v3float %37 %37 0 1 2 | |
| %49 = OpFSub %v3float %42 %48 | |
| %50 = OpExtInst %v3float %1 FSign %49 | |
| %51 = OpConvertFToS %v3int %50 | |
| %52 = OpConvertSToF %v3float %51 | |
| %53 = OpExtInst %v3float %1 FMax %52 %26 | |
| %54 = OpFSub %v3float %48 %42 | |
| %55 = OpFMul %v3float %53 %54 | |
| %56 = OpFMul %v3float %55 %54 | |
| %57 = OpFSub %v3float %48 %47 | |
| %58 = OpExtInst %v3float %1 FSign %57 | |
| %59 = OpConvertFToS %v3int %58 | |
| %60 = OpConvertSToF %v3float %59 | |
| %61 = OpExtInst %v3float %1 FMax %60 %27 | |
| %62 = OpFMul %v3float %61 %57 | |
| %63 = OpFMul %v3float %62 %57 | |
| %64 = OpFAdd %v3float %56 %63 | |
| %65 = OpCompositeExtract %float %64 0 | |
| %66 = OpCompositeExtract %float %64 1 | |
| %67 = OpFAdd %float %65 %66 | |
| %68 = OpCompositeExtract %float %64 2 | |
| %69 = OpFAdd %float %67 %68 | |
| %70 = OpCompositeExtract %float %37 3 | |
| %71 = OpFMul %float %70 %70 | |
| %72 = OpFOrdLessThanEqual %bool %69 %71 | |
| %73 = OpInBoundsAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_uint %entryPointParams %int_3 | |
| %74 = OpLoad %_ptr_PhysicalStorageBuffer_uint %73 | |
| %75 = OpPtrAccessChain %_ptr_PhysicalStorageBuffer_uint %74 %threadIndex | |
| %76 = OpSelect %uint %72 %uint_1 %uint_0 | |
| OpStore %75 %76 Aligned 4 | |
| OpReturn | |
| OpFunctionEnd |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment