Skip to content

Instantly share code, notes, and snippets.

@zeux
Last active February 12, 2024 20:21
Show Gist options
  • Star 10 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zeux/1a67e8930df782d5474276e218831e22 to your computer and use it in GitHub Desktop.
Save zeux/1a67e8930df782d5474276e218831e22 to your computer and use it in GitHub Desktop.
Annotated source of fastComputeAABB and fasterComputeAABB with notes for expensive lines
local function fastComputeAABB(model)
local inf = math.huge
local axes = {'X','Y','Z'}
local min = { inf, inf, inf }
local max = { -inf, -inf, -inf }
for _,obj in pairs(model:GetDescendants()) do -- model:GetDescendants has to marshal an array of instances to Lua which is pretty expensive but there's no way around it
if obj:IsA("BasePart") then -- this uses Roblox __namecall optimization - no point caching IsA, it's fast enough (although does involve LuaBridge invocation)
local cf = obj.CFrame -- this causes a LuaBridge invocation + heap allocation to create CFrame object - expensive! - but no way around it. we need the cframe
local origin = cf.p -- this causes a Lua->C++ invocation (slightly cheaper than LuaBridge) and a heap allocation to create Vector3 object!
local up,right,look = cf.upVector,cf.rightVector,cf.lookVector -- this causes 3 Lua->C++ invocations and 3 heap allocations to get 3 Vector3 objects!
local halfSize = obj.Size/2 -- this causes a LuaBridge invocation, followed by 2 heap allocations to create 2 Vector3 objects (size, and result of division)
local hx,hy,hz = halfSize.X, halfSize.Y, halfSize.Z -- this causes 3 Lua->C++ invocations
local worldR,worldU,worldL,vertex
for x = -1,1,2 do -- loops aren't free - this costs some time to just do two iterations
worldR = right * (x * hx) -- this causes a heap allocation to create a Vector3 object (the multiplication cost is comparatively trivial)
for y = -1,1,2 do -- loops aren't free!
worldU = up * (y * hy) -- heap allocation!
for z = -1,1,2 do -- loops aren't free!
worldL = look * (z * hz) -- heap allocation!
vertex = (origin + worldR + worldU + worldL) -- this requires 3 heap allocations to get 3 Vector3 objects! remember - this is inner loop, repeats 8 times!
for axisId = 1,3 do -- loops aren't free!
local coord = vertex[axes[axisId]] -- this requires one array lookup (fast but non-trivial) and one Lua->C++ invocation
if coord < min[axisId] then -- this requires one array lookup (fast but non-trivial)
min[axisId] = coord -- same
end
if coord > max[axisId] then -- same
max[axisId] = coord -- same
end
end
end
end
end
end
end
min = Vector3.new(unpack(min))
max = Vector3.new(unpack(max))
return Region3.new(min,max)
end
local function fasterComputeAABB(model)
local abs = math.abs
local inf = math.huge
local minx, miny, minz = inf, inf, inf
local maxx, maxy, maxz = -inf, -inf, -inf
for _,obj in pairs(model:GetDescendants()) do -- model:GetDescendants has to marshal an array of instances to Lua which is pretty expensive but there's no way around it
if obj:IsA("BasePart") then -- this uses Roblox __namecall optimization - no point caching IsA, it's fast enough (although does involve LuaBridge invocation)
local cf = obj.CFrame -- this causes a LuaBridge invocation + heap allocation to create CFrame object - expensive! - but no way around it. we need the cframe
local size = obj.Size -- this causes a LuaBridge invocation + heap allocation to create Vector3 object - expensive! - but no way around it
local sx, sy, sz = size.X, size.Y, size.Z -- this causes 3 Lua->C++ invocations
local x, y, z, R00, R01, R02, R10, R11, R12, R20, R21, R22 = cf:components() -- this causes 1 Lua->C++ invocations and gets all components of cframe in one go, with no allocations
-- https://zeuxcg.org/2010/10/17/aabb-from-obb-with-component-wise-abs/
local wsx = 0.5 * (abs(R00) * sx + abs(R01) * sy + abs(R02) * sz) -- this requires 3 Lua->C++ invocations to call abs, but no hash lookups since we cached abs value above; otherwise this is just a bunch of local ops
local wsy = 0.5 * (abs(R10) * sx + abs(R11) * sy + abs(R12) * sz) -- same
local wsz = 0.5 * (abs(R20) * sx + abs(R21) * sy + abs(R22) * sz) -- same
-- just a bunch of local ops
if minx > x - wsx then minx = x - wsx end
if miny > y - wsy then miny = y - wsy end
if minz > z - wsz then minz = z - wsz end
if maxx < x + wsx then maxx = x + wsx end
if maxy < y + wsy then maxy = y + wsy end
if maxz < z + wsz then maxz = z + wsz end
end
end
return Region3.new(Vector3.new(minx, miny, minz), Vector3.new(maxx, maxy, maxz))
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment