-
-
Save zeux/1a67e8930df782d5474276e218831e22 to your computer and use it in GitHub Desktop.
Annotated source of fastComputeAABB and fasterComputeAABB with notes for expensive lines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local function fastComputeAABB(model) | |
local inf = math.huge | |
local axes = {'X','Y','Z'} | |
local min = { inf, inf, inf } | |
local max = { -inf, -inf, -inf } | |
for _,obj in pairs(model:GetDescendants()) do -- model:GetDescendants has to marshal an array of instances to Lua which is pretty expensive but there's no way around it | |
if obj:IsA("BasePart") then -- this uses Roblox __namecall optimization - no point caching IsA, it's fast enough (although does involve LuaBridge invocation) | |
local cf = obj.CFrame -- this causes a LuaBridge invocation + heap allocation to create CFrame object - expensive! - but no way around it. we need the cframe | |
local origin = cf.p -- this causes a Lua->C++ invocation (slightly cheaper than LuaBridge) and a heap allocation to create Vector3 object! | |
local up,right,look = cf.upVector,cf.rightVector,cf.lookVector -- this causes 3 Lua->C++ invocations and 3 heap allocations to get 3 Vector3 objects! | |
local halfSize = obj.Size/2 -- this causes a LuaBridge invocation, followed by 2 heap allocations to create 2 Vector3 objects (size, and result of division) | |
local hx,hy,hz = halfSize.X, halfSize.Y, halfSize.Z -- this causes 3 Lua->C++ invocations | |
local worldR,worldU,worldL,vertex | |
for x = -1,1,2 do -- loops aren't free - this costs some time to just do two iterations | |
worldR = right * (x * hx) -- this causes a heap allocation to create a Vector3 object (the multiplication cost is comparatively trivial) | |
for y = -1,1,2 do -- loops aren't free! | |
worldU = up * (y * hy) -- heap allocation! | |
for z = -1,1,2 do -- loops aren't free! | |
worldL = look * (z * hz) -- heap allocation! | |
vertex = (origin + worldR + worldU + worldL) -- this requires 3 heap allocations to get 3 Vector3 objects! remember - this is inner loop, repeats 8 times! | |
for axisId = 1,3 do -- loops aren't free! | |
local coord = vertex[axes[axisId]] -- this requires one array lookup (fast but non-trivial) and one Lua->C++ invocation | |
if coord < min[axisId] then -- this requires one array lookup (fast but non-trivial) | |
min[axisId] = coord -- same | |
end | |
if coord > max[axisId] then -- same | |
max[axisId] = coord -- same | |
end | |
end | |
end | |
end | |
end | |
end | |
end | |
min = Vector3.new(unpack(min)) | |
max = Vector3.new(unpack(max)) | |
return Region3.new(min,max) | |
end | |
local function fasterComputeAABB(model) | |
local abs = math.abs | |
local inf = math.huge | |
local minx, miny, minz = inf, inf, inf | |
local maxx, maxy, maxz = -inf, -inf, -inf | |
for _,obj in pairs(model:GetDescendants()) do -- model:GetDescendants has to marshal an array of instances to Lua which is pretty expensive but there's no way around it | |
if obj:IsA("BasePart") then -- this uses Roblox __namecall optimization - no point caching IsA, it's fast enough (although does involve LuaBridge invocation) | |
local cf = obj.CFrame -- this causes a LuaBridge invocation + heap allocation to create CFrame object - expensive! - but no way around it. we need the cframe | |
local size = obj.Size -- this causes a LuaBridge invocation + heap allocation to create Vector3 object - expensive! - but no way around it | |
local sx, sy, sz = size.X, size.Y, size.Z -- this causes 3 Lua->C++ invocations | |
local x, y, z, R00, R01, R02, R10, R11, R12, R20, R21, R22 = cf:components() -- this causes 1 Lua->C++ invocations and gets all components of cframe in one go, with no allocations | |
-- https://zeuxcg.org/2010/10/17/aabb-from-obb-with-component-wise-abs/ | |
local wsx = 0.5 * (abs(R00) * sx + abs(R01) * sy + abs(R02) * sz) -- this requires 3 Lua->C++ invocations to call abs, but no hash lookups since we cached abs value above; otherwise this is just a bunch of local ops | |
local wsy = 0.5 * (abs(R10) * sx + abs(R11) * sy + abs(R12) * sz) -- same | |
local wsz = 0.5 * (abs(R20) * sx + abs(R21) * sy + abs(R22) * sz) -- same | |
-- just a bunch of local ops | |
if minx > x - wsx then minx = x - wsx end | |
if miny > y - wsy then miny = y - wsy end | |
if minz > z - wsz then minz = z - wsz end | |
if maxx < x + wsx then maxx = x + wsx end | |
if maxy < y + wsy then maxy = y + wsy end | |
if maxz < z + wsz then maxz = z + wsz end | |
end | |
end | |
return Region3.new(Vector3.new(minx, miny, minz), Vector3.new(maxx, maxy, maxz)) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment