Skip to content

Instantly share code, notes, and snippets.

@AlexKordic
Last active August 29, 2015 14:05
Show Gist options
  • Save AlexKordic/99113326daeeb33b584a to your computer and use it in GitHub Desktop.
Save AlexKordic/99113326daeeb33b584a to your computer and use it in GitHub Desktop.
300,000 4x4 matrix multiply inversions
local ffi = require("ffi")
local new = ffi.new
--[[
All numeric calculations are performed with doubles. Using
floats for storage saves memory (for big arrays). But arithmetic
is usually slower due to the required float<->double conversions.
]]
ffi.cdef([[
struct Matrix {
double entries[16];
};
]])
local Matrix
local MatrixStructure = ffi.typeof("struct Matrix")
local MatrixMetatable = {}
-- MatrixMetatable.__new = function (_ctype)
-- local ret = new(_ctype)
-- for i=0, 15 do ret.entries[i] = 0 end
-- return ret
-- end
local function _new_matrix()
return Matrix()
-- return ffi.new(MatrixStructure)
-- return ffi.new("struct Matrix")
end
MatrixMetatable.__index = {}
function MatrixMetatable.__index.identity(self)
for i=0, 15, 5 do self.entries[i] = 1 end
end
-- function MatrixMetatable.__mul(self, m)
function MatrixMetatable.__index.mul(self, m)
local ret = _new_matrix()
local me, se = m.entries, self.entries
for i=0, 12, 4 do
for j=0, 3 do
ret.entries[i+j] = me[j] * se[i] + me[j+4] * se[i+1] + me[j+8] * se[i+2] + me[j+12] * se[i+3]
end
end
return ret
end
function MatrixMetatable.__index.clone(self)
local ret = _new_matrix()
for i=0, 15 do ret.entries[i] = self.entries[i] end
return ret
end
function MatrixMetatable.__index.inverse(self)
local ret = _new_matrix()
local e = self.entries
local rete = ret.entries
rete[0] = e[5] * e[10] * e[15] -
e[5] * e[11] * e[14] -
e[9] * e[6] * e[15] +
e[9] * e[7] * e[14] +
e[13] * e[6] * e[11] -
e[13] * e[7] * e[10]
rete[4] = -e[4] * e[10] * e[15] +
e[4] * e[11] * e[14] +
e[8] * e[6] * e[15] -
e[8] * e[7] * e[14] -
e[12] * e[6] * e[11] +
e[12] * e[7] * e[10]
rete[8] = e[4] * e[9] * e[15] -
e[4] * e[11] * e[13] -
e[8] * e[5] * e[15] +
e[8] * e[7] * e[13] +
e[12] * e[5] * e[11] -
e[12] * e[7] * e[9]
rete[12] = -e[4] * e[9] * e[14] +
e[4] * e[10] * e[13] +
e[8] * e[5] * e[14] -
e[8] * e[6] * e[13] -
e[12] * e[5] * e[10] +
e[12] * e[6] * e[9]
rete[1] = -e[1] * e[10] * e[15] +
e[1] * e[11] * e[14] +
e[9] * e[2] * e[15] -
e[9] * e[3] * e[14] -
e[13] * e[2] * e[11] +
e[13] * e[3] * e[10]
rete[5] = e[0] * e[10] * e[15] -
e[0] * e[11] * e[14] -
e[8] * e[2] * e[15] +
e[8] * e[3] * e[14] +
e[12] * e[2] * e[11] -
e[12] * e[3] * e[10]
rete[9] = -e[0] * e[9] * e[15] +
e[0] * e[11] * e[13] +
e[8] * e[1] * e[15] -
e[8] * e[3] * e[13] -
e[12] * e[1] * e[11] +
e[12] * e[3] * e[9]
rete[13] = e[0] * e[9] * e[14] -
e[0] * e[10] * e[13] -
e[8] * e[1] * e[14] +
e[8] * e[2] * e[13] +
e[12] * e[1] * e[10] -
e[12] * e[2] * e[9]
rete[2] = e[1] * e[6] * e[15] -
e[1] * e[7] * e[14] -
e[5] * e[2] * e[15] +
e[5] * e[3] * e[14] +
e[13] * e[2] * e[7] -
e[13] * e[3] * e[6]
rete[6] = -e[0] * e[6] * e[15] +
e[0] * e[7] * e[14] +
e[4] * e[2] * e[15] -
e[4] * e[3] * e[14] -
e[12] * e[2] * e[7] +
e[12] * e[3] * e[6]
rete[10] = e[0] * e[5] * e[15] -
e[0] * e[7] * e[13] -
e[4] * e[1] * e[15] +
e[4] * e[3] * e[13] +
e[12] * e[1] * e[7] -
e[12] * e[3] * e[5]
rete[14] = -e[0] * e[5] * e[14] +
e[0] * e[6] * e[13] +
e[4] * e[1] * e[14] -
e[4] * e[2] * e[13] -
e[12] * e[1] * e[6] +
e[12] * e[2] * e[5]
rete[3] = -e[1] * e[6] * e[11] +
e[1] * e[7] * e[10] +
e[5] * e[2] * e[11] -
e[5] * e[3] * e[10] -
e[9] * e[2] * e[7] +
e[9] * e[3] * e[6]
rete[7] = e[0] * e[6] * e[11] -
e[0] * e[7] * e[10] -
e[4] * e[2] * e[11] +
e[4] * e[3] * e[10] +
e[8] * e[2] * e[7] -
e[8] * e[3] * e[6]
rete[11] = -e[0] * e[5] * e[11] +
e[0] * e[7] * e[9] +
e[4] * e[1] * e[11] -
e[4] * e[3] * e[9] -
e[8] * e[1] * e[7] +
e[8] * e[3] * e[5]
rete[15] = e[0] * e[5] * e[10] -
e[0] * e[6] * e[9] -
e[4] * e[1] * e[10] +
e[4] * e[2] * e[9] +
e[8] * e[1] * e[6] -
e[8] * e[2] * e[5]
local det = e[0] * rete[0] + e[1] * rete[4] + e[2] * rete[8] + e[3] * rete[12]
if det == 0 then return self:clone() end -- << must return new matrix not self
det = 1.0 / det
for i=0, 15 do rete[i] = rete[i] * det end
return ret
end
Matrix = ffi.metatype(MatrixStructure, MatrixMetatable)
local mOut
local m = _new_matrix()
m:identity()
local ITERATIONS = 300000
local t = os.clock()
for i=1, ITERATIONS do
mOut = m:mul(m:inverse())
end
local time = os.clock() - t
print("time", time, "iterations per sec:", ITERATIONS / time)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment