Skip to content

Instantly share code, notes, and snippets.

@Deco
Created September 8, 2012 10:58
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Deco/3673493 to your computer and use it in GitHub Desktop.
Save Deco/3673493 to your computer and use it in GitHub Desktop.
Benchmark of C callbacks in LuaJIT 2.0.0-beta10
local ffi = require"ffi"
jit.on()
ffi.cdef[[
typedef int (*getval_func)();
/* imagine these are C functions */
typedef void (*sum_push_func)(int* total, getval_func getval);
typedef void (*sum_pull_func)(int* total, int val);
]]
local sum_push = ffi.cast("sum_push_func", function(total_ptr, getval_ptr)
total_ptr[0] = total_ptr[0]+getval_ptr()
end)
local sum_pull = ffi.cast("sum_pull_func", function(total_ptr, val)
total_ptr[0] = total_ptr[0]+val
end)
local getval = function()
return 6
end
local getval_ptr = ffi.cast("getval_func", getval)
function test_push(n)
local total_ptr = ffi.new("int[1]", 0)
for i = 1, n do
sum_push(total_ptr, getval_ptr)
end
return total_ptr[0]
end
function test_pull(n)
local total_ptr = ffi.new("int[1]", 0)
for i = 1, n do
sum_pull(total_ptr, getval())
end
return total_ptr[0]
end
local gettime = os.clock
local fmt = "%-10s %-16s %-16s %-16s %-16s"
local jit_options = {jit.status()} local jit_on = table.remove(jit_options, 1)
print(jit.version.. " ("..jit.os.." "..jit.arch..")")
print("JIT: "..(jit_on and "ON" or "OFF").." "..table.concat(jit_options, " "))
print(fmt:format("n", "Push Time", "Pull Time", "Push Mem", "Pull Mem"))
function math.round(n, p) return math.floor(n*p)/p end
local trials = 3
for i = 8, 24, 4 do
local n = 2^i
local time_total_push, time_total_pull = 0, 0
local memu_total_push, memu_total_pull = 0, 0
local res_push, res_pull
for i = 1, trials do
collectgarbage("collect")
collectgarbage("collect")
local time_push = os.clock()
res_push = test_push(n)
time_push = os.clock()-time_push
time_total_push = time_total_push+time_push
memu_total_push = memu_total_push+collectgarbage("count")
collectgarbage("collect")
collectgarbage("collect")
local time_pull = os.clock()
res_pull = test_pull(n)
time_pull = os.clock()-time_pull
time_total_pull = time_total_pull+time_pull
memu_total_pull = memu_total_pull+collectgarbage("count")
end
print(fmt:format(
n,
math.round(time_total_push/trials, 10^6),
math.round(time_total_pull/trials, 10^6),
math.round(memu_total_push/trials, 10^6),
math.round(memu_total_pull/trials, 10^6)
))
end
--[[
LuaJIT 2.0.0-beta10 (Windows x64)
JIT: ON CMOV SSE2 SSE3 SSE4.1 fold cse dce fwd dse narrow loop abc sink fuse
n Push Time Pull Time Push Mem Pull Mem
256 0 0 63.247721 59.346679
4096 0.002666 0.000999 183.346679 119.346679
65536 0.036666 0.015333 2103.346679 1079.346679
1048576 0.620666 0.272 32823.346679 16439.346679
16777216 9.439666 4.227666 524343.346679 262199.346679
]]
@linguofeng
Copy link

LuaJIT 2.0.1 (OSX x64)
JIT: ON CMOV SSE2 SSE3 SSE4.1 fold cse dce fwd dse narrow loop abc sink fuse
n          Push Time        Pull Time        Push Mem         Pull Mem
256        0.000391         0.000156         47.843424        43.942382
4096       0.002765         0.000862         167.942382       103.942382
65536      0.033591         0.013966         2087.942382      1063.942382
1048576    0.531986         0.22839          32807.942382     16423.942382
16777216   8.680506         3.688063         524327.942382    262183.942382

@bitRAKE
Copy link

bitRAKE commented Feb 26, 2021

LuaJIT 2.1.0-beta3 (Windows x64)
JIT: ON SSE2 SSE3 SSE4.1 AMD BMI2 fold cse dce fwd dse narrow loop abc sink fuse
n          Push Time        Pull Time        Push Mem         Pull Mem
256        0.000333         0                48.187174        44.293945
4096       0.002333         0.000333         69.856445        70.450195
65536      0.032666         0.014            66.668945        81.168945
1048576    0.521            0.217            64.981445        70.575195
16777216   8.422            3.470333         87.293945        83.168945

@exikyut
Copy link

exikyut commented Jul 24, 2021

i3-3220 (3.3GHz) + 1600MHz DDR3, no foreground processes:

LuaJIT 2.1.0-beta3 (Linux x64)
JIT: ON SSE2 SSE3 SSE4.1 fold cse dce fwd dse narrow loop abc sink fuse
n          Push Time        Pull Time        Push Mem         Pull Mem        
256        0.000391         0.000156         48.342447        44.449218       
4096       0.003357         0.001251         69.699218        70.605468       
65536      0.031069         0.0142           64.074218        76.636718       
1048576    0.496462         0.228811         72.824218        76.152343       
16777216   7.958449         3.643323         64.699218        68.402343       

@fenix272
Copy link

fenix272 commented Apr 4, 2024

LuaJIT 2.1.1710088188 (Windows x64)
JIT: ON SSE3 SSE4.1 fold cse dce fwd dse narrow loop abc sink fuse
n          Push Time        Pull Time        Push Mem         Pull Mem
256        0.000666         0.000333         65.730794        59.944335
4096       0.006            0.002666         113.381835       84.014648
65536      0.058333         0.026            72.881835        73.561523
1048576    0.903333         0.418999         88.631835        104.100585
16777216   14.506666        6.770666         75.131835        89.217773

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment