cipri-tom/callback-bench.lua

## callback-bench.lua
local function printf(s, ...)
  io.write(s:format(...))
end

local ffi = require("ffi")
ffi.cdef[[
  typedef void (*cb)(void);
  void set_v(int n, void (*)(void  ));
  void set_i(int n, void (*)(int   ));
  void set_d(int n, void (*)(double));

  int    get_i(int n, int    (*)(void));
  double get_d(int n, double (*)(void));

  void call_v(void  );
  void call_i(int   );
  void call_d(double);
  void loop   (int n);
]]
local callback = ffi.load("./callback.so")
local timeit = require("timeit")

local v = 0
local function lset_v( ) v = v + 1   end
local function lset_a(a) a = a + 1   end
local function lget  ( ) return v*2  end

print("operation          ", "reps     ", "time(s)", "nsec/call")

local c2l = {
  {name='set_v', func=lset_v},
  {name='set_i', func=lset_a},
  {name='set_d', func=lset_a},
  {name='get_i', func=lget  },
  {name='get_d', func=lget  }
}

for _,test in ipairs(c2l) do
  local r = timeit(function(n)
              callback[test.name](n, test.func)
            end)
  printf("C into Lua %-12s %s\n", test.name, r)
end

print("Lua into C call(void)  ", timeit(function(n)
  for i = 1, n do callback.call_v() end
end))

print("Lua into C call(int)   ", timeit(function(n)
  for i = 1, n do callback.call_i(3) end
end))

print("Lua into C call(double)", timeit(function(n)
  for i = 1, n do callback.call_d(3.5) end
end))

print("Lua into Lua       ", timeit(function(n)
  for i = 1, n do lset_v() end
end))

print("C empty loop       ", timeit(function(n)
  callback.loop(n)
end))

print("Lua empty loop     ", timeit(function(n)
  for i = 1, n do end
end))

## callback.c
// gcc -std=c99 -Wall -pedantic -O3 -shared -static-libgcc -fPIC callback.c -o callback.so

// --- CALLS ------------------------------------------------------------------
void call_v(void) {
}

void call_i(int v) {
  v += 5;
}

void call_d(double v) {
  v += 5.0;
}

void loop(int n) {
  for (int i = 0; i < n; i++) {
      /* prevent compiler optimizations from skipping loop entirely */
      __asm__("");
  }
}

// --- SETTERS ----------------------------------------------------------------

void set_v(int n, void (*f)(void)) {
  for (int i = 0; i < n; i++) {
      f();
  }
}

void set_i(int n, void (*f)(int)) {
  for (int i = 0; i < n; i++) {
      f(i);
  }
}

void set_d(int n, void (*f)(double)) {
  double a = 3.0;
  for (int i = 0; i < n; i++) {
      f(a);
  }
}

// --- GETTERS ----------------------------------------------------------------

void get_i(int n, int (*f)(void)) {
  int v;
  for (int i = 0; i < n; i++) {
      v = f();
  }
  (void)v;  // avoid 'set-but-unused' warning
}

void get_d(int n, double (*f)(void)) {
  double v;
  for (int i = 0; i < n; i++) {
      v = f();
  }
  (void)v;  // avoid 'set-but-unused' warning
}

// --- PUSH vs PULL -----------------------------------------------------------

typedef double (*getter_fp)(int len, unsigned char mono[len]);

struct Arr {
  int size;
  double data[];
};

enum constants {MONO_LEN = 5};
unsigned char mono[MONO_LEN] = {1, 2, 3, 4, 5};


// --- --- PUSH style ---------------------------------------------------------
void push_style(struct Arr *a, getter_fp get_multiplier)
{
  for (int i = 0; i < a->size; ++i)
    a->data[i] *= get_multiplier(MONO_LEN, mono);
}

// --- --- PULL style ---------------------------------------------------------

int get_mono_len()
{
  return MONO_LEN;
}

unsigned char*
get_mono(int idx)
{
  return mono;
}

## push_pull.lua
-- OBJECTIVE    : apply a LUA function to all members of an array
-- -- PUSH style: do it in C   side, with callback to the LUA function
-- -- PULL style: do it in Lua side, with calls to C to get necessary info

local ffi = require("ffi")

ffi.cdef[[
  typedef double (*getter_fp)(int len, unsigned char mono[]);

  struct Arr {
    int size;
    double data[?];
  };

  void           push_style(struct Arr *a, getter_fp get_multiplier);
  int            get_mono_len();
  unsigned char* get_mono(int idx);
]]

local callback = ffi.load("./callback.so")
local timeit = require("timeit")

local arr_t = ffi.typeof("struct Arr");

-- the callback
local function lget_multiplier(len, mono)
  local s = 0
  for i=0,len-1 do s = s + mono[i] end
  return s * 0.5
end

-- PUSH style -----------------------------------------------------------------
local function push_style(n)
  local a = arr_t(n, {n})
  for i=0,n-1 do a.data[i] = i end

  local cb = ffi.cast("getter_fp", lget_multiplier)
  callback.push_style(a, cb)
  return a
end

-- PULL style -----------------------------------------------------------------
local function pull_style(n)
  local a = arr_t(n, {n})
  for i=0,n-1 do a.data[i] = i end

  local mono_len, mono = callback.get_mono_len()
  for i=0,n-1 do
    mono = callback.get_mono(i)
    a.data[i] = a.data[i] * lget_multiplier(mono_len, mono)
  end
  return a
end

-- CHECK ----------------------------------------------------------------------
local push_v, pull_v = push_style(100), pull_style(100)
assert(push_v.size == pull_v.size)
for i=0,push_v.size-1 do
  assert(push_v.data[i] == pull_v.data[i])
end

-- BENCH ----------------------------------------------------------------------
print("PUSH style", timeit(push_style))
print("PULL style", timeit(pull_style))


## timeit.lua
--- call a function with a repeat count argument.
-- Takes a single argument, which is a function which in turn takes one argument,
-- a repeat count.  The function is called with increasingly large repeat counts
-- until it takes at least a certain amount of time to run, and is then called four
-- more times with the same repeat count, with the minimum elapsed time recorded.
-- Modeled loosely on Python's timeit, except the function passed in is responsible
-- for doing the actual repetition.
return function(func)
  local reps = 10000
  local elapsed
  repeat
    reps = reps * 10
    local start = os.clock()
    func(reps)
    elapsed = os.clock() - start
  until elapsed > 0.1 or reps >= 1e9
  for i = 1, 4 do
    local start = os.clock()
    func(reps)
    elapsed = math.min(elapsed, os.clock() - start)
  end
  return ("%10d\t%.3f\t%7.3f"):format(reps, elapsed, elapsed / reps * 1e9)
end
	local function printf(s, ...)
	io.write(s:format(...))
	end

	local ffi = require("ffi")
	ffi.cdef[[
	typedef void (*cb)(void);
	void set_v(int n, void (*)(void ));
	void set_i(int n, void (*)(int ));
	void set_d(int n, void (*)(double));

	int get_i(int n, int (*)(void));
	double get_d(int n, double (*)(void));

	void call_v(void );
	void call_i(int );
	void call_d(double);
	void loop (int n);
	]]
	local callback = ffi.load("./callback.so")
	local timeit = require("timeit")

	local v = 0
	local function lset_v( ) v = v + 1 end
	local function lset_a(a) a = a + 1 end
	local function lget ( ) return v*2 end

	print("operation ", "reps ", "time(s)", "nsec/call")

	local c2l = {
	{name='set_v', func=lset_v},
	{name='set_i', func=lset_a},
	{name='set_d', func=lset_a},
	{name='get_i', func=lget },
	{name='get_d', func=lget }
	}

	for _,test in ipairs(c2l) do
	local r = timeit(function(n)
	callback[test.name](n, test.func)
	end)
	printf("C into Lua %-12s %s\n", test.name, r)
	end

	print("Lua into C call(void) ", timeit(function(n)
	for i = 1, n do callback.call_v() end
	end))

	print("Lua into C call(int) ", timeit(function(n)
	for i = 1, n do callback.call_i(3) end
	end))

	print("Lua into C call(double)", timeit(function(n)
	for i = 1, n do callback.call_d(3.5) end
	end))

	print("Lua into Lua ", timeit(function(n)
	for i = 1, n do lset_v() end
	end))

	print("C empty loop ", timeit(function(n)
	callback.loop(n)
	end))

	print("Lua empty loop ", timeit(function(n)
	for i = 1, n do end
	end))
	// gcc -std=c99 -Wall -pedantic -O3 -shared -static-libgcc -fPIC callback.c -o callback.so

	// --- CALLS ------------------------------------------------------------------
	void call_v(void) {
	}

	void call_i(int v) {
	v += 5;
	}

	void call_d(double v) {
	v += 5.0;
	}

	void loop(int n) {
	for (int i = 0; i < n; i++) {
	/* prevent compiler optimizations from skipping loop entirely */
	__asm__("");
	}
	}

	// --- SETTERS ----------------------------------------------------------------

	void set_v(int n, void (*f)(void)) {
	for (int i = 0; i < n; i++) {
	f();
	}
	}

	void set_i(int n, void (*f)(int)) {
	for (int i = 0; i < n; i++) {
	f(i);
	}
	}

	void set_d(int n, void (*f)(double)) {
	double a = 3.0;
	for (int i = 0; i < n; i++) {
	f(a);
	}
	}

	// --- GETTERS ----------------------------------------------------------------

	void get_i(int n, int (*f)(void)) {
	int v;
	for (int i = 0; i < n; i++) {
	v = f();
	}
	(void)v; // avoid 'set-but-unused' warning
	}

	void get_d(int n, double (*f)(void)) {
	double v;
	for (int i = 0; i < n; i++) {
	v = f();
	}
	(void)v; // avoid 'set-but-unused' warning
	}

	// --- PUSH vs PULL -----------------------------------------------------------

	typedef double (*getter_fp)(int len, unsigned char mono[len]);

	struct Arr {
	int size;
	double data[];
	};

	enum constants {MONO_LEN = 5};
	unsigned char mono[MONO_LEN] = {1, 2, 3, 4, 5};


	// --- --- PUSH style ---------------------------------------------------------
	void push_style(struct Arr *a, getter_fp get_multiplier)
	{
	for (int i = 0; i < a->size; ++i)
	a->data[i] *= get_multiplier(MONO_LEN, mono);
	}

	// --- --- PULL style ---------------------------------------------------------

	int get_mono_len()
	{
	return MONO_LEN;
	}

	unsigned char*
	get_mono(int idx)
	{
	return mono;
	}
	-- OBJECTIVE : apply a LUA function to all members of an array
	-- -- PUSH style: do it in C side, with callback to the LUA function
	-- -- PULL style: do it in Lua side, with calls to C to get necessary info

	local ffi = require("ffi")

	ffi.cdef[[
	typedef double (*getter_fp)(int len, unsigned char mono[]);

	struct Arr {
	int size;
	double data[?];
	};

	void push_style(struct Arr *a, getter_fp get_multiplier);
	int get_mono_len();
	unsigned char* get_mono(int idx);
	]]

	local callback = ffi.load("./callback.so")
	local timeit = require("timeit")

	local arr_t = ffi.typeof("struct Arr");

	-- the callback
	local function lget_multiplier(len, mono)
	local s = 0
	for i=0,len-1 do s = s + mono[i] end
	return s * 0.5
	end

	-- PUSH style -----------------------------------------------------------------
	local function push_style(n)
	local a = arr_t(n, {n})
	for i=0,n-1 do a.data[i] = i end

	local cb = ffi.cast("getter_fp", lget_multiplier)
	callback.push_style(a, cb)
	return a
	end

	-- PULL style -----------------------------------------------------------------
	local function pull_style(n)
	local a = arr_t(n, {n})
	for i=0,n-1 do a.data[i] = i end

	local mono_len, mono = callback.get_mono_len()
	for i=0,n-1 do
	mono = callback.get_mono(i)
	a.data[i] = a.data[i] * lget_multiplier(mono_len, mono)
	end
	return a
	end

	-- CHECK ----------------------------------------------------------------------
	local push_v, pull_v = push_style(100), pull_style(100)
	assert(push_v.size == pull_v.size)
	for i=0,push_v.size-1 do
	assert(push_v.data[i] == pull_v.data[i])
	end

	-- BENCH ----------------------------------------------------------------------
	print("PUSH style", timeit(push_style))
	print("PULL style", timeit(pull_style))
	--- call a function with a repeat count argument.
	-- Takes a single argument, which is a function which in turn takes one argument,
	-- a repeat count. The function is called with increasingly large repeat counts
	-- until it takes at least a certain amount of time to run, and is then called four
	-- more times with the same repeat count, with the minimum elapsed time recorded.
	-- Modeled loosely on Python's timeit, except the function passed in is responsible
	-- for doing the actual repetition.
	return function(func)
	local reps = 10000
	local elapsed
	repeat
	reps = reps * 10
	local start = os.clock()
	func(reps)
	elapsed = os.clock() - start
	until elapsed > 0.1 or reps >= 1e9
	for i = 1, 4 do
	local start = os.clock()
	func(reps)
	elapsed = math.min(elapsed, os.clock() - start)
	end
	return ("%10d\t%.3f\t%7.3f"):format(reps, elapsed, elapsed / reps * 1e9)
	end