Skip to content

Instantly share code, notes, and snippets.

@ruoshan
Last active August 26, 2018 22:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ruoshan/11168592 to your computer and use it in GitHub Desktop.
Save ruoshan/11168592 to your computer and use it in GitHub Desktop.
better way to (un)pack double in luajit
local ffi = require "ffi"
local bit = require "bit"
local memcopy = ffi.copy
local string = require "string"
local s_reverse = string.reverse
local s_sub = string.sub
local s_byte = string.byte
local math = require "math"
local floor = math.floor
local frexp = math.frexp
local ldexp = math.ldexp
local huge = math.huge
local double = ffi.new("double [1]")
local uint8_ptr = ffi.cast("uint8_t *", double) -- so I can write byte-by-byte into double
local double_t = ffi.typeof("double [1]")
local double_u = ffi.new("union {double d; uint64_t i; char c[8];}")
local _M = {}
function _M.double_bswap(cstr)
ffi.copy(double_u.c, cstr, 8)
double_u.i = bit.bswap(double_u.i)
return double_u.d
end
function _M.double_encode(number)
double[0] = number
local obj_str = s_reverse(ffi.string(double, 8))
return obj_str
end
function _M.double_slow(str) -- remove local ctype, avoid keeping GC busy.
--local n = double_t()
local rstr = s_reverse(s_sub(str, 1, 8))
ffi.copy(double, rstr, 8)
return double[0]
end
function _M.double_improved(cstr) -- use uint8_t [] as string buffer.
-- for i=0,7 do -- don't see any loop expansion, so do it by hand and it's faster(why?)
-- ffi.copy(uint8_ptr + i, cstr + 7 - i, 1)
-- end
ffi.copy(uint8_ptr, cstr + 7, 1)
ffi.copy(uint8_ptr + 1, cstr + 6, 1)
ffi.copy(uint8_ptr + 2, cstr + 5, 1)
ffi.copy(uint8_ptr + 3, cstr + 4, 1)
ffi.copy(uint8_ptr + 4, cstr + 3, 1)
ffi.copy(uint8_ptr + 5, cstr + 2, 1)
ffi.copy(uint8_ptr + 6, cstr + 1, 1)
ffi.copy(uint8_ptr + 7, cstr, 1)
return double[0]
end
function _M.double_fast(str)
local b1, b2, b3, b4, b5, b6, b7, b8 = s_byte(str, 1, 8)
local sign = b1 > 0x7F
local expo = (b1 % 0x80) * 0x10 + floor(b2 / 0x10)
local mant = ((((((b2 % 0x10) * 0x100 + b3) * 0x100 + b4) * 0x100 + b5) * 0x100 + b6) * 0x100 + b7) * 0x100 + b8
if sign then
sign = -1
else
sign = 1
end
local n
if mant == 0 and expo == 0 then
n = sign * 0.0
elseif expo == 0x7FF then
if mant == 0 then
n = sign * huge
else
n = 0.0/0.0
end
else
n = sign * ldexp(1.0 + mant / 0x10000000000000, expo - 0x3FF)
end
return n
end
-- TEST
local profiler = require "jit.p"
--profiler.start("a")
local d_str = _M.double_encode(23.3333)
local len = string.len(d_str)
local d_cstr = ffi.new("uint8_t [?]", len)
ffi.copy(d_cstr, d_str, string.len(d_str))
local n
for i=1,1e8 do
--n= _M.double_improved(d_cstr)
--n = _M.double_fast(d_str)
--n = _M.double_slow(d_str)
n = _M.double_bswap(d_cstr) -- new version using bit.bswap, it works on 64-bit in luajit2.1. didn't know that before.
end
--profiler.stop()
print(n)
@ruoshan
Copy link
Author

ruoshan commented Apr 24, 2014

Thanks those helpful people on luajit mail list.
Here are some points from them:

  • a lot of time can be saved when using ffi.uint8_t [] buffer instead of string.sub(buffer_str).

  • the double_fast in the test loop is actually execute only once after JIT. checkout the machine code (luajit -jdump=m):

    ->LOOP:
    f125fff0  add ebp, +0x01
    f125fff3  cmp ebp, 0x05f5e100
    f125fff9  jle 0xf125fff0    ->LOOP
    f125fffb  jmp 0xf1250028    ->6
    

@ruoshan
Copy link
Author

ruoshan commented Apr 24, 2014

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment