ruoshan/luajit-double-unpacking.lua

## luajit-double-unpacking.lua
local ffi       = require "ffi"
local bit       = require "bit"
local memcopy   = ffi.copy
local string    = require "string"
local s_reverse = string.reverse
local s_sub     = string.sub
local s_byte    = string.byte
local math      = require "math"
local floor     = math.floor
local frexp     = math.frexp
local ldexp     = math.ldexp
local huge      = math.huge

local double = ffi.new("double [1]")
local uint8_ptr = ffi.cast("uint8_t *", double)  -- so I can write byte-by-byte into double
local double_t = ffi.typeof("double [1]")
local double_u = ffi.new("union {double d; uint64_t i; char c[8];}")

local _M = {}

function _M.double_bswap(cstr)
    ffi.copy(double_u.c, cstr, 8)
    double_u.i = bit.bswap(double_u.i)
    return double_u.d
end

function _M.double_encode(number)
    double[0] = number
    local obj_str = s_reverse(ffi.string(double, 8))
    return obj_str
end

function _M.double_slow(str)   -- remove local ctype, avoid keeping GC busy.
    --local n = double_t()
    local rstr = s_reverse(s_sub(str, 1, 8))
    ffi.copy(double, rstr, 8)
    return double[0]
end

function _M.double_improved(cstr) -- use uint8_t [] as string buffer.
    -- for i=0,7 do       -- don't see any loop expansion, so do it by hand and it's faster(why?)
    --     ffi.copy(uint8_ptr + i, cstr + 7 - i, 1)
    -- end
    ffi.copy(uint8_ptr, cstr + 7, 1)
    ffi.copy(uint8_ptr + 1, cstr + 6, 1)
    ffi.copy(uint8_ptr + 2, cstr + 5, 1)
    ffi.copy(uint8_ptr + 3, cstr + 4, 1)
    ffi.copy(uint8_ptr + 4, cstr + 3, 1)
    ffi.copy(uint8_ptr + 5, cstr + 2, 1)
    ffi.copy(uint8_ptr + 6, cstr + 1, 1)
    ffi.copy(uint8_ptr + 7, cstr, 1)
    return double[0]
end

function _M.double_fast(str)
    local b1, b2, b3, b4, b5, b6, b7, b8 = s_byte(str, 1, 8)
    local sign = b1 > 0x7F
    local expo = (b1 % 0x80) * 0x10 + floor(b2 / 0x10)
    local mant = ((((((b2 % 0x10) * 0x100 + b3) * 0x100 + b4) * 0x100 + b5) * 0x100 + b6) * 0x100 + b7) * 0x100 + b8
    if sign then
        sign = -1
    else
        sign = 1
    end
    local n
    if mant == 0 and expo == 0 then
        n = sign * 0.0
    elseif expo == 0x7FF then
        if mant == 0 then
            n = sign * huge
        else
            n = 0.0/0.0
        end
    else
        n = sign * ldexp(1.0 + mant / 0x10000000000000, expo - 0x3FF)
    end
    return n
end


-- TEST
local profiler = require "jit.p"
--profiler.start("a")
local d_str = _M.double_encode(23.3333)
local len = string.len(d_str)
local d_cstr = ffi.new("uint8_t [?]", len)
ffi.copy(d_cstr, d_str, string.len(d_str))
local n
for i=1,1e8 do
    --n= _M.double_improved(d_cstr)
    --n = _M.double_fast(d_str)
    --n = _M.double_slow(d_str)
    n = _M.double_bswap(d_cstr)     -- new version using bit.bswap, it works on 64-bit in luajit2.1. didn't know that before.
end
--profiler.stop()
print(n)
	local ffi = require "ffi"
	local bit = require "bit"
	local memcopy = ffi.copy
	local string = require "string"
	local s_reverse = string.reverse
	local s_sub = string.sub
	local s_byte = string.byte
	local math = require "math"
	local floor = math.floor
	local frexp = math.frexp
	local ldexp = math.ldexp
	local huge = math.huge

	local double = ffi.new("double [1]")
	local uint8_ptr = ffi.cast("uint8_t *", double) -- so I can write byte-by-byte into double
	local double_t = ffi.typeof("double [1]")
	local double_u = ffi.new("union {double d; uint64_t i; char c[8];}")

	local _M = {}

	function _M.double_bswap(cstr)
	ffi.copy(double_u.c, cstr, 8)
	double_u.i = bit.bswap(double_u.i)
	return double_u.d
	end

	function _M.double_encode(number)
	double[0] = number
	local obj_str = s_reverse(ffi.string(double, 8))
	return obj_str
	end

	function _M.double_slow(str) -- remove local ctype, avoid keeping GC busy.
	--local n = double_t()
	local rstr = s_reverse(s_sub(str, 1, 8))
	ffi.copy(double, rstr, 8)
	return double[0]
	end

	function _M.double_improved(cstr) -- use uint8_t [] as string buffer.
	-- for i=0,7 do -- don't see any loop expansion, so do it by hand and it's faster(why?)
	-- ffi.copy(uint8_ptr + i, cstr + 7 - i, 1)
	-- end
	ffi.copy(uint8_ptr, cstr + 7, 1)
	ffi.copy(uint8_ptr + 1, cstr + 6, 1)
	ffi.copy(uint8_ptr + 2, cstr + 5, 1)
	ffi.copy(uint8_ptr + 3, cstr + 4, 1)
	ffi.copy(uint8_ptr + 4, cstr + 3, 1)
	ffi.copy(uint8_ptr + 5, cstr + 2, 1)
	ffi.copy(uint8_ptr + 6, cstr + 1, 1)
	ffi.copy(uint8_ptr + 7, cstr, 1)
	return double[0]
	end

	function _M.double_fast(str)
	local b1, b2, b3, b4, b5, b6, b7, b8 = s_byte(str, 1, 8)
	local sign = b1 > 0x7F
	local expo = (b1 % 0x80) * 0x10 + floor(b2 / 0x10)
	local mant = ((((((b2 % 0x10) * 0x100 + b3) * 0x100 + b4) * 0x100 + b5) * 0x100 + b6) * 0x100 + b7) * 0x100 + b8
	if sign then
	sign = -1
	else
	sign = 1
	end
	local n
	if mant == 0 and expo == 0 then
	n = sign * 0.0
	elseif expo == 0x7FF then
	if mant == 0 then
	n = sign * huge
	else
	n = 0.0/0.0
	end
	else
	n = sign * ldexp(1.0 + mant / 0x10000000000000, expo - 0x3FF)
	end
	return n
	end


	-- TEST
	local profiler = require "jit.p"
	--profiler.start("a")
	local d_str = _M.double_encode(23.3333)
	local len = string.len(d_str)
	local d_cstr = ffi.new("uint8_t [?]", len)
	ffi.copy(d_cstr, d_str, string.len(d_str))
	local n
	for i=1,1e8 do
	--n= _M.double_improved(d_cstr)
	--n = _M.double_fast(d_str)
	--n = _M.double_slow(d_str)
	n = _M.double_bswap(d_cstr) -- new version using bit.bswap, it works on 64-bit in luajit2.1. didn't know that before.
	end
	--profiler.stop()
	print(n)