edubart/mathx.lua

## mathx.lua
##[[
if GLSL then
  primtypes.number = primtypes.float32
  primtypes.integer = primtypes.cint
  primtypes.uinteger = primtypes.cuint
else
  primtypes.number = primtypes.float32
  primtypes.integer = primtypes.int32
  primtypes.uinteger = primtypes.uint32
end
]]

global float = @number
global int = @integer
global uint = @uinteger

local is_scalar = #[concept(function(x) return x.type.is_scalar end)]#

require 'math'

function math.rsqrt(x: is_scalar) <inline,nosideeffect>
  return 1.0 / sqrt(x)
end

-- Returns 0 if x < edge, and 1 otherwise.
function math.step(edge: is_scalar, x: is_scalar) <inline,nosideeffect>
  x = x < edge and 0 or 1
  return x
end

function math.fastexp(x: float32) <inline,nosideeffect>
  -- Fast exp (imprecise)
  -- Reference: https://github.com/ekmett/approximate/blob/master/cbits/fast.c
  local u: union {f: float32, x: int32} = {x = (@int32)(12102203 * x) + 1064866805}
  return u.f
end

function math.fastpow(a: float32, b: float32): float32 <inline,nosideeffect>
  -- Fast pow (precise)
  -- Reference: https://github.com/ekmett/approximate/blob/master/cbits/fast.c
  local flip: boolean = b < 0
  b = flip and -b or b
  local e: int32 = (@int32)(b)
  local u: union{f: float32, x: int32} = {f = a}
  u.x = (@int32)((b - e)*(u.x - 1065353216) + 1065353216)
  local r: float32 = 1.0
  ## for i=1,32 do
    if e == 0 then goto finish end
    r = e & 1 ~= 0 and r*a or r
    a = a * a
    e = e >>> 1
  ## end
::finish::
  r = r * u.f
  return flip and 1.0/r or r
end

function math.fasterpow(a: float32, b: float32): float32 <inline,nosideeffect>
  -- Very fast pow (imprecise)
  -- Reference: https://github.com/ekmett/approximate/blob/master/cbits/fast.c
  local u: union {f: float32, x: int32} = {f = a}
  u.x = (@int32)(b * (u.x - 1064866805) + 1064866805)
  return u.f
end

function math.fastcbrt(a: float32): float32 <inline,nosideeffect>
  -- Cube root approximation using 1 iterations of Halley's method
  if unlikely(a == 0.0) then return 0.0 end
  local u: union{f: float32, x: uint32} = {f = a}
  u.x = u.x /// 3_u32 + 709921077_u32
  local x: float32 = u.f -- initial guess
  local x3: float32 = x*x*x
  x = u.f * (x3 + 2.0*a) / (2.0*x3 + a)
  x3 = x*x*x
  return x * (x3 + 2.0*a) / (2.0*x3 + a)
end

function math.fastercbrt(a: float32): float32 <inline,nosideeffect>
  if unlikely(a == 0.0) then return 0.0 end
  -- Cube root approximation using 2 iterations of Halley's method
  local u: union{f: float32, x: uint32} = {f = a}
  u.x = u.x /// 3_u32 + 709921077_u32
  local x: float32 = u.f -- initial guess
  local x3: float32 = x*x*x
  return x * (x3 + 2.0*a) / (2.0*x3 + a)
end

-- Convert float bit representation to an unsigned integer.
function math.floatbits2uint(x: float): uint <inline,nosideeffect>
  local u: union{f: float, i: uint} = {f = x}
  return u.i
end

-- Convert float bit representation to an integer.
function math.floatbits2int(x: float): int <inline,nosideeffect>
  local u: union{f: float, i: int} = {f = x}
  return u.i
end

## end

-- Returns -1 if x < 0, and 1 otherwise.
function math.signb(x: is_scalar) <inline,nosideeffect>
  return x < 0 and -1 or 1
end

function math.floorround(x: is_scalar) <inline,nosideeffect>
  return math.floor(x + 0.5)
end

-- Returns cos(acos(x)/3.0) but optimized, usually used to solve cubic equations.
-- Reference: https://iquilezles.org/www/articles/trisect/trisect.htm
function math.trisect(x: float): float <inline,nosideeffect>
  x = math.sqrt(0.5+0.5*x)
  return x*(x*(x*(x*-0.008972+0.039071)-0.107074)+0.576975)+0.5
end

-- Like `trisect`, but faster losing precision.
-- Reference: https://www.shadertoy.com/view/WltSD7
function math.fasttrisect(x: float): float <inline,nosideeffect>
  x = math.sqrt(0.5+0.5*x)
  return x*(-0.064916*x+0.564916)+0.5
end

-- Returns arccosine of an scalar.
-- Handbook of Mathematical Functions - M. Abramowitz and I.A. Stegun, Ed.
-- Reference: https://developer.download.nvidia.com/cg/acos.html
function math.fastacos(x: float): float <inline,nosideeffect>
  local negate: float = x < 0.0 and 1.0 or 0.0
  x = math.abs(x)
  local ret: float = -0.0187293
  ret = ret * x
  ret = ret + 0.0742610
  ret = ret * x
  ret = ret - 0.2121144
  ret = ret * x
  ret = ret + 1.5707288
  ret = ret * math.sqrt(1.0-x)
  ret = ret - 2.0 * negate * ret
  return negate * 3.14159265358979 + ret
end

-- Returns arccosine of an scalar. (imprecise)
-- Approximation by Sebastien Lagarde.
-- Reference: https://www.shadertoy.com/view/lsjXDc
function math.fasteracos(x: float): float <inline,nosideeffect>
  local y: float = math.abs(math.clamp(x,-1.0,1.0))
  local b: float = (-0.168577*y + 1.56723) * math.sqrt(1.0 - y)
  local t: float = math.sign(x)
  local a: float = 0.5*3.1415927
  return a * (1.0-t) + b * t
end

-- Reference: https://www.shadertoy.com/view/7d23D1
function math.fastsin(x: float): float <inline,nosideeffect>
  -- Quartic sin approximation (precise), using the following constrains:
  -- f(0) = 0, f(pi) = 0, f(pi/2) = 1, f'(0) = 1, f'(pi) = -1, f'(pi/2) = 0
  local line = x*#[1/math.pi]#
  local stair = math.floor(line)
  local saw = line - stair
  local wave = saw*(saw*(saw*(saw*#[16 - 4*math.pi]# + #[8*math.pi - 32]#) + #[16 - 5*math.pi]#) + #[math.pi]#)
  local signal = (1.0-2.0*(stair - 2.0*math.floor(0.5*line)))
  return signal*wave
end

function math.fastcos(x: float): float <inline,nosideeffect>
  return math.fastsin(x + #[math.pi/2]#)
end

function math.qsin(x: float): float <inline,nosideeffect>
  -- Quadratic sin approximation (imprecise), using the following constrains:
  -- f(0) = 0, f(pi) = 0, f(pi/2) = 1
  local line = x*#[1/math.pi]#
  local stair = math.floor(line)
  local saw = line - stair
  local wave = 4.0*saw*(1.0-saw)
  local signal = (1.0-2.0*(stair - 2.0*math.floor(0.5*line)))
  return signal*wave
end

function math.qcos(x: float): float <inline,nosideeffect>
  return math.qsin(x + #[math.pi/2]#)
end
	##[[
	if GLSL then
	primtypes.number = primtypes.float32
	primtypes.integer = primtypes.cint
	primtypes.uinteger = primtypes.cuint
	else
	primtypes.number = primtypes.float32
	primtypes.integer = primtypes.int32
	primtypes.uinteger = primtypes.uint32
	end
	]]

	global float = @number
	global int = @integer
	global uint = @uinteger

	local is_scalar = #[concept(function(x) return x.type.is_scalar end)]#

	require 'math'

	function math.rsqrt(x: is_scalar) <inline,nosideeffect>
	return 1.0 / sqrt(x)
	end

	-- Returns 0 if x < edge, and 1 otherwise.
	function math.step(edge: is_scalar, x: is_scalar) <inline,nosideeffect>
	x = x < edge and 0 or 1
	return x
	end

	function math.fastexp(x: float32) <inline,nosideeffect>
	-- Fast exp (imprecise)
	-- Reference: https://github.com/ekmett/approximate/blob/master/cbits/fast.c
	local u: union {f: float32, x: int32} = {x = (@int32)(12102203 * x) + 1064866805}
	return u.f
	end

	function math.fastpow(a: float32, b: float32): float32 <inline,nosideeffect>
	-- Fast pow (precise)
	-- Reference: https://github.com/ekmett/approximate/blob/master/cbits/fast.c
	local flip: boolean = b < 0
	b = flip and -b or b
	local e: int32 = (@int32)(b)
	local u: union{f: float32, x: int32} = {f = a}
	u.x = (@int32)((b - e)*(u.x - 1065353216) + 1065353216)
	local r: float32 = 1.0
	## for i=1,32 do
	if e == 0 then goto finish end
	r = e & 1 ~= 0 and r*a or r
	a = a * a
	e = e >>> 1
	## end
	::finish::
	r = r * u.f
	return flip and 1.0/r or r
	end

	function math.fasterpow(a: float32, b: float32): float32 <inline,nosideeffect>
	-- Very fast pow (imprecise)
	-- Reference: https://github.com/ekmett/approximate/blob/master/cbits/fast.c
	local u: union {f: float32, x: int32} = {f = a}
	u.x = (@int32)(b * (u.x - 1064866805) + 1064866805)
	return u.f
	end

	function math.fastcbrt(a: float32): float32 <inline,nosideeffect>
	-- Cube root approximation using 1 iterations of Halley's method
	if unlikely(a == 0.0) then return 0.0 end
	local u: union{f: float32, x: uint32} = {f = a}
	u.x = u.x /// 3_u32 + 709921077_u32
	local x: float32 = u.f -- initial guess
	local x3: float32 = xxx
	x = u.f * (x3 + 2.0a) / (2.0x3 + a)
	x3 = xxx
	return x * (x3 + 2.0a) / (2.0x3 + a)
	end

	function math.fastercbrt(a: float32): float32 <inline,nosideeffect>
	if unlikely(a == 0.0) then return 0.0 end
	-- Cube root approximation using 2 iterations of Halley's method
	local u: union{f: float32, x: uint32} = {f = a}
	u.x = u.x /// 3_u32 + 709921077_u32
	local x: float32 = u.f -- initial guess
	local x3: float32 = xxx
	return x * (x3 + 2.0a) / (2.0x3 + a)
	end

	-- Convert float bit representation to an unsigned integer.
	function math.floatbits2uint(x: float): uint <inline,nosideeffect>
	local u: union{f: float, i: uint} = {f = x}
	return u.i
	end

	-- Convert float bit representation to an integer.
	function math.floatbits2int(x: float): int <inline,nosideeffect>
	local u: union{f: float, i: int} = {f = x}
	return u.i
	end

	## end

	-- Returns -1 if x < 0, and 1 otherwise.
	function math.signb(x: is_scalar) <inline,nosideeffect>
	return x < 0 and -1 or 1
	end

	function math.floorround(x: is_scalar) <inline,nosideeffect>
	return math.floor(x + 0.5)
	end

	-- Returns cos(acos(x)/3.0) but optimized, usually used to solve cubic equations.
	-- Reference: https://iquilezles.org/www/articles/trisect/trisect.htm
	function math.trisect(x: float): float <inline,nosideeffect>
	x = math.sqrt(0.5+0.5*x)
	return x(x(x(x-0.008972+0.039071)-0.107074)+0.576975)+0.5
	end

	-- Like `trisect`, but faster losing precision.
	-- Reference: https://www.shadertoy.com/view/WltSD7
	function math.fasttrisect(x: float): float <inline,nosideeffect>
	x = math.sqrt(0.5+0.5*x)
	return x(-0.064916x+0.564916)+0.5
	end

	-- Returns arccosine of an scalar.
	-- Handbook of Mathematical Functions - M. Abramowitz and I.A. Stegun, Ed.
	-- Reference: https://developer.download.nvidia.com/cg/acos.html
	function math.fastacos(x: float): float <inline,nosideeffect>
	local negate: float = x < 0.0 and 1.0 or 0.0
	x = math.abs(x)
	local ret: float = -0.0187293
	ret = ret * x
	ret = ret + 0.0742610
	ret = ret * x
	ret = ret - 0.2121144
	ret = ret * x
	ret = ret + 1.5707288
	ret = ret * math.sqrt(1.0-x)
	ret = ret - 2.0 * negate * ret
	return negate * 3.14159265358979 + ret
	end

	-- Returns arccosine of an scalar. (imprecise)
	-- Approximation by Sebastien Lagarde.
	-- Reference: https://www.shadertoy.com/view/lsjXDc
	function math.fasteracos(x: float): float <inline,nosideeffect>
	local y: float = math.abs(math.clamp(x,-1.0,1.0))
	local b: float = (-0.168577y + 1.56723) math.sqrt(1.0 - y)
	local t: float = math.sign(x)
	local a: float = 0.5*3.1415927
	return a * (1.0-t) + b * t
	end

	-- Reference: https://www.shadertoy.com/view/7d23D1
	function math.fastsin(x: float): float <inline,nosideeffect>
	-- Quartic sin approximation (precise), using the following constrains:
	-- f(0) = 0, f(pi) = 0, f(pi/2) = 1, f'(0) = 1, f'(pi) = -1, f'(pi/2) = 0
	local line = x*#[1/math.pi]#
	local stair = math.floor(line)
	local saw = line - stair
	local wave = saw(saw(saw(saw#[16 - 4math.pi]# + #[8math.pi - 32]#) + #[16 - 5*math.pi]#) + #[math.pi]#)
	local signal = (1.0-2.0(stair - 2.0math.floor(0.5*line)))
	return signal*wave
	end

	function math.fastcos(x: float): float <inline,nosideeffect>
	return math.fastsin(x + #[math.pi/2]#)
	end

	function math.qsin(x: float): float <inline,nosideeffect>
	-- Quadratic sin approximation (imprecise), using the following constrains:
	-- f(0) = 0, f(pi) = 0, f(pi/2) = 1
	local line = x*#[1/math.pi]#
	local stair = math.floor(line)
	local saw = line - stair
	local wave = 4.0saw(1.0-saw)
	local signal = (1.0-2.0(stair - 2.0math.floor(0.5*line)))
	return signal*wave
	end

	function math.qcos(x: float): float <inline,nosideeffect>
	return math.qsin(x + #[math.pi/2]#)
	end