ararslan/painfulkayak.jl

## painfulkayak.jl
# Based on the Jetfuel C# library

# Bits of 1/2π
const INV2PI = UInt64[
    (0x28be60db << 32) | 0x9391054a,
    (0x7f09d5f4 << 32) | 0x7d4d3770,
    (0x36d8a566 << 32) | 0x4f10e410,
    (0x7f9458ea << 32) | 0xf7aef158,
    (0x6dc91b8e << 32) | 0x909374b8,
    (0x01924bba << 32) | 0x82746487,
    (0x3f877ac7 << 32) | 0x2c4a69cf,
    (0xba208d7d << 32) | 0x4baed121,
    (0x3a671c09 << 32) | 0xad17df90,
    (0x4e64758e << 32) | 0x60d4ce7d,
    (0x272117e2 << 32) | 0xef7e4a0e,
    (0xc7fe25ff << 32) | 0xf7816603,
    (0xfbcbc462 << 32) | 0xd6829b47,
    (0xdb4d9fb3 << 32) | 0xc9f2c26d,
    (0xd3d18fd9 << 32) | 0xa797fa8b,
    (0x5d49eeb1 << 32) | 0xfaf97c5e,
    (0xcf41ce7d << 32) | 0xe294a4ba,
     0x9afed7ec << 32
]

# Bits of π/4
const PI04 = UInt64[
    (0xc90fdaa2 << 32) | 0x2168c234,
    (0xc4c6628b << 32) | 0x80dc1cd1
]

const TWOPOWER52 = 2^52

"""
    paynehanek(x) -> (quadrant, TwicePrecision{Float64}(hi, lo))

Reduce `x` using the Payne-Hanek method. This method is appropriate
for ``0 < x < ∞``. Returns the remainder of `x` divided by ``π/2``
as a tuple, where the first argument is the quadrant and the second
is a double-double value containing the upper and lower bits of the
remainder, `hi` and `lo`.
"""
function paynehanek(x::Float64)
    inbits = reinterpret(UInt64, x)
    exponent = ((inbits >> 52) & 0x7ff) - 1023

    # Convert to a fixed point representation
    inbits &= 0x000fffffffffffff
    inbits |= 0x0010000000000000

    # Normalize the input to be between 1/2 and 1
    exponent += 1
    inbits <<= 11

    # Based on the exponent, get a shifted copy of INV2PI
    idx = exponent >> 6
    shift = exponent - (idx << 6)

    if shift != 0
        shpi0 = idx == 0 ? UInt64(0) : INV2PI[idx] << shift
        shpi0 |= INV2PI[idx+1] >> (64 - shift)
        shpiA = (INV2PI[idx+1] << shift) | (INV2PI[idx+2] >> (64 - shift))
        shpiB = (INV2PI[idx+2] << shift) | (INV2PI[idx+3] >> (64 - shift))
    else
        shpi0 = (idx == 0) ? UInt(0) : INV2PI[idx]
        shpiA = INV2PI[idx+1]
        shpiB = INV2PI[idx+2]
    end

    # Multiply input by shpiA
    a = inbits >> 32
    b = inbits & 0xffffffff

    c = shpiA >> 32
    d = shpiA & 0xffffffff

    ac = a * c
    bd = b * d
    bc = b * c
    ad = a * d

    prodB = bd + (ad << 32)
    prodA = ac + (ad >> 32)

    bita = (bd & 0x8000000000000000) != 0
    bitb = (ad & 0x8000000000000000) != 0
    bitsum = (prodB & 0x8000000000000000) != 0

    # Carry
    prodA += (bita && bitb) || ((bita || bitb) && !bitsum)

    bita = (prodB & 0x8000000000000000) != 0
    bitb = (bc & 0x8000000000000000) != 0

    prodB += bc << 32
    prodA += bc >> 32

    bitsum = (prodB & 0x8000000000000000) != 0

    # Carry
    prodA += (bita && bitb) || ((bita || bitb) && !bitsum)

    # Multiply input by shipiB
    c = shpiB >> 32
    d = shpiB & 0xffffffff
    ac = a * c
    bc = b * c
    ad = a * d

    # Collect terms
    ac += (bc + ad) >> 32

    bita = (prodB & 0x8000000000000000) != 0
    bitb = (ac & 0x8000000000000000) != 0
    prodB += ac
    bitsum = (prodB & 0x8000000000000000) != 0

    # Carry
    prodA += (bita && bitb) || ((bita || bitb) && !bitsum)

    c = shpi0 >> 32
    d = shpi0 & 0xffffffff

    bd = b * d
    bc = b * c
    ad = a * d

    prodA += bd + ((bc + ad) << 32)

    # prodA, prodB now contain the remainder as a fraction of π. We
    # want that as a fraction of π/2, so use the following steps:
    # 1.) multiply by 4
    # 2.) do a fixed point multiply by π/4
    # 3.) convert to floating point
    # 4.) multiply by 2

    # This identifies the quadrant
    intpart = Int(prodA >> 62)

    # Multiply by 4
    prodA <<= 2
    prodA |= prodB >> 62
    prodB <<= 2

    # Multiply by π/4
    a = prodA >> 32
    b = prodA & 0xffffffff

    c = PIO4[1] >> 32
    d = PI04[1] & 0xffffffff

    ac = a * c
    bd = b * d
    bc = b * c
    ad = a * d

    prod2B = bd + (ad << 32)
    prod2A = ac + (ad >> 32)

    bita = (bd & 0x8000000000000000) != 0
    bitb = (ad & 0x8000000000000000) != 0
    bitsum = (prod2B & 0x8000000000000000) != 0

    # Carry
    prod2A += (bita && bitb) || ((bita || bitb) && !bitsum)

    bita = (prod2B & 0x8000000000000000) != 0
    bitb = (bc & 0x8000000000000000) != 0

    prod2B += bc << 32
    prod2A += bc >> 32

    bitsum = (prod2B & 0x8000000000000000) != 0

    # Carry
    prod2A += (bita && bitb) || ((bitb || bita) && !bitsum)

    # Multiply input by PIO4[2]
    c = PIO4[2] >> 32
    d = PIO4[2] & 0xffffffff
    ac = a * c
    bc = b * c
    ad = a * d

    # Collect terms
    ac += (bc + ad) >> 32

    bita = (prod2B & 0x8000000000000000) != 0
    bitb = (ac & 0x8000000000000000) != 0
    prod2B += ac
    bitsum = (prod2B & 0x8000000000000000) != 0

    # Carry
    prod2A += (bita && bitb) || ((bita || bitb) && !bitsum)

    # Multiply input by PIO4[1]
    a = prodB >> 32
    b = prodB & 0xffffffff
    c = PIO4[1] >> 32
    d = PIO4[1] & 0xffffffff
    ac = a * c
    bc = b * c
    ad = a * d

    # Collect terms
    ac += (bc + ad) >> 32

    bita = (prod2B & 0x8000000000000000) != 0
    bitb = (ac & 0x8000000000000000) != 0
    prod2B += ac
    bitsum = (prod2B & 0x8000000000000000) != 0

    # Carry
    prod2A += (bita && bitb) || ((bita || bitb) && !bitsum)

    # Convert back to Float64
    tmpA = (prod2A >> 12) / TWOPOWER52 # high order 52 bits
    tmpB = (((prod2A & 0xfff) << 40) + (prod2B >> 24)) / TWOPOWER52 / TWOPOWER52 # low bits

    sumA = tmpA + tmpB
    sumB = -(sumA - tmpA - tmpB)

    # Multiply by π/2 and return
    return (intpart, Base.TwicePrecision{Float64}(2sumA, 2sumB))
end
	# Based on the Jetfuel C# library

	# Bits of 1/2π
	const INV2PI = UInt64[
	(0x28be60db << 32) \| 0x9391054a,
	(0x7f09d5f4 << 32) \| 0x7d4d3770,
	(0x36d8a566 << 32) \| 0x4f10e410,
	(0x7f9458ea << 32) \| 0xf7aef158,
	(0x6dc91b8e << 32) \| 0x909374b8,
	(0x01924bba << 32) \| 0x82746487,
	(0x3f877ac7 << 32) \| 0x2c4a69cf,
	(0xba208d7d << 32) \| 0x4baed121,
	(0x3a671c09 << 32) \| 0xad17df90,
	(0x4e64758e << 32) \| 0x60d4ce7d,
	(0x272117e2 << 32) \| 0xef7e4a0e,
	(0xc7fe25ff << 32) \| 0xf7816603,
	(0xfbcbc462 << 32) \| 0xd6829b47,
	(0xdb4d9fb3 << 32) \| 0xc9f2c26d,
	(0xd3d18fd9 << 32) \| 0xa797fa8b,
	(0x5d49eeb1 << 32) \| 0xfaf97c5e,
	(0xcf41ce7d << 32) \| 0xe294a4ba,
	0x9afed7ec << 32
	]

	# Bits of π/4
	const PI04 = UInt64[
	(0xc90fdaa2 << 32) \| 0x2168c234,
	(0xc4c6628b << 32) \| 0x80dc1cd1
	]

	const TWOPOWER52 = 2^52

	"""
	paynehanek(x) -> (quadrant, TwicePrecision{Float64}(hi, lo))

	Reduce `x` using the Payne-Hanek method. This method is appropriate
	for ``0 < x < ∞``. Returns the remainder of `x` divided by ``π/2``
	as a tuple, where the first argument is the quadrant and the second
	is a double-double value containing the upper and lower bits of the
	remainder, `hi` and `lo`.
	"""
	function paynehanek(x::Float64)
	inbits = reinterpret(UInt64, x)
	exponent = ((inbits >> 52) & 0x7ff) - 1023

	# Convert to a fixed point representation
	inbits &= 0x000fffffffffffff
	inbits \|= 0x0010000000000000

	# Normalize the input to be between 1/2 and 1
	exponent += 1
	inbits <<= 11

	# Based on the exponent, get a shifted copy of INV2PI
	idx = exponent >> 6
	shift = exponent - (idx << 6)

	if shift != 0
	shpi0 = idx == 0 ? UInt64(0) : INV2PI[idx] << shift
	shpi0 \|= INV2PI[idx+1] >> (64 - shift)
	shpiA = (INV2PI[idx+1] << shift) \| (INV2PI[idx+2] >> (64 - shift))
	shpiB = (INV2PI[idx+2] << shift) \| (INV2PI[idx+3] >> (64 - shift))
	else
	shpi0 = (idx == 0) ? UInt(0) : INV2PI[idx]
	shpiA = INV2PI[idx+1]
	shpiB = INV2PI[idx+2]
	end

	# Multiply input by shpiA
	a = inbits >> 32
	b = inbits & 0xffffffff

	c = shpiA >> 32
	d = shpiA & 0xffffffff

	ac = a * c
	bd = b * d
	bc = b * c
	ad = a * d

	prodB = bd + (ad << 32)
	prodA = ac + (ad >> 32)

	bita = (bd & 0x8000000000000000) != 0
	bitb = (ad & 0x8000000000000000) != 0
	bitsum = (prodB & 0x8000000000000000) != 0

	# Carry
	prodA += (bita && bitb) \|\| ((bita \|\| bitb) && !bitsum)

	bita = (prodB & 0x8000000000000000) != 0
	bitb = (bc & 0x8000000000000000) != 0

	prodB += bc << 32
	prodA += bc >> 32

	bitsum = (prodB & 0x8000000000000000) != 0

	# Carry
	prodA += (bita && bitb) \|\| ((bita \|\| bitb) && !bitsum)

	# Multiply input by shipiB
	c = shpiB >> 32
	d = shpiB & 0xffffffff
	ac = a * c
	bc = b * c
	ad = a * d

	# Collect terms
	ac += (bc + ad) >> 32

	bita = (prodB & 0x8000000000000000) != 0
	bitb = (ac & 0x8000000000000000) != 0
	prodB += ac
	bitsum = (prodB & 0x8000000000000000) != 0

	# Carry
	prodA += (bita && bitb) \|\| ((bita \|\| bitb) && !bitsum)

	c = shpi0 >> 32
	d = shpi0 & 0xffffffff

	bd = b * d
	bc = b * c
	ad = a * d

	prodA += bd + ((bc + ad) << 32)

	# prodA, prodB now contain the remainder as a fraction of π. We
	# want that as a fraction of π/2, so use the following steps:
	# 1.) multiply by 4
	# 2.) do a fixed point multiply by π/4
	# 3.) convert to floating point
	# 4.) multiply by 2

	# This identifies the quadrant
	intpart = Int(prodA >> 62)

	# Multiply by 4
	prodA <<= 2
	prodA \|= prodB >> 62
	prodB <<= 2

	# Multiply by π/4
	a = prodA >> 32
	b = prodA & 0xffffffff

	c = PIO4[1] >> 32
	d = PI04[1] & 0xffffffff

	ac = a * c
	bd = b * d
	bc = b * c
	ad = a * d

	prod2B = bd + (ad << 32)
	prod2A = ac + (ad >> 32)

	bita = (bd & 0x8000000000000000) != 0
	bitb = (ad & 0x8000000000000000) != 0
	bitsum = (prod2B & 0x8000000000000000) != 0

	# Carry
	prod2A += (bita && bitb) \|\| ((bita \|\| bitb) && !bitsum)

	bita = (prod2B & 0x8000000000000000) != 0
	bitb = (bc & 0x8000000000000000) != 0

	prod2B += bc << 32
	prod2A += bc >> 32

	bitsum = (prod2B & 0x8000000000000000) != 0

	# Carry
	prod2A += (bita && bitb) \|\| ((bitb \|\| bita) && !bitsum)

	# Multiply input by PIO4[2]
	c = PIO4[2] >> 32
	d = PIO4[2] & 0xffffffff
	ac = a * c
	bc = b * c
	ad = a * d

	# Collect terms
	ac += (bc + ad) >> 32

	bita = (prod2B & 0x8000000000000000) != 0
	bitb = (ac & 0x8000000000000000) != 0
	prod2B += ac
	bitsum = (prod2B & 0x8000000000000000) != 0

	# Carry
	prod2A += (bita && bitb) \|\| ((bita \|\| bitb) && !bitsum)

	# Multiply input by PIO4[1]
	a = prodB >> 32
	b = prodB & 0xffffffff
	c = PIO4[1] >> 32
	d = PIO4[1] & 0xffffffff
	ac = a * c
	bc = b * c
	ad = a * d

	# Collect terms
	ac += (bc + ad) >> 32

	bita = (prod2B & 0x8000000000000000) != 0
	bitb = (ac & 0x8000000000000000) != 0
	prod2B += ac
	bitsum = (prod2B & 0x8000000000000000) != 0

	# Carry
	prod2A += (bita && bitb) \|\| ((bita \|\| bitb) && !bitsum)

	# Convert back to Float64
	tmpA = (prod2A >> 12) / TWOPOWER52 # high order 52 bits
	tmpB = (((prod2A & 0xfff) << 40) + (prod2B >> 24)) / TWOPOWER52 / TWOPOWER52 # low bits

	sumA = tmpA + tmpB
	sumB = -(sumA - tmpA - tmpB)

	# Multiply by π/2 and return
	return (intpart, Base.TwicePrecision{Float64}(2sumA, 2sumB))
	end