Created
February 6, 2017 23:09
-
-
Save ararslan/961db8d286d225630f07c0729b67208e to your computer and use it in GitHub Desktop.
Payne-Hanek reduction based on the Jetfuel C# library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based on the Jetfuel C# library | |
# Bits of 1/2π | |
const INV2PI = UInt64[ | |
(0x28be60db << 32) | 0x9391054a, | |
(0x7f09d5f4 << 32) | 0x7d4d3770, | |
(0x36d8a566 << 32) | 0x4f10e410, | |
(0x7f9458ea << 32) | 0xf7aef158, | |
(0x6dc91b8e << 32) | 0x909374b8, | |
(0x01924bba << 32) | 0x82746487, | |
(0x3f877ac7 << 32) | 0x2c4a69cf, | |
(0xba208d7d << 32) | 0x4baed121, | |
(0x3a671c09 << 32) | 0xad17df90, | |
(0x4e64758e << 32) | 0x60d4ce7d, | |
(0x272117e2 << 32) | 0xef7e4a0e, | |
(0xc7fe25ff << 32) | 0xf7816603, | |
(0xfbcbc462 << 32) | 0xd6829b47, | |
(0xdb4d9fb3 << 32) | 0xc9f2c26d, | |
(0xd3d18fd9 << 32) | 0xa797fa8b, | |
(0x5d49eeb1 << 32) | 0xfaf97c5e, | |
(0xcf41ce7d << 32) | 0xe294a4ba, | |
0x9afed7ec << 32 | |
] | |
# Bits of π/4 | |
const PI04 = UInt64[ | |
(0xc90fdaa2 << 32) | 0x2168c234, | |
(0xc4c6628b << 32) | 0x80dc1cd1 | |
] | |
const TWOPOWER52 = 2^52 | |
""" | |
paynehanek(x) -> (quadrant, TwicePrecision{Float64}(hi, lo)) | |
Reduce `x` using the Payne-Hanek method. This method is appropriate | |
for ``0 < x < ∞``. Returns the remainder of `x` divided by ``π/2`` | |
as a tuple, where the first argument is the quadrant and the second | |
is a double-double value containing the upper and lower bits of the | |
remainder, `hi` and `lo`. | |
""" | |
function paynehanek(x::Float64) | |
inbits = reinterpret(UInt64, x) | |
exponent = ((inbits >> 52) & 0x7ff) - 1023 | |
# Convert to a fixed point representation | |
inbits &= 0x000fffffffffffff | |
inbits |= 0x0010000000000000 | |
# Normalize the input to be between 1/2 and 1 | |
exponent += 1 | |
inbits <<= 11 | |
# Based on the exponent, get a shifted copy of INV2PI | |
idx = exponent >> 6 | |
shift = exponent - (idx << 6) | |
if shift != 0 | |
shpi0 = idx == 0 ? UInt64(0) : INV2PI[idx] << shift | |
shpi0 |= INV2PI[idx+1] >> (64 - shift) | |
shpiA = (INV2PI[idx+1] << shift) | (INV2PI[idx+2] >> (64 - shift)) | |
shpiB = (INV2PI[idx+2] << shift) | (INV2PI[idx+3] >> (64 - shift)) | |
else | |
shpi0 = (idx == 0) ? UInt(0) : INV2PI[idx] | |
shpiA = INV2PI[idx+1] | |
shpiB = INV2PI[idx+2] | |
end | |
# Multiply input by shpiA | |
a = inbits >> 32 | |
b = inbits & 0xffffffff | |
c = shpiA >> 32 | |
d = shpiA & 0xffffffff | |
ac = a * c | |
bd = b * d | |
bc = b * c | |
ad = a * d | |
prodB = bd + (ad << 32) | |
prodA = ac + (ad >> 32) | |
bita = (bd & 0x8000000000000000) != 0 | |
bitb = (ad & 0x8000000000000000) != 0 | |
bitsum = (prodB & 0x8000000000000000) != 0 | |
# Carry | |
prodA += (bita && bitb) || ((bita || bitb) && !bitsum) | |
bita = (prodB & 0x8000000000000000) != 0 | |
bitb = (bc & 0x8000000000000000) != 0 | |
prodB += bc << 32 | |
prodA += bc >> 32 | |
bitsum = (prodB & 0x8000000000000000) != 0 | |
# Carry | |
prodA += (bita && bitb) || ((bita || bitb) && !bitsum) | |
# Multiply input by shipiB | |
c = shpiB >> 32 | |
d = shpiB & 0xffffffff | |
ac = a * c | |
bc = b * c | |
ad = a * d | |
# Collect terms | |
ac += (bc + ad) >> 32 | |
bita = (prodB & 0x8000000000000000) != 0 | |
bitb = (ac & 0x8000000000000000) != 0 | |
prodB += ac | |
bitsum = (prodB & 0x8000000000000000) != 0 | |
# Carry | |
prodA += (bita && bitb) || ((bita || bitb) && !bitsum) | |
c = shpi0 >> 32 | |
d = shpi0 & 0xffffffff | |
bd = b * d | |
bc = b * c | |
ad = a * d | |
prodA += bd + ((bc + ad) << 32) | |
# prodA, prodB now contain the remainder as a fraction of π. We | |
# want that as a fraction of π/2, so use the following steps: | |
# 1.) multiply by 4 | |
# 2.) do a fixed point multiply by π/4 | |
# 3.) convert to floating point | |
# 4.) multiply by 2 | |
# This identifies the quadrant | |
intpart = Int(prodA >> 62) | |
# Multiply by 4 | |
prodA <<= 2 | |
prodA |= prodB >> 62 | |
prodB <<= 2 | |
# Multiply by π/4 | |
a = prodA >> 32 | |
b = prodA & 0xffffffff | |
c = PIO4[1] >> 32 | |
d = PI04[1] & 0xffffffff | |
ac = a * c | |
bd = b * d | |
bc = b * c | |
ad = a * d | |
prod2B = bd + (ad << 32) | |
prod2A = ac + (ad >> 32) | |
bita = (bd & 0x8000000000000000) != 0 | |
bitb = (ad & 0x8000000000000000) != 0 | |
bitsum = (prod2B & 0x8000000000000000) != 0 | |
# Carry | |
prod2A += (bita && bitb) || ((bita || bitb) && !bitsum) | |
bita = (prod2B & 0x8000000000000000) != 0 | |
bitb = (bc & 0x8000000000000000) != 0 | |
prod2B += bc << 32 | |
prod2A += bc >> 32 | |
bitsum = (prod2B & 0x8000000000000000) != 0 | |
# Carry | |
prod2A += (bita && bitb) || ((bitb || bita) && !bitsum) | |
# Multiply input by PIO4[2] | |
c = PIO4[2] >> 32 | |
d = PIO4[2] & 0xffffffff | |
ac = a * c | |
bc = b * c | |
ad = a * d | |
# Collect terms | |
ac += (bc + ad) >> 32 | |
bita = (prod2B & 0x8000000000000000) != 0 | |
bitb = (ac & 0x8000000000000000) != 0 | |
prod2B += ac | |
bitsum = (prod2B & 0x8000000000000000) != 0 | |
# Carry | |
prod2A += (bita && bitb) || ((bita || bitb) && !bitsum) | |
# Multiply input by PIO4[1] | |
a = prodB >> 32 | |
b = prodB & 0xffffffff | |
c = PIO4[1] >> 32 | |
d = PIO4[1] & 0xffffffff | |
ac = a * c | |
bc = b * c | |
ad = a * d | |
# Collect terms | |
ac += (bc + ad) >> 32 | |
bita = (prod2B & 0x8000000000000000) != 0 | |
bitb = (ac & 0x8000000000000000) != 0 | |
prod2B += ac | |
bitsum = (prod2B & 0x8000000000000000) != 0 | |
# Carry | |
prod2A += (bita && bitb) || ((bita || bitb) && !bitsum) | |
# Convert back to Float64 | |
tmpA = (prod2A >> 12) / TWOPOWER52 # high order 52 bits | |
tmpB = (((prod2A & 0xfff) << 40) + (prod2B >> 24)) / TWOPOWER52 / TWOPOWER52 # low bits | |
sumA = tmpA + tmpB | |
sumB = -(sumA - tmpA - tmpB) | |
# Multiply by π/2 and return | |
return (intpart, Base.TwicePrecision{Float64}(2sumA, 2sumB)) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
should probably be
or even simpler