Last active
February 27, 2020 13:01
-
-
Save Alhadis/3ab85958e046718a84463dde7d132bd9 to your computer and use it in GitHub Desktop.
IEEE 734.mjs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
export const qNaN = Symbol("qNaN"); | |
export const sNaN = Symbol("sNaN"); | |
/** | |
* Convert a binary floating-point representation to a number. | |
* | |
* Source: IEEE 754-2008, table 3.5 – “Binary interchange format parameters” | |
* | |
* @param {Number} S - Sign | |
* @param {Number} E - Biased exponent (w bits) | |
* @param {Number} T - Trailing significand field (t = p - 1 bits) | |
* @param {Number} [size=32] - Either 16, 32, 64, or 128 | |
* @return {{r: number[], v: number}} | |
*/ | |
export function encode(S, E, T, size = 32){ | |
const k = {16: 16, 32: 32, 64: 64, 128: 128} [size]; // Storage width in bits | |
const p = {16: 11, 32: 24, 64: 53, 128: 113} [size]; // Precision in bits | |
const emax = {16: 15, 32: 127, 64: 1023, 128: 16383} [size]; // Maximum exponent e | |
const bias = {16: 15, 32: 127, 64: 1023, 128: 16383} [size]; // E - e | |
const signBit = {16: 1, 32: 1, 64: 1, 128: 1} [size]; | |
const w = {16: 5, 32: 8, 64: 11, 128: 15} [size]; // Exponent field width in bits | |
const t = {16: 10, 32: 23, 64: 52, 128: 112} [size]; // Trailing significand field width in bits | |
const emin = 2 - (2 ** (w - 1)); | |
let r; // Representation of the floating-point datum | |
let v; // Value of the floating-point datum represented | |
// NaN | |
if(E === (2 ** w) - 1 && T !== 0){ | |
r = qNaN || sNaN; | |
v = NaN; | |
} | |
// Infinity | |
else if(E === (2 ** w) - 1 && 0 !== T) | |
r = v = ((-1) ** S) * Infinity; | |
// Normal numbers (implicit leading significand bit of 1) | |
else if(E >= 1 && E <= (2 ** w) - 2){ | |
r = [S, (E - bias), (1 + (2 ** (1 - p)) * T)]; | |
v = ((-1) ** S) * (2 ** (E - bias)) * (1 + (2 ** (1 - p)) * T); | |
} | |
// Subnormal numbers (implicit leading significand bit of 0) | |
else if(0 === E && 0 !== T){ | |
r = [S, emin, (0 + (2 ** (1 - p)) * T)]; | |
v = ((-1) ** S) * (2 ** emin) * (0 + (2 ** (1 - p)) * T); | |
} | |
// Signed zero | |
else if(0 === E && 0 === T){ | |
r = [S, emin, 0]; | |
v = ((-1) ** S) * 0; | |
} | |
return {r, v}; | |
} | |
/** | |
* Convert the binary representation of a fraction back to a number. | |
* | |
* @example bitsToFrac(0b11n << 62n) == 0.375; | |
* @param {Number|BigInt} bits - Binary fraction returned by {@link fracToBits} | |
* @param {Number|BigInt} [precision=64] - Significand precision in bits | |
* @return {Number} | |
*/ | |
export function bitsToFrac(bits, precision = 64){ | |
bits = BigInt(bits); | |
precision = BigInt(precision); | |
let frac = 0; | |
for(let i = 0n; i <= precision; | |
frac += Number(bits >> precision - i & 1n) * 2 ** -Number(i++)); | |
return frac * 2 ** -1; | |
} | |
/** | |
* Convert a number's fractional component to binary. | |
* | |
* @example fracToBits(0.375) == 0b11n << 62n; | |
* @param {Number} fraction - A floating-point value between 0 and 1 | |
* @param {Number|BigInt} [precision=64] - Significand precision in bits | |
* @return {Number} | |
*/ | |
export function fracToBits(frac, precision = 64){ | |
frac %= 1; | |
precision = BigInt(precision); | |
let bits = 0n; | |
for(let int, i = 0n; frac && i <= precision; | |
frac *= 2, | |
bits |= BigInt(int = ~~frac) << (precision - i++), | |
frac -= int); | |
return Number(bits); | |
} | |
/** | |
* Convert 32-bit IEEE 754 floating-point values to bytes. | |
* | |
* FIXME: Doesn't work properly, lol. See below. | |
* | |
* @todo Add support for subnormal numbers. | |
* @todo Fix incorrect rounding (according to IEEE 754-2008 § 4.3.3): | |
* @example float32ToBytes(1 / 3) != [0x3E, 0xAA, 0xAA, 0xAB]; | |
* @example float32ToBytes(Math.PI) != [0x40, 0x49, 0x0F, 0xDB]; | |
* | |
* @example float32ToBytes(0.375) == [0x3E, 0xC0, 0x00, 0x00]; | |
* @param {Number|Number[]} input | |
* @param {Boolean} [littleEndian=false] | |
* @return {Uint8Array} | |
*/ | |
export function float32ToBytes(input, littleEndian = false){ | |
if("number" === typeof input) | |
input = [input]; | |
const {length} = input; | |
const bytes = new Uint8Array(length * 4); | |
for(let i = 0; i < length; ++i){ | |
let float = input[i]; | |
let a, b, c, d; | |
if(Number.isNaN(float)) [a, b, c, d] = [0x7F, 0x80, 0x00, 0x01]; | |
else if(!isFinite(float)) [a, b, c, d] = [0x7F + (float === -Infinity), 0x80, 0, 0]; | |
else if(!float) [a, b, c, d] = [0x80 * Object.is(float, -0), 0, 0, 0]; | |
else{ | |
let n = float = Math.abs(float), exp = 0, bin = 0; | |
// Subnormal number | |
if(n < 2 ** -126){ | |
n *= 2 ** 126; | |
for(let I, i = 0; n && i <= 23; | |
n *= 2, bin |= (I = ~~n) << (23 - i++), n -= I); | |
} | |
// Normal number (FIXME: Incorrect rounding) | |
else{ | |
while(n < 1) n = float * 2 ** -(--exp); | |
float *= 2 ** -exp; | |
let dec = float % 1; | |
let int = BigInt(Math.abs(float - dec)); | |
for(let I, i = 0; dec && i <= 23; | |
dec *= 2, bin |= (I = ~~dec) << (23 - i++), dec -= I); | |
while(int > 0) bin = Number(int & 1n) << 23 | bin >>> 1, int >>= 1n, ++exp; | |
exp += 126; | |
} | |
bin &= 0x7FFFFF; | |
a = (input[i] < 0 ? 128 : 0) | exp >> 1 & 127; | |
b = exp << 7 & 128 | bin >> 16 & 127; | |
c = bin >> 8 & 255; | |
d = bin & 255; | |
} | |
if(littleEndian) [a, b, c, d] = [d, c, b, a]; | |
bytes[i * 4] = a || 0; | |
bytes[i * 4 + 1] = b || 0; | |
bytes[i * 4 + 2] = c || 0; | |
bytes[i * 4 + 3] = d || 0; | |
} | |
return bytes; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
describe("float32ToBytes()", () => { | |
const {float32ToBytes} = utils; | |
const decode = (input, expected) => { | |
expect(float32ToBytes(input)).to.eql(Uint8Array.from(expected)); | |
expect(float32ToBytes(input, true)).to.eql(Uint8Array.from([...expected].reverse())); | |
expect(float32ToBytes([input, input])).to.eql(Uint8Array.from(expected.concat(expected))); | |
}; | |
it("decodes normal numbers", () => { | |
decode(1, [0x3F, 0x80, 0x00, 0x00]); | |
decode(6.1, [0x40, 0xC3, 0x33, 0x33]); | |
decode(0.25, [0x3E, 0x80, 0x00, 0x00]); | |
decode(0.375, [0x3E, 0xC0, 0x00, 0x00]); | |
decode(0.0244140625, [0x3C, 0xC8, 0x00, 0x00]); | |
decode(-91.6875, [0xC2, 0xB7, 0x60, 0x00]); | |
decode(2 ** -126, [0x00, 0x80, 0x00, 0x00]); | |
decode(2 ** +127 * (2 - 2 ** -23), [0x7F, 0x7F, 0xFF, 0xFF]); | |
decode(1 - 2 ** -24, [0x3F, 0x7F, 0xFF, 0xFF]); | |
decode(1 + 2 ** -23, [0x3F, 0x80, 0x00, 0x01]); | |
}); | |
it("decodes subnormal numbers", () => { | |
decode(2 ** -126 * (2 ** -23), [0x00, 0x00, 0x00, 0x01]); | |
decode(2 ** -126 * (1 - 2 ** -23), [0x00, 0x7F, 0xFF, 0xFF]); | |
}); | |
it("decodes NaN", () => decode(NaN, [0x7F, 0x80, 0x00, 0x01])); | |
it("decodes positive infinity", () => decode(+Infinity, [0x7F, 0x80, 0x00, 0x00])); | |
it("decodes negative infinity", () => decode(-Infinity, [0x80, 0x80, 0x00, 0x00])); | |
it("decodes positive zero", () => decode(+0, [0x00, 0x00, 0x00, 0x00])); | |
it("decodes negative zero", () => decode(-0, [0x80, 0x00, 0x00, 0x00])); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment