Skip to content

Instantly share code, notes, and snippets.

@Alhadis
Last active February 27, 2020 13:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Alhadis/3ab85958e046718a84463dde7d132bd9 to your computer and use it in GitHub Desktop.
Save Alhadis/3ab85958e046718a84463dde7d132bd9 to your computer and use it in GitHub Desktop.
IEEE 734.mjs
#!/usr/bin/env node
export const qNaN = Symbol("qNaN");
export const sNaN = Symbol("sNaN");
/**
* Convert a binary floating-point representation to a number.
*
* Source: IEEE 754-2008, table 3.5 – “Binary interchange format parameters”
*
* @param {Number} S - Sign
* @param {Number} E - Biased exponent (w bits)
* @param {Number} T - Trailing significand field (t = p - 1 bits)
* @param {Number} [size=32] - Either 16, 32, 64, or 128
* @return {{r: number[], v: number}}
*/
export function encode(S, E, T, size = 32){
const k = {16: 16, 32: 32, 64: 64, 128: 128} [size]; // Storage width in bits
const p = {16: 11, 32: 24, 64: 53, 128: 113} [size]; // Precision in bits
const emax = {16: 15, 32: 127, 64: 1023, 128: 16383} [size]; // Maximum exponent e
const bias = {16: 15, 32: 127, 64: 1023, 128: 16383} [size]; // E - e
const signBit = {16: 1, 32: 1, 64: 1, 128: 1} [size];
const w = {16: 5, 32: 8, 64: 11, 128: 15} [size]; // Exponent field width in bits
const t = {16: 10, 32: 23, 64: 52, 128: 112} [size]; // Trailing significand field width in bits
const emin = 2 - (2 ** (w - 1));
let r; // Representation of the floating-point datum
let v; // Value of the floating-point datum represented
// NaN
if(E === (2 ** w) - 1 && T !== 0){
r = qNaN || sNaN;
v = NaN;
}
// Infinity
else if(E === (2 ** w) - 1 && 0 !== T)
r = v = ((-1) ** S) * Infinity;
// Normal numbers (implicit leading significand bit of 1)
else if(E >= 1 && E <= (2 ** w) - 2){
r = [S, (E - bias), (1 + (2 ** (1 - p)) * T)];
v = ((-1) ** S) * (2 ** (E - bias)) * (1 + (2 ** (1 - p)) * T);
}
// Subnormal numbers (implicit leading significand bit of 0)
else if(0 === E && 0 !== T){
r = [S, emin, (0 + (2 ** (1 - p)) * T)];
v = ((-1) ** S) * (2 ** emin) * (0 + (2 ** (1 - p)) * T);
}
// Signed zero
else if(0 === E && 0 === T){
r = [S, emin, 0];
v = ((-1) ** S) * 0;
}
return {r, v};
}
/**
* Convert the binary representation of a fraction back to a number.
*
* @example bitsToFrac(0b11n << 62n) == 0.375;
* @param {Number|BigInt} bits - Binary fraction returned by {@link fracToBits}
* @param {Number|BigInt} [precision=64] - Significand precision in bits
* @return {Number}
*/
export function bitsToFrac(bits, precision = 64){
bits = BigInt(bits);
precision = BigInt(precision);
let frac = 0;
for(let i = 0n; i <= precision;
frac += Number(bits >> precision - i & 1n) * 2 ** -Number(i++));
return frac * 2 ** -1;
}
/**
* Convert a number's fractional component to binary.
*
* @example fracToBits(0.375) == 0b11n << 62n;
* @param {Number} fraction - A floating-point value between 0 and 1
* @param {Number|BigInt} [precision=64] - Significand precision in bits
* @return {Number}
*/
export function fracToBits(frac, precision = 64){
frac %= 1;
precision = BigInt(precision);
let bits = 0n;
for(let int, i = 0n; frac && i <= precision;
frac *= 2,
bits |= BigInt(int = ~~frac) << (precision - i++),
frac -= int);
return Number(bits);
}
/**
* Convert 32-bit IEEE 754 floating-point values to bytes.
*
* FIXME: Doesn't work properly, lol. See below.
*
* @todo Add support for subnormal numbers.
* @todo Fix incorrect rounding (according to IEEE 754-2008 § 4.3.3):
* @example float32ToBytes(1 / 3) != [0x3E, 0xAA, 0xAA, 0xAB];
* @example float32ToBytes(Math.PI) != [0x40, 0x49, 0x0F, 0xDB];
*
* @example float32ToBytes(0.375) == [0x3E, 0xC0, 0x00, 0x00];
* @param {Number|Number[]} input
* @param {Boolean} [littleEndian=false]
* @return {Uint8Array}
*/
export function float32ToBytes(input, littleEndian = false){
if("number" === typeof input)
input = [input];
const {length} = input;
const bytes = new Uint8Array(length * 4);
for(let i = 0; i < length; ++i){
let float = input[i];
let a, b, c, d;
if(Number.isNaN(float)) [a, b, c, d] = [0x7F, 0x80, 0x00, 0x01];
else if(!isFinite(float)) [a, b, c, d] = [0x7F + (float === -Infinity), 0x80, 0, 0];
else if(!float) [a, b, c, d] = [0x80 * Object.is(float, -0), 0, 0, 0];
else{
let n = float = Math.abs(float), exp = 0, bin = 0;
// Subnormal number
if(n < 2 ** -126){
n *= 2 ** 126;
for(let I, i = 0; n && i <= 23;
n *= 2, bin |= (I = ~~n) << (23 - i++), n -= I);
}
// Normal number (FIXME: Incorrect rounding)
else{
while(n < 1) n = float * 2 ** -(--exp);
float *= 2 ** -exp;
let dec = float % 1;
let int = BigInt(Math.abs(float - dec));
for(let I, i = 0; dec && i <= 23;
dec *= 2, bin |= (I = ~~dec) << (23 - i++), dec -= I);
while(int > 0) bin = Number(int & 1n) << 23 | bin >>> 1, int >>= 1n, ++exp;
exp += 126;
}
bin &= 0x7FFFFF;
a = (input[i] < 0 ? 128 : 0) | exp >> 1 & 127;
b = exp << 7 & 128 | bin >> 16 & 127;
c = bin >> 8 & 255;
d = bin & 255;
}
if(littleEndian) [a, b, c, d] = [d, c, b, a];
bytes[i * 4] = a || 0;
bytes[i * 4 + 1] = b || 0;
bytes[i * 4 + 2] = c || 0;
bytes[i * 4 + 3] = d || 0;
}
return bytes;
}
describe("float32ToBytes()", () => {
const {float32ToBytes} = utils;
const decode = (input, expected) => {
expect(float32ToBytes(input)).to.eql(Uint8Array.from(expected));
expect(float32ToBytes(input, true)).to.eql(Uint8Array.from([...expected].reverse()));
expect(float32ToBytes([input, input])).to.eql(Uint8Array.from(expected.concat(expected)));
};
it("decodes normal numbers", () => {
decode(1, [0x3F, 0x80, 0x00, 0x00]);
decode(6.1, [0x40, 0xC3, 0x33, 0x33]);
decode(0.25, [0x3E, 0x80, 0x00, 0x00]);
decode(0.375, [0x3E, 0xC0, 0x00, 0x00]);
decode(0.0244140625, [0x3C, 0xC8, 0x00, 0x00]);
decode(-91.6875, [0xC2, 0xB7, 0x60, 0x00]);
decode(2 ** -126, [0x00, 0x80, 0x00, 0x00]);
decode(2 ** +127 * (2 - 2 ** -23), [0x7F, 0x7F, 0xFF, 0xFF]);
decode(1 - 2 ** -24, [0x3F, 0x7F, 0xFF, 0xFF]);
decode(1 + 2 ** -23, [0x3F, 0x80, 0x00, 0x01]);
});
it("decodes subnormal numbers", () => {
decode(2 ** -126 * (2 ** -23), [0x00, 0x00, 0x00, 0x01]);
decode(2 ** -126 * (1 - 2 ** -23), [0x00, 0x7F, 0xFF, 0xFF]);
});
it("decodes NaN", () => decode(NaN, [0x7F, 0x80, 0x00, 0x01]));
it("decodes positive infinity", () => decode(+Infinity, [0x7F, 0x80, 0x00, 0x00]));
it("decodes negative infinity", () => decode(-Infinity, [0x80, 0x80, 0x00, 0x00]));
it("decodes positive zero", () => decode(+0, [0x00, 0x00, 0x00, 0x00]));
it("decodes negative zero", () => decode(-0, [0x80, 0x00, 0x00, 0x00]));
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment