Alhadis/ieee-754.mjs

## ieee-754.mjs
#!/usr/bin/env node

export const qNaN = Symbol("qNaN");
export const sNaN = Symbol("sNaN");


/**
 * Convert a binary floating-point representation to a number.
 *
 * Source: IEEE 754-2008, table 3.5 – “Binary interchange format parameters”
 *
 * @param {Number} S - Sign
 * @param {Number} E - Biased exponent (w bits)
 * @param {Number} T - Trailing significand field (t = p - 1 bits)
 * @param {Number} [size=32] - Either 16, 32, 64, or 128
 * @return {{r: number[], v: number}}
 */
export function encode(S, E, T, size = 32){
	const k       = {16: 16,   32: 32,    64: 64,    128: 128}   [size]; // Storage width in bits
	const p       = {16: 11,   32: 24,    64: 53,    128: 113}   [size]; // Precision in bits
	const emax    = {16: 15,   32: 127,   64: 1023,  128: 16383} [size]; // Maximum exponent e
	const bias    = {16: 15,   32: 127,   64: 1023,  128: 16383} [size]; // E - e
	const signBit = {16: 1,    32: 1,     64: 1,     128: 1}     [size];
	const w       = {16: 5,    32: 8,     64: 11,    128: 15}    [size]; // Exponent field width in bits
	const t       = {16: 10,   32: 23,    64: 52,    128: 112}   [size]; // Trailing significand field width in bits
	const emin    = 2 - (2 ** (w - 1));

	let r;  // Representation of the floating-point datum
	let v;  // Value of the floating-point datum represented

	// NaN
	if(E === (2 ** w) - 1 && T !== 0){
		r = qNaN || sNaN;
		v = NaN;
	}

	// Infinity
	else if(E === (2 ** w) - 1 && 0 !== T)
		r = v = ((-1) ** S) * Infinity;

	// Normal numbers (implicit leading significand bit of 1)
	else if(E >= 1 && E <= (2 ** w) - 2){
		r = [S, (E - bias), (1 + (2 ** (1 - p)) * T)];
		v = ((-1) ** S) * (2 ** (E - bias)) * (1 + (2 ** (1 - p)) * T);
	}

	// Subnormal numbers (implicit leading significand bit of 0)
	else if(0 === E && 0 !== T){
		r = [S, emin, (0 + (2 ** (1 - p)) * T)];
		v = ((-1) ** S) * (2 ** emin) * (0 + (2 ** (1 - p)) * T);
	}

	// Signed zero
	else if(0 === E && 0 === T){
		r = [S, emin, 0];
		v = ((-1) ** S) * 0;
	}
	return {r, v};
}


/**
 * Convert the binary representation of a fraction back to a number.
 *
 * @example bitsToFrac(0b11n << 62n) == 0.375;
 * @param {Number|BigInt} bits - Binary fraction returned by {@link fracToBits}
 * @param {Number|BigInt} [precision=64] - Significand precision in bits
 * @return {Number}
 */
export function bitsToFrac(bits, precision = 64){
	bits = BigInt(bits);
	precision = BigInt(precision);
	let frac = 0;
	for(let i = 0n; i <= precision;
		frac += Number(bits >> precision - i & 1n) * 2 ** -Number(i++));
	return frac * 2 ** -1;
}


/**
 * Convert a number's fractional component to binary.
 *
 * @example fracToBits(0.375) == 0b11n << 62n;
 * @param {Number} fraction - A floating-point value between 0 and 1
 * @param {Number|BigInt} [precision=64] - Significand precision in bits
 * @return {Number}
 */
export function fracToBits(frac, precision = 64){
	frac %= 1;
	precision = BigInt(precision);
	let bits = 0n;
	for(let int, i = 0n; frac && i <= precision;
		frac *= 2,
		bits |= BigInt(int = ~~frac) << (precision - i++),
		frac -= int);
	return Number(bits);
}


/**
 * Convert 32-bit IEEE 754 floating-point values to bytes.
 *
 * FIXME: Doesn't work properly, lol. See below.
 *
 * @todo Add support for subnormal numbers.
 * @todo Fix incorrect rounding (according to IEEE 754-2008 § 4.3.3):
 * @example float32ToBytes(1 / 3)   != [0x3E, 0xAA, 0xAA, 0xAB];
 * @example float32ToBytes(Math.PI) != [0x40, 0x49, 0x0F, 0xDB];
 *
 * @example float32ToBytes(0.375) == [0x3E, 0xC0, 0x00, 0x00];
 * @param {Number|Number[]} input
 * @param {Boolean} [littleEndian=false]
 * @return {Uint8Array}
 */
export function float32ToBytes(input, littleEndian = false){
	if("number" === typeof input)
		input = [input];
	const {length} = input;
	const bytes = new Uint8Array(length * 4);
	for(let i = 0; i < length; ++i){
		let float = input[i];
		let a, b, c, d;
		if(Number.isNaN(float))   [a, b, c, d] = [0x7F, 0x80, 0x00, 0x01];
		else if(!isFinite(float)) [a, b, c, d] = [0x7F + (float === -Infinity), 0x80, 0, 0];
		else if(!float)           [a, b, c, d] = [0x80 * Object.is(float, -0), 0, 0, 0];
		else{
			let n = float = Math.abs(float), exp = 0, bin = 0;
			// Subnormal number
			if(n < 2 ** -126){
				n *= 2 ** 126;
				for(let I, i = 0; n && i <= 23;
					n *= 2, bin |= (I = ~~n) << (23 - i++), n -= I);
			}
			// Normal number (FIXME: Incorrect rounding)
			else{
				while(n < 1) n = float * 2 ** -(--exp);
				float *= 2 ** -exp;
				let dec = float % 1;
				let int = BigInt(Math.abs(float - dec));
				for(let I, i = 0; dec && i <= 23;
					dec *= 2, bin |= (I = ~~dec) << (23 - i++), dec -= I);
				while(int > 0) bin = Number(int & 1n) << 23 | bin >>> 1, int >>= 1n, ++exp;
				exp += 126;
			}
			bin &= 0x7FFFFF;
			a = (input[i] < 0 ? 128 : 0) | exp >> 1 & 127;
			b = exp << 7 & 128 | bin >> 16 & 127;
			c = bin >> 8 & 255;
			d = bin & 255;
		}
		if(littleEndian) [a, b, c, d] = [d, c, b, a];
		bytes[i * 4]     = a || 0;
		bytes[i * 4 + 1] = b || 0;
		bytes[i * 4 + 2] = c || 0;
		bytes[i * 4 + 3] = d || 0;
	}
	return bytes;
}

## test.mjs
describe("float32ToBytes()", () => {
	const {float32ToBytes} = utils;
	const decode = (input, expected) => {
		expect(float32ToBytes(input)).to.eql(Uint8Array.from(expected));
		expect(float32ToBytes(input, true)).to.eql(Uint8Array.from([...expected].reverse()));
		expect(float32ToBytes([input, input])).to.eql(Uint8Array.from(expected.concat(expected)));
	};
	it("decodes normal numbers", () => {
		decode(1,            [0x3F, 0x80, 0x00, 0x00]);
		decode(6.1,          [0x40, 0xC3, 0x33, 0x33]);
		decode(0.25,         [0x3E, 0x80, 0x00, 0x00]);
		decode(0.375,        [0x3E, 0xC0, 0x00, 0x00]);
		decode(0.0244140625, [0x3C, 0xC8, 0x00, 0x00]);
		decode(-91.6875,     [0xC2, 0xB7, 0x60, 0x00]);
		decode(2 ** -126,    [0x00, 0x80, 0x00, 0x00]);
		decode(2 ** +127 * (2 - 2 ** -23), [0x7F, 0x7F, 0xFF, 0xFF]);
		decode(1 - 2 ** -24, [0x3F, 0x7F, 0xFF, 0xFF]);
		decode(1 + 2 ** -23, [0x3F, 0x80, 0x00, 0x01]);
	});
	it("decodes subnormal numbers", () => {
		decode(2 ** -126 * (2 ** -23),     [0x00, 0x00, 0x00, 0x01]);
		decode(2 ** -126 * (1 - 2 ** -23), [0x00, 0x7F, 0xFF, 0xFF]);
	});
	it("decodes NaN",                () => decode(NaN,       [0x7F, 0x80, 0x00, 0x01]));
	it("decodes positive infinity",  () => decode(+Infinity, [0x7F, 0x80, 0x00, 0x00]));
	it("decodes negative infinity",  () => decode(-Infinity, [0x80, 0x80, 0x00, 0x00]));
	it("decodes positive zero",      () => decode(+0,        [0x00, 0x00, 0x00, 0x00]));
	it("decodes negative zero",      () => decode(-0,        [0x80, 0x00, 0x00, 0x00]));
});
	#!/usr/bin/env node

	export const qNaN = Symbol("qNaN");
	export const sNaN = Symbol("sNaN");


	/**
	* Convert a binary floating-point representation to a number.
	*
	* Source: IEEE 754-2008, table 3.5 – “Binary interchange format parameters”
	*
	* @param {Number} S - Sign
	* @param {Number} E - Biased exponent (w bits)
	* @param {Number} T - Trailing significand field (t = p - 1 bits)
	* @param {Number} [size=32] - Either 16, 32, 64, or 128
	* @return {{r: number[], v: number}}
	*/
	export function encode(S, E, T, size = 32){
	const k = {16: 16, 32: 32, 64: 64, 128: 128} [size]; // Storage width in bits
	const p = {16: 11, 32: 24, 64: 53, 128: 113} [size]; // Precision in bits
	const emax = {16: 15, 32: 127, 64: 1023, 128: 16383} [size]; // Maximum exponent e
	const bias = {16: 15, 32: 127, 64: 1023, 128: 16383} [size]; // E - e
	const signBit = {16: 1, 32: 1, 64: 1, 128: 1} [size];
	const w = {16: 5, 32: 8, 64: 11, 128: 15} [size]; // Exponent field width in bits
	const t = {16: 10, 32: 23, 64: 52, 128: 112} [size]; // Trailing significand field width in bits
	const emin = 2 - (2 ** (w - 1));

	let r; // Representation of the floating-point datum
	let v; // Value of the floating-point datum represented

	// NaN
	if(E === (2 ** w) - 1 && T !== 0){
	r = qNaN \|\| sNaN;
	v = NaN;
	}

	// Infinity
	else if(E === (2 ** w) - 1 && 0 !== T)
	r = v = ((-1) ** S) * Infinity;

	// Normal numbers (implicit leading significand bit of 1)
	else if(E >= 1 && E <= (2 ** w) - 2){
	r = [S, (E - bias), (1 + (2 ** (1 - p)) * T)];
	v = ((-1) ** S) * (2 ** (E - bias)) * (1 + (2 ** (1 - p)) * T);
	}

	// Subnormal numbers (implicit leading significand bit of 0)
	else if(0 === E && 0 !== T){
	r = [S, emin, (0 + (2 ** (1 - p)) * T)];
	v = ((-1) ** S) * (2 ** emin) * (0 + (2 ** (1 - p)) * T);
	}

	// Signed zero
	else if(0 === E && 0 === T){
	r = [S, emin, 0];
	v = ((-1) ** S) * 0;
	}
	return {r, v};
	}


	/**
	* Convert the binary representation of a fraction back to a number.
	*
	* @example bitsToFrac(0b11n << 62n) == 0.375;
	* @param {Number\|BigInt} bits - Binary fraction returned by {@link fracToBits}
	* @param {Number\|BigInt} [precision=64] - Significand precision in bits
	* @return {Number}
	*/
	export function bitsToFrac(bits, precision = 64){
	bits = BigInt(bits);
	precision = BigInt(precision);
	let frac = 0;
	for(let i = 0n; i <= precision;
	frac += Number(bits >> precision - i & 1n) * 2 ** -Number(i++));
	return frac * 2 ** -1;
	}


	/**
	* Convert a number's fractional component to binary.
	*
	* @example fracToBits(0.375) == 0b11n << 62n;
	* @param {Number} fraction - A floating-point value between 0 and 1
	* @param {Number\|BigInt} [precision=64] - Significand precision in bits
	* @return {Number}
	*/
	export function fracToBits(frac, precision = 64){
	frac %= 1;
	precision = BigInt(precision);
	let bits = 0n;
	for(let int, i = 0n; frac && i <= precision;
	frac *= 2,
	bits \|= BigInt(int = ~~frac) << (precision - i++),
	frac -= int);
	return Number(bits);
	}


	/**
	* Convert 32-bit IEEE 754 floating-point values to bytes.
	*
	* FIXME: Doesn't work properly, lol. See below.
	*
	* @todo Add support for subnormal numbers.
	* @todo Fix incorrect rounding (according to IEEE 754-2008 § 4.3.3):
	* @example float32ToBytes(1 / 3) != [0x3E, 0xAA, 0xAA, 0xAB];
	* @example float32ToBytes(Math.PI) != [0x40, 0x49, 0x0F, 0xDB];
	*
	* @example float32ToBytes(0.375) == [0x3E, 0xC0, 0x00, 0x00];
	* @param {Number\|Number[]} input
	* @param {Boolean} [littleEndian=false]
	* @return {Uint8Array}
	*/
	export function float32ToBytes(input, littleEndian = false){
	if("number" === typeof input)
	input = [input];
	const {length} = input;
	const bytes = new Uint8Array(length * 4);
	for(let i = 0; i < length; ++i){
	let float = input[i];
	let a, b, c, d;
	if(Number.isNaN(float)) [a, b, c, d] = [0x7F, 0x80, 0x00, 0x01];
	else if(!isFinite(float)) [a, b, c, d] = [0x7F + (float === -Infinity), 0x80, 0, 0];
	else if(!float) [a, b, c, d] = [0x80 * Object.is(float, -0), 0, 0, 0];
	else{
	let n = float = Math.abs(float), exp = 0, bin = 0;
	// Subnormal number
	if(n < 2 ** -126){
	n = 2 * 126;
	for(let I, i = 0; n && i <= 23;
	n *= 2, bin \|= (I = ~~n) << (23 - i++), n -= I);
	}
	// Normal number (FIXME: Incorrect rounding)
	else{
	while(n < 1) n = float * 2 ** -(--exp);
	float = 2 * -exp;
	let dec = float % 1;
	let int = BigInt(Math.abs(float - dec));
	for(let I, i = 0; dec && i <= 23;
	dec *= 2, bin \|= (I = ~~dec) << (23 - i++), dec -= I);
	while(int > 0) bin = Number(int & 1n) << 23 \| bin >>> 1, int >>= 1n, ++exp;
	exp += 126;
	}
	bin &= 0x7FFFFF;
	a = (input[i] < 0 ? 128 : 0) \| exp >> 1 & 127;
	b = exp << 7 & 128 \| bin >> 16 & 127;
	c = bin >> 8 & 255;
	d = bin & 255;
	}
	if(littleEndian) [a, b, c, d] = [d, c, b, a];
	bytes[i * 4] = a \|\| 0;
	bytes[i * 4 + 1] = b \|\| 0;
	bytes[i * 4 + 2] = c \|\| 0;
	bytes[i * 4 + 3] = d \|\| 0;
	}
	return bytes;
	}
	describe("float32ToBytes()", () => {
	const {float32ToBytes} = utils;
	const decode = (input, expected) => {
	expect(float32ToBytes(input)).to.eql(Uint8Array.from(expected));
	expect(float32ToBytes(input, true)).to.eql(Uint8Array.from([...expected].reverse()));
	expect(float32ToBytes([input, input])).to.eql(Uint8Array.from(expected.concat(expected)));
	};
	it("decodes normal numbers", () => {
	decode(1, [0x3F, 0x80, 0x00, 0x00]);
	decode(6.1, [0x40, 0xC3, 0x33, 0x33]);
	decode(0.25, [0x3E, 0x80, 0x00, 0x00]);
	decode(0.375, [0x3E, 0xC0, 0x00, 0x00]);
	decode(0.0244140625, [0x3C, 0xC8, 0x00, 0x00]);
	decode(-91.6875, [0xC2, 0xB7, 0x60, 0x00]);
	decode(2 ** -126, [0x00, 0x80, 0x00, 0x00]);
	decode(2 ** +127 * (2 - 2 ** -23), [0x7F, 0x7F, 0xFF, 0xFF]);
	decode(1 - 2 ** -24, [0x3F, 0x7F, 0xFF, 0xFF]);
	decode(1 + 2 ** -23, [0x3F, 0x80, 0x00, 0x01]);
	});
	it("decodes subnormal numbers", () => {
	decode(2 ** -126 * (2 ** -23), [0x00, 0x00, 0x00, 0x01]);
	decode(2 ** -126 * (1 - 2 ** -23), [0x00, 0x7F, 0xFF, 0xFF]);
	});
	it("decodes NaN", () => decode(NaN, [0x7F, 0x80, 0x00, 0x01]));
	it("decodes positive infinity", () => decode(+Infinity, [0x7F, 0x80, 0x00, 0x00]));
	it("decodes negative infinity", () => decode(-Infinity, [0x80, 0x80, 0x00, 0x00]));
	it("decodes positive zero", () => decode(+0, [0x00, 0x00, 0x00, 0x00]));
	it("decodes negative zero", () => decode(-0, [0x80, 0x00, 0x00, 0x00]));
	});