Skip to content

Instantly share code, notes, and snippets.

@LiosK
Last active March 8, 2022 04:07
Show Gist options
  • Save LiosK/92cbbc477fe64e0bd4f1604daa09dcfa to your computer and use it in GitHub Desktop.
Save LiosK/92cbbc477fe64e0bd4f1604daa09dcfa to your computer and use it in GitHub Desktop.
Experimental base36 (modulo) and base32 (bitwise) binary-to-text encoders and decoders
const DIGITS = "0123456789abcdefghijklmnopqrstuv";
/** O(1) map from ASCII code points to digit values. */
const DECODE_MAP = new Uint8Array(128).fill(0xff);
for (let i = 0, uc = DIGITS.toUpperCase(); i < DIGITS.length; i++) {
DECODE_MAP[DIGITS.charCodeAt(i)] = i;
DECODE_MAP[uc.charCodeAt(i)] = i; // omit this to make decoder case-sensitive
}
/** Encodes a byte array to text. */
export const encode = (bytes) => {
const dst = convertRadix(bytes, 256, DIGITS.length);
let text = "";
for (let i = 0; i < dst.length; i++) {
text += DIGITS[dst[i]];
}
return text;
};
/** Decodes text to a byte array. */
export const decode = (text) => {
const src = new Uint8Array(text.length);
for (let i = 0; i < text.length; i++) {
const c = DECODE_MAP[text.charCodeAt(i)] ?? 0xff;
if (c >= DIGITS.length) {
throw new SyntaxError("invalid character");
}
src[i] = c;
}
return convertRadix(src, DIGITS.length, 256);
};
/** Converts a digit value array in `srcRadix` to that in `dstRadix`. */
const convertRadix = (src, srcRadix, dstRadix) => {
const srcRadixBitLen = Math.log2(srcRadix);
const dstRadixBitLen = Math.log2(dstRadix);
const dstSize = Math.ceil(src.length * (srcRadixBitLen / dstRadixBitLen));
const dst = new Uint8Array(dstSize);
let carryBitLen = 0;
let carry = 0;
let dstCursor = 0;
for (let i = 0; i < src.length; i++) {
carryBitLen += srcRadixBitLen;
carry = (carry << srcRadixBitLen) | src[i];
while (carryBitLen >= dstRadixBitLen) {
carryBitLen -= dstRadixBitLen;
dst[dstCursor++] = carry >>> carryBitLen;
carry &= (1 << carryBitLen) - 1;
}
}
dst[dstCursor] = carry << (dstRadixBitLen - carryBitLen);
// assert(dstCursor === dst.length - 1);
return dst;
};
const DIGITS = "0123456789abcdefghijklmnopqrstuvwxyz";
/** O(1) map from ASCII code points to digit values. */
const DECODE_MAP = new Uint8Array(128).fill(0xff);
for (let i = 0, uc = DIGITS.toUpperCase(); i < DIGITS.length; i++) {
DECODE_MAP[DIGITS.charCodeAt(i)] = i;
DECODE_MAP[uc.charCodeAt(i)] = i; // omit this to make decoder case-sensitive
}
/** Encodes a byte array to text. */
export const encode = (bytes) => {
const dst = convertRadix(bytes, 256, DIGITS.length);
let text = "";
for (let i = 0; i < dst.length; i++) {
text += DIGITS[dst[i]];
}
return text;
};
/** Decodes text to a byte array. */
export const decode = (text) => {
const src = new Uint8Array(text.length);
for (let i = 0; i < text.length; i++) {
const c = DECODE_MAP[text.charCodeAt(i)] ?? 0xff;
if (c >= DIGITS.length) {
throw new SyntaxError("invalid character");
}
src[i] = c;
}
return convertRadix(src, DIGITS.length, 256);
};
/** Converts a digit value array in `srcRadix` to that in `dstRadix`. */
const convertRadix = (src, srcRadix, dstRadix) => {
const dstSize = Math.ceil(
src.length * (Math.log2(srcRadix) / Math.log2(dstRadix))
);
const dst = new Uint8Array(dstSize);
for (let i = 0; i < src.length; i++) {
let carry = src[i];
for (let j = dst.length - 1; j >= 0; j--) {
carry += dst[j] * srcRadix;
dst[j] = carry % dstRadix;
carry = Math.trunc(carry / dstRadix);
}
// assert(carry === 0);
}
return dst;
};
const DIGITS = "0123456789abcdefghijklmnopqrstuvwxyz";
/** O(1) map from ASCII code points to digit values. */
const DECODE_MAP = new Uint8Array(128).fill(0xff);
for (let i = 0, uc = DIGITS.toUpperCase(); i < DIGITS.length; i++) {
DECODE_MAP[DIGITS.charCodeAt(i)] = i;
DECODE_MAP[uc.charCodeAt(i)] = i; // omit this to make decoder case-sensitive
}
/** Encodes a byte array to text. */
export const encode = (bytes) => {
const dst = convertRadix(bytes, 256, DIGITS.length);
let text = "";
for (let i = 0; i < dst.length; i++) {
text += DIGITS[dst[i]];
}
return text;
};
/** Decodes text to a byte array. */
export const decode = (text) => {
const src = new Uint8Array(text.length);
for (let i = 0; i < text.length; i++) {
const c = DECODE_MAP[text.charCodeAt(i)] ?? 0xff;
if (c >= DIGITS.length) {
throw new SyntaxError("invalid character");
}
src[i] = c;
}
return convertRadix(src, DIGITS.length, 256);
};
/** Converts a digit value array in `srcRadix` to that in `dstRadix`. */
const convertRadix = (src, srcRadix, dstRadix) => {
const dstSize = Math.ceil(
src.length * (Math.log2(srcRadix) / Math.log2(dstRadix))
);
const dst = new Uint8Array(dstSize);
let dstUsed = dst.length - 1;
for (let i = 0, carry = 0; i < src.length; ) {
// Reset carry to input (read multiple digits for optimization)
let power = 1; // Set to srcRadix ** number of digits read
while (power < 2 ** 36 && i < src.length) {
carry = carry * srcRadix + src[i++];
power *= srcRadix;
}
// Iterate over dst from right while carry != 0 or up to place already used
let j = dst.length - 1;
for (; carry > 0 || j >= dstUsed; j--) {
// assert(j >= 0);
carry += dst[j] * power;
dst[j] = carry % dstRadix;
carry = Math.trunc(carry / dstRadix);
}
dstUsed = j + 1;
// assert(carry === 0);
}
return dst;
};
import * as base32 from "./base32.mjs";
import * as base36 from "./base36.mjs";
import * as base36_optimized from "./base36_optimized.mjs";
const cases = new Array(100_000);
for (let i = 0; i < cases.length; i++) {
cases[i] = new Uint8Array(16);
for (let j = 0; j < cases[i].length; j++) {
cases[i][j] = Math.random() * 256;
}
}
{
const encoded = new Array(cases.length);
const decoded = new Array(cases.length);
console.time("base32.encode()");
for (let i = 0; i < cases.length; i++) {
encoded[i] = base32.encode(cases[i]);
}
console.timeEnd("base32.encode()");
console.time("base32.decode()");
for (let i = 0; i < cases.length; i++) {
decoded[i] = base32.decode(encoded[i]);
}
console.timeEnd("base32.decode()");
}
{
const encoded = new Array(cases.length);
const decoded = new Array(cases.length);
console.time("base36.encode()");
for (let i = 0; i < cases.length; i++) {
encoded[i] = base36.encode(cases[i]);
}
console.timeEnd("base36.encode()");
console.time("base36.decode()");
for (let i = 0; i < cases.length; i++) {
decoded[i] = base36.decode(encoded[i]);
}
console.timeEnd("base36.decode()");
}
{
const encoded = new Array(cases.length);
const decoded = new Array(cases.length);
console.time("base36_optimized.encode()");
for (let i = 0; i < cases.length; i++) {
encoded[i] = base36_optimized.encode(cases[i]);
}
console.timeEnd("base36_optimized.encode()");
console.time("base36_optimized.decode()");
for (let i = 0; i < cases.length; i++) {
decoded[i] = base36_optimized.decode(encoded[i]);
}
console.timeEnd("base36_optimized.decode()");
}
import { encode, decode } from "./base32.mjs";
const N = 100_000;
console.time(`${N} test cases`);
for (let i = 0; i < N; i++) {
const bytes = new Uint8Array(16);
let num = 0n;
for (let j = 0; j < bytes.length; j++) {
bytes[j] = Math.random() * 256;
num = num * 256n + BigInt(bytes[j]);
}
num <<= 2n;
const encoded = encode(bytes);
if (encoded !== num.toString(32).padStart(26, "0")) {
throw new Error("encode: disagreement with BigInt.prototype.toString(32)");
}
const decoded = decode(encoded);
if (decoded.length !== 17) {
throw new Error("decode: wrong output size");
}
for (let j = 0; j < bytes.length; j++) {
if (bytes[j] !== decoded[j]) {
throw new Error("decode: disagreement with original byte array");
}
}
if (decoded[bytes.length] !== 0) {
throw new Error("decode: disagreement with original byte array");
}
}
console.timeEnd(`${N} test cases`);
import { encode, decode } from "./base36.mjs";
const N = 100_000;
console.time(`${N} test cases`);
for (let i = 0; i < N; i++) {
const bytes = new Uint8Array(16);
let num = 0n;
for (let j = 0; j < bytes.length; j++) {
bytes[j] = Math.random() * 256;
num = num * 256n + BigInt(bytes[j]);
}
const encoded = encode(bytes);
if (encoded !== num.toString(36).padStart(25, "0")) {
throw new Error("encode: disagreement with BigInt.prototype.toString(36)");
}
const decoded = decode(encoded);
if (decoded.length !== 17) {
throw new Error("decode: wrong output size");
}
if (decoded[0] !== 0) {
throw new Error("decode: disagreement with original byte array");
}
for (let j = 0; j < bytes.length; j++) {
if (bytes[j] !== decoded[j + 1]) {
throw new Error("decode: disagreement with original byte array");
}
}
}
console.timeEnd(`${N} test cases`);
import { encode, decode } from "./base36_optimized.mjs";
const N = 100_000;
console.time(`${N} test cases`);
for (let i = 0; i < N; i++) {
const bytes = new Uint8Array(16);
let num = 0n;
for (let j = 0; j < bytes.length; j++) {
bytes[j] = Math.random() * 256;
num = num * 256n + BigInt(bytes[j]);
}
const encoded = encode(bytes);
if (encoded !== num.toString(36).padStart(25, "0")) {
throw new Error("encode: disagreement with BigInt.prototype.toString(36)");
}
const decoded = decode(encoded);
if (decoded.length !== 17) {
throw new Error("decode: wrong output size");
}
if (decoded[0] !== 0) {
throw new Error("decode: disagreement with original byte array");
}
for (let j = 0; j < bytes.length; j++) {
if (bytes[j] !== decoded[j + 1]) {
throw new Error("decode: disagreement with original byte array");
}
}
}
console.timeEnd(`${N} test cases`);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment