Last active
March 8, 2022 04:07
-
-
Save LiosK/92cbbc477fe64e0bd4f1604daa09dcfa to your computer and use it in GitHub Desktop.
Experimental base36 (modulo) and base32 (bitwise) binary-to-text encoders and decoders
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const DIGITS = "0123456789abcdefghijklmnopqrstuv"; | |
/** O(1) map from ASCII code points to digit values. */ | |
const DECODE_MAP = new Uint8Array(128).fill(0xff); | |
for (let i = 0, uc = DIGITS.toUpperCase(); i < DIGITS.length; i++) { | |
DECODE_MAP[DIGITS.charCodeAt(i)] = i; | |
DECODE_MAP[uc.charCodeAt(i)] = i; // omit this to make decoder case-sensitive | |
} | |
/** Encodes a byte array to text. */ | |
export const encode = (bytes) => { | |
const dst = convertRadix(bytes, 256, DIGITS.length); | |
let text = ""; | |
for (let i = 0; i < dst.length; i++) { | |
text += DIGITS[dst[i]]; | |
} | |
return text; | |
}; | |
/** Decodes text to a byte array. */ | |
export const decode = (text) => { | |
const src = new Uint8Array(text.length); | |
for (let i = 0; i < text.length; i++) { | |
const c = DECODE_MAP[text.charCodeAt(i)] ?? 0xff; | |
if (c >= DIGITS.length) { | |
throw new SyntaxError("invalid character"); | |
} | |
src[i] = c; | |
} | |
return convertRadix(src, DIGITS.length, 256); | |
}; | |
/** Converts a digit value array in `srcRadix` to that in `dstRadix`. */ | |
const convertRadix = (src, srcRadix, dstRadix) => { | |
const srcRadixBitLen = Math.log2(srcRadix); | |
const dstRadixBitLen = Math.log2(dstRadix); | |
const dstSize = Math.ceil(src.length * (srcRadixBitLen / dstRadixBitLen)); | |
const dst = new Uint8Array(dstSize); | |
let carryBitLen = 0; | |
let carry = 0; | |
let dstCursor = 0; | |
for (let i = 0; i < src.length; i++) { | |
carryBitLen += srcRadixBitLen; | |
carry = (carry << srcRadixBitLen) | src[i]; | |
while (carryBitLen >= dstRadixBitLen) { | |
carryBitLen -= dstRadixBitLen; | |
dst[dstCursor++] = carry >>> carryBitLen; | |
carry &= (1 << carryBitLen) - 1; | |
} | |
} | |
dst[dstCursor] = carry << (dstRadixBitLen - carryBitLen); | |
// assert(dstCursor === dst.length - 1); | |
return dst; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const DIGITS = "0123456789abcdefghijklmnopqrstuvwxyz"; | |
/** O(1) map from ASCII code points to digit values. */ | |
const DECODE_MAP = new Uint8Array(128).fill(0xff); | |
for (let i = 0, uc = DIGITS.toUpperCase(); i < DIGITS.length; i++) { | |
DECODE_MAP[DIGITS.charCodeAt(i)] = i; | |
DECODE_MAP[uc.charCodeAt(i)] = i; // omit this to make decoder case-sensitive | |
} | |
/** Encodes a byte array to text. */ | |
export const encode = (bytes) => { | |
const dst = convertRadix(bytes, 256, DIGITS.length); | |
let text = ""; | |
for (let i = 0; i < dst.length; i++) { | |
text += DIGITS[dst[i]]; | |
} | |
return text; | |
}; | |
/** Decodes text to a byte array. */ | |
export const decode = (text) => { | |
const src = new Uint8Array(text.length); | |
for (let i = 0; i < text.length; i++) { | |
const c = DECODE_MAP[text.charCodeAt(i)] ?? 0xff; | |
if (c >= DIGITS.length) { | |
throw new SyntaxError("invalid character"); | |
} | |
src[i] = c; | |
} | |
return convertRadix(src, DIGITS.length, 256); | |
}; | |
/** Converts a digit value array in `srcRadix` to that in `dstRadix`. */ | |
const convertRadix = (src, srcRadix, dstRadix) => { | |
const dstSize = Math.ceil( | |
src.length * (Math.log2(srcRadix) / Math.log2(dstRadix)) | |
); | |
const dst = new Uint8Array(dstSize); | |
for (let i = 0; i < src.length; i++) { | |
let carry = src[i]; | |
for (let j = dst.length - 1; j >= 0; j--) { | |
carry += dst[j] * srcRadix; | |
dst[j] = carry % dstRadix; | |
carry = Math.trunc(carry / dstRadix); | |
} | |
// assert(carry === 0); | |
} | |
return dst; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const DIGITS = "0123456789abcdefghijklmnopqrstuvwxyz"; | |
/** O(1) map from ASCII code points to digit values. */ | |
const DECODE_MAP = new Uint8Array(128).fill(0xff); | |
for (let i = 0, uc = DIGITS.toUpperCase(); i < DIGITS.length; i++) { | |
DECODE_MAP[DIGITS.charCodeAt(i)] = i; | |
DECODE_MAP[uc.charCodeAt(i)] = i; // omit this to make decoder case-sensitive | |
} | |
/** Encodes a byte array to text. */ | |
export const encode = (bytes) => { | |
const dst = convertRadix(bytes, 256, DIGITS.length); | |
let text = ""; | |
for (let i = 0; i < dst.length; i++) { | |
text += DIGITS[dst[i]]; | |
} | |
return text; | |
}; | |
/** Decodes text to a byte array. */ | |
export const decode = (text) => { | |
const src = new Uint8Array(text.length); | |
for (let i = 0; i < text.length; i++) { | |
const c = DECODE_MAP[text.charCodeAt(i)] ?? 0xff; | |
if (c >= DIGITS.length) { | |
throw new SyntaxError("invalid character"); | |
} | |
src[i] = c; | |
} | |
return convertRadix(src, DIGITS.length, 256); | |
}; | |
/** Converts a digit value array in `srcRadix` to that in `dstRadix`. */ | |
const convertRadix = (src, srcRadix, dstRadix) => { | |
const dstSize = Math.ceil( | |
src.length * (Math.log2(srcRadix) / Math.log2(dstRadix)) | |
); | |
const dst = new Uint8Array(dstSize); | |
let dstUsed = dst.length - 1; | |
for (let i = 0, carry = 0; i < src.length; ) { | |
// Reset carry to input (read multiple digits for optimization) | |
let power = 1; // Set to srcRadix ** number of digits read | |
while (power < 2 ** 36 && i < src.length) { | |
carry = carry * srcRadix + src[i++]; | |
power *= srcRadix; | |
} | |
// Iterate over dst from right while carry != 0 or up to place already used | |
let j = dst.length - 1; | |
for (; carry > 0 || j >= dstUsed; j--) { | |
// assert(j >= 0); | |
carry += dst[j] * power; | |
dst[j] = carry % dstRadix; | |
carry = Math.trunc(carry / dstRadix); | |
} | |
dstUsed = j + 1; | |
// assert(carry === 0); | |
} | |
return dst; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as base32 from "./base32.mjs"; | |
import * as base36 from "./base36.mjs"; | |
import * as base36_optimized from "./base36_optimized.mjs"; | |
const cases = new Array(100_000); | |
for (let i = 0; i < cases.length; i++) { | |
cases[i] = new Uint8Array(16); | |
for (let j = 0; j < cases[i].length; j++) { | |
cases[i][j] = Math.random() * 256; | |
} | |
} | |
{ | |
const encoded = new Array(cases.length); | |
const decoded = new Array(cases.length); | |
console.time("base32.encode()"); | |
for (let i = 0; i < cases.length; i++) { | |
encoded[i] = base32.encode(cases[i]); | |
} | |
console.timeEnd("base32.encode()"); | |
console.time("base32.decode()"); | |
for (let i = 0; i < cases.length; i++) { | |
decoded[i] = base32.decode(encoded[i]); | |
} | |
console.timeEnd("base32.decode()"); | |
} | |
{ | |
const encoded = new Array(cases.length); | |
const decoded = new Array(cases.length); | |
console.time("base36.encode()"); | |
for (let i = 0; i < cases.length; i++) { | |
encoded[i] = base36.encode(cases[i]); | |
} | |
console.timeEnd("base36.encode()"); | |
console.time("base36.decode()"); | |
for (let i = 0; i < cases.length; i++) { | |
decoded[i] = base36.decode(encoded[i]); | |
} | |
console.timeEnd("base36.decode()"); | |
} | |
{ | |
const encoded = new Array(cases.length); | |
const decoded = new Array(cases.length); | |
console.time("base36_optimized.encode()"); | |
for (let i = 0; i < cases.length; i++) { | |
encoded[i] = base36_optimized.encode(cases[i]); | |
} | |
console.timeEnd("base36_optimized.encode()"); | |
console.time("base36_optimized.decode()"); | |
for (let i = 0; i < cases.length; i++) { | |
decoded[i] = base36_optimized.decode(encoded[i]); | |
} | |
console.timeEnd("base36_optimized.decode()"); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { encode, decode } from "./base32.mjs"; | |
const N = 100_000; | |
console.time(`${N} test cases`); | |
for (let i = 0; i < N; i++) { | |
const bytes = new Uint8Array(16); | |
let num = 0n; | |
for (let j = 0; j < bytes.length; j++) { | |
bytes[j] = Math.random() * 256; | |
num = num * 256n + BigInt(bytes[j]); | |
} | |
num <<= 2n; | |
const encoded = encode(bytes); | |
if (encoded !== num.toString(32).padStart(26, "0")) { | |
throw new Error("encode: disagreement with BigInt.prototype.toString(32)"); | |
} | |
const decoded = decode(encoded); | |
if (decoded.length !== 17) { | |
throw new Error("decode: wrong output size"); | |
} | |
for (let j = 0; j < bytes.length; j++) { | |
if (bytes[j] !== decoded[j]) { | |
throw new Error("decode: disagreement with original byte array"); | |
} | |
} | |
if (decoded[bytes.length] !== 0) { | |
throw new Error("decode: disagreement with original byte array"); | |
} | |
} | |
console.timeEnd(`${N} test cases`); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { encode, decode } from "./base36.mjs"; | |
const N = 100_000; | |
console.time(`${N} test cases`); | |
for (let i = 0; i < N; i++) { | |
const bytes = new Uint8Array(16); | |
let num = 0n; | |
for (let j = 0; j < bytes.length; j++) { | |
bytes[j] = Math.random() * 256; | |
num = num * 256n + BigInt(bytes[j]); | |
} | |
const encoded = encode(bytes); | |
if (encoded !== num.toString(36).padStart(25, "0")) { | |
throw new Error("encode: disagreement with BigInt.prototype.toString(36)"); | |
} | |
const decoded = decode(encoded); | |
if (decoded.length !== 17) { | |
throw new Error("decode: wrong output size"); | |
} | |
if (decoded[0] !== 0) { | |
throw new Error("decode: disagreement with original byte array"); | |
} | |
for (let j = 0; j < bytes.length; j++) { | |
if (bytes[j] !== decoded[j + 1]) { | |
throw new Error("decode: disagreement with original byte array"); | |
} | |
} | |
} | |
console.timeEnd(`${N} test cases`); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { encode, decode } from "./base36_optimized.mjs"; | |
const N = 100_000; | |
console.time(`${N} test cases`); | |
for (let i = 0; i < N; i++) { | |
const bytes = new Uint8Array(16); | |
let num = 0n; | |
for (let j = 0; j < bytes.length; j++) { | |
bytes[j] = Math.random() * 256; | |
num = num * 256n + BigInt(bytes[j]); | |
} | |
const encoded = encode(bytes); | |
if (encoded !== num.toString(36).padStart(25, "0")) { | |
throw new Error("encode: disagreement with BigInt.prototype.toString(36)"); | |
} | |
const decoded = decode(encoded); | |
if (decoded.length !== 17) { | |
throw new Error("decode: wrong output size"); | |
} | |
if (decoded[0] !== 0) { | |
throw new Error("decode: disagreement with original byte array"); | |
} | |
for (let j = 0; j < bytes.length; j++) { | |
if (bytes[j] !== decoded[j + 1]) { | |
throw new Error("decode: disagreement with original byte array"); | |
} | |
} | |
} | |
console.timeEnd(`${N} test cases`); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment