Skip to content

Instantly share code, notes, and snippets.

@thoughtpolice
Created June 9, 2023 02:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thoughtpolice/317c9306ca79a2fe400a84f0c463cd82 to your computer and use it in GitHub Desktop.
Save thoughtpolice/317c9306ca79a2fe400a84f0c463cd82 to your computer and use it in GitHub Desktop.
Standalone implementation of KSUIDs for TypeScript
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: 2017-2021 Mark Wubben <mark@novemberborn.net>
// SPDX-FileCopyrightText: 2017-2023 Segment.io
// SPDX-FileCopyrightText: 2023 Austin Seipp <aseipp@pobox.com>
// Implementation of KSUIDs, a K-Sortable Globally Unique IDentifier, as defined
// by https://github.com/segmentio/ksuid -- this is a port of the upstream
// project to typescript/deno, with some modifications.
//
// External API based on the one from https://github.com/novemberborn/ksuid,
// with some code copied; but with some modifications to be simpler and ported
// to typescript
//
// Base62 algorithms are basically direct translaterations of upstream golang to
// javascript: https://github.com/segmentio/ksuid/blob/master/base62.go -- this
// is less efficient than e.g. base-x it seems, but actually seems to work
// better in practice with fewer edge cases; I trust it as more correct wrt. the
// upstream implementation, while almost every other base62 encoder I found
// couldn't handle original ksuid strings correctly (e.g. they would fail to
// decode them), or would decode incorrectly (e.g. 21 byte buffers returned),
// and so on.
//
// However, this at least makes the implementation completely standalone and
// isolated from any other dependencies, which is nice. it should work on deno,
// node, and the browser.
//
// Includes tests and benchmarks for deno, and a command line program that can
// be used to generate ksuids and inspect them, like upstream tools:
//
// $ deno run util/ksuid.ts
// $ deno run util/ksuid.ts -f inspect 0ujtsYcgvSTl8PAuAdqWYSMnLOv
// $ deno run util/ksuid.ts -f inspect $(deno run util/ksuid.ts -n 4)
import * as flags from "$std/flags/mod.ts";
// ---------------------------------------------------------------------------------------------------------------------
// KSUID's epoch starts more recently so that the 32-bit number space gives a
// significantly higher useful lifetime of around 136 years from March 2014.
// This number (14e11) was picked to be easy to remember.
const EPOCH_IN_MS = 14e11;
const MAX_TIME_IN_MS = 1e3 * (2 ** 32 - 1) + EPOCH_IN_MS;
const TIMESTAMP_BYTE_LENGTH = 4; // uint32 timestamp
const PAYLOAD_BYTE_LENGTH = 16; // 16-byte payload, so...
const BYTE_LENGTH = 4 + 16; // 20 bytes total!
const STRING_ENCODED_LENGTH = 27; // 20 bytes (160 bits) = 27 base62 characters, exactly
const TIME_IN_MS_ASSERTION =
`Valid KSUID timestamps must be in milliseconds since ${
new Date(0).toISOString()
},
no earlier than ${new Date(EPOCH_IN_MS).toISOString()} and no later than ${
new Date(MAX_TIME_IN_MS).toISOString()
}
`.trim().replace(/(\n|\s)+/g, " ").replace(/\.000Z/g, "Z");
const VALID_ENCODING_ASSERTION =
`Valid encoded KSUIDs are ${STRING_ENCODED_LENGTH} characters`;
const VALID_BUFFER_ASSERTION = `Valid KSUID buffers are ${BYTE_LENGTH} bytes`;
const VALID_PAYLOAD_ASSERTION =
`Valid KSUID payloads are ${PAYLOAD_BYTE_LENGTH} bytes`;
function concatArrayBuffers(bufs: ArrayBuffer[]): ArrayBuffer {
let total = 0;
for (const buf of bufs) {
total += buf.byteLength;
}
const result = new Uint8Array(total);
let offset = 0;
for (const buf of bufs) {
result.set(new Uint8Array(buf), offset);
offset += buf.byteLength;
}
return result.buffer;
}
function validateParts(ms: number, payload: ArrayBuffer): number {
if (!Number.isInteger(ms) || ms < EPOCH_IN_MS || ms > MAX_TIME_IN_MS) {
return 1;
}
if (payload.byteLength !== PAYLOAD_BYTE_LENGTH) {
return 2;
}
return 0;
}
// ---------------------------------------------------------------------------------------------------------------------
const BASE62_ALPHABET =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
function base62Value(digit: number): number {
const offsetUppercase = 10;
const offsetLowercase = 36;
if (digit >= 48 && digit <= 57) {
return digit - 48; // '0'-'9'
} else if (digit >= 65 && digit <= 90) {
return offsetUppercase + (digit - 65); // 'A'-'Z'
} else {
return offsetLowercase + (digit - 97); // 'a'-'z'
}
}
export function fastEncodeBase62(dst: Uint8Array, src: Uint8Array) {
if (src.length !== 20) throw new Error("Invalid base62 string");
if (dst.length !== 27) throw new Error("Invalid buffer length");
const srcBase = 4294967296;
const dstBase = 62;
// Split src into 5 4-byte words, this is where most of the efficiency comes
// from because this is a O(N^2) algorithm, and we make N = N / 4 by working
// on 32 bits at a time.
const parts = new Uint32Array([
(new DataView(src.buffer)).getUint32(0),
(new DataView(src.buffer)).getUint32(4),
(new DataView(src.buffer)).getUint32(8),
(new DataView(src.buffer)).getUint32(12),
(new DataView(src.buffer)).getUint32(16),
]);
let n = 27;
let bp = parts.slice();
const bq = new Uint32Array(5);
while (n !== 0) {
let off = 0;
const quotient = bq.slice(0);
let remainder = 0;
for (const c of bp) {
const value = c + remainder * srcBase;
const digit = Math.floor(value / dstBase);
remainder = value % dstBase;
if (quotient.length !== 0 || digit !== 0) {
quotient[off++] = digit;
}
}
// Writes at the end of the destination buffer because we computed the
// lowest bits first.
dst[--n] = BASE62_ALPHABET.charCodeAt(remainder);
bp = quotient;
}
return dst;
}
export function fastDecodeBase62(dst: Uint8Array, src: Uint8Array) {
if (src.length !== 27) throw new Error("Invalid base62 string");
if (dst.length !== 20) throw new Error("Invalid buffer length");
const srcBase = 62;
const dstBase = 4294967296;
const parts = new Uint8Array([
base62Value(src[0]),
base62Value(src[1]),
base62Value(src[2]),
base62Value(src[3]),
base62Value(src[4]),
base62Value(src[5]),
base62Value(src[6]),
base62Value(src[7]),
base62Value(src[8]),
base62Value(src[9]),
base62Value(src[10]),
base62Value(src[11]),
base62Value(src[12]),
base62Value(src[13]),
base62Value(src[14]),
base62Value(src[15]),
base62Value(src[16]),
base62Value(src[17]),
base62Value(src[18]),
base62Value(src[19]),
base62Value(src[20]),
base62Value(src[21]),
base62Value(src[22]),
base62Value(src[23]),
base62Value(src[24]),
base62Value(src[25]),
base62Value(src[26]),
]);
let n = 20;
let bp = parts.slice();
const bq = new Uint8Array(27);
while (n > 0) {
const quotient = bq.slice();
let remainder = 0, off = 0;
for (let i = 0; i < bp.length; i++) {
const c = bp[i];
const value = c + remainder * srcBase;
const digit = Math.floor(value / dstBase);
remainder = value % dstBase;
if (quotient.length !== 0 || digit !== 0) {
quotient[off++] = digit;
}
}
if (n < 4) {
throw new Error("Short buffer");
}
dst[n - 4] = remainder >> 24;
dst[n - 3] = remainder >> 16;
dst[n - 2] = remainder >> 8;
dst[n - 1] = remainder;
n -= 4;
bp = quotient;
}
return dst;
}
// ---------------------------------------------------------------------------------------------------------------------
/**
* Validate a KSUID. This ensures it has the proper binary format and timestamp range,
* but otherwise does not validate the payload.
*
* @param blob Buffer containing a KSUID
* @returns False if invalid; true otherwise.
*/
export function validate(blob: ArrayBuffer): boolean {
if (blob.byteLength !== BYTE_LENGTH) {
return false;
}
if (
0 != validateParts(
1e3 * (new DataView(blob)).getUint32(0) + EPOCH_IN_MS,
blob.slice(TIMESTAMP_BYTE_LENGTH),
)
) {
return false;
}
return true;
}
/**
* Construct a KSUID from the given timestamp and payload.
*
* @param ms Milliseconds since unix epoch. Typically `Date.now()`.
* @param payload 16-byte payload. Use `crypto.getRandomValues(16)`.
* @returns A KSUID.
*/
export function fromParts(ms: number, payload: ArrayBuffer): KSUID {
switch (validateParts(ms, payload)) {
case 1:
throw new TypeError(TIME_IN_MS_ASSERTION);
case 2:
throw new TypeError(VALID_PAYLOAD_ASSERTION);
default:
}
const timestamp = Math.floor((ms - EPOCH_IN_MS) / 1e3);
const buffer = new ArrayBuffer(TIMESTAMP_BYTE_LENGTH);
const view = new DataView(buffer);
view.setUint32(0, timestamp);
const result = concatArrayBuffers([buffer, payload]);
if (result.byteLength !== BYTE_LENGTH) {
throw new Error(VALID_BUFFER_ASSERTION);
}
return new KSUID(result);
}
/**
* Create a KSUID based on the current Unix timestamp and a securely generated
* random payload.
*
* @returns A KSUID.
*/
export function create() {
return fromParts(
Date.now(),
crypto.getRandomValues(new Uint8Array(PAYLOAD_BYTE_LENGTH)),
);
}
/**
* Parse the string representation of a KSUID; this should be in base62 format, and
* is always a string exactly 27 characters long.
*
* @param str Serialized KSUID input
* @returns A KSUID.
*/
export function parse(str: string): KSUID {
if (str.length !== STRING_ENCODED_LENGTH) {
throw new TypeError(VALID_ENCODING_ASSERTION);
}
const bytes = new Uint8Array(20);
fastDecodeBase62(
bytes,
new TextEncoder().encode(str),
);
return new KSUID(bytes.buffer);
}
// ---------------------------------------------------------------------------------------------------------------------
export class KSUID {
private blob: ArrayBuffer;
constructor(blob: ArrayBuffer) {
if (!validate(blob)) {
throw new TypeError(VALID_BUFFER_ASSERTION);
}
const copy = new ArrayBuffer(20);
new Uint8Array(copy).set(new Uint8Array(blob));
this.blob = copy;
}
/**
* Get the raw bytes of the KSUID.
*/
get raw(): ArrayBuffer {
return this.blob;
}
get rawHex(): string {
return KSUID.buf2hex(this.blob);
}
/**
* Get the string representation of the KSUID; a 27-character base62 string.
*/
get string(): string {
const result = new Uint8Array(27);
fastEncodeBase62(result, new Uint8Array(this.blob));
return new TextDecoder().decode(result);
}
/**
* Get the timestamp of the KSUID as a Date object.
*/
get date(): Date {
return new Date(this.timestamp);
}
/**
* Get the timestamp of the KSUID in milliseconds since unix epoch.
*/
get timestamp(): number {
return 1e3 * this.timestampRaw + EPOCH_IN_MS;
}
/**
* Get the raw timestamp of the KSUID in seconds since unix epoch; this is not
* actually an exact date, but a scaled value that is adjusted to March 2014.
* You normally should not need this.
*/
get timestampRaw(): number {
return (new DataView(this.blob)).getUint32(0);
}
/**
* Get the binary payload of the KSUID.
*
* @returns A 16-byte ArrayBuffer.
*/
get payload(): ArrayBuffer {
return this.blob.slice(TIMESTAMP_BYTE_LENGTH);
}
/**
* Get the hex-encoded payload of the 16-byte payload.
*
* @returns A 32-character hex string.
*/
get payloadHex(): string {
return KSUID.buf2hex(this.payload);
}
/**
* Compare two KSUIDs for equality.
*
* @param other Other KSUID to compare to.
* @returns True if equal; false otherwise.
*/
equals(other: KSUID): boolean {
return this.string === other.string; // FIXME: optimize
}
/**
* Get the string representation of the KSUID; a 27-character base62 string.
*
* @returns String representation of the KSUID.
*/
toString(): string {
return this.string;
}
/* Convert an ArrayBuffer to a hex string. */
private static buf2hex(buffer: ArrayBuffer): string {
return [...new Uint8Array(buffer)]
.map((x) => x.toString(16).padStart(2, "0"))
.join("")
.toUpperCase();
}
}
// ---------------------------------------------------------------------------------------------------------------------
// simple port of upstream ksuid inspection program from segmentio/ksuid
if (import.meta.main) {
const args = flags.parse(Deno.args, {
string: ["n", "f"],
default: { n: "1" },
});
if (parseInt(args.n, 10) < 1) {
throw new Error("invalid number of ksuids to generate");
}
const n = parseInt(args.n, 10);
if (args.f !== undefined) {
switch (args.f) {
case "inspect": {
const printKSUID = (ksuid: KSUID) => {
console.log(`
REPRESENTATION:
String: ${ksuid.string}
Raw: ${ksuid.raw}
COMPONENTS:
Time: ${ksuid.date}
Timestamp: ${ksuid.timestampRaw}
Payload: ${ksuid.payloadHex}
`);
};
if (args._.length !== 0) {
for (const str of args._) {
printKSUID(parse(str.toString()));
}
} else {
for (let i = 0; i < n; i++) {
printKSUID(create());
}
}
break;
}
default: {
throw new Error(`unknown flag: ${args.f}`);
}
}
Deno.exit(0);
}
for (let i = 0; i < n; i++) {
console.log(create().string);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment