sebinsua/smooth-with-array.ts

## smooth-with-array.ts
function sleep(ms: number) {
  return new Promise((resolve) => setTimeout(resolve, ms));
}

class AsyncQueue<T> {
  queuedItems: (T | Error)[];
  queuedProcessors: [(item: T) => void, (error: Error) => void][];

  constructor() {
    // Note: The FIFO `shift` operations we do on these arrays are `O(n)`.
    //       The performance is acceptable to us for now. If we ever need
    //       to optimize this we can swap the arrays for linked lists.
    //       However, without benchmarking it's hard to know whether we
    //       would benefit more from keeping the contiguous memory layout
    //       of an array or from moving to linked lists and getting `shift`
    //       operations with a time complexity of `O(1)` instead of `O(N)`.
    //
    // Note: We've implemented this here already:
    //       https://gist.github.com/sebinsua/76fc5eb6fc498636bc637b9f10b7e6bf
    this.queuedItems = [];
    this.queuedProcessors = [];
  }

  enqueue(item: T | Error) {
    if (this.queuedProcessors.length > 0) {
      const [resolve, reject] = this.queuedProcessors.shift()!;
      if (item instanceof Error) {
        reject(item);
      } else {
        resolve(item);
      }
    } else {
      this.queuedItems.push(item);
    }
  }

  async dequeue(): Promise<T> {
    if (this.queuedItems.length > 0) {
      const item = this.queuedItems.shift()!;
      if (item instanceof Error) {
        throw item;
      }
      return item;
    } else {
      return new Promise((resolve, reject) =>
        this.queuedProcessors.push([resolve, reject])
      );
    }
  }

  size() {
    return this.queuedItems.length;
  }
}

interface CalculateDelayOptions {
  initialDelay?: number;
  zeroDelayQueueSize?: number;
}

function calculateDelay(
  queueSize: number,
  { initialDelay = 32, zeroDelayQueueSize = 64 }: CalculateDelayOptions = {}
): number {
  return Math.max(
    0,
    Math.floor(initialDelay - (initialDelay / zeroDelayQueueSize) * queueSize)
  );
}

export type TokenizeFn = (
  text: string,
  inclusive?: boolean,
  eof?: boolean
) => (readonly [token: string, index: number])[];
export type TokenizeType = "preserve" | "chars" | "words";

export type SmoothOptions = CalculateDelayOptions & {
  tokenize?: TokenizeType | TokenizeFn;
};

function preserve(buffer: string) {
  return [[buffer, buffer.length] as const];
}

function chars(buffer: string) {
  return buffer.split("").map((token, index) => [token, index + 1] as const);
}

function chunks(buffer: string, regex: RegExp, inclusive = false, eof = false) {
  const ws = [];

  let lastIndex = 0;
  for (let currentIndex = 0; currentIndex < buffer.length; currentIndex++) {
    if (regex.test(buffer[currentIndex]!)) {
      ws.push([
        buffer.slice(lastIndex, currentIndex + (inclusive ? 1 : 0)),
        currentIndex + (inclusive ? 1 : 0),
      ] as const);
      lastIndex = currentIndex;
    }
  }

  if (eof) {
    ws.push([buffer.slice(lastIndex), buffer.length] as const);
  }

  return ws;
}

function words(buffer: string, eof = false) {
  return chunks(buffer, /\s/, false, eof);
}

function clauses(buffer: string, eof = false) {
  return chunks(buffer, /[.,!?;]/, true, eof);
}

const tokenizers = {
  chars,
  words,
  clauses,
  preserve,
} as const;

/**
 * Smooth a stream of LLM tokens into a stream of characters or semantic chunks
 * while reducing jitter by stabilising output timing.
 *
 * @param streamingData A stream of LLM tokens.
 * @param options Options for the smoothing algorithm.
 */
export async function* smooth(
  streamingData: AsyncGenerator<string | undefined>,
  { tokenize: _tokenize = chars, ...options }: SmoothOptions = {}
) {
  const tokenize =
    typeof _tokenize === "function" ? _tokenize : tokenizers[_tokenize];

  const queue = new AsyncQueue<string | undefined>();
  void (async () => {
    let buffer = "";
    let lastIndex: number | undefined;
    try {
      for await (const oldToken of streamingData) {
        buffer += oldToken ?? "";
        for (const [newToken, index] of tokenize(buffer)) {
          queue.enqueue(newToken);
          lastIndex = index;
        }
        if (typeof lastIndex === "number") {
          buffer = buffer.slice(lastIndex);
          lastIndex = undefined;
        }
      }

      // Flush the buffer.
      for (const [newToken] of tokenize(buffer, true)) {
        queue.enqueue(newToken);
      }
    } catch (error) {
      queue.enqueue(error as Error);
    } finally {
      queue.enqueue(undefined);
    }
  })();

  while (true) {
    const newToken = await queue.dequeue();
    if (newToken === undefined) {
      break;
    }

    yield newToken;

    const delay = calculateDelay(queue.size(), options);
    if (delay === 0) {
      continue;
    }

    await sleep(delay);
  }
}


## smooth-with-linked-list.ts
function sleep(ms: number) {
  return new Promise((resolve) => setTimeout(resolve, ms));
}

class ListNode<T> {
  public value: T;
  public next: ListNode<T> | null = null;

  constructor(value: T) {
    this.value = value;
  }
}

class LinkedList<T> {
  private head: ListNode<T> | null = null;
  private tail: ListNode<T> | null = null;
  private _length: number = 0;

  public isEmpty(): boolean {
    return this._length === 0;
  }

  public size(): number {
    return this._length;
  }

  public get length(): number {
    return this._length;
  }

  public push(value: T): void {
    const newNode = new ListNode(value);
    if (this.tail) {
      this.tail.next = newNode;
    } else {
      this.head = newNode;
    }
    this.tail = newNode;
    this._length++;
  }

  public shift(): T | null {
    if (!this.head) {
      return null;
    }
    const headValue = this.head.value;
    this.head = this.head.next;
    if (!this.head) {
      this.tail = null;
    }
    this._length--;
    return headValue;
  }
}

class AsyncQueue<T> {
  queuedItems: LinkedList<T | Error>;
  queuedProcessors: LinkedList<[(item: T) => void, (error: Error) => void]>;

  constructor() {
    // Note: The FIFO `shift` operations we do are `O(n)` on arrays.
    //       Therefore, we are using linked lists, however, without
    //       benchmarking it's hard to know whether we would benefit
    //       more from keeping the contiguous memory layout of an array
    //       or from continuing to use linked lists in order to get
    //       `shift` operations with a time complexity of `O(1)` instead
    //       of `O(N)`.
    this.queuedItems = new LinkedList();
    this.queuedProcessors = new LinkedList();
  }

  enqueue(item: T | Error) {
    if (this.queuedProcessors.length > 0) {
      const [resolve, reject] = this.queuedProcessors.shift()!;
      if (item instanceof Error) {
        reject(item);
      } else {
        resolve(item);
      }
    } else {
      this.queuedItems.push(item);
    }
  }

  async dequeue(): Promise<T> {
    if (this.queuedItems.length > 0) {
      const item = this.queuedItems.shift()!;
      if (item instanceof Error) {
        throw item;
      }
      return item;
    } else {
      return new Promise((resolve, reject) =>
        this.queuedProcessors.push([resolve, reject])
      );
    }
  }

  size() {
    return this.queuedItems.length;
  }
}

interface CalculateDelayOptions {
  initialDelay?: number;
  zeroDelayQueueSize?: number;
}

function calculateDelay(
  queueSize: number,
  { initialDelay = 32, zeroDelayQueueSize = 64 }: CalculateDelayOptions = {}
): number {
  return Math.max(
    0,
    Math.floor(initialDelay - (initialDelay / zeroDelayQueueSize) * queueSize)
  );
}

export type TokenizeFn = (
  text: string,
  inclusive?: boolean,
  eof?: boolean
) => (readonly [token: string, index: number])[];
export type TokenizeType = "preserve" | "chars" | "words";

export type SmoothOptions = CalculateDelayOptions & {
  tokenize?: TokenizeType | TokenizeFn;
};

function preserve(buffer: string) {
  return [[buffer, buffer.length] as const];
}

function chars(buffer: string) {
  return buffer.split("").map((token, index) => [token, index + 1] as const);
}

function chunks(buffer: string, regex: RegExp, inclusive = false, eof = false) {
  const ws = [];

  let lastIndex = 0;
  for (let currentIndex = 0; currentIndex < buffer.length; currentIndex++) {
    if (regex.test(buffer[currentIndex]!)) {
      ws.push([
        buffer.slice(lastIndex, currentIndex + (inclusive ? 1 : 0)),
        currentIndex + (inclusive ? 1 : 0),
      ] as const);
      lastIndex = currentIndex;
    }
  }

  if (eof) {
    ws.push([buffer.slice(lastIndex), buffer.length] as const);
  }

  return ws;
}

function words(buffer: string, eof = false) {
  return chunks(buffer, /\s/, false, eof);
}

function clauses(buffer: string, eof = false) {
  return chunks(buffer, /[.,!?;]/, true, eof);
}

const tokenizers = {
  chars,
  words,
  clauses,
  preserve,
} as const;

/**
 * Smooth a stream of LLM tokens into a stream of characters or semantic chunks
 * while reducing jitter by stabilising output timing.
 *
 * @param streamingData A stream of LLM tokens.
 * @param options Options for the smoothing algorithm.
 */
export async function* smooth(
  streamingData: AsyncGenerator<string | undefined>,
  { tokenize: _tokenize = chars, ...options }: SmoothOptions = {}
) {
  const tokenize =
    typeof _tokenize === "function" ? _tokenize : tokenizers[_tokenize];

  const queue = new AsyncQueue<string | undefined>();
  void (async () => {
    let buffer = "";
    let lastIndex: number | undefined;
    try {
      for await (const oldToken of streamingData) {
        buffer += oldToken ?? "";
        for (const [newToken, index] of tokenize(buffer)) {
          queue.enqueue(newToken);
          lastIndex = index;
        }
        if (typeof lastIndex === "number") {
          buffer = buffer.slice(lastIndex);
          lastIndex = undefined;
        }
      }

      // Flush the buffer.
      for (const [newToken] of tokenize(buffer, true)) {
        queue.enqueue(newToken);
      }
    } catch (error) {
      queue.enqueue(error as Error);
    } finally {
      queue.enqueue(undefined);
    }
  })();

  while (true) {
    const newToken = await queue.dequeue();
    if (newToken === undefined) {
      break;
    }

    yield newToken;

    const delay = calculateDelay(queue.size(), options);
    if (delay === 0) {
      continue;
    }

    await sleep(delay);
  }
}
	function sleep(ms: number) {
	return new Promise((resolve) => setTimeout(resolve, ms));
	}

	class AsyncQueue<T> {
	queuedItems: (T \| Error)[];
	queuedProcessors: [(item: T) => void, (error: Error) => void][];

	constructor() {
	// Note: The FIFO `shift` operations we do on these arrays are `O(n)`.
	// The performance is acceptable to us for now. If we ever need
	// to optimize this we can swap the arrays for linked lists.
	// However, without benchmarking it's hard to know whether we
	// would benefit more from keeping the contiguous memory layout
	// of an array or from moving to linked lists and getting `shift`
	// operations with a time complexity of `O(1)` instead of `O(N)`.
	//
	// Note: We've implemented this here already:
	// https://gist.github.com/sebinsua/76fc5eb6fc498636bc637b9f10b7e6bf
	this.queuedItems = [];
	this.queuedProcessors = [];
	}

	enqueue(item: T \| Error) {
	if (this.queuedProcessors.length > 0) {
	const [resolve, reject] = this.queuedProcessors.shift()!;
	if (item instanceof Error) {
	reject(item);
	} else {
	resolve(item);
	}
	} else {
	this.queuedItems.push(item);
	}
	}

	async dequeue(): Promise<T> {
	if (this.queuedItems.length > 0) {
	const item = this.queuedItems.shift()!;
	if (item instanceof Error) {
	throw item;
	}
	return item;
	} else {
	return new Promise((resolve, reject) =>
	this.queuedProcessors.push([resolve, reject])
	);
	}
	}

	size() {
	return this.queuedItems.length;
	}
	}

	interface CalculateDelayOptions {
	initialDelay?: number;
	zeroDelayQueueSize?: number;
	}

	function calculateDelay(
	queueSize: number,
	{ initialDelay = 32, zeroDelayQueueSize = 64 }: CalculateDelayOptions = {}
	): number {
	return Math.max(
	0,
	Math.floor(initialDelay - (initialDelay / zeroDelayQueueSize) * queueSize)
	);
	}

	export type TokenizeFn = (
	text: string,
	inclusive?: boolean,
	eof?: boolean
	) => (readonly [token: string, index: number])[];
	export type TokenizeType = "preserve" \| "chars" \| "words";

	export type SmoothOptions = CalculateDelayOptions & {
	tokenize?: TokenizeType \| TokenizeFn;
	};

	function preserve(buffer: string) {
	return [[buffer, buffer.length] as const];
	}

	function chars(buffer: string) {
	return buffer.split("").map((token, index) => [token, index + 1] as const);
	}

	function chunks(buffer: string, regex: RegExp, inclusive = false, eof = false) {
	const ws = [];

	let lastIndex = 0;
	for (let currentIndex = 0; currentIndex < buffer.length; currentIndex++) {
	if (regex.test(buffer[currentIndex]!)) {
	ws.push([
	buffer.slice(lastIndex, currentIndex + (inclusive ? 1 : 0)),
	currentIndex + (inclusive ? 1 : 0),
	] as const);
	lastIndex = currentIndex;
	}
	}

	if (eof) {
	ws.push([buffer.slice(lastIndex), buffer.length] as const);
	}

	return ws;
	}

	function words(buffer: string, eof = false) {
	return chunks(buffer, /\s/, false, eof);
	}

	function clauses(buffer: string, eof = false) {
	return chunks(buffer, /[.,!?;]/, true, eof);
	}

	const tokenizers = {
	chars,
	words,
	clauses,
	preserve,
	} as const;

	/**
	* Smooth a stream of LLM tokens into a stream of characters or semantic chunks
	* while reducing jitter by stabilising output timing.
	*
	* @param streamingData A stream of LLM tokens.
	* @param options Options for the smoothing algorithm.
	*/
	export async function* smooth(
	streamingData: AsyncGenerator<string \| undefined>,
	{ tokenize: _tokenize = chars, ...options }: SmoothOptions = {}
	) {
	const tokenize =
	typeof _tokenize === "function" ? _tokenize : tokenizers[_tokenize];

	const queue = new AsyncQueue<string \| undefined>();
	void (async () => {
	let buffer = "";
	let lastIndex: number \| undefined;
	try {
	for await (const oldToken of streamingData) {
	buffer += oldToken ?? "";
	for (const [newToken, index] of tokenize(buffer)) {
	queue.enqueue(newToken);
	lastIndex = index;
	}
	if (typeof lastIndex === "number") {
	buffer = buffer.slice(lastIndex);
	lastIndex = undefined;
	}
	}

	// Flush the buffer.
	for (const [newToken] of tokenize(buffer, true)) {
	queue.enqueue(newToken);
	}
	} catch (error) {
	queue.enqueue(error as Error);
	} finally {
	queue.enqueue(undefined);
	}
	})();

	while (true) {
	const newToken = await queue.dequeue();
	if (newToken === undefined) {
	break;
	}

	yield newToken;

	const delay = calculateDelay(queue.size(), options);
	if (delay === 0) {
	continue;
	}

	await sleep(delay);
	}
	}
	function sleep(ms: number) {
	return new Promise((resolve) => setTimeout(resolve, ms));
	}

	class ListNode<T> {
	public value: T;
	public next: ListNode<T> \| null = null;

	constructor(value: T) {
	this.value = value;
	}
	}

	class LinkedList<T> {
	private head: ListNode<T> \| null = null;
	private tail: ListNode<T> \| null = null;
	private _length: number = 0;

	public isEmpty(): boolean {
	return this._length === 0;
	}

	public size(): number {
	return this._length;
	}

	public get length(): number {
	return this._length;
	}

	public push(value: T): void {
	const newNode = new ListNode(value);
	if (this.tail) {
	this.tail.next = newNode;
	} else {
	this.head = newNode;
	}
	this.tail = newNode;
	this._length++;
	}

	public shift(): T \| null {
	if (!this.head) {
	return null;
	}
	const headValue = this.head.value;
	this.head = this.head.next;
	if (!this.head) {
	this.tail = null;
	}
	this._length--;
	return headValue;
	}
	}

	class AsyncQueue<T> {
	queuedItems: LinkedList<T \| Error>;
	queuedProcessors: LinkedList<[(item: T) => void, (error: Error) => void]>;

	constructor() {
	// Note: The FIFO `shift` operations we do are `O(n)` on arrays.
	// Therefore, we are using linked lists, however, without
	// benchmarking it's hard to know whether we would benefit
	// more from keeping the contiguous memory layout of an array
	// or from continuing to use linked lists in order to get
	// `shift` operations with a time complexity of `O(1)` instead
	// of `O(N)`.
	this.queuedItems = new LinkedList();
	this.queuedProcessors = new LinkedList();
	}

	enqueue(item: T \| Error) {
	if (this.queuedProcessors.length > 0) {
	const [resolve, reject] = this.queuedProcessors.shift()!;
	if (item instanceof Error) {
	reject(item);
	} else {
	resolve(item);
	}
	} else {
	this.queuedItems.push(item);
	}
	}

	async dequeue(): Promise<T> {
	if (this.queuedItems.length > 0) {
	const item = this.queuedItems.shift()!;
	if (item instanceof Error) {
	throw item;
	}
	return item;
	} else {
	return new Promise((resolve, reject) =>
	this.queuedProcessors.push([resolve, reject])
	);
	}
	}

	size() {
	return this.queuedItems.length;
	}
	}

	interface CalculateDelayOptions {
	initialDelay?: number;
	zeroDelayQueueSize?: number;
	}

	function calculateDelay(
	queueSize: number,
	{ initialDelay = 32, zeroDelayQueueSize = 64 }: CalculateDelayOptions = {}
	): number {
	return Math.max(
	0,
	Math.floor(initialDelay - (initialDelay / zeroDelayQueueSize) * queueSize)
	);
	}

	export type TokenizeFn = (
	text: string,
	inclusive?: boolean,
	eof?: boolean
	) => (readonly [token: string, index: number])[];
	export type TokenizeType = "preserve" \| "chars" \| "words";

	export type SmoothOptions = CalculateDelayOptions & {
	tokenize?: TokenizeType \| TokenizeFn;
	};

	function preserve(buffer: string) {
	return [[buffer, buffer.length] as const];
	}

	function chars(buffer: string) {
	return buffer.split("").map((token, index) => [token, index + 1] as const);
	}

	function chunks(buffer: string, regex: RegExp, inclusive = false, eof = false) {
	const ws = [];

	let lastIndex = 0;
	for (let currentIndex = 0; currentIndex < buffer.length; currentIndex++) {
	if (regex.test(buffer[currentIndex]!)) {
	ws.push([
	buffer.slice(lastIndex, currentIndex + (inclusive ? 1 : 0)),
	currentIndex + (inclusive ? 1 : 0),
	] as const);
	lastIndex = currentIndex;
	}
	}

	if (eof) {
	ws.push([buffer.slice(lastIndex), buffer.length] as const);
	}

	return ws;
	}

	function words(buffer: string, eof = false) {
	return chunks(buffer, /\s/, false, eof);
	}

	function clauses(buffer: string, eof = false) {
	return chunks(buffer, /[.,!?;]/, true, eof);
	}

	const tokenizers = {
	chars,
	words,
	clauses,
	preserve,
	} as const;

	/**
	* Smooth a stream of LLM tokens into a stream of characters or semantic chunks
	* while reducing jitter by stabilising output timing.
	*
	* @param streamingData A stream of LLM tokens.
	* @param options Options for the smoothing algorithm.
	*/
	export async function* smooth(
	streamingData: AsyncGenerator<string \| undefined>,
	{ tokenize: _tokenize = chars, ...options }: SmoothOptions = {}
	) {
	const tokenize =
	typeof _tokenize === "function" ? _tokenize : tokenizers[_tokenize];

	const queue = new AsyncQueue<string \| undefined>();
	void (async () => {
	let buffer = "";
	let lastIndex: number \| undefined;
	try {
	for await (const oldToken of streamingData) {
	buffer += oldToken ?? "";
	for (const [newToken, index] of tokenize(buffer)) {
	queue.enqueue(newToken);
	lastIndex = index;
	}
	if (typeof lastIndex === "number") {
	buffer = buffer.slice(lastIndex);
	lastIndex = undefined;
	}
	}

	// Flush the buffer.
	for (const [newToken] of tokenize(buffer, true)) {
	queue.enqueue(newToken);
	}
	} catch (error) {
	queue.enqueue(error as Error);
	} finally {
	queue.enqueue(undefined);
	}
	})();

	while (true) {
	const newToken = await queue.dequeue();
	if (newToken === undefined) {
	break;
	}

	yield newToken;

	const delay = calculateDelay(queue.size(), options);
	if (delay === 0) {
	continue;
	}

	await sleep(delay);
	}
	}