marcogrcr/1-stateless-percentage.js

## 1-stateless-percentage.js
const { createHash } = require("node:crypto");

/**
 * Returns a random value betwen 0 and 99 associated with an input value that's
 * quasi-evenly distributed.
 *
 * This function is useful for incrementally rolling out a feature to customers
 * based on a percentage.
 *
 * @param value {string} The input value to derive the output from.
 * @param seed {string} An optional seed value to bucket percentages together.
 *
 * ---
 *
 * This function utilizes SHAKE-256 to obtain a 4-byte hash from the value using
 * the seed as salt. It then parses the output as a HEX value and calculates the
 * modulo of 100.
 *
 * See: https://en.wikipedia.org/wiki/SHA-3
 *
 * ---
 *
 * IMPORTANT note about modulo bias:
 * https://research.kudelskisecurity.com/2020/07/28/the-definitive-guide-to-modulo-bias-and-how-to-avoid-it/
 *
 * Even though we could use a SHAKE-256 output of 1 byte and get a value greater
 * than 100, this has 56 out of 256 (~21.87%) values that would be impacted by
 * modulo bias.
 *
 * To see the bias in action, run the following in https://jupyter.org/try-jupyter/lab/
 *
 * ```py
 * from hashlib import shake_256
 * import matplotlib.pyplot as plt
 *
 * r = dict()
 * for v in range(1_000_000):
 *     h = shake_256()
 *     h.update(bytes(str(v).rjust(6, '0'), 'utf8'))
 *     d = h.digest(1)
 *     p = int.from_bytes(d, 'big') % 100
 *     r[p] = r.get(p, 0) + 1
 *
 * plt.subplots(figsize=(20,5))
 * plt.bar(r.keys(), r.values())
 * plt.show()
 * ```
 *
 * Using an output length of 4, provides 95 out of 2^32 (~0.00%) values that
 * would be impacted by modulo bias.
 *
 * Try running the previous example with `h.digest(4)` and see the difference.
 */
export function statelessPercentage(value, seed = "") {
  const hash = createHash("shake256", { outputLength: 4 })
    .update(value + seed)
    .digest("hex");

  return parseInt(hash, 16) % 100;
}

## 2-example-usage.js
import { statelessPercentage } from "./stateless-percentage";

function hasFeature1Enabled(customer) {
  // enable feature-1 to 50 percent of the customers
  return statelessPercentage(customer, "feature-1") < 50;
}


[...Array(10).keys()].forEach((n) => {
  const customer = `customer-${n + 1}@example.com`;
  const enabled = hasFeature1Enabled(customer);

  /*
   * outputs:
   *
   * customer-1@example.com has feature-1 enabled? false
   * customer-2@example.com has feature-1 enabled? true
   * customer-3@example.com has feature-1 enabled? false
   * customer-4@example.com has feature-1 enabled? true
   * customer-5@example.com has feature-1 enabled? false
   * customer-6@example.com has feature-1 enabled? true
   * customer-7@example.com has feature-1 enabled? true
   * customer-8@example.com has feature-1 enabled? false
   * customer-9@example.com has feature-1 enabled? false
   * customer-10@example.com has feature-1 enabled? true
   */
  console.log(`${customer} has feature-1 enabled? ${enabled}`);
});
	const { createHash } = require("node:crypto");

	/**
	* Returns a random value betwen 0 and 99 associated with an input value that's
	* quasi-evenly distributed.
	*
	* This function is useful for incrementally rolling out a feature to customers
	* based on a percentage.
	*
	* @param value {string} The input value to derive the output from.
	* @param seed {string} An optional seed value to bucket percentages together.
	*
	* ---
	*
	* This function utilizes SHAKE-256 to obtain a 4-byte hash from the value using
	* the seed as salt. It then parses the output as a HEX value and calculates the
	* modulo of 100.
	*
	* See: https://en.wikipedia.org/wiki/SHA-3
	*
	* ---
	*
	* IMPORTANT note about modulo bias:
	* https://research.kudelskisecurity.com/2020/07/28/the-definitive-guide-to-modulo-bias-and-how-to-avoid-it/
	*
	* Even though we could use a SHAKE-256 output of 1 byte and get a value greater
	* than 100, this has 56 out of 256 (~21.87%) values that would be impacted by
	* modulo bias.
	*
	* To see the bias in action, run the following in https://jupyter.org/try-jupyter/lab/
	*
	* ```py
	* from hashlib import shake_256
	* import matplotlib.pyplot as plt
	*
	* r = dict()
	* for v in range(1_000_000):
	* h = shake_256()
	* h.update(bytes(str(v).rjust(6, '0'), 'utf8'))
	* d = h.digest(1)
	* p = int.from_bytes(d, 'big') % 100
	* r[p] = r.get(p, 0) + 1
	*
	* plt.subplots(figsize=(20,5))
	* plt.bar(r.keys(), r.values())
	* plt.show()
	* ```
	*
	* Using an output length of 4, provides 95 out of 2^32 (~0.00%) values that
	* would be impacted by modulo bias.
	*
	* Try running the previous example with `h.digest(4)` and see the difference.
	*/
	export function statelessPercentage(value, seed = "") {
	const hash = createHash("shake256", { outputLength: 4 })
	.update(value + seed)
	.digest("hex");

	return parseInt(hash, 16) % 100;
	}
	import { statelessPercentage } from "./stateless-percentage";

	function hasFeature1Enabled(customer) {
	// enable feature-1 to 50 percent of the customers
	return statelessPercentage(customer, "feature-1") < 50;
	}


	[...Array(10).keys()].forEach((n) => {
	const customer = `customer-${n + 1}@example.com`;
	const enabled = hasFeature1Enabled(customer);

	/*
	* outputs:
	*
	* customer-1@example.com has feature-1 enabled? false
	* customer-2@example.com has feature-1 enabled? true
	* customer-3@example.com has feature-1 enabled? false
	* customer-4@example.com has feature-1 enabled? true
	* customer-5@example.com has feature-1 enabled? false
	* customer-6@example.com has feature-1 enabled? true
	* customer-7@example.com has feature-1 enabled? true
	* customer-8@example.com has feature-1 enabled? false
	* customer-9@example.com has feature-1 enabled? false
	* customer-10@example.com has feature-1 enabled? true
	*/
	console.log(`${customer} has feature-1 enabled? ${enabled}`);
	});