developmentalmadness/CondensedTable.cs

## CondensedTable.cs
using System;
using System.Collections.Generic;

namespace WeightedSetSelection
{
    /// <summary>
	/// Creates a table by duplicating entries propotionate to
	/// their weight. When samples are requested they will be
	/// selected randomly according to weight.
	/// </summary>
	/// <remarks>
	/// While the entries are generic (T) because of the amount of copying
	/// that will happen interally it is suggested that T be
	/// and Int32 representing the index of the item in the collection
	/// rather than the actual object reference. When a value is returned
	/// from the Sample() method it can be used to lookup the index
	/// in the original collection.
	/// </remarks>
	/// <see cref="http://www.jstatsoft.org/v11/i03"/>
	/// <typeparam name="T">The referece type or index to the type in another collection</typeparam>
	public class CondensedTable<T>
	{
		Random random;
		T[] A;
		T[] B;
		int tableSpace;
		int tableSplit;

		private const double PRECISION = 10000.0;

		/// <summary>
		/// Initialization of the table
		/// </summary>
		/// <remarks>
		/// Precision of the probability is expected to be 4 digits.
		/// </remarks>
		/// <param name="pp">Tuple of the type or collection index and the probability of each entry.</param>
		public CondensedTable(Tuple<T, double>[] pp)
		{
			random = new Random();
			byte[,] sizes = new byte[pp.Length, 2];
			int aTot = 0, bTot = 0;

			for(int i = 0; i < pp.Length; i++)
			{
				var pair = pp[i];

				// convert the probability to an integer value
				int prob = Convert.ToInt32(pair.Item2 * PRECISION);

				// sum the total probability of all items (used as MAX random value)
				tableSpace += prob;

				// split the left and right halves of the probability value
				int b = prob % 100;
				int a = (prob - b) / 100;

				// store the probabilities as integer (byte) values
				sizes[i, 0] = Convert.ToByte(a);
				sizes[i, 1] = Convert.ToByte(b);

				// sum the probabilities of each half for all items
				aTot += a;
				bTot += b;
			}

			// init an array for each half based on the sum of probabilities
			A = new T[aTot];
			B = new T[bTot];

			// shift the decimal to the right on the A side (ex: 56 -> 5600)
			tableSplit = aTot * 100;

			// build the table based on the sizes (this creates a distribution table)
			int aIdx = 0, bIdx = 0;
			for(int i= 0; i < pp.Length; i++)
			{
				var pair = pp[i];

				// get the size
				int a = sizes[i, 0], b = sizes[i, 1];
				for (int j = 0; j < Math.Max(a, b); j++)
				{
					// fill the table based on the probabilities (distirbution)
					if (j < a) A[aIdx++] = pair.Item1;
					if (j < b) B[bIdx++] = pair.Item1;
				}
			}
		}

		public T Sample()
		{
			// get a new random value (with MAX of tableSpace)
			int i = random.Next(tableSpace);

			// based on mid-value return the selected value from the appropriate table
			if (i < tableSplit) return A[i / 100];
			return B[i - tableSplit];
		}
	}
}
	using System;
	using System.Collections.Generic;

	namespace WeightedSetSelection
	{
	/// <summary>
	/// Creates a table by duplicating entries propotionate to
	/// their weight. When samples are requested they will be
	/// selected randomly according to weight.
	/// </summary>
	/// <remarks>
	/// While the entries are generic (T) because of the amount of copying
	/// that will happen interally it is suggested that T be
	/// and Int32 representing the index of the item in the collection
	/// rather than the actual object reference. When a value is returned
	/// from the Sample() method it can be used to lookup the index
	/// in the original collection.
	/// </remarks>
	/// <see cref="http://www.jstatsoft.org/v11/i03"/>
	/// <typeparam name="T">The referece type or index to the type in another collection</typeparam>
	public class CondensedTable<T>
	{
	Random random;
	T[] A;
	T[] B;
	int tableSpace;
	int tableSplit;

	private const double PRECISION = 10000.0;

	/// <summary>
	/// Initialization of the table
	/// </summary>
	/// <remarks>
	/// Precision of the probability is expected to be 4 digits.
	/// </remarks>
	/// <param name="pp">Tuple of the type or collection index and the probability of each entry.</param>
	public CondensedTable(Tuple<T, double>[] pp)
	{
	random = new Random();
	byte[,] sizes = new byte[pp.Length, 2];
	int aTot = 0, bTot = 0;

	for(int i = 0; i < pp.Length; i++)
	{
	var pair = pp[i];

	// convert the probability to an integer value
	int prob = Convert.ToInt32(pair.Item2 * PRECISION);

	// sum the total probability of all items (used as MAX random value)
	tableSpace += prob;

	// split the left and right halves of the probability value
	int b = prob % 100;
	int a = (prob - b) / 100;

	// store the probabilities as integer (byte) values
	sizes[i, 0] = Convert.ToByte(a);
	sizes[i, 1] = Convert.ToByte(b);

	// sum the probabilities of each half for all items
	aTot += a;
	bTot += b;
	}

	// init an array for each half based on the sum of probabilities
	A = new T[aTot];
	B = new T[bTot];

	// shift the decimal to the right on the A side (ex: 56 -> 5600)
	tableSplit = aTot * 100;

	// build the table based on the sizes (this creates a distribution table)
	int aIdx = 0, bIdx = 0;
	for(int i= 0; i < pp.Length; i++)
	{
	var pair = pp[i];

	// get the size
	int a = sizes[i, 0], b = sizes[i, 1];
	for (int j = 0; j < Math.Max(a, b); j++)
	{
	// fill the table based on the probabilities (distirbution)
	if (j < a) A[aIdx++] = pair.Item1;
	if (j < b) B[bIdx++] = pair.Item1;
	}
	}
	}

	public T Sample()
	{
	// get a new random value (with MAX of tableSpace)
	int i = random.Next(tableSpace);

	// based on mid-value return the selected value from the appropriate table
	if (i < tableSplit) return A[i / 100];
	return B[i - tableSplit];
	}
	}
	}