Created
May 7, 2013 17:10
-
-
Save developmentalmadness/5534308 to your computer and use it in GitHub Desktop.
Condensed table (see: http://www.jstatsoft.org/v11/i03) implementation used to provide weighted, random selection of items in a set.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
namespace WeightedSetSelection | |
{ | |
/// <summary> | |
/// Creates a table by duplicating entries propotionate to | |
/// their weight. When samples are requested they will be | |
/// selected randomly according to weight. | |
/// </summary> | |
/// <remarks> | |
/// While the entries are generic (T) because of the amount of copying | |
/// that will happen interally it is suggested that T be | |
/// and Int32 representing the index of the item in the collection | |
/// rather than the actual object reference. When a value is returned | |
/// from the Sample() method it can be used to lookup the index | |
/// in the original collection. | |
/// </remarks> | |
/// <see cref="http://www.jstatsoft.org/v11/i03"/> | |
/// <typeparam name="T">The referece type or index to the type in another collection</typeparam> | |
public class CondensedTable<T> | |
{ | |
Random random; | |
T[] A; | |
T[] B; | |
int tableSpace; | |
int tableSplit; | |
private const double PRECISION = 10000.0; | |
/// <summary> | |
/// Initialization of the table | |
/// </summary> | |
/// <remarks> | |
/// Precision of the probability is expected to be 4 digits. | |
/// </remarks> | |
/// <param name="pp">Tuple of the type or collection index and the probability of each entry.</param> | |
public CondensedTable(Tuple<T, double>[] pp) | |
{ | |
random = new Random(); | |
byte[,] sizes = new byte[pp.Length, 2]; | |
int aTot = 0, bTot = 0; | |
for(int i = 0; i < pp.Length; i++) | |
{ | |
var pair = pp[i]; | |
// convert the probability to an integer value | |
int prob = Convert.ToInt32(pair.Item2 * PRECISION); | |
// sum the total probability of all items (used as MAX random value) | |
tableSpace += prob; | |
// split the left and right halves of the probability value | |
int b = prob % 100; | |
int a = (prob - b) / 100; | |
// store the probabilities as integer (byte) values | |
sizes[i, 0] = Convert.ToByte(a); | |
sizes[i, 1] = Convert.ToByte(b); | |
// sum the probabilities of each half for all items | |
aTot += a; | |
bTot += b; | |
} | |
// init an array for each half based on the sum of probabilities | |
A = new T[aTot]; | |
B = new T[bTot]; | |
// shift the decimal to the right on the A side (ex: 56 -> 5600) | |
tableSplit = aTot * 100; | |
// build the table based on the sizes (this creates a distribution table) | |
int aIdx = 0, bIdx = 0; | |
for(int i= 0; i < pp.Length; i++) | |
{ | |
var pair = pp[i]; | |
// get the size | |
int a = sizes[i, 0], b = sizes[i, 1]; | |
for (int j = 0; j < Math.Max(a, b); j++) | |
{ | |
// fill the table based on the probabilities (distirbution) | |
if (j < a) A[aIdx++] = pair.Item1; | |
if (j < b) B[bIdx++] = pair.Item1; | |
} | |
} | |
} | |
public T Sample() | |
{ | |
// get a new random value (with MAX of tableSpace) | |
int i = random.Next(tableSpace); | |
// based on mid-value return the selected value from the appropriate table | |
if (i < tableSplit) return A[i / 100]; | |
return B[i - tableSplit]; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment