Skip to content

Instantly share code, notes, and snippets.

@anandankm
Created December 12, 2012 16:03
Show Gist options
  • Save anandankm/4268992 to your computer and use it in GitHub Desktop.
Save anandankm/4268992 to your computer and use it in GitHub Desktop.
Stream sample: based on an evenly distributed random sample.
/**
* Stream sample: based on an evenly distributed random sample.
*/
public class StreamSample
{
private static int sampleSize = 500;
private static int streamCount = 0;
private static int sampleIndex = 0;
private StreamElement[] streamSample = new StreamElement[sampleSize];
public void nextElement(StreamElement x) {
/**
* Initially fill in the sample until the specified
* sampleSize is reached.
*/
streamCount++;
if (sampleIndex <= sampleSize - 1) {
streamSample[sampleIndex] = x;
sampleIndex++;
return;
}
/**
* Probability that a stream element at (sampleSize + i)th instant is chosen to
* be added to the stream sample is probabilityForX = sampleSize/(sampleSize + i).
*
*/
double probabilityForX = sampleSize/(double)streamCount;
double randomProbability = Math.random(); // random probability [0.0, 1.0)
/**
* if random probability falls under probability for stream element at
* (sampleSize + i)th instant, add the element to the stream array.
*/
if (randomProbability <= probabilityForX) {
/**
* Choose a random index in the sample size range.
* #randomRange(s, e) - see below.
*/
int randomIndex = randomRange(0, sampleSize-1);
streamSample[randomIndex] = x;
/**
* 1: Probability that any element in the array would be replaced
* by the newer element is (probabilityForX * 1/sampleSize)
* 2: Probability that any element will not be replaced is
* 1 - (probabilityForX * 1/sampleSize)
*
* @example - 1. For 501th element to be chosen for sampling, the probability is 500/501.
* 2. For any element in the array to be replaced, the probability is 500/501 * 1/500 = 1/501.
* 3. For any element in the array to remain in the array, the probability is 1 - (1/501) = 500/501.
* From 1 and 2, at the end of the 501th instant, we have same probabilities for all the elements
* in the sample, ie, 500/501.
*/
}
}
public StreamElement[] getSampleStream() {
return this.streamSample;
}
/**
* Get a random number in the range of [s, e]
*/
private int randomRange(int s, int e) {
if (s >= e) {
return 0;
}
long range = (long) e - (long) s + 1;
range = (long) (range * Math.random());
return (int) (range + s);
}
/**
* Corresponds to a stream element object streamed at
* a particular instant 't'.
* This can have any data that is streamed.
*/
public class StreamElement {
long streamID;
long data1;
String data2;
// ..
// .. so on and so forth
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment