Skip to content

Instantly share code, notes, and snippets.

@albans
Created April 16, 2014 12:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save albans/10864989 to your computer and use it in GitHub Desktop.
Save albans/10864989 to your computer and use it in GitHub Desktop.
Java implementation of an algorithm for sampling without replacement by Donald Knuth
import static java.util.Arrays.asList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import com.google.common.collect.AbstractIterator;
public class Sample {
public static <T> Iterator<T> sOfN(final Integer sampleSize, final Integer populationSize, final Iterator<T> populationItems) {
return new AbstractIterator<T>() {
Integer n = sampleSize;
Integer N = populationSize;
Integer t = 0;
Integer m = 0;
Random random = new Random();
@Override
protected T computeNext() {
while (m<n) {
T next = populationItems.next();
double u = random.nextDouble();
if ((N-t)*u >= n-m) {
t++;
} else {
t++; m++;
return next;
}
}
return endOfData();
}
};
}
public static void main(String[] args) {
int[] result = new int[10];
List<Integer> digits = asList(0,1,2,3,4,5,6,7,8,9);
for (int i=0; i<100000; i++) {
Iterator<Integer> sample = Sample.sOfN(3, 10, digits.iterator());
while(sample.hasNext()) {
result[sample.next()]++;
}
}
System.out.println(Arrays.toString(result));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment