Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
"""
Vitter JS (1987) 'An efficient algorithm for sequential
random sampling.' ACM T. Math. Softw. 13(1): 58--67.
Copied from: https://gist.github.com/ldoddema/bb4ba2d4ad1b948a05e0
"""
from math import exp, log
import random
import numpy as np
from numpy import arange
def vitter_A(N, n, sample_out, last=0):
"""
Select `n` samples from the integers in population [0, `N`).
The result is placed in integer array `sample_out` (convenient
when testing with Numba).
Optionally start "counting" from integer `last`, to continue
from Vitter_D. I.e. the population has range [last, N+last).
"""
# cdef:
# float V, quot, Nreal, top
# int -> all others?
idx = 0
top = float(N - n); Nreal = float(N)
while n >= 2:
V = np.random.random(); S = 0; quot = top / Nreal
while quot > V:
S += 1; top -= 1.0; Nreal -= 1.0
quot = (quot * top) / Nreal
last = last + S
sample_out[idx] = last
idx += 1
last = last + 1
Nreal -= 1.0
n -= 1
S = int(np.round(Nreal) * np.random.random())
last += S
sample_out[idx] = last
def test_python(n,k):
return random.sample(xrange(n), k)
def test_vitter(n, k):
sample_out = [0] * k
vitter_A(n, k, sample_out)
return sample_out
# timeit -n 10 test_python(10000000, 400000)
# 10 loops, best of 3: 250 ms per loop
#
# timeit -n 10 test_vitter(10000000, 400000)
# 10 loops, best of 3: 1.72 s per loop
@loisaidasam

This comment has been minimized.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.