Skip to content

Instantly share code, notes, and snippets.

@danielhao5
Forked from mesejo/reservoir_sampling.py
Created October 9, 2021 16:47
Show Gist options
  • Save danielhao5/33095e3180d77c169dd125e32e7736ec to your computer and use it in GitHub Desktop.
Save danielhao5/33095e3180d77c169dd125e32e7736ec to your computer and use it in GitHub Desktop.
Implementation of the L algorithm for reservoir sampling
import random
import numpy as np
from numpy.random import default_rng
from itertools import islice
def reservoir_sampling(pop, k):
reservoir = []
stream = iter(pop)
for e in islice(stream, k):
reservoir.append(e)
rng = default_rng()
w = np.exp(np.log(rng.random()) / k)
nxt = (k - 1) + rng.geometric(w)
for i, e in enumerate(stream, k):
if i == nxt:
reservoir[rng.integers(k)] = e
w *= np.exp(np.log(rng.random()) / k)
nxt += rng.geometric(w)
return reservoir
def reservoir_sampling_with_replacement(pop, k):
stream = iter(pop)
e = next(stream)
reservoir = np.repeat(e, k)
rng = default_rng()
w = rng.random(k)
nxt = rng.geometric(w)
min_nxt = np.amin(nxt)
for i, e in enumerate(stream, 1):
if i == min_nxt:
mask = (i == nxt)
reservoir[mask] = e
w[mask] = w[mask] * rng.random(np.count_nonzero(mask))
nxt[mask] = nxt[mask] + rng.geometric(w[mask])
min_nxt = np.amin(nxt)
return reservoir.tolist()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment