Skip to content

Instantly share code, notes, and snippets.

@crowsonkb
Last active June 1, 2023 20:38
Show Gist options
  • Save crowsonkb/078a664989bd1d7d13ac81cfb449e328 to your computer and use it in GitHub Desktop.
Save crowsonkb/078a664989bd1d7d13ac81cfb449e328 to your computer and use it in GitHub Desktop.
import itertools
import random
class WeightedSampler:
"""Samples k elements from a stream of weighted items without replacement.
See Weighted Random Sampling (Efraimidis, Spirakis 2005).
"""
def __init__(self, k=1):
self.k = k
self.topk = []
def update(self, iterable):
"""Updates the sample with a new iterable of (item, weight) tuples."""
for item, weight in iterable:
e = weight / random.expovariate(1)
self.topk.append((e, item))
self.topk.sort(key=lambda x: x[0], reverse=True)
self.topk = self.topk[: self.k]
def sample(self):
"""Returns the current sample."""
return [x[1] for x in self.topk]
def sample_weighted(population, k=1, weights=None):
"""Samples k elements from the population sequence without replacement.
If a `weights` sequence is specified, selections are made according to the
relative weights.
"""
if weights is None:
weights = itertools.repeat(1.0, len(population))
sampler = WeightedSampler(k)
sampler.update(zip(population, weights))
return sampler.sample()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment