Skip to content

Instantly share code, notes, and snippets.

@simonhayward
Created August 17, 2020 09:55
Show Gist options
  • Save simonhayward/2c4245321c94502c3a27015aca90b48f to your computer and use it in GitHub Desktop.
Save simonhayward/2c4245321c94502c3a27015aca90b48f to your computer and use it in GitHub Desktop.
reservoir sampling
"""
https://florian.github.io/reservoir-sampling/
- Store the first k elements as the reservoir elements
- For the i-th element, add it to the reservoir with a probability of k/i.
This is done by replacing a randomly selected element in the reservoir
"""
import random
import sys
def decision(probability):
r = random.random()
result = r < probability
if result is True:
print(f"chosen: {r} | {probability}")
return result
def sample(stream, k):
s = []
for i, element in enumerate(stream, 1):
if i < (k+1):
s.append(element)
else:
if i == (k+1):
print(f"sample: {s}\n")
print(f"element: {element} | probability={k}/{i}")
if decision(k/i) is True:
index = random.randint(0, k-1)
print(f"replacing: {s[index]} with: {element}")
s[index] = element
print(f"sample: {s}\n")
return s
if __name__ == "__main__":
try:
k = int(sys.argv[1])
except (IndexError, ValueError):
k = 3
stream = random.sample(range(1, 1000), 10)
print(f"size: {k}")
print(f"stream: {stream}")
print(f"sample result: {sample(stream, k)}")
@simonhayward
Copy link
Author

size: 3
stream: [723, 52, 884, 268, 761, 949, 151, 930, 4, 586]
sample: [723, 52, 884]

element: 268 | probability=3/4
chosen: 0.7123548512254824 | 0.75
replacing: 884 with: 268
sample: [723, 52, 268]

element: 761 | probability=3/5
element: 949 | probability=3/6
element: 151 | probability=3/7
element: 930 | probability=3/8
element: 4 | probability=3/9
chosen: 0.3317526888912905 | 0.3333333333333333
replacing: 268 with: 4
sample: [723, 52, 4]

element: 586 | probability=3/10
chosen: 0.033747990638995695 | 0.3
replacing: 723 with: 586
sample: [586, 52, 4]

sample result: [586, 52, 4]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment