Skip to content

Instantly share code, notes, and snippets.

@jamesduffy
Forked from miguelgrinberg/retry_simulation.py
Created November 8, 2016 19:00
Show Gist options
  • Save jamesduffy/69f5707820e9958d641edc1a46a6c12d to your computer and use it in GitHub Desktop.
Save jamesduffy/69f5707820e9958d641edc1a46a6c12d to your computer and use it in GitHub Desktop.
#!/bin/python
# This simple Python script runs simulations for different retry algorithms
# The results are printed as CSV data to the console
# See this article on my blog for details:
# https://blog.miguelgrinberg.com/post/how-to-retry-with-class
from random import random, choice
def fixed_retry(previous_sleep):
# retry every second
return 1
def exponential_backoff(previous_sleep):
# start with one second, double with each additional attempt
if previous_sleep == 0:
return 1
return previous_sleep * 2
def jitter1(sleep):
# add up to 25% random time to sleep time
return sleep * (1 + random() / 4)
def jitter2(sleep):
# randomize the full sleep time
return sleep * random()
def simulate_failure(retry_func, sleep_adjust_func=None,
requests_per_second=100, max_requests_per_second=200,
failure_length=10, simulate_from=-2, simulate_to=30):
requests = []
time = simulate_from
rps = 0
def fail_request(req):
sleep_amount = req['sleep']
if sleep_adjust_func:
sleep_amount = sleep_adjust_func(sleep_amount)
if req['time'] <= time:
req['time'] += sleep_amount
req['sleep'] = retry_func(req['sleep'])
print("time,ok,failed")
while time < simulate_to:
service_is_down = time >= 0 and time < failure_length
# new requests added in this slot
new = requests_per_second
# count how many outstanding requests retry in this time slot
retries = len([req for req in requests if req['time'] <= time])
# add new requests
for i in range(new):
requests.append({'time': time - random(), 'sleep': retry_func(0)})
if service_is_down:
num_ok = 0
else:
num_ok = new + retries \
if new + retries < max_requests_per_second \
else max_requests_per_second
print('{},{},{}'.format(time, num_ok, new + retries - num_ok))
# update outstanding requests
if service_is_down:
# requests are failing in this time slot
# any requests that run in this time slot need to be retried later
for req in requests:
fail_request(req)
else:
# requests are succeeding in this time slot, so we remove as many
# as we have capacity for
finished_requests = [req for req in requests
if req['time'] <= time]
requests = [req for req in requests if req['time'] > time]
while len(finished_requests) > max_requests_per_second:
req = choice(finished_requests)
finished_requests.remove(req)
fail_request(req)
requests.append(req)
time += 1
print("# Retries at fixed intervals")
simulate_failure(fixed_retry)
print("# Retries with exponential backoff")
simulate_failure(exponential_backoff)
print("# Retries with exponential backoff and 25% jitter")
simulate_failure(exponential_backoff, jitter1)
print("# Retries with exponential backoff and full jitter")
simulate_failure(exponential_backoff, jitter2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment