Skip to content

Instantly share code, notes, and snippets.

@lytex
Created April 3, 2020 18:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lytex/a52fd9451061da8ca30bb14d77d160e3 to your computer and use it in GitHub Desktop.
Save lytex/a52fd9451061da8ca30bb14d77d160e3 to your computer and use it in GitHub Desktop.
Rough benchmarking to estimate overhead of spawning a process vs time of execution
import ray
from time import sleep, time
import numpy as np
import pandas as pd
thread_number = 100
pids = list(range(thread_number))
min_wait = 0
max_wait = 100
n = 1
waits = np.random.randint(min_wait, max_wait, size=thread_number)/10**n
def pad_format(x, max_x):
max_0s = int(np.ceil(np.log10(max_x)))
return ('%.'+str(max_0s)+'i') % x
@ray.remote
def wait(x, pid):
global start
sleep(x)
fmt_pid = pad_format(pid, thread_number)
actual = round(time()-start, n+1)
print(f'pid {fmt_pid} said {x}\twas\t{actual}', file=open('result', 'a'))
start = time()
return x, actual
with open('result', 'w') as fid:
fid.write('')
fid.flush()
ray.init()
start = time()
processes = []
for seconds, pid in zip(waits, pids):
fmt_pid = pad_format(pid, thread_number)
fmt_seconds = pad_format(seconds, max_wait)
processes.append(wait.remote(seconds, pid))
ray.wait(processes)
df = pd.DataFrame([ray.get(process) for process in processes])
df = df.rename(mapper={0: 'said', 1:'actual'}, axis=1)
print(df.sum())
print('actual - said')
print((df.actual - df.said).describe())
print(f'sum: {(df.actual-df.said).sum()}')
ray.shutdown()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment