Created
March 8, 2022 20:06
-
-
Save jcrousse/9e26506ec9f6c12c5ecd203645f91caf to your computer and use it in GitHub Desktop.
Using CUDA streams with CuPy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cupy as cp | |
import time | |
import asyncio | |
async def predict(N, power): | |
compute_stream = cp.cuda.stream.Stream(non_blocking=True) | |
compute_stream.use() | |
d_mat = cp.random.randn(N * N, dtype=cp.float64).reshape(N, N) | |
d_ret = d_mat | |
cp.matmul(d_ret, d_mat) | |
start = time.time() | |
for i in range(power - 1): | |
d_ret = cp.matmul(d_ret, d_mat) | |
pre_synch = time.time() | |
await asyncio.sleep(5) | |
compute_stream.synchronize() | |
cpu_time = pre_synch - start | |
gpu_time = time.time() - pre_synch | |
print(f"CPU time: {cpu_time}, GPU time: {gpu_time}") | |
return cpu_time, gpu_time | |
async def main(n): | |
cpu_time, gpu_time = await predict(1024, n) | |
single_request_time = round(cpu_time + gpu_time, 1) | |
start = time.time() | |
_ = await asyncio.gather(predict(1024, n), predict(1024, n), predict(1024, n), predict(1024, n)) | |
total_time = round(time.time() - start, 1) | |
gain = round(total_time / (single_request_time * 4) * 100) | |
print(f"Treated one request of size {n} in {cpu_time + gpu_time}\n " | |
f"Treated 4 requests of size {n} in {total_time} seconds, instead " | |
f"of {4 * single_request_time}, ({gain}% of sequential operations)") | |
if __name__ == "__main__": | |
asyncio.run(main(32)) | |
asyncio.run(main(512)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment