Skip to content

Instantly share code, notes, and snippets.

@jakirkham
Forked from pentschev/ucx_map_overlap_smooth.py
Created August 16, 2019 10:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jakirkham/97604689c1308c2347b598fe1abc57d6 to your computer and use it in GitHub Desktop.
Save jakirkham/97604689c1308c2347b598fe1abc57d6 to your computer and use it in GitHub Desktop.
UCX map overlap sample
import asyncio
import time
import numpy as np
import cupy
import numba
import dask.array as da
from dask_cuda import DGX, LocalCUDACluster
from dask.distributed import Client, wait
@numba.cuda.jit
def _smooth_gpu(x, out):
i, j = numba.cuda.grid(2)
n, m = x.shape
if 1 <= i < n - 1 and 1 <= j < m - 1:
out[i, j] = (x[i - 1, j - 1] + x[i - 1, j] + x[i - 1, j + 1] +
x[i , j - 1] + x[i , j] + x[i , j + 1] +
x[i + 1, j - 1] + x[i + 1, j] + x[i + 1, j + 1]) / 9
def smooth_gpu(x, out):
import math
threadsperblock = (16, 16)
blockspergrid_x = math.ceil(x.shape[0] / threadsperblock[0])
blockspergrid_y = math.ceil(x.shape[1] / threadsperblock[1])
blockspergrid = (blockspergrid_x, blockspergrid_y)
_smooth_gpu[blockspergrid, threadsperblock](x, out)
def dispatch_smooth_gpu(x):
out = cupy.zeros(x.shape, x.dtype)
smooth_gpu(x, out)
return out
async def f():
#async with LocalCUDACluster(asynchronous=True) as cluster:
async with DGX(asynchronous=True, silence_logs=True) as cluster:
async with Client(cluster, asynchronous=True) as client:
# Create a simple random array
rs = da.random.RandomState(RandomState=cupy.random.RandomState)
x = rs.random((80000, 80000), chunks=(10000, 10000)).persist()
await wait(x)
import time
t = time.time()
y = x.map_overlap(dispatch_smooth_gpu, depth=1)
result = await y.persist()
print("Time:", time.time() - t)
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(f())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment