Skip to content

Instantly share code, notes, and snippets.

@pentschev
Last active August 16, 2019 10:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save pentschev/d0aa9cd20d543c3441e530e8067eb765 to your computer and use it in GitHub Desktop.
Save pentschev/d0aa9cd20d543c3441e530e8067eb765 to your computer and use it in GitHub Desktop.
UCX map overlap sample
import asyncio
import time
import numpy as np
import cupy
import numba
import dask.array as da
from dask_cuda import DGX, LocalCUDACluster
from dask.distributed import Client, wait
@numba.cuda.jit
def _smooth_gpu(x, out):
i, j = numba.cuda.grid(2)
n, m = x.shape
if 1 <= i < n - 1 and 1 <= j < m - 1:
out[i, j] = (x[i - 1, j - 1] + x[i - 1, j] + x[i - 1, j + 1] +
x[i , j - 1] + x[i , j] + x[i , j + 1] +
x[i + 1, j - 1] + x[i + 1, j] + x[i + 1, j + 1]) / 9
def smooth_gpu(x, out):
import math
threadsperblock = (16, 16)
blockspergrid_x = math.ceil(x.shape[0] / threadsperblock[0])
blockspergrid_y = math.ceil(x.shape[1] / threadsperblock[1])
blockspergrid = (blockspergrid_x, blockspergrid_y)
_smooth_gpu[blockspergrid, threadsperblock](x, out)
def dispatch_smooth_gpu(x):
out = cupy.zeros(x.shape, x.dtype)
smooth_gpu(x, out)
return out
async def f():
#async with LocalCUDACluster(asynchronous=True) as cluster:
async with DGX(asynchronous=True, silence_logs=True) as cluster:
async with Client(cluster, asynchronous=True) as client:
# Create a simple random array
rs = da.random.RandomState(RandomState=cupy.random.RandomState)
x = rs.random((80000, 80000), chunks=(10000, 10000)).persist()
await wait(x)
import time
t = time.time()
y = x.map_overlap(dispatch_smooth_gpu, depth=1)
result = await y.persist()
print("Time:", time.time() - t)
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(f())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment