Skip to content

Instantly share code, notes, and snippets.

@quasiben
Created April 30, 2020 17:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save quasiben/71841f8f1ce7c4fa709d3abf1bdbcb98 to your computer and use it in GitHub Desktop.
Save quasiben/71841f8f1ce7c4fa709d3abf1bdbcb98 to your computer and use it in GitHub Desktop.
import asyncio
import time
import numpy as np
import cupy
import numba
import dask.array as da
from dask_cuda import LocalCUDACluster
from dask.distributed import Client, wait
@numba.cuda.jit
def _smooth_gpu(x, out):
i, j = numba.cuda.grid(2)
n, m = x.shape
if 1 <= i < n - 1 and 1 <= j < m - 1:
out[i, j] = (
x[i - 1, j - 1]
+ x[i - 1, j]
+ x[i - 1, j + 1]
+ x[i, j - 1]
+ x[i, j]
+ x[i, j + 1]
+ x[i + 1, j - 1]
+ x[i + 1, j]
+ x[i + 1, j + 1]
) / 9
def smooth_gpu(x, out):
import math
threadsperblock = (16, 16)
blockspergrid_x = math.ceil(x.shape[0] / threadsperblock[0])
blockspergrid_y = math.ceil(x.shape[1] / threadsperblock[1])
blockspergrid = (blockspergrid_x, blockspergrid_y)
_smooth_gpu[blockspergrid, threadsperblock](x, out)
def dispatch_smooth_gpu(x):
out = cupy.zeros(x.shape, x.dtype)
smooth_gpu(x, out)
return out
async def f():
protocol = "ucx"
interface = "enp1s0f0" # DGX-1
interface = "enp134s0f1" # DGX-2
enable_tcp_over_ucx = True
enable_nvlink = True
enable_infiniband = False
kwargs = {
"protocol": protocol,
"interface": interface,
"enable_tcp_over_ucx": enable_tcp_over_ucx,
"enable_nvlink": enable_nvlink,
"enable_infiniband": enable_infiniband,
"rmm_pool_size": '24GB',
"ucx_net_devices": "auto" if enable_infiniband is True else None,
}
async with LocalCUDACluster(
asynchronous=True, silence_logs=True, local_directory="/tmp/bzaitlen", **kwargs
) as cluster:
async with Client(cluster, asynchronous=True) as client:
# Create a simple random array
rs = da.random.RandomState(RandomState=cupy.random.RandomState)
x = rs.random((80000, 80000), chunks=(10000, 10000)).persist()
await wait(x)
import time
t = time.time()
y = x.map_overlap(dispatch_smooth_gpu, depth=1)
result = await y.persist()
print("Time:", time.time() - t)
if __name__ == "__main__":
asyncio.get_event_loop().run_until_complete(f())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment