Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Compare performance of using host-registered pinned and unpinned host memory
#!/usr/bin/env python
"""
Compare performance of using host-registered pinned and unpinned host memory.
"""
import numpy as np
import pycuda.autoinit
import pycuda.driver as drv
from pycuda.compiler import SourceModule
from time import time
increment_mod = SourceModule("""
__global__ void increment(double *a, int N)
{
int idx = threadIdx.x;
if (idx < N)
a[idx] = a[idx]+1;
}
""")
increment = increment_mod.get_function("increment")
N = 20
M = 3
# Time use of pinned host memory:
x = drv.aligned_empty((N, N), dtype=np.float64, order='C')
x = drv.register_host_memory(x, flags=drv.mem_host_register_flags.DEVICEMAP)
x_gpu_ptr = np.intp(x.base.get_device_pointer())
times = np.empty(M)
for i in xrange(M):
x[:, :] = np.random.rand(N, N)
x_orig = x.copy()
start = time()
increment(x_gpu_ptr, np.uint32(x.size), block=(512, 1, 1))
times[i] = time()-start
np.allclose(x_orig + 1, x)
print "Average kernel execution time with pinned memory: %3.7f" % np.mean(times)
# Time use of pageable host memory:
x = np.empty((N, N), np.float64)
times = np.empty(M)
for i in xrange(M):
x[:, :] = np.random.rand(N, N)
x_orig = x.copy()
start = time()
increment(drv.InOut(x), np.uint32(x.size), block=(512, 1, 1))
times[i] = time()-start
np.allclose(x_orig + 1, x)
print "Average kernel execution time with pageable memory: %3.7f" % np.mean(times)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.