Skip to content

Instantly share code, notes, and snippets.

@lebedov
Created November 8, 2012 02:48
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lebedov/4036297 to your computer and use it in GitHub Desktop.
Save lebedov/4036297 to your computer and use it in GitHub Desktop.
Compare performance of using preallocated pinned and unpinned host memory
#!/usr/bin/env python
"""
Compare performance of using pinned and unpinned host memory.
"""
import numpy as np
import pycuda.autoinit
import pycuda.driver as drv
from pycuda.compiler import SourceModule
from time import time
increment_mod = SourceModule("""
__global__ void increment(double *a, int N)
{
int idx = threadIdx.x;
if (idx < N)
a[idx] = a[idx]+1;
}
""")
increment = increment_mod.get_function("increment")
N = 20
M = 3
# Time use of pinned host memory:
x = drv.pagelocked_empty((N, N), np.float64, mem_flags=drv.host_alloc_flags.DEVICEMAP)
x_gpu_ptr = np.intp(x.base.get_device_pointer())
times = np.empty(M)
for i in xrange(M):
x[:, :] = np.random.rand(N, N)
x_orig = x.copy()
start = time()
increment(x_gpu_ptr, np.uint32(x.size), block=(512, 1, 1))
times[i] = time()-start
np.allclose(x_orig + 1, x)
print "Average kernel execution time with pinned memory: %3.7f" % np.mean(times)
# Time use of pageable host memory:
x = np.empty((N, N), np.float64)
times = np.empty(M)
for i in xrange(M):
x[:, :] = np.random.rand(N, N)
x_orig = x.copy()
start = time()
increment(drv.InOut(x), np.uint32(x.size), block=(512, 1, 1))
times[i] = time()-start
np.allclose(x_orig + 1, x)
print "Average kernel execution time with pageable memory: %3.7f" % np.mean(times)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment