Skip to content

Instantly share code, notes, and snippets.

@alexbw
Created June 14, 2011 20:27
Show Gist options
  • Save alexbw/1025785 to your computer and use it in GitHub Desktop.
Save alexbw/1025785 to your computer and use it in GitHub Desktop.
Benchmarking OpenCL
import pyopencl as cl
import pyopencl.array as cl_array
from pyopencl.elementwise import ElementwiseKernel
import numpy.linalg as la
import numpy as np
from time import clock
from pylab import *
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
do_times = 20 # how many times do you want to perform the computation?
n = 10000000
a = 5
b = 6
lin_comb = ElementwiseKernel(ctx, "float a, float *x, float b, float *y, float *z",
"z[i] = a*x[i] + b*y[i]",
"linear_combination")
n_log = 8 # maximum 10th power array size - 1. My GPU (GeForce GT 330M) craps out at 10^8 floats.
cpu_time = np.empty((n_log,))
gpu_time = np.empty((n_log,))
cpu_time_accessed = np.empty((n_log,))
gpu_time_accessed = np.empty((n_log,))
array_sizes = np.logspace(0, n_log-1, n_log)
print "\n\n"
for (counter, n) in enumerate(array_sizes):
print counter
a_cpu = np.ones((n,)).astype('float32')
b_cpu = np.ones((n,)).astype('float32')
c_cpu = np.empty_like(a_cpu)
a_gpu = cl_array.to_device(ctx, queue, a_cpu)
b_gpu = cl_array.to_device(ctx, queue, b_cpu)
c_gpu = cl_array.empty_like(a_gpu)
t_cpu = clock()
for i in range(do_times):
a*a_cpu+b*b_cpu
cpu_time[counter] = (clock() - t_cpu)/(do_times/1000.0)
t_gpu = clock()
for i in range(do_times):
lin_comb(a, a_gpu, b, b_gpu, c_gpu)
gpu_time[counter] = (clock() - t_gpu)/(do_times/1000.0)
t_cpu = clock()
for i in range(do_times):
c_cpu = a*a_cpu+b*b_cpu
c_cpu[0]
cpu_time_accessed[counter] = (clock() - t_cpu)/(do_times/1000.0)
t_gpu = clock()
for i in range(do_times):
lin_comb(a, a_gpu, b, b_gpu, c_gpu)
c_gpu.get()[0]
gpu_time_accessed[counter] = (clock() - t_gpu)/(do_times/1000.0)
cla()
loglog(array_sizes, cpu_time, '-gx')
loglog(array_sizes, gpu_time, '-rx')
loglog(array_sizes, cpu_time_accessed, '-go')
loglog(array_sizes, gpu_time_accessed, '-ro')
ylabel('ms / iteration')
xlabel('array size')
title('Execution time for a*x[] + b*y[] operation')
legend(('CPU, w/out retrieval', 'GPU, w/out retrieval', 'CPU w/ retrieval', 'GPU w/ retrieval'), loc='best')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment