Created
June 14, 2011 20:27
-
-
Save alexbw/1025785 to your computer and use it in GitHub Desktop.
Benchmarking OpenCL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyopencl as cl | |
import pyopencl.array as cl_array | |
from pyopencl.elementwise import ElementwiseKernel | |
import numpy.linalg as la | |
import numpy as np | |
from time import clock | |
from pylab import * | |
ctx = cl.create_some_context() | |
queue = cl.CommandQueue(ctx) | |
do_times = 20 # how many times do you want to perform the computation? | |
n = 10000000 | |
a = 5 | |
b = 6 | |
lin_comb = ElementwiseKernel(ctx, "float a, float *x, float b, float *y, float *z", | |
"z[i] = a*x[i] + b*y[i]", | |
"linear_combination") | |
n_log = 8 # maximum 10th power array size - 1. My GPU (GeForce GT 330M) craps out at 10^8 floats. | |
cpu_time = np.empty((n_log,)) | |
gpu_time = np.empty((n_log,)) | |
cpu_time_accessed = np.empty((n_log,)) | |
gpu_time_accessed = np.empty((n_log,)) | |
array_sizes = np.logspace(0, n_log-1, n_log) | |
print "\n\n" | |
for (counter, n) in enumerate(array_sizes): | |
print counter | |
a_cpu = np.ones((n,)).astype('float32') | |
b_cpu = np.ones((n,)).astype('float32') | |
c_cpu = np.empty_like(a_cpu) | |
a_gpu = cl_array.to_device(ctx, queue, a_cpu) | |
b_gpu = cl_array.to_device(ctx, queue, b_cpu) | |
c_gpu = cl_array.empty_like(a_gpu) | |
t_cpu = clock() | |
for i in range(do_times): | |
a*a_cpu+b*b_cpu | |
cpu_time[counter] = (clock() - t_cpu)/(do_times/1000.0) | |
t_gpu = clock() | |
for i in range(do_times): | |
lin_comb(a, a_gpu, b, b_gpu, c_gpu) | |
gpu_time[counter] = (clock() - t_gpu)/(do_times/1000.0) | |
t_cpu = clock() | |
for i in range(do_times): | |
c_cpu = a*a_cpu+b*b_cpu | |
c_cpu[0] | |
cpu_time_accessed[counter] = (clock() - t_cpu)/(do_times/1000.0) | |
t_gpu = clock() | |
for i in range(do_times): | |
lin_comb(a, a_gpu, b, b_gpu, c_gpu) | |
c_gpu.get()[0] | |
gpu_time_accessed[counter] = (clock() - t_gpu)/(do_times/1000.0) | |
cla() | |
loglog(array_sizes, cpu_time, '-gx') | |
loglog(array_sizes, gpu_time, '-rx') | |
loglog(array_sizes, cpu_time_accessed, '-go') | |
loglog(array_sizes, gpu_time_accessed, '-ro') | |
ylabel('ms / iteration') | |
xlabel('array size') | |
title('Execution time for a*x[] + b*y[] operation') | |
legend(('CPU, w/out retrieval', 'GPU, w/out retrieval', 'CPU w/ retrieval', 'GPU w/ retrieval'), loc='best') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment