Created
September 11, 2017 18:02
-
-
Save kaushikcfd/1a9460dbfb7ee032963688d5e958e4aa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pyopencl as cl | |
import matplotlib.pyplot as plt | |
from time import time | |
from time import sleep | |
def bandwidth_calculator(n_numbers): | |
ctx = cl.create_some_context() | |
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) | |
a = np.random.rand(n_numbers).astype(np.float32) | |
mf = cl.mem_flags | |
a_dev = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a) | |
a_twice_dev = cl.Buffer(ctx, mf.WRITE_ONLY, a.nbytes) | |
prg = cl.Program(ctx,""" | |
__kernel void twice(__global const float *a_dev, __global float *a_twice_dev) | |
{ | |
a_twice_dev[get_global_id(0)] = a_dev[get_global_id(0)] * 2; | |
} | |
""").build() | |
knl = prg.twice | |
a_twice = np.empty_like(a) | |
# Compiling the kernel thrice. | |
for i in range(3): | |
evt = knl(queue, a.shape, None, a_dev, a_twice_dev) | |
nruns = 5 | |
queue.finish() | |
gpu_start_time = time() | |
# Running the kernel multiple number of times, and then taking the average of the values obtained. | |
for i in range(nruns): | |
evt = knl(queue, a.shape, None, a_dev, a_twice_dev) | |
evt.wait()# This is being done so that, our timing is correct. Otherwise the next statement would start getting executed before the kernel is done with its execution. | |
gpu_end_time = time() # Ending the time here. | |
cl.enqueue_copy(queue, a_twice, a_twice_dev) | |
return -(nruns*a.nbytes)/((gpu_start_time-gpu_end_time)*1e9) | |
if __name__ == "__main__": | |
print('Please set the environment variable PYOPENCL_CTX, so that the data printed is presentable.') | |
sleep(2.0) | |
number_list = [] | |
bandwidth_list = [] | |
for k in range(9): | |
number_list.append(10**k) | |
bandwidth_list.append(bandwidth_calculator(10**k)) | |
print("Number of floats=1E", k,"\tBandwidth obtained = ", bandwidth_list[-1], " GB/s.", sep='') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment