GbGp/sumtest.py

## sumtest.py
import pyopencl as cl

platforms = cl.get_platforms()
print(platforms)
device = platforms[0].get_devices(cl.device_type.GPU)[0]  # Select the first GPU device
context = cl.Context([device])

# Create a command queue for the selected device
queue = cl.CommandQueue(context)

# Define an OpenCL kernel as a multi-line string
kernel_code = """
__kernel void simple_add(__global const float* A, __global const float* B, __global float* C)
{
    int gid = get_global_id(0);
    C[gid] = A[gid] + B[gid];
}
"""

# Create a program from the kernel code
program = cl.Program(context, kernel_code).build()

# Create input data
import numpy as np
a_np = np.random.rand(100).astype(np.float32)
b_np = np.random.rand(100).astype(np.float32)

# Allocate memory for variables on the device

# Execute the kernel multiple times to stress the system a little bit
for i in range(100000):
    a_g = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a_np)
    b_g = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=b_np)
    c_g = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, a_np.nbytes)

    kernel = program.simple_add
    kernel(queue, a_np.shape, None, a_g, b_g, c_g)

    # Read the result back into a Python array
    c_np = np.empty_like(a_np)
    cl.enqueue_copy(queue, c_np, c_g)

    # Check the result
    assert np.array_equal(c_np, a_np + b_np)

print(c_np)
	import pyopencl as cl

	platforms = cl.get_platforms()
	print(platforms)
	device = platforms[0].get_devices(cl.device_type.GPU)[0] # Select the first GPU device
	context = cl.Context([device])

	# Create a command queue for the selected device
	queue = cl.CommandQueue(context)

	# Define an OpenCL kernel as a multi-line string
	kernel_code = """
	__kernel void simple_add(__global const float* A, __global const float* B, __global float* C)
	{
	int gid = get_global_id(0);
	C[gid] = A[gid] + B[gid];
	}
	"""

	# Create a program from the kernel code
	program = cl.Program(context, kernel_code).build()

	# Create input data
	import numpy as np
	a_np = np.random.rand(100).astype(np.float32)
	b_np = np.random.rand(100).astype(np.float32)

	# Allocate memory for variables on the device

	# Execute the kernel multiple times to stress the system a little bit
	for i in range(100000):
	a_g = cl.Buffer(context, cl.mem_flags.READ_ONLY \| cl.mem_flags.COPY_HOST_PTR, hostbuf=a_np)
	b_g = cl.Buffer(context, cl.mem_flags.READ_ONLY \| cl.mem_flags.COPY_HOST_PTR, hostbuf=b_np)
	c_g = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, a_np.nbytes)

	kernel = program.simple_add
	kernel(queue, a_np.shape, None, a_g, b_g, c_g)

	# Read the result back into a Python array
	c_np = np.empty_like(a_np)
	cl.enqueue_copy(queue, c_np, c_g)

	# Check the result
	assert np.array_equal(c_np, a_np + b_np)

	print(c_np)