ddemidov/Makefile

## Makefile
zero: zero.cpp
	g++ -std=c++0x -o zero zero.cpp -lOpenCL

## zero.cpp
#include <iostream>
#include <vector>
#include <string>
#include <stdexcept>

#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>

//---------------------------------------------------------------------------
void precondition(bool cond, const std::string &msg) {
    if (!cond) throw std::runtime_error(msg);
}

//---------------------------------------------------------------------------
cl::Device get_device() {
    // Get list of OpenCL platforms.
    std::vector<cl::Platform> platform;
    cl::Platform::get(&platform);

    precondition(!platform.empty(), "No OpenCL platforms.");

    // Get first available device.
    for(auto p = platform.begin(); p != platform.end(); p++) {
        try {
            std::vector<cl::Device> device;
            p->getDevices(CL_DEVICE_TYPE_GPU, &device);
            if (!device.empty()) return device[0];
        } catch(...) {}
    }

    precondition(false, "No compute devices.");
}

//---------------------------------------------------------------------------
cl::Program build_program(
        const cl::Context &context,
        const std::vector<cl::Device> &device,
        const std::string &source
        )
{
    cl::Program program(context,
            cl::Program::Sources(1, std::make_pair(source.c_str(), source.size()))
            );
    try {
        program.build(device);
    } catch (const cl::Error&) {
        std::cerr << source
                  << std::endl
                  << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device[0])
                  << std::endl;
        throw;
    }
    return program;
}

//---------------------------------------------------------------------------
int main() {
    const size_t N = 5;

    try {
        std::vector<cl::Device> device;
        device.push_back(get_device());
        std::cout << device[0].getInfo<CL_DEVICE_NAME>() << std::endl;

        cl::Context context(device);
        cl::CommandQueue queue(context, device[0]);

        cl::Buffer u2(context, CL_MEM_READ_WRITE, N * sizeof(double));

        // Compile OpenCL program for the device.
        cl::Program program = build_program(context, device,
                "#if defined(cl_khr_fp64)\n"
                "#  pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
                "#elif defined(cl_amd_fp64)\n"
                "#  pragma OPENCL EXTENSION cl_amd_fp64: enable\n"
                "#endif\n"
                "kernel void vexcl_vector_kernel(\n"
                "	ulong n,\n"
                "	global double * prm_1,\n"
                "	double prm_2\n"
                ")\n"
                "{\n"
                "	size_t chunk_size  = (n + get_global_size(0) - 1) / get_global_size(0);\n"
                "	size_t chunk_start = get_global_id(0) * chunk_size;\n"
                "	size_t chunk_end   = min(n, chunk_start + chunk_size);\n"
                "	for(size_t idx = chunk_start; idx < chunk_end; ++idx) {\n"
                "		prm_1[idx] = prm_2;\n"
                "	}\n"
                "}\n"
                );
        cl::Kernel kernel(program, "vexcl_vector_kernel");

        size_t w_size = kernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(device[0]);
        std::cout << "max workgroup size = " << w_size << std::endl;
        size_t g_size = 4 * w_size * device[0].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();

        kernel.setArg(0, N);
        kernel.setArg(1, u2);
        kernel.setArg(2, static_cast<double>(0));

        queue.enqueueNDRangeKernel(kernel, cl::NullRange, g_size, w_size);

        std::vector<double> u2_host(N, 42);

        queue.enqueueReadBuffer(u2, CL_TRUE, 0, N * sizeof(double), u2_host.data());

        std::cout << u2_host[0] << std::endl;
    } catch (const cl::Error &err) {
        std::cerr << err.what() << " " << err.err() << std::endl;
        return 1;
    }
}
	#include <iostream>
	#include <vector>
	#include <string>
	#include <stdexcept>

	#define __CL_ENABLE_EXCEPTIONS
	#include <CL/cl.hpp>

	//---------------------------------------------------------------------------
	void precondition(bool cond, const std::string &msg) {
	if (!cond) throw std::runtime_error(msg);
	}

	//---------------------------------------------------------------------------
	cl::Device get_device() {
	// Get list of OpenCL platforms.
	std::vector<cl::Platform> platform;
	cl::Platform::get(&platform);

	precondition(!platform.empty(), "No OpenCL platforms.");

	// Get first available device.
	for(auto p = platform.begin(); p != platform.end(); p++) {
	try {
	std::vector<cl::Device> device;
	p->getDevices(CL_DEVICE_TYPE_GPU, &device);
	if (!device.empty()) return device[0];
	} catch(...) {}
	}

	precondition(false, "No compute devices.");
	}

	//---------------------------------------------------------------------------
	cl::Program build_program(
	const cl::Context &context,
	const std::vector<cl::Device> &device,
	const std::string &source
	)
	{
	cl::Program program(context,
	cl::Program::Sources(1, std::make_pair(source.c_str(), source.size()))
	);
	try {
	program.build(device);
	} catch (const cl::Error&) {
	std::cerr << source
	<< std::endl
	<< program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device[0])
	<< std::endl;
	throw;
	}
	return program;
	}

	//---------------------------------------------------------------------------
	int main() {
	const size_t N = 5;

	try {
	std::vector<cl::Device> device;
	device.push_back(get_device());
	std::cout << device[0].getInfo<CL_DEVICE_NAME>() << std::endl;

	cl::Context context(device);
	cl::CommandQueue queue(context, device[0]);

	cl::Buffer u2(context, CL_MEM_READ_WRITE, N * sizeof(double));

	// Compile OpenCL program for the device.
	cl::Program program = build_program(context, device,
	"#if defined(cl_khr_fp64)\n"
	"# pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
	"#elif defined(cl_amd_fp64)\n"
	"# pragma OPENCL EXTENSION cl_amd_fp64: enable\n"
	"#endif\n"
	"kernel void vexcl_vector_kernel(\n"
	" ulong n,\n"
	" global double * prm_1,\n"
	" double prm_2\n"
	")\n"
	"{\n"
	" size_t chunk_size = (n + get_global_size(0) - 1) / get_global_size(0);\n"
	" size_t chunk_start = get_global_id(0) * chunk_size;\n"
	" size_t chunk_end = min(n, chunk_start + chunk_size);\n"
	" for(size_t idx = chunk_start; idx < chunk_end; ++idx) {\n"
	" prm_1[idx] = prm_2;\n"
	" }\n"
	"}\n"
	);
	cl::Kernel kernel(program, "vexcl_vector_kernel");

	size_t w_size = kernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(device[0]);
	std::cout << "max workgroup size = " << w_size << std::endl;
	size_t g_size = 4 * w_size * device[0].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();

	kernel.setArg(0, N);
	kernel.setArg(1, u2);
	kernel.setArg(2, static_cast<double>(0));

	queue.enqueueNDRangeKernel(kernel, cl::NullRange, g_size, w_size);

	std::vector<double> u2_host(N, 42);

	queue.enqueueReadBuffer(u2, CL_TRUE, 0, N * sizeof(double), u2_host.data());

	std::cout << u2_host[0] << std::endl;
	} catch (const cl::Error &err) {
	std::cerr << err.what() << " " << err.err() << std::endl;
	return 1;
	}
	}