fogleman/main.cpp

## main.cpp
#define CL_SILENCE_DEPRECATION

// #if defined(__APPLE__) || defined(__MACOSX)
// #include <OpenCL/cl.hpp>
// #else
// #include <CL/cl.hpp>
// #endif

#include "cl.hpp"

#include <fstream>
#include <iostream>

const int iterations = 10000;

const int w = 1920;
const int h = 1080;
const float centerWeight = -1;
const float adjacentWeight = 0.2;
const float diagonalWeight = 0.05;
const float feedRate = 0.06;
const float killRate = 0.062;
const float diffusionRateA = 8;
const float diffusionRateB = 4;
const float timestep = 0.125;

const std::string kernelSource(R"(

__kernel void grayScott(
    global float *A0,
    global float *B0,
    global float *A1,
    global float *B1,
    const int w,
    const int h,
    const float centerWeight,
    const float adjacentWeight,
    const float diagonalWeight,
    const float feedRate,
    const float killRate,
    const float diffusionRateA,
    const float diffusionRateB,
    const float timestep,
    const int bufferIndex)
{
    const int i = get_global_id(0);
    const int x = i % w;
    const int y = i / w;
    const int xp = x == 0 ? w - 1 : x - 1;
    const int xn = x == w - 1 ? 0 : x + 1;
    const int yp = y == 0 ? h - 1 : y - 1;
    const int yn = y == h - 1 ? 0 : y + 1;

    global float *A = bufferIndex == 0 ? A0 : A1;
    global float *B = bufferIndex == 0 ? B0 : B1;
    global float *newA = bufferIndex == 0 ? A1 : A0;
    global float *newB = bufferIndex == 0 ? B1 : B0;

    const float a = A[i];
    const float b = B[i];

    float dda = 0;
    dda += a * centerWeight;
    dda += A[yp * w + xp] * diagonalWeight;
    dda += A[yp * w + xn] * diagonalWeight;
    dda += A[yn * w + xp] * diagonalWeight;
    dda += A[yn * w + xn] * diagonalWeight;
    dda += A[yp * w + x] * adjacentWeight;
    dda += A[yn * w + x] * adjacentWeight;
    dda += A[y * w + xp] * adjacentWeight;
    dda += A[y * w + xn] * adjacentWeight;

    float ddb = 0;
    ddb += b * centerWeight;
    ddb += B[yp * w + xp] * diagonalWeight;
    ddb += B[yp * w + xn] * diagonalWeight;
    ddb += B[yn * w + xp] * diagonalWeight;
    ddb += B[yn * w + xn] * diagonalWeight;
    ddb += B[yp * w + x] * adjacentWeight;
    ddb += B[yn * w + x] * adjacentWeight;
    ddb += B[y * w + xp] * adjacentWeight;
    ddb += B[y * w + xn] * adjacentWeight;

    const float da =
        diffusionRateA * dda - a * b * b + feedRate * (1 - a);

    const float db =
        diffusionRateB * ddb + a * b * b - (feedRate + killRate) * b;

    newA[i] = a + da * timestep;
    newB[i] = b + db * timestep;
}

)");

void SavePPM(
    const std::string &path,
    const int width,
    const int height,
    const std::vector<float> &data)
{
    std::ofstream out(path);
    out << "P3\n";
    out << width << " " << height << "\n";
    out << 255 << "\n";
    int i = 0;
    const float lo = *std::min_element(data.begin(), data.end());
    const float hi = *std::max_element(data.begin(), data.end());
    std::cout << lo << ", " << hi << std::endl;
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x++) {
            const float v = data[i++];
            const float t = (v - lo) / (hi - lo);
            const int r = t * 255;
            out << r << " " << r << " " << r << "\n";
        }
    }
    out.close();
}

int main() {
    // get platform
    std::vector<cl::Platform> platforms;
    cl::Platform::get(&platforms);
    if (platforms.empty()) {
        return -1;
    }
    cl::Platform platform = platforms[0];
    std::cout << platform.getInfo<CL_PLATFORM_NAME>() << std::endl;

    // get device
    std::vector<cl::Device> devices;
    platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
    if (devices.empty()) {
        return -1;
    }
    cl::Device device = devices[0];
    std::cout << device.getInfo<CL_DEVICE_NAME>() << std::endl;

    // compile program
    cl::Context context({device});
    cl::Program::Sources sources;
    sources.push_back({kernelSource.c_str(), kernelSource.size()});
    cl::Program program(context, sources);
    if (program.build({device}) != CL_SUCCESS) {
        std::cout
            << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
            << std::endl;
        return -1;
    }
    cl::Kernel kernel(program, "grayScott");

    // create & initialize cpu-side buffers
    std::vector<float> A(w * h, 1);
    std::vector<float> B(w * h, 0);
    for (int i = 0; i < B.size(); i++) {
        if (rand() % 10 == 0) {
            B[i] = 1;
        }
    }

    // make buffers
    const size_t numBytes = sizeof(A.front()) * A.size();
    cl::Buffer bufferA0(context, CL_MEM_READ_WRITE, numBytes);
    cl::Buffer bufferB0(context, CL_MEM_READ_WRITE, numBytes);
    cl::Buffer bufferA1(context, CL_MEM_READ_WRITE, numBytes);
    cl::Buffer bufferB1(context, CL_MEM_READ_WRITE, numBytes);

    // set arguments
    kernel.setArg(0, bufferA0);
    kernel.setArg(1, bufferB0);
    kernel.setArg(2, bufferA1);
    kernel.setArg(3, bufferB1);
    kernel.setArg(4, w);
    kernel.setArg(5, h);
    kernel.setArg(6, centerWeight);
    kernel.setArg(7, adjacentWeight);
    kernel.setArg(8, diagonalWeight);
    kernel.setArg(9, feedRate);
    kernel.setArg(10, killRate);
    kernel.setArg(11, diffusionRateA);
    kernel.setArg(12, diffusionRateB);
    kernel.setArg(13, timestep);

    // copy intial buffers over
    cl::CommandQueue queue(context, device);
    queue.enqueueWriteBuffer(bufferA0, CL_TRUE, 0, numBytes, A.data());
    queue.enqueueWriteBuffer(bufferB0, CL_TRUE, 0, numBytes, B.data());

    // run N iterations
    for (int i = 0; i < iterations; i++) {
        kernel.setArg(14, i % 2);
        queue.enqueueNDRangeKernel(
            kernel, cl::NullRange, cl::NDRange(A.size()), cl::NullRange);
        queue.finish();
    }

    // read out final buffers
    queue.enqueueReadBuffer(bufferA0, CL_TRUE, 0, numBytes, A.data());
    queue.enqueueReadBuffer(bufferB0, CL_TRUE, 0, numBytes, B.data());
    queue.finish();

    // write image
    SavePPM("out.ppm", w, h, B);

    return 0;
}
	#define CL_SILENCE_DEPRECATION

	// #if defined(__APPLE__) \|\| defined(__MACOSX)
	// #include <OpenCL/cl.hpp>
	// #else
	// #include <CL/cl.hpp>
	// #endif

	#include "cl.hpp"

	#include <fstream>
	#include <iostream>

	const int iterations = 10000;

	const int w = 1920;
	const int h = 1080;
	const float centerWeight = -1;
	const float adjacentWeight = 0.2;
	const float diagonalWeight = 0.05;
	const float feedRate = 0.06;
	const float killRate = 0.062;
	const float diffusionRateA = 8;
	const float diffusionRateB = 4;
	const float timestep = 0.125;

	const std::string kernelSource(R"(

	__kernel void grayScott(
	global float *A0,
	global float *B0,
	global float *A1,
	global float *B1,
	const int w,
	const int h,
	const float centerWeight,
	const float adjacentWeight,
	const float diagonalWeight,
	const float feedRate,
	const float killRate,
	const float diffusionRateA,
	const float diffusionRateB,
	const float timestep,
	const int bufferIndex)
	{
	const int i = get_global_id(0);
	const int x = i % w;
	const int y = i / w;
	const int xp = x == 0 ? w - 1 : x - 1;
	const int xn = x == w - 1 ? 0 : x + 1;
	const int yp = y == 0 ? h - 1 : y - 1;
	const int yn = y == h - 1 ? 0 : y + 1;

	global float *A = bufferIndex == 0 ? A0 : A1;
	global float *B = bufferIndex == 0 ? B0 : B1;
	global float *newA = bufferIndex == 0 ? A1 : A0;
	global float *newB = bufferIndex == 0 ? B1 : B0;

	const float a = A[i];
	const float b = B[i];

	float dda = 0;
	dda += a * centerWeight;
	dda += A[yp * w + xp] * diagonalWeight;
	dda += A[yp * w + xn] * diagonalWeight;
	dda += A[yn * w + xp] * diagonalWeight;
	dda += A[yn * w + xn] * diagonalWeight;
	dda += A[yp * w + x] * adjacentWeight;
	dda += A[yn * w + x] * adjacentWeight;
	dda += A[y * w + xp] * adjacentWeight;
	dda += A[y * w + xn] * adjacentWeight;

	float ddb = 0;
	ddb += b * centerWeight;
	ddb += B[yp * w + xp] * diagonalWeight;
	ddb += B[yp * w + xn] * diagonalWeight;
	ddb += B[yn * w + xp] * diagonalWeight;
	ddb += B[yn * w + xn] * diagonalWeight;
	ddb += B[yp * w + x] * adjacentWeight;
	ddb += B[yn * w + x] * adjacentWeight;
	ddb += B[y * w + xp] * adjacentWeight;
	ddb += B[y * w + xn] * adjacentWeight;

	const float da =
	diffusionRateA * dda - a * b * b + feedRate * (1 - a);

	const float db =
	diffusionRateB * ddb + a * b * b - (feedRate + killRate) * b;

	newA[i] = a + da * timestep;
	newB[i] = b + db * timestep;
	}

	)");

	void SavePPM(
	const std::string &path,
	const int width,
	const int height,
	const std::vector<float> &data)
	{
	std::ofstream out(path);
	out << "P3\n";
	out << width << " " << height << "\n";
	out << 255 << "\n";
	int i = 0;
	const float lo = *std::min_element(data.begin(), data.end());
	const float hi = *std::max_element(data.begin(), data.end());
	std::cout << lo << ", " << hi << std::endl;
	for (int y = 0; y < height; y++) {
	for (int x = 0; x < width; x++) {
	const float v = data[i++];
	const float t = (v - lo) / (hi - lo);
	const int r = t * 255;
	out << r << " " << r << " " << r << "\n";
	}
	}
	out.close();
	}

	int main() {
	// get platform
	std::vector<cl::Platform> platforms;
	cl::Platform::get(&platforms);
	if (platforms.empty()) {
	return -1;
	}
	cl::Platform platform = platforms[0];
	std::cout << platform.getInfo<CL_PLATFORM_NAME>() << std::endl;

	// get device
	std::vector<cl::Device> devices;
	platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
	if (devices.empty()) {
	return -1;
	}
	cl::Device device = devices[0];
	std::cout << device.getInfo<CL_DEVICE_NAME>() << std::endl;

	// compile program
	cl::Context context({device});
	cl::Program::Sources sources;
	sources.push_back({kernelSource.c_str(), kernelSource.size()});
	cl::Program program(context, sources);
	if (program.build({device}) != CL_SUCCESS) {
	std::cout
	<< program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device)
	<< std::endl;
	return -1;
	}
	cl::Kernel kernel(program, "grayScott");

	// create & initialize cpu-side buffers
	std::vector<float> A(w * h, 1);
	std::vector<float> B(w * h, 0);
	for (int i = 0; i < B.size(); i++) {
	if (rand() % 10 == 0) {
	B[i] = 1;
	}
	}

	// make buffers
	const size_t numBytes = sizeof(A.front()) * A.size();
	cl::Buffer bufferA0(context, CL_MEM_READ_WRITE, numBytes);
	cl::Buffer bufferB0(context, CL_MEM_READ_WRITE, numBytes);
	cl::Buffer bufferA1(context, CL_MEM_READ_WRITE, numBytes);
	cl::Buffer bufferB1(context, CL_MEM_READ_WRITE, numBytes);

	// set arguments
	kernel.setArg(0, bufferA0);
	kernel.setArg(1, bufferB0);
	kernel.setArg(2, bufferA1);
	kernel.setArg(3, bufferB1);
	kernel.setArg(4, w);
	kernel.setArg(5, h);
	kernel.setArg(6, centerWeight);
	kernel.setArg(7, adjacentWeight);
	kernel.setArg(8, diagonalWeight);
	kernel.setArg(9, feedRate);
	kernel.setArg(10, killRate);
	kernel.setArg(11, diffusionRateA);
	kernel.setArg(12, diffusionRateB);
	kernel.setArg(13, timestep);

	// copy intial buffers over
	cl::CommandQueue queue(context, device);
	queue.enqueueWriteBuffer(bufferA0, CL_TRUE, 0, numBytes, A.data());
	queue.enqueueWriteBuffer(bufferB0, CL_TRUE, 0, numBytes, B.data());

	// run N iterations
	for (int i = 0; i < iterations; i++) {
	kernel.setArg(14, i % 2);
	queue.enqueueNDRangeKernel(
	kernel, cl::NullRange, cl::NDRange(A.size()), cl::NullRange);
	queue.finish();
	}

	// read out final buffers
	queue.enqueueReadBuffer(bufferA0, CL_TRUE, 0, numBytes, A.data());
	queue.enqueueReadBuffer(bufferB0, CL_TRUE, 0, numBytes, B.data());
	queue.finish();

	// write image
	SavePPM("out.ppm", w, h, B);

	return 0;
	}