Last active
September 19, 2021 09:45
-
-
Save itzmeanjan/0a13447c634afbc34f64d1f22751011d to your computer and use it in GitHub Desktop.
π Computing Julia Set on CPU + GPGPU, using SYCL DPC++ π
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <CL/sycl.hpp> | |
#include <chrono> | |
#include <iostream> | |
using namespace sycl; | |
namespace ts = std::chrono; | |
constexpr uint N = 512; | |
constexpr uint B = 4; | |
constexpr float R = 2.f; // radius | |
constexpr uint MAX_ITR = 1 << 10; | |
float length(sycl::cl_float2 v) { return sqrtf(v.x() * v.x() + v.y() * v.y()); } | |
sycl::cl_float2 get_complex_value_at_index(uint row, uint col) { | |
float half = (float)N / 2.f; | |
float step = 1.f; | |
float factor = sqrtf(powf(half, 2) + powf(half, 2)); | |
return {((-half + (float)row * step) / factor) * R, | |
((half - (float)col * step) / factor) * R}; | |
} | |
uint quadratic_iteration(sycl::cl_float2 z, sycl::cl_float2 c) { | |
uint itr = 0; | |
for (; itr < MAX_ITR; itr++) { | |
z = {z.x() * z.x() - z.y() * z.y() + c.x(), 2 * z.y() * z.x() + c.y()}; | |
if (length(z) > R) { | |
break; | |
} | |
} | |
return itr; | |
} | |
sycl::cl_float4 colorize(uint itr) { | |
sycl::cl_float3 d = {0.5, 0.5, 0.5}; | |
sycl::cl_float3 e = {0.5, 0.5, 0.5}; | |
sycl::cl_float3 f = {2.0, 1.0, 0.0}; | |
sycl::cl_float3 g = {0.5, 0.2, 0.25}; | |
sycl::cl_float4 pixel_val = { | |
d + e * cos(6.28318f * (f * ((float)itr / (float)(MAX_ITR)) + g)), 1.f}; | |
return pixel_val; | |
} | |
int64_t compute_julia_set_dentrite(queue &q, void *data) { | |
image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8, | |
range<2>{N, N}}; | |
auto start = ts::steady_clock::now(); | |
q.submit([&](handler &h) { | |
accessor<float4, 2, access::mode::write, access::target::image> acc{img, h}; | |
h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}}, | |
[=](nd_item<2> it) { | |
const uint i = it.get_global_id()[0]; | |
const uint j = it.get_global_id()[1]; | |
const sycl::cl_float2 c = {0.f, 1.f}; | |
sycl::cl_float2 z = get_complex_value_at_index(i, j); | |
uint itr = quadratic_iteration(z, c); | |
acc.write(int2(i, j), colorize(itr)); | |
}); | |
}); | |
q.wait(); | |
auto end = ts::steady_clock::now(); | |
return ts::duration_cast<ts::milliseconds>(end - start).count(); | |
} | |
int64_t compute_julia_set_douadys_rabbit(queue &q, void *data) { | |
image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8, | |
range<2>{N, N}}; | |
auto start = ts::steady_clock::now(); | |
q.submit([&](handler &h) { | |
accessor<float4, 2, access::mode::write, access::target::image> acc{img, h}; | |
h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}}, | |
[=](nd_item<2> it) { | |
const uint i = it.get_global_id()[0]; | |
const uint j = it.get_global_id()[1]; | |
const sycl::cl_float2 c = {-0.123, 0.745}; | |
sycl::cl_float2 z = get_complex_value_at_index(i, j); | |
uint itr = quadratic_iteration(z, c); | |
acc.write(int2(i, j), colorize(itr)); | |
}); | |
}); | |
q.wait(); | |
auto end = ts::steady_clock::now(); | |
return ts::duration_cast<ts::milliseconds>(end - start).count(); | |
} | |
int64_t compute_julia_set_san_marco(queue &q, void *data) { | |
image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8, | |
range<2>{N, N}}; | |
auto start = ts::steady_clock::now(); | |
q.submit([&](handler &h) { | |
accessor<float4, 2, access::mode::write, access::target::image> acc{img, h}; | |
h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}}, | |
[=](nd_item<2> it) { | |
const uint i = it.get_global_id()[0]; | |
const uint j = it.get_global_id()[1]; | |
const sycl::cl_float2 c = {-0.750, 0.f}; | |
sycl::cl_float2 z = get_complex_value_at_index(i, j); | |
uint itr = quadratic_iteration(z, c); | |
acc.write(int2(i, j), colorize(itr)); | |
}); | |
}); | |
q.wait(); | |
auto end = ts::steady_clock::now(); | |
return ts::duration_cast<ts::milliseconds>(end - start).count(); | |
} | |
int64_t compute_julia_set_siegel_disk(queue &q, void *data) { | |
image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8, | |
range<2>{N, N}}; | |
auto start = ts::steady_clock::now(); | |
q.submit([&](handler &h) { | |
accessor<float4, 2, access::mode::write, access::target::image> acc{img, h}; | |
h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}}, | |
[=](nd_item<2> it) { | |
const uint i = it.get_global_id()[0]; | |
const uint j = it.get_global_id()[1]; | |
const sycl::cl_float2 c = {-0.391, -0.587}; | |
sycl::cl_float2 z = get_complex_value_at_index(i, j); | |
uint itr = quadratic_iteration(z, c); | |
acc.write(int2(i, j), colorize(itr)); | |
}); | |
}); | |
q.wait(); | |
auto end = ts::steady_clock::now(); | |
return ts::duration_cast<ts::milliseconds>(end - start).count(); | |
} | |
int64_t compute_julia_set_neat_cauliflower(queue &q, void *data) { | |
image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8, | |
range<2>{N, N}}; | |
auto start = ts::steady_clock::now(); | |
q.submit([&](handler &h) { | |
accessor<float4, 2, access::mode::write, access::target::image> acc{img, h}; | |
h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}}, | |
[=](nd_item<2> it) { | |
const uint i = it.get_global_id()[0]; | |
const uint j = it.get_global_id()[1]; | |
const sycl::cl_float2 c = {-0.7, -0.3}; | |
sycl::cl_float2 z = get_complex_value_at_index(i, j); | |
uint itr = quadratic_iteration(z, c); | |
acc.write(int2(i, j), colorize(itr)); | |
}); | |
}); | |
q.wait(); | |
auto end = ts::steady_clock::now(); | |
return ts::duration_cast<ts::milliseconds>(end - start).count(); | |
} | |
int64_t compute_julia_set_galaxies(queue &q, void *data) { | |
image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8, | |
range<2>{N, N}}; | |
auto start = ts::steady_clock::now(); | |
q.submit([&](handler &h) { | |
accessor<float4, 2, access::mode::write, access::target::image> acc{img, h}; | |
h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}}, | |
[=](nd_item<2> it) { | |
const uint i = it.get_global_id()[0]; | |
const uint j = it.get_global_id()[1]; | |
const sycl::cl_float2 c = {-0.75, -0.2}; | |
sycl::cl_float2 z = get_complex_value_at_index(i, j); | |
uint itr = quadratic_iteration(z, c); | |
acc.write(int2(i, j), colorize(itr)); | |
}); | |
}); | |
q.wait(); | |
auto end = ts::steady_clock::now(); | |
return ts::duration_cast<ts::milliseconds>(end - start).count(); | |
} | |
int64_t compute_julia_set_groovy(queue &q, void *data) { | |
image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8, | |
range<2>{N, N}}; | |
auto start = ts::steady_clock::now(); | |
q.submit([&](handler &h) { | |
accessor<float4, 2, access::mode::write, access::target::image> acc{img, h}; | |
h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}}, | |
[=](nd_item<2> it) { | |
const uint i = it.get_global_id()[0]; | |
const uint j = it.get_global_id()[1]; | |
const sycl::cl_float2 c = {-0.75, 0.15}; | |
sycl::cl_float2 z = get_complex_value_at_index(i, j); | |
uint itr = quadratic_iteration(z, c); | |
acc.write(int2(i, j), colorize(itr)); | |
}); | |
}); | |
q.wait(); | |
auto end = ts::steady_clock::now(); | |
return ts::duration_cast<ts::milliseconds>(end - start).count(); | |
} | |
int64_t compute_julia_set_frost(queue &q, void *data) { | |
image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8, | |
range<2>{N, N}}; | |
auto start = ts::steady_clock::now(); | |
q.submit([&](handler &h) { | |
accessor<float4, 2, access::mode::write, access::target::image> acc{img, h}; | |
h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}}, | |
[=](nd_item<2> it) { | |
const uint i = it.get_global_id()[0]; | |
const uint j = it.get_global_id()[1]; | |
const sycl::cl_float2 c = {-0.7f, 0.35}; | |
sycl::cl_float2 z = get_complex_value_at_index(i, j); | |
uint itr = quadratic_iteration(z, c); | |
acc.write(int2(i, j), colorize(itr)); | |
}); | |
}); | |
q.wait(); | |
auto end = ts::steady_clock::now(); | |
return ts::duration_cast<ts::milliseconds>(end - start).count(); | |
} | |
size_t export_to_file(void *data, size_t size, char const *path) { | |
FILE *fp = fopen(path, "wb"); | |
size_t n = fwrite(data, size, 1, fp); | |
fclose(fp); | |
return size * n; | |
} | |
int main(int argc, char **argv) { | |
device d{host_selector{}}; | |
queue q(d, ([](exception_list l) { | |
for (auto ep : l) { | |
try { | |
std::rethrow_exception(ep); | |
} catch (const cl::sycl::exception &e) { | |
std::cout << "Async exception caught:\n" << e.what() << "\n"; | |
throw; | |
} | |
} | |
})); | |
size_t size = N * N * 4; | |
uint8_t *data = (uint8_t *)malloc(size * sizeof(uint8_t)); | |
memset(data, 0, size); // redundant here though | |
auto tm = compute_julia_set_dentrite(q, data); | |
std::cout << "computed in " << tm << " ms" << std::endl; | |
uint total = export_to_file(data, size, "julia_set.dentrite.raw"); | |
std::cout << "wrote " << total << " bytes" << std::endl; | |
memset(data, 0, size); // redundant here though | |
tm = compute_julia_set_douadys_rabbit(q, data); | |
std::cout << "computed in " << tm << " ms" << std::endl; | |
total = export_to_file(data, size, "julia_set.douadys_rabbit.raw"); | |
std::cout << "wrote " << total << " bytes" << std::endl; | |
memset(data, 0, size); // redundant here though | |
tm = compute_julia_set_san_marco(q, data); | |
std::cout << "computed in " << tm << " ms" << std::endl; | |
total = export_to_file(data, size, "julia_set.san_marco.raw"); | |
std::cout << "wrote " << total << " bytes" << std::endl; | |
memset(data, 0, size); // redundant here though | |
tm = compute_julia_set_siegel_disk(q, data); | |
std::cout << "computed in " << tm << " ms" << std::endl; | |
total = export_to_file(data, size, "julia_set.siegel_disk.raw"); | |
std::cout << "wrote " << total << " bytes" << std::endl; | |
memset(data, 0, size); // redundant here though | |
tm = compute_julia_set_neat_cauliflower(q, data); | |
std::cout << "computed in " << tm << " ms" << std::endl; | |
total = export_to_file(data, size, "julia_set.neat_cauliflower.raw"); | |
std::cout << "wrote " << total << " bytes" << std::endl; | |
memset(data, 0, size); // redundant here though | |
tm = compute_julia_set_galaxies(q, data); | |
std::cout << "computed in " << tm << " ms" << std::endl; | |
total = export_to_file(data, size, "julia_set.galaxies.raw"); | |
std::cout << "wrote " << total << " bytes" << std::endl; | |
memset(data, 0, size); // redundant here though | |
tm = compute_julia_set_groovy(q, data); | |
std::cout << "computed in " << tm << " ms" << std::endl; | |
total = export_to_file(data, size, "julia_set.groovy.raw"); | |
std::cout << "wrote " << total << " bytes" << std::endl; | |
memset(data, 0, size); // redundant here though | |
tm = compute_julia_set_frost(q, data); | |
std::cout << "computed in " << tm << " ms" << std::endl; | |
total = export_to_file(data, size, "julia_set.frost.raw"); | |
std::cout << "wrote " << total << " bytes" << std::endl; | |
free(data); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Generated fractal image looks like π