itzmeanjan/julia_set.cpp

## julia_set.cpp
#include <CL/sycl.hpp>
#include <chrono>
#include <iostream>

using namespace sycl;

namespace ts = std::chrono;

constexpr uint N = 512;
constexpr uint B = 4;
constexpr float R = 2.f; // radius
constexpr uint MAX_ITR = 1 << 10;

float length(sycl::cl_float2 v) { return sqrtf(v.x() * v.x() + v.y() * v.y()); }

sycl::cl_float2 get_complex_value_at_index(uint row, uint col) {
  float half = (float)N / 2.f;
  float step = 1.f;
  float factor = sqrtf(powf(half, 2) + powf(half, 2));

  return {((-half + (float)row * step) / factor) * R,
          ((half - (float)col * step) / factor) * R};
}

uint quadratic_iteration(sycl::cl_float2 z, sycl::cl_float2 c) {
  uint itr = 0;
  for (; itr < MAX_ITR; itr++) {
    z = {z.x() * z.x() - z.y() * z.y() + c.x(), 2 * z.y() * z.x() + c.y()};

    if (length(z) > R) {
      break;
    }
  }
  return itr;
}

sycl::cl_float4 colorize(uint itr) {
  sycl::cl_float3 d = {0.5, 0.5, 0.5};
  sycl::cl_float3 e = {0.5, 0.5, 0.5};
  sycl::cl_float3 f = {2.0, 1.0, 0.0};
  sycl::cl_float3 g = {0.5, 0.2, 0.25};

  sycl::cl_float4 pixel_val = {
      d + e * cos(6.28318f * (f * ((float)itr / (float)(MAX_ITR)) + g)), 1.f};
  return pixel_val;
}

int64_t compute_julia_set_dentrite(queue &q, void *data) {
  image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
               range<2>{N, N}};

  auto start = ts::steady_clock::now();
  q.submit([&](handler &h) {
    accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

    h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
                   [=](nd_item<2> it) {
                     const uint i = it.get_global_id()[0];
                     const uint j = it.get_global_id()[1];
                     const sycl::cl_float2 c = {0.f, 1.f};

                     sycl::cl_float2 z = get_complex_value_at_index(i, j);
                     uint itr = quadratic_iteration(z, c);
                     acc.write(int2(i, j), colorize(itr));
                   });
  });
  q.wait();
  auto end = ts::steady_clock::now();
  return ts::duration_cast<ts::milliseconds>(end - start).count();
}

int64_t compute_julia_set_douadys_rabbit(queue &q, void *data) {
  image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
               range<2>{N, N}};

  auto start = ts::steady_clock::now();
  q.submit([&](handler &h) {
    accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

    h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
                   [=](nd_item<2> it) {
                     const uint i = it.get_global_id()[0];
                     const uint j = it.get_global_id()[1];
                     const sycl::cl_float2 c = {-0.123, 0.745};

                     sycl::cl_float2 z = get_complex_value_at_index(i, j);
                     uint itr = quadratic_iteration(z, c);
                     acc.write(int2(i, j), colorize(itr));
                   });
  });
  q.wait();
  auto end = ts::steady_clock::now();
  return ts::duration_cast<ts::milliseconds>(end - start).count();
}

int64_t compute_julia_set_san_marco(queue &q, void *data) {
  image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
               range<2>{N, N}};

  auto start = ts::steady_clock::now();
  q.submit([&](handler &h) {
    accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

    h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
                   [=](nd_item<2> it) {
                     const uint i = it.get_global_id()[0];
                     const uint j = it.get_global_id()[1];
                     const sycl::cl_float2 c = {-0.750, 0.f};

                     sycl::cl_float2 z = get_complex_value_at_index(i, j);
                     uint itr = quadratic_iteration(z, c);
                     acc.write(int2(i, j), colorize(itr));
                   });
  });
  q.wait();
  auto end = ts::steady_clock::now();
  return ts::duration_cast<ts::milliseconds>(end - start).count();
}

int64_t compute_julia_set_siegel_disk(queue &q, void *data) {
  image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
               range<2>{N, N}};

  auto start = ts::steady_clock::now();
  q.submit([&](handler &h) {
    accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

    h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
                   [=](nd_item<2> it) {
                     const uint i = it.get_global_id()[0];
                     const uint j = it.get_global_id()[1];
                     const sycl::cl_float2 c = {-0.391, -0.587};

                     sycl::cl_float2 z = get_complex_value_at_index(i, j);
                     uint itr = quadratic_iteration(z, c);
                     acc.write(int2(i, j), colorize(itr));
                   });
  });
  q.wait();
  auto end = ts::steady_clock::now();
  return ts::duration_cast<ts::milliseconds>(end - start).count();
}

int64_t compute_julia_set_neat_cauliflower(queue &q, void *data) {
  image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
               range<2>{N, N}};

  auto start = ts::steady_clock::now();
  q.submit([&](handler &h) {
    accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

    h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
                   [=](nd_item<2> it) {
                     const uint i = it.get_global_id()[0];
                     const uint j = it.get_global_id()[1];
                     const sycl::cl_float2 c = {-0.7, -0.3};

                     sycl::cl_float2 z = get_complex_value_at_index(i, j);
                     uint itr = quadratic_iteration(z, c);
                     acc.write(int2(i, j), colorize(itr));
                   });
  });
  q.wait();
  auto end = ts::steady_clock::now();
  return ts::duration_cast<ts::milliseconds>(end - start).count();
}

int64_t compute_julia_set_galaxies(queue &q, void *data) {
  image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
               range<2>{N, N}};

  auto start = ts::steady_clock::now();
  q.submit([&](handler &h) {
    accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

    h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
                   [=](nd_item<2> it) {
                     const uint i = it.get_global_id()[0];
                     const uint j = it.get_global_id()[1];
                     const sycl::cl_float2 c = {-0.75, -0.2};

                     sycl::cl_float2 z = get_complex_value_at_index(i, j);
                     uint itr = quadratic_iteration(z, c);
                     acc.write(int2(i, j), colorize(itr));
                   });
  });
  q.wait();
  auto end = ts::steady_clock::now();
  return ts::duration_cast<ts::milliseconds>(end - start).count();
}

int64_t compute_julia_set_groovy(queue &q, void *data) {
  image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
               range<2>{N, N}};

  auto start = ts::steady_clock::now();
  q.submit([&](handler &h) {
    accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

    h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
                   [=](nd_item<2> it) {
                     const uint i = it.get_global_id()[0];
                     const uint j = it.get_global_id()[1];
                     const sycl::cl_float2 c = {-0.75, 0.15};

                     sycl::cl_float2 z = get_complex_value_at_index(i, j);
                     uint itr = quadratic_iteration(z, c);
                     acc.write(int2(i, j), colorize(itr));
                   });
  });
  q.wait();
  auto end = ts::steady_clock::now();
  return ts::duration_cast<ts::milliseconds>(end - start).count();
}

int64_t compute_julia_set_frost(queue &q, void *data) {
  image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
               range<2>{N, N}};

  auto start = ts::steady_clock::now();
  q.submit([&](handler &h) {
    accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

    h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
                   [=](nd_item<2> it) {
                     const uint i = it.get_global_id()[0];
                     const uint j = it.get_global_id()[1];
                     const sycl::cl_float2 c = {-0.7f, 0.35};

                     sycl::cl_float2 z = get_complex_value_at_index(i, j);
                     uint itr = quadratic_iteration(z, c);
                     acc.write(int2(i, j), colorize(itr));
                   });
  });
  q.wait();
  auto end = ts::steady_clock::now();
  return ts::duration_cast<ts::milliseconds>(end - start).count();
}

size_t export_to_file(void *data, size_t size, char const *path) {
  FILE *fp = fopen(path, "wb");
  size_t n = fwrite(data, size, 1, fp);
  fclose(fp);
  return size * n;
}

int main(int argc, char **argv) {
  device d{host_selector{}};
  queue q(d, ([](exception_list l) {
            for (auto ep : l) {
              try {
                std::rethrow_exception(ep);
              } catch (const cl::sycl::exception &e) {
                std::cout << "Async exception caught:\n" << e.what() << "\n";
                throw;
              }
            }
          }));

  size_t size = N * N * 4;
  uint8_t *data = (uint8_t *)malloc(size * sizeof(uint8_t));
  memset(data, 0, size); // redundant here though

  auto tm = compute_julia_set_dentrite(q, data);
  std::cout << "computed in " << tm << " ms" << std::endl;
  uint total = export_to_file(data, size, "julia_set.dentrite.raw");
  std::cout << "wrote " << total << " bytes" << std::endl;

  memset(data, 0, size); // redundant here though

  tm = compute_julia_set_douadys_rabbit(q, data);
  std::cout << "computed in " << tm << " ms" << std::endl;
  total = export_to_file(data, size, "julia_set.douadys_rabbit.raw");
  std::cout << "wrote " << total << " bytes" << std::endl;

  memset(data, 0, size); // redundant here though

  tm = compute_julia_set_san_marco(q, data);
  std::cout << "computed in " << tm << " ms" << std::endl;
  total = export_to_file(data, size, "julia_set.san_marco.raw");
  std::cout << "wrote " << total << " bytes" << std::endl;

  memset(data, 0, size); // redundant here though

  tm = compute_julia_set_siegel_disk(q, data);
  std::cout << "computed in " << tm << " ms" << std::endl;
  total = export_to_file(data, size, "julia_set.siegel_disk.raw");
  std::cout << "wrote " << total << " bytes" << std::endl;

  memset(data, 0, size); // redundant here though

  tm = compute_julia_set_neat_cauliflower(q, data);
  std::cout << "computed in " << tm << " ms" << std::endl;
  total = export_to_file(data, size, "julia_set.neat_cauliflower.raw");
  std::cout << "wrote " << total << " bytes" << std::endl;

  memset(data, 0, size); // redundant here though

  tm = compute_julia_set_galaxies(q, data);
  std::cout << "computed in " << tm << " ms" << std::endl;
  total = export_to_file(data, size, "julia_set.galaxies.raw");
  std::cout << "wrote " << total << " bytes" << std::endl;

  memset(data, 0, size); // redundant here though

  tm = compute_julia_set_groovy(q, data);
  std::cout << "computed in " << tm << " ms" << std::endl;
  total = export_to_file(data, size, "julia_set.groovy.raw");
  std::cout << "wrote " << total << " bytes" << std::endl;

  memset(data, 0, size); // redundant here though

  tm = compute_julia_set_frost(q, data);
  std::cout << "computed in " << tm << " ms" << std::endl;
  total = export_to_file(data, size, "julia_set.frost.raw");
  std::cout << "wrote " << total << " bytes" << std::endl;

  free(data);
  return 0;
}
	#include <CL/sycl.hpp>
	#include <chrono>
	#include <iostream>

	using namespace sycl;

	namespace ts = std::chrono;

	constexpr uint N = 512;
	constexpr uint B = 4;
	constexpr float R = 2.f; // radius
	constexpr uint MAX_ITR = 1 << 10;

	float length(sycl::cl_float2 v) { return sqrtf(v.x() * v.x() + v.y() * v.y()); }

	sycl::cl_float2 get_complex_value_at_index(uint row, uint col) {
	float half = (float)N / 2.f;
	float step = 1.f;
	float factor = sqrtf(powf(half, 2) + powf(half, 2));

	return {((-half + (float)row * step) / factor) * R,
	((half - (float)col * step) / factor) * R};
	}

	uint quadratic_iteration(sycl::cl_float2 z, sycl::cl_float2 c) {
	uint itr = 0;
	for (; itr < MAX_ITR; itr++) {
	z = {z.x() * z.x() - z.y() * z.y() + c.x(), 2 * z.y() * z.x() + c.y()};

	if (length(z) > R) {
	break;
	}
	}
	return itr;
	}

	sycl::cl_float4 colorize(uint itr) {
	sycl::cl_float3 d = {0.5, 0.5, 0.5};
	sycl::cl_float3 e = {0.5, 0.5, 0.5};
	sycl::cl_float3 f = {2.0, 1.0, 0.0};
	sycl::cl_float3 g = {0.5, 0.2, 0.25};

	sycl::cl_float4 pixel_val = {
	d + e * cos(6.28318f * (f * ((float)itr / (float)(MAX_ITR)) + g)), 1.f};
	return pixel_val;
	}

	int64_t compute_julia_set_dentrite(queue &q, void *data) {
	image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
	range<2>{N, N}};

	auto start = ts::steady_clock::now();
	q.submit([&](handler &h) {
	accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

	h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
	[=](nd_item<2> it) {
	const uint i = it.get_global_id()[0];
	const uint j = it.get_global_id()[1];
	const sycl::cl_float2 c = {0.f, 1.f};

	sycl::cl_float2 z = get_complex_value_at_index(i, j);
	uint itr = quadratic_iteration(z, c);
	acc.write(int2(i, j), colorize(itr));
	});
	});
	q.wait();
	auto end = ts::steady_clock::now();
	return ts::duration_cast<ts::milliseconds>(end - start).count();
	}

	int64_t compute_julia_set_douadys_rabbit(queue &q, void *data) {
	image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
	range<2>{N, N}};

	auto start = ts::steady_clock::now();
	q.submit([&](handler &h) {
	accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

	h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
	[=](nd_item<2> it) {
	const uint i = it.get_global_id()[0];
	const uint j = it.get_global_id()[1];
	const sycl::cl_float2 c = {-0.123, 0.745};

	sycl::cl_float2 z = get_complex_value_at_index(i, j);
	uint itr = quadratic_iteration(z, c);
	acc.write(int2(i, j), colorize(itr));
	});
	});
	q.wait();
	auto end = ts::steady_clock::now();
	return ts::duration_cast<ts::milliseconds>(end - start).count();
	}

	int64_t compute_julia_set_san_marco(queue &q, void *data) {
	image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
	range<2>{N, N}};

	auto start = ts::steady_clock::now();
	q.submit([&](handler &h) {
	accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

	h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
	[=](nd_item<2> it) {
	const uint i = it.get_global_id()[0];
	const uint j = it.get_global_id()[1];
	const sycl::cl_float2 c = {-0.750, 0.f};

	sycl::cl_float2 z = get_complex_value_at_index(i, j);
	uint itr = quadratic_iteration(z, c);
	acc.write(int2(i, j), colorize(itr));
	});
	});
	q.wait();
	auto end = ts::steady_clock::now();
	return ts::duration_cast<ts::milliseconds>(end - start).count();
	}

	int64_t compute_julia_set_siegel_disk(queue &q, void *data) {
	image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
	range<2>{N, N}};

	auto start = ts::steady_clock::now();
	q.submit([&](handler &h) {
	accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

	h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
	[=](nd_item<2> it) {
	const uint i = it.get_global_id()[0];
	const uint j = it.get_global_id()[1];
	const sycl::cl_float2 c = {-0.391, -0.587};

	sycl::cl_float2 z = get_complex_value_at_index(i, j);
	uint itr = quadratic_iteration(z, c);
	acc.write(int2(i, j), colorize(itr));
	});
	});
	q.wait();
	auto end = ts::steady_clock::now();
	return ts::duration_cast<ts::milliseconds>(end - start).count();
	}

	int64_t compute_julia_set_neat_cauliflower(queue &q, void *data) {
	image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
	range<2>{N, N}};

	auto start = ts::steady_clock::now();
	q.submit([&](handler &h) {
	accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

	h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
	[=](nd_item<2> it) {
	const uint i = it.get_global_id()[0];
	const uint j = it.get_global_id()[1];
	const sycl::cl_float2 c = {-0.7, -0.3};

	sycl::cl_float2 z = get_complex_value_at_index(i, j);
	uint itr = quadratic_iteration(z, c);
	acc.write(int2(i, j), colorize(itr));
	});
	});
	q.wait();
	auto end = ts::steady_clock::now();
	return ts::duration_cast<ts::milliseconds>(end - start).count();
	}

	int64_t compute_julia_set_galaxies(queue &q, void *data) {
	image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
	range<2>{N, N}};

	auto start = ts::steady_clock::now();
	q.submit([&](handler &h) {
	accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

	h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
	[=](nd_item<2> it) {
	const uint i = it.get_global_id()[0];
	const uint j = it.get_global_id()[1];
	const sycl::cl_float2 c = {-0.75, -0.2};

	sycl::cl_float2 z = get_complex_value_at_index(i, j);
	uint itr = quadratic_iteration(z, c);
	acc.write(int2(i, j), colorize(itr));
	});
	});
	q.wait();
	auto end = ts::steady_clock::now();
	return ts::duration_cast<ts::milliseconds>(end - start).count();
	}

	int64_t compute_julia_set_groovy(queue &q, void *data) {
	image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
	range<2>{N, N}};

	auto start = ts::steady_clock::now();
	q.submit([&](handler &h) {
	accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

	h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
	[=](nd_item<2> it) {
	const uint i = it.get_global_id()[0];
	const uint j = it.get_global_id()[1];
	const sycl::cl_float2 c = {-0.75, 0.15};

	sycl::cl_float2 z = get_complex_value_at_index(i, j);
	uint itr = quadratic_iteration(z, c);
	acc.write(int2(i, j), colorize(itr));
	});
	});
	q.wait();
	auto end = ts::steady_clock::now();
	return ts::duration_cast<ts::milliseconds>(end - start).count();
	}

	int64_t compute_julia_set_frost(queue &q, void *data) {
	image<2> img{data, image_channel_order::rgba, image_channel_type::unorm_int8,
	range<2>{N, N}};

	auto start = ts::steady_clock::now();
	q.submit([&](handler &h) {
	accessor<float4, 2, access::mode::write, access::target::image> acc{img, h};

	h.parallel_for(nd_range<2>{range<2>{N, N}, range<2>{B, 1}},
	[=](nd_item<2> it) {
	const uint i = it.get_global_id()[0];
	const uint j = it.get_global_id()[1];
	const sycl::cl_float2 c = {-0.7f, 0.35};

	sycl::cl_float2 z = get_complex_value_at_index(i, j);
	uint itr = quadratic_iteration(z, c);
	acc.write(int2(i, j), colorize(itr));
	});
	});
	q.wait();
	auto end = ts::steady_clock::now();
	return ts::duration_cast<ts::milliseconds>(end - start).count();
	}

	size_t export_to_file(void data, size_t size, char const path) {
	FILE *fp = fopen(path, "wb");
	size_t n = fwrite(data, size, 1, fp);
	fclose(fp);
	return size * n;
	}

	int main(int argc, char **argv) {
	device d{host_selector{}};
	queue q(d, ([](exception_list l) {
	for (auto ep : l) {
	try {
	std::rethrow_exception(ep);
	} catch (const cl::sycl::exception &e) {
	std::cout << "Async exception caught:\n" << e.what() << "\n";
	throw;
	}
	}
	}));

	size_t size = N * N * 4;
	uint8_t data = (uint8_t )malloc(size * sizeof(uint8_t));
	memset(data, 0, size); // redundant here though

	auto tm = compute_julia_set_dentrite(q, data);
	std::cout << "computed in " << tm << " ms" << std::endl;
	uint total = export_to_file(data, size, "julia_set.dentrite.raw");
	std::cout << "wrote " << total << " bytes" << std::endl;

	memset(data, 0, size); // redundant here though

	tm = compute_julia_set_douadys_rabbit(q, data);
	std::cout << "computed in " << tm << " ms" << std::endl;
	total = export_to_file(data, size, "julia_set.douadys_rabbit.raw");
	std::cout << "wrote " << total << " bytes" << std::endl;

	memset(data, 0, size); // redundant here though

	tm = compute_julia_set_san_marco(q, data);
	std::cout << "computed in " << tm << " ms" << std::endl;
	total = export_to_file(data, size, "julia_set.san_marco.raw");
	std::cout << "wrote " << total << " bytes" << std::endl;

	memset(data, 0, size); // redundant here though

	tm = compute_julia_set_siegel_disk(q, data);
	std::cout << "computed in " << tm << " ms" << std::endl;
	total = export_to_file(data, size, "julia_set.siegel_disk.raw");
	std::cout << "wrote " << total << " bytes" << std::endl;

	memset(data, 0, size); // redundant here though

	tm = compute_julia_set_neat_cauliflower(q, data);
	std::cout << "computed in " << tm << " ms" << std::endl;
	total = export_to_file(data, size, "julia_set.neat_cauliflower.raw");
	std::cout << "wrote " << total << " bytes" << std::endl;

	memset(data, 0, size); // redundant here though

	tm = compute_julia_set_galaxies(q, data);
	std::cout << "computed in " << tm << " ms" << std::endl;
	total = export_to_file(data, size, "julia_set.galaxies.raw");
	std::cout << "wrote " << total << " bytes" << std::endl;

	memset(data, 0, size); // redundant here though

	tm = compute_julia_set_groovy(q, data);
	std::cout << "computed in " << tm << " ms" << std::endl;
	total = export_to_file(data, size, "julia_set.groovy.raw");
	std::cout << "wrote " << total << " bytes" << std::endl;

	memset(data, 0, size); // redundant here though

	tm = compute_julia_set_frost(q, data);
	std::cout << "computed in " << tm << " ms" << std::endl;
	total = export_to_file(data, size, "julia_set.frost.raw");
	std::cout << "wrote " << total << " bytes" << std::endl;

	free(data);
	return 0;
	}
c	Fractal
-0.391 -0.587j
0.f + 1.0j
-0.123 + 0.745j
-0.7f + 0.35j
-0.75 -0.2j
-0.75 + 0.15j
-0.7 -0.3j
-0.75 + 0.0j