Skip to content

Instantly share code, notes, and snippets.

@al42and
Created November 16, 2021 19:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save al42and/b2eb3bd19c30fdda11f7551294684c13 to your computer and use it in GitHub Desktop.
Save al42and/b2eb3bd19c30fdda11f7551294684c13 to your computer and use it in GitHub Desktop.
#include <CL/sycl.hpp>
#include <iostream>
using mode = sycl::access_mode;
using sycl::access::fence_space;
using sycl::access::target;
static constexpr int blockSize = 32;
static constexpr int numBlocks = 8;
template <typename T>
sycl::local_ptr<T>
getLocalPtr(const sycl::accessor<T, 1, mode::read_write, target::local> &acc) {
return acc.get_pointer();
}
template <typename T> cl::sycl::local_ptr<T> getLocalPtr(std::nullptr_t) {
return nullptr;
}
template <bool useLocalMem>
auto myKernel(sycl::handler &cgh, sycl::buffer<int, 1> &buf) {
auto gm_data = buf.get_access<mode::write>(cgh);
auto acc_sm_buf = [&]() {
if constexpr (useLocalMem)
return sycl::accessor<float, 1, mode::read_write, target::local>{
sycl::range<1>{blockSize}, cgh};
else
return nullptr;
}();
return [=](sycl::nd_item<1> itemIdx) {
sycl::local_ptr<float> sm_buf = getLocalPtr<float>(acc_sm_buf);
int i = itemIdx.get_local_linear_id();
if constexpr (useLocalMem) {
sm_buf[i] = i;
itemIdx.barrier(fence_space::local_space);
i = sm_buf[(i + 1) % blockSize];
} else {
i = (i + 1) % blockSize;
}
gm_data[itemIdx.get_global_linear_id()] = i;
};
}
template <bool> class Kernel;
template <bool useLocalMem>
void runKernel(sycl::queue &q, sycl::buffer<int, 1> &buffer) {
const sycl::nd_range<1> range{{numBlocks * blockSize}, {blockSize}};
q.submit([&](sycl::handler &cgh) {
cgh.parallel_for<Kernel<useLocalMem>>(range,
myKernel<useLocalMem>(cgh, buffer));
}).wait_and_throw();
}
int main() {
sycl::device dev{sycl::gpu_selector{}};
std::cout << "Device name: " << dev.get_info<sycl::info::device::name>()
<< std::endl;
sycl::queue q{dev};
sycl::buffer<int, 1> buffer(numBlocks * blockSize);
std::cout << "Running kernel with SLM: " << std::endl;
runKernel<true>(q, buffer);
std::cout << " Done" << std::endl;
std::cout << "Running kernel without SLM: " << std::endl;
runKernel<false>(q, buffer);
std::cout << " Done" << std::endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment