Skip to content

Instantly share code, notes, and snippets.

View al42and's full-sized avatar

Andrey Alekseenko al42and

View GitHub Profile
#include <CL/sycl.hpp>
#include <iostream>
#include <stdio.h>
void run_kernel(const sycl::device &syclDevice) {
constexpr int numThreads = 512;
try {
sycl::queue queue = sycl::queue(syclDevice);
sycl::buffer<int, 1> buffer(numThreads);
queue
project(HipSyclTest)
cmake_minimum_required(VERSION 3.0)
set(HIPSYCL_SYCLCC_EXTRA_ARGS -DCOMMON_DEFINE=1)
set(HIPSYCL_SYCLCC_EXTRA_ARGS_DEVICE_ONLY -DGPU_DEFINE=1 -ffast-math)
find_package(hipsycl REQUIRED)
add_executable(HipSyclTest main.cpp module_cpu.cpp module_gpu.cpp)
add_sycl_to_target(TARGET HipSyclTest SOURCES module_gpu.cpp)
@al42and
al42and / 8390.cpp
Created April 5, 2023 14:49
Scheduling a lot of small kernels, CUDA vs oneAPI
#include "nvToolsExt.h"
#include <iostream>
#include <sycl/sycl.hpp>
#include <vector>
template <int N> class Kernel;
constexpr int size = 8;
template <int N> void run_kernel(sycl::queue &queue, int *ptr) {
@al42and
al42and / vkfft-5x5x10.cpp
Created April 25, 2023 14:48
VkFFT example failing with current "develop" branch
#include <iostream>
#include <vector>
#include "vkFFT.h"
const float inputdata[500] = {
-3.5, 6.3, 1.2, 0.3, 1.1, -5.7, 5.8, -1.9, -6.3, -1.4, 7.4, 2.4,
-9.9, -7.2, 5.4, 6.1, -1.9, -7.6, 1.4, -3.5, 0.7, 5.6, -4.2, -1.1,
-4.4, -6.3, -7.2, 4.6, -3.0, -0.9, 7.2, 2.5, -3.6, 6.1, -3.2, -2.1,
6.5, -0.4, -9.0, 2.3, 8.4, 4.0, -5.2, -9.0, 4.7, -3.7, -2.0, -9.5,
@al42and
al42and / kittler.py
Created January 19, 2016 16:07
Kittler-Illingworth Thresholding
import numpy as np
def Kittler(im, out):
"""
The reimplementation of Kittler-Illingworth Thresholding algorithm by Bob Pepin
Works on 8-bit images only
Original Matlab code: https://www.mathworks.com/matlabcentral/fileexchange/45685-kittler-illingworth-thresholding
Paper: Kittler, J. & Illingworth, J. Minimum error thresholding. Pattern Recognit. 19, 41–47 (1986).
"""
h,g = np.histogram(im.ravel(),256,[0,256])
#include <cstdio>
#define FARM_NB_BITS_IN_VEC 128
#include "farm_sve.h"
int main()
{
svfloat32_t a, b, c;
svbool_t m;
for (int i = 0; i < FARM_NB_BITS_IN_VEC / 32; i++)
{
@al42and
al42and / test_scan.cpp
Last active April 18, 2024 16:30
Simple standalone test to see how different versions of a small scan kernel behave
#include <sycl/sycl.hpp>
#if BUILD_ONEDPL
#define ONEDPL_USE_DPCPP_BACKEND 1
#include <oneapi/dpl/algorithm>
#include <oneapi/dpl/async>
#include <oneapi/dpl/execution>
#endif
template <int workGroupSize, int nElements_> struct ExclusivePrefixSumGlobal {