Skip to content

Instantly share code, notes, and snippets.

@jrprice
jrprice / oclc.c
Created April 14, 2015 10:39
Simple command-line OpenCL C compiler.
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
cl_int err;
cl_platform_id platform;
cl_device_id device;
@jrprice
jrprice / a.metal
Last active November 23, 2022 19:22
#include <metal_stdlib>
using namespace metal;
template<typename T, size_t N>
struct tint_array {
const constant T& operator[](size_t i) const constant { return elements[i]; }
device T& operator[](size_t i) device { return elements[i]; }
const device T& operator[](size_t i) const device { return elements[i]; }
thread T& operator[](size_t i) thread { return elements[i]; }
@jrprice
jrprice / kernel.metal
Last active August 6, 2020 09:29
Comparison of trivial vector addition in Metal and OpenCL
#include <metal_stdlib>
using namespace metal;
kernel void vecadd(const device float *a [[buffer(0)]],
const device float *b [[buffer(1)]],
device float *c [[buffer(2)]],
uint i [[thread_position_in_grid]])
{
c[i] = c[i] + a[i] + b[i];
}
@jrprice
jrprice / opencl-c_reduced.diff
Created August 5, 2018 16:38
Changes generated by new regex for stripping features in clspv
31d30
< unsigned int __ovld __cnfn get_work_dim(void);
1788a1788,1861
> #define as_uchar(x) __builtin_astype((x), uchar)
> #define as_uchar2(x) __builtin_astype((x), uchar2)
> #define as_uchar3(x) __builtin_astype((x), uchar3)
> #define as_uchar4(x) __builtin_astype((x), uchar4)
>
> #define as_short(x) __builtin_astype((x), short)
> #define as_short2(x) __builtin_astype((x), short2)
@jrprice
jrprice / config.h
Created February 21, 2018 15:25
grep HAVE_ config.h for pocl
#define HAVE_FORK
#define HAVE_VFORK
#define HAVE_CLOCK_GETTIME
#define HAVE_FDATASYNC
#define HAVE_MKOSTEMPS
#define HAVE_MKDTEMP
/* #undef HAVE_LTTNG_UST */
/* #undef HAVE_OCL_ICD */
#define HAVE_POSIX_MEMALIGN
/* #undef HAVE_HSA_EXT_AMD_H */
@jrprice
jrprice / backtrace.txt
Created February 21, 2018 12:15
pocl-cuda CGxx crash
#0 0x00002aaab2150231 in ?? () from /usr/lib64/nvidia/libcuda.so.1
#1 0x00002aaab207c38e in ?? () from /usr/lib64/nvidia/libcuda.so.1
#2 0x00002aaab207cb51 in ?? () from /usr/lib64/nvidia/libcuda.so.1
#3 0x00002aaab2194722 in cuModuleGetGlobal_v2 () from /usr/lib64/nvidia/libcuda.so.1
#4 0x00002aaaab3f998a in pocl_cuda_submit_kernel (stream=0x1140300, run=..., device=0x685c10, event=0x1146d80)
at ../../lib/CL/devices/cuda/pocl-cuda.c:916
#5 0x00002aaaab3fab68 in pocl_cuda_submit_node (node=0x1146c90, cq=0x11401a0, locked=0)
at ../../lib/CL/devices/cuda/pocl-cuda.c:1232
#6 0x00002aaaab3fc121 in pocl_cuda_submit_thread (data=0x1140230) at ../../lib/CL/devices/cuda/pocl-cuda.c:1575
#7 0x00002aaab1592dc5 in start_thread () from /lib64/libpthread.so.0
@jrprice
jrprice / strided_buffer_to_image.c
Last active September 1, 2017 03:40
Benchmark for different methods of copying strided data from a buffer to an image using OpenCL
#include <assert.h>
#include <stdio.h>
#include <sys/time.h>
#include <CL/cl.h>
// Platform and device indices to use
#define PLATFORM 0
#define DEVICE 0
@jrprice
jrprice / blocking-memalloc.c
Last active August 8, 2017 16:40
CUDA driver API blocking cuMemAlloc issue
#include <stdio.h>
#include <unistd.h>
#include <cuda.h>
#include <pthread.h>
void check(CUresult err);
int size;
CUdevice device;
CUcontext context;
@jrprice
jrprice / retain-event-deps.patch
Created May 29, 2017 13:06
WIP patch for attempting to fix event dependencies in pocl
diff --git a/lib/CL/devices/common.c b/lib/CL/devices/common.c
index 075474c..a574b14 100644
--- a/lib/CL/devices/common.c
+++ b/lib/CL/devices/common.c
@@ -632,6 +632,7 @@ pocl_broadcast (cl_event brc_event)
if (tmp->event == brc_event)
{
LL_DELETE (target->event->wait_list, tmp);
+ POname(clReleaseEvent) (tmp->event);
pocl_mem_manager_free_event_node (tmp);
@jrprice
jrprice / dmesg.log
Created March 8, 2017 09:08
dmesg for ROCm box
[ 0.000000] microcode: microcode updated early to revision 0x9e, date = 2016-06-22
[ 0.000000] Linux version 4.8.13-100.fc23.x86_64 (mockbuild@bkernel02.phx2.fedoraproject.org) (gcc version 5.3.1 20160406 (Red Hat 5.3.1-6) (GCC) ) #1 SMP Fri Dec 9 14:51:40 UTC 2016
[ 0.000000] Command line: BOOT_IMAGE=/vmlinuz-4.8.13-100.fc23.x86_64 root=/dev/mapper/fedora-root ro rd.lvm.lv=fedora/root rd.lvm.lv=fedora/swap rhgb quiet LANG=en_GB.UTF-8
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x008: 'MPX bounds registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x010: 'MPX CSR'
[ 0.000000] x86/fpu: xstate_offset[2]: 576, xstate_sizes[2]: 256
[ 0.000000] x86/fpu: xstate_offset[3]: 832, xstate_sizes[3]: 64