Skip to content

Instantly share code, notes, and snippets.

View msl-compile.mm
@import Metal;
int main(int argc, char *argv[]) {
if (argc != 2 && argc != 8) {
printf("Usage:\n");
printf("\tmsl-compile FILE\n");
printf("\tmsl-compile '' '' '' '' '' '' FILE\n");
exit(1);
}
@jrprice
jrprice / opencl-c_reduced.diff
Created Aug 5, 2018
Changes generated by new regex for stripping features in clspv
View opencl-c_reduced.diff
31d30
< unsigned int __ovld __cnfn get_work_dim(void);
1788a1788,1861
> #define as_uchar(x) __builtin_astype((x), uchar)
> #define as_uchar2(x) __builtin_astype((x), uchar2)
> #define as_uchar3(x) __builtin_astype((x), uchar3)
> #define as_uchar4(x) __builtin_astype((x), uchar4)
>
> #define as_short(x) __builtin_astype((x), short)
> #define as_short2(x) __builtin_astype((x), short2)
@jrprice
jrprice / config.h
Created Feb 21, 2018
grep HAVE_ config.h for pocl
View config.h
#define HAVE_FORK
#define HAVE_VFORK
#define HAVE_CLOCK_GETTIME
#define HAVE_FDATASYNC
#define HAVE_MKOSTEMPS
#define HAVE_MKDTEMP
/* #undef HAVE_LTTNG_UST */
/* #undef HAVE_OCL_ICD */
#define HAVE_POSIX_MEMALIGN
/* #undef HAVE_HSA_EXT_AMD_H */
@jrprice
jrprice / backtrace.txt
Created Feb 21, 2018
pocl-cuda CGxx crash
View backtrace.txt
#0 0x00002aaab2150231 in ?? () from /usr/lib64/nvidia/libcuda.so.1
#1 0x00002aaab207c38e in ?? () from /usr/lib64/nvidia/libcuda.so.1
#2 0x00002aaab207cb51 in ?? () from /usr/lib64/nvidia/libcuda.so.1
#3 0x00002aaab2194722 in cuModuleGetGlobal_v2 () from /usr/lib64/nvidia/libcuda.so.1
#4 0x00002aaaab3f998a in pocl_cuda_submit_kernel (stream=0x1140300, run=..., device=0x685c10, event=0x1146d80)
at ../../lib/CL/devices/cuda/pocl-cuda.c:916
#5 0x00002aaaab3fab68 in pocl_cuda_submit_node (node=0x1146c90, cq=0x11401a0, locked=0)
at ../../lib/CL/devices/cuda/pocl-cuda.c:1232
#6 0x00002aaaab3fc121 in pocl_cuda_submit_thread (data=0x1140230) at ../../lib/CL/devices/cuda/pocl-cuda.c:1575
#7 0x00002aaab1592dc5 in start_thread () from /lib64/libpthread.so.0
@jrprice
jrprice / blocking-memalloc.c
Last active Aug 8, 2017
CUDA driver API blocking cuMemAlloc issue
View blocking-memalloc.c
#include <stdio.h>
#include <unistd.h>
#include <cuda.h>
#include <pthread.h>
void check(CUresult err);
int size;
CUdevice device;
CUcontext context;
@jrprice
jrprice / retain-event-deps.patch
Created May 29, 2017
WIP patch for attempting to fix event dependencies in pocl
View retain-event-deps.patch
diff --git a/lib/CL/devices/common.c b/lib/CL/devices/common.c
index 075474c..a574b14 100644
--- a/lib/CL/devices/common.c
+++ b/lib/CL/devices/common.c
@@ -632,6 +632,7 @@ pocl_broadcast (cl_event brc_event)
if (tmp->event == brc_event)
{
LL_DELETE (target->event->wait_list, tmp);
+ POname(clReleaseEvent) (tmp->event);
pocl_mem_manager_free_event_node (tmp);
@jrprice
jrprice / dmesg.log
Created Mar 8, 2017
dmesg for ROCm box
View dmesg.log
[ 0.000000] microcode: microcode updated early to revision 0x9e, date = 2016-06-22
[ 0.000000] Linux version 4.8.13-100.fc23.x86_64 (mockbuild@bkernel02.phx2.fedoraproject.org) (gcc version 5.3.1 20160406 (Red Hat 5.3.1-6) (GCC) ) #1 SMP Fri Dec 9 14:51:40 UTC 2016
[ 0.000000] Command line: BOOT_IMAGE=/vmlinuz-4.8.13-100.fc23.x86_64 root=/dev/mapper/fedora-root ro rd.lvm.lv=fedora/root rd.lvm.lv=fedora/swap rhgb quiet LANG=en_GB.UTF-8
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x008: 'MPX bounds registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x010: 'MPX CSR'
[ 0.000000] x86/fpu: xstate_offset[2]: 576, xstate_sizes[2]: 256
[ 0.000000] x86/fpu: xstate_offset[3]: 832, xstate_sizes[3]: 64
@jrprice
jrprice / OCLPlatform.h
Last active Sep 14, 2016
Simple OpenCL backend for StreamExecutor for testing purposes
View OCLPlatform.h
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include <streamexecutor/StreamExecutor.h>
#include <vector>
#define CHECK_OCL(Op, Err) \
@jrprice
jrprice / kernel.metal
Last active Aug 6, 2020
Comparison of trivial vector addition in Metal and OpenCL
View kernel.metal
#include <metal_stdlib>
using namespace metal;
kernel void vecadd(const device float *a [[buffer(0)]],
const device float *b [[buffer(1)]],
device float *c [[buffer(2)]],
uint i [[thread_position_in_grid]])
{
c[i] = c[i] + a[i] + b[i];
}
View pocl-invalid-record.cpp
#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>
#include <iostream>
using namespace std;
const char *SOURCE = R"RAW(
#define local_barrier() barrier(CLK_LOCAL_MEM_FENCE);
#define WITHIN_KERNEL /* empty */
#define KERNEL __kernel