Skip to content

Instantly share code, notes, and snippets.

@geohot
Last active October 19, 2023 22:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save geohot/0cad05378fcbaeb0dceec3e89e0d4d7b to your computer and use it in GitHub Desktop.
Save geohot/0cad05378fcbaeb0dceec3e89e0d4d7b to your computer and use it in GitHub Desktop.
A 1024x1024x1024 matmul with a 2x2x2 core in OpenCL
__kernel void matmul(__global float* data0, const __global float* data1, const __global float* data2) {
int gidx0 = get_group_id(1); /* 512 */
int gidx1 = get_group_id(0); /* 512 */
float2 acc0 = (float2)(0.0f,0.0f);
float2 acc1 = (float2)(0.0f,0.0f);
for (int ridx0 = 0; ridx0 < 512; ++ridx0) {
float2 val0 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2))));
float2 val1 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2)+1024)));
float2 val2 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048))));
float2 val3 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048)+1024)));
(acc0).x = (((val0).x*(val2).x)+(acc0).x);
(acc0).x = (((val0).y*(val3).x)+(acc0).x);
(acc1).x = (((val1).x*(val2).x)+(acc1).x);
(acc1).x = (((val1).y*(val3).x)+(acc1).x);
(acc0).y = (((val0).x*(val2).y)+(acc0).y);
(acc0).y = (((val0).y*(val3).y)+(acc0).y);
(acc1).y = (((val1).x*(val2).y)+(acc1).y);
(acc1).y = (((val1).y*(val3).y)+(acc1).y);
}
*((__global float2*)(data0+(gidx0*2048)+(gidx1*2))) = (float2)(float2)((acc0).x,(acc0).y);
*((__global float2*)(data0+(gidx0*2048)+(gidx1*2)+1024)) = (float2)(float2)((acc1).x,(acc1).y);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment