Skip to content

Instantly share code, notes, and snippets.

@angus-g
Created May 25, 2020 01:39
Show Gist options
  • Save angus-g/220dfb89f9e5c62320bc072829c5cd90 to your computer and use it in GitHub Desktop.
Save angus-g/220dfb89f9e5c62320bc072829c5cd90 to your computer and use it in GitHub Desktop.
#version 430
layout(local_size_x = 128, local_size_y = 1) in;
layout(std430, binding = 0) buffer input_data {
float domain[][128];
};
uniform bool transpose;
void main() {
uint idx = gl_LocalInvocationIndex;
ivec2 domain_idx;
if (transpose) {
domain_idx = ivec2(gl_WorkGroupID.x, gl_LocalInvocationIndex);
} else {
domain_idx = ivec2(gl_LocalInvocationIndex, gl_WorkGroupID.y);
}
// minimum element we read is -s_max - 1, where s_max is something
// like 2**(log2(n) - 1) -- so this should be enough room
shared float work_a[128*3]; // superdiag
//shared float work_b[128*3]; // diag
shared float work_c[128*3]; // subdiag
shared float work_d[128*3]; // rhs
int j = int(idx) + 128; // local element index in work arrays
work_a[j - 128];
work_a[j] = (idx < 127) ? -0.25f : 0; // normalised superdiag
work_a[j + 128] = 0;
// work_b[j - 128] = 0;
// work_b[j] = -4;
// work_b[j + 128] = 0;
work_c[j - 128] = 0;
work_c[j] = (idx > 0) ? -0.25f : 0; // normalised subdiag
work_c[j + 128] = 0;
work_d[j - 128] = 0;
work_d[j] = domain[domain_idx.y][domain_idx.x] / (-4.0f);
work_d[j + 128] = 0;
int stride = 1;
for (int i = 0; i < 7; i++) {
// non-normalised:
// float rho = work_c[j] / work_b[j - stride];
// float tau = work_a[j] / work_b[j + stride];
// float db = rho * work_a[j - stride] + tau * work_c[j + stride];
// float dd = rho * work_d[j - stride] + tau * work_d[j + stride];
// float da = tau * ((idx < 127) ? work_a[j + stride] : 0);
// float dc = rho * ((idx > 0) ? work_c[j - stride] : 0);
// normalised diag
// calculate all changes
float rho = work_c[j] * work_a[j - stride];
float tau = work_a[j] * work_c[j + stride];
float r = 1.0f / (1.0f - rho - tau);
float da = r * work_a[j] * work_a[j + stride];
float dc = r * work_c[j] * work_c[j - stride];
float dd = r * (work_d[j] - work_c[j] * work_d[j - stride] - work_a[j] * work_d[j + stride]);
barrier();
//work_b[j] -= db;
//work_d[j] -= dd;
// run all changes simultaneously
work_a[j] = (idx < 127) ? -da : 0;
work_c[j] = (idx > 0) ? -dc : 0;
work_d[j] = dd;
barrier();
stride *= 2;
}
domain[domain_idx.y][domain_idx.x] = work_d[j];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment