Created
May 25, 2020 01:39
-
-
Save angus-g/220dfb89f9e5c62320bc072829c5cd90 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#version 430 | |
layout(local_size_x = 128, local_size_y = 1) in; | |
layout(std430, binding = 0) buffer input_data { | |
float domain[][128]; | |
}; | |
uniform bool transpose; | |
void main() { | |
uint idx = gl_LocalInvocationIndex; | |
ivec2 domain_idx; | |
if (transpose) { | |
domain_idx = ivec2(gl_WorkGroupID.x, gl_LocalInvocationIndex); | |
} else { | |
domain_idx = ivec2(gl_LocalInvocationIndex, gl_WorkGroupID.y); | |
} | |
// minimum element we read is -s_max - 1, where s_max is something | |
// like 2**(log2(n) - 1) -- so this should be enough room | |
shared float work_a[128*3]; // superdiag | |
//shared float work_b[128*3]; // diag | |
shared float work_c[128*3]; // subdiag | |
shared float work_d[128*3]; // rhs | |
int j = int(idx) + 128; // local element index in work arrays | |
work_a[j - 128]; | |
work_a[j] = (idx < 127) ? -0.25f : 0; // normalised superdiag | |
work_a[j + 128] = 0; | |
// work_b[j - 128] = 0; | |
// work_b[j] = -4; | |
// work_b[j + 128] = 0; | |
work_c[j - 128] = 0; | |
work_c[j] = (idx > 0) ? -0.25f : 0; // normalised subdiag | |
work_c[j + 128] = 0; | |
work_d[j - 128] = 0; | |
work_d[j] = domain[domain_idx.y][domain_idx.x] / (-4.0f); | |
work_d[j + 128] = 0; | |
int stride = 1; | |
for (int i = 0; i < 7; i++) { | |
// non-normalised: | |
// float rho = work_c[j] / work_b[j - stride]; | |
// float tau = work_a[j] / work_b[j + stride]; | |
// float db = rho * work_a[j - stride] + tau * work_c[j + stride]; | |
// float dd = rho * work_d[j - stride] + tau * work_d[j + stride]; | |
// float da = tau * ((idx < 127) ? work_a[j + stride] : 0); | |
// float dc = rho * ((idx > 0) ? work_c[j - stride] : 0); | |
// normalised diag | |
// calculate all changes | |
float rho = work_c[j] * work_a[j - stride]; | |
float tau = work_a[j] * work_c[j + stride]; | |
float r = 1.0f / (1.0f - rho - tau); | |
float da = r * work_a[j] * work_a[j + stride]; | |
float dc = r * work_c[j] * work_c[j - stride]; | |
float dd = r * (work_d[j] - work_c[j] * work_d[j - stride] - work_a[j] * work_d[j + stride]); | |
barrier(); | |
//work_b[j] -= db; | |
//work_d[j] -= dd; | |
// run all changes simultaneously | |
work_a[j] = (idx < 127) ? -da : 0; | |
work_c[j] = (idx > 0) ? -dc : 0; | |
work_d[j] = dd; | |
barrier(); | |
stride *= 2; | |
} | |
domain[domain_idx.y][domain_idx.x] = work_d[j]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment