Created
August 2, 2011 14:51
-
-
Save uzytkownik/1120346 to your computer and use it in GitHub Desktop.
Testcase of gather loads
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/********************************* Primitives ********************************/ | |
// aos(dim, length, remote, idx, iidx) | |
// soa(dim, length, remote, idx, iidx) | |
extern "C" void abort(uniform int code); | |
// transpose(type, data, offset) | |
// trans_no(dim) | |
// local_aos(type, local, dim) | |
// local_soa(type, local, dim) | |
// load_aos(type, local, dim, lenght, remote, cond, idx) | |
// load_soa(type, local, dim, length, remote, cond, idx) | |
// store_aos(type, local, dim, length, remote, cond, idx) | |
// store_soa(type, name, dim, from, length, cond, idx) | |
// zero(local, dim) | |
// reduce_aos_indir_inc(type, local, dim, length, remote, cond, idx) | |
// reduce_soa_indir_inc(type, local, dim, length, remote, cond, idx) | |
// reduce_global_inc(type, var, idx, inc) | |
/*********************************** Policy **********************************/ | |
// layout | |
// load(type, local, dim, length, remote, idx, continue) | |
// store(type, local, dim, length, remote, idx, continue) | |
// load_store(type, local, dim, length, remote, idx, cond, continue) | |
// reduce_indir_inc(type, local, dim, length, remote, idx, continue) | |
// loop(offset, runlength, macro, args...) | |
/************************************ Code ***********************************/ | |
extern uniform float gam, gm1, cfl, eps, mach, alpha, air_const, qinf[4]; | |
export void save_solv(uniform float p_q[], | |
uniform float p_qold[], | |
uniform int length, | |
uniform int offset, | |
uniform int runlength) | |
{ | |
uniform int i; | |
for(i = offset; i + programCount <= offset + runlength; i += programCount) | |
{ | |
int idx = i + programIndex; | |
float q[4]; | |
do { | |
q[0] = p_q[extract(idx, 0) * 4 + programIndex]; | |
} while(0); | |
do { | |
q[1] = p_q[extract(idx, 1) * 4 + programIndex]; | |
} while(0); | |
do { | |
q[2] = p_q[extract(idx, 2) * 4 + programIndex]; | |
} while(0); | |
do { | |
q[3] = p_q[extract(idx, 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 0 + 0] = tmp1; | |
q[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 2 + 0] = tmp1; | |
q[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 0 + 0] = tmp1; | |
q[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 2 + 0] = tmp1; | |
q[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float qold[4]; | |
for(uniform int j = 0; j < 4; j++) | |
qold[j] = q[j]; | |
do { | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 0 + 0] = tmp1; | |
qold[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 2 + 0] = tmp1; | |
qold[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 0 + 0] = tmp1; | |
qold[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 2 + 0] = tmp1; | |
qold[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
; | |
p_qold[extract(idx, 0) * 4 + programIndex] = qold[0]; | |
p_qold[extract(idx, 1) * 4 + programIndex] = qold[1]; | |
p_qold[extract(idx, 2) * 4 + programIndex] = qold[2]; | |
p_qold[extract(idx, 3) * 4 + programIndex] = qold[3]; | |
} while(0); | |
} | |
uniform int rem = offset + runlength - i; | |
if(rem > 0) | |
{ | |
int idx = i + programIndex; | |
float q[4]; | |
if(0 < rem) | |
do { | |
q[0] = p_q[extract(idx, 0) * 4 + programIndex]; | |
} while(0); | |
if(1 < rem) | |
do { | |
q[1] = p_q[extract(idx, 1) * 4 + programIndex]; | |
} while(0); | |
if(2 < rem) | |
do { | |
q[2] = p_q[extract(idx, 2) * 4 + programIndex]; | |
} while(0); | |
if(3 < rem) | |
do { | |
q[3] = p_q[extract(idx, 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 0 + 0] = tmp1; | |
q[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 2 + 0] = tmp1; | |
q[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 0 + 0] = tmp1; | |
q[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 2 + 0] = tmp1; | |
q[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float qold[4]; | |
for(uniform int j = 0; j < 4; j++) | |
qold[j] = q[j]; | |
do { | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 0 + 0] = tmp1; | |
qold[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 2 + 0] = tmp1; | |
qold[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 0 + 0] = tmp1; | |
qold[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 2 + 0] = tmp1; | |
qold[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
; | |
if(0 < rem) | |
p_qold[extract(idx, 0) * 4 + programIndex] = qold[0]; | |
if(1 < rem) | |
p_qold[extract(idx, 1) * 4 + programIndex] = qold[1]; | |
if(2 < rem) | |
p_qold[extract(idx, 2) * 4 + programIndex] = qold[2]; | |
if(3 < rem) | |
p_qold[extract(idx, 3) * 4 + programIndex] = qold[3]; | |
} while(0); | |
} | |
} | |
export void adt_calc(uniform int pcell[], | |
uniform float p_x[], | |
uniform float p_q[], | |
uniform float p_adt[], | |
uniform int length, | |
uniform int offset, | |
uniform int runlength, | |
uniform int pcell_colength) | |
{ | |
uniform int i; | |
for(i = offset; i + programCount <= offset + runlength; i += programCount) | |
{ | |
int idx = i + programIndex; | |
int cell[4]; | |
do { | |
cell[0] = pcell[extract(idx, 0) * 4 + programIndex]; | |
} while(0); | |
do { | |
cell[1] = pcell[extract(idx, 1) * 4 + programIndex]; | |
} while(0); | |
do { | |
cell[2] = pcell[extract(idx, 2) * 4 + programIndex]; | |
} while(0); | |
do { | |
cell[3] = pcell[extract(idx, 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
cell[0 + 0 + 0] = tmp1; | |
cell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
cell[0 + 2 + 0] = tmp1; | |
cell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
cell[0 + 0 + 0] = tmp1; | |
cell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
cell[0 + 2 + 0] = tmp1; | |
cell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float x1[2]; | |
if(programIndex < 2) | |
do { | |
x1[0] = p_x[extract(cell[0], 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x1[1] = p_x[extract(cell[0], 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x1[2] = p_x[extract(cell[0], 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x1[3] = p_x[extract(cell[0], 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
} | |
uniform int rem = offset + runlength - i; | |
if(rem > 0) | |
{ | |
int idx = i + programIndex; | |
int cell[4]; | |
if(0 < rem) | |
do { | |
cell[0] = pcell[extract(idx, 0) * 4 + programIndex]; | |
} while(0); | |
if(1 < rem) | |
do { | |
cell[1] = pcell[extract(idx, 1) * 4 + programIndex]; | |
} while(0); | |
if(2 < rem) | |
do { | |
cell[2] = pcell[extract(idx, 2) * 4 + programIndex]; | |
} while(0); | |
if(3 < rem) | |
do { | |
cell[3] = pcell[extract(idx, 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
cell[0 + 0 + 0] = tmp1; | |
cell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
cell[0 + 2 + 0] = tmp1; | |
cell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
cell[0 + 0 + 0] = tmp1; | |
cell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
cell[0 + 2 + 0] = tmp1; | |
cell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float x1[2]; | |
if(programIndex < 2) | |
if(0 < rem) | |
do { | |
x1[0] = p_x[extract(cell[0], 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(1 < rem) | |
do { | |
x1[1] = p_x[extract(cell[0], 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(2 < rem) | |
do { | |
x1[2] = p_x[extract(cell[0], 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(3 < rem) | |
do { | |
x1[3] = p_x[extract(cell[0], 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
} | |
} | |
export void res_calc(uniform int pedge[], | |
uniform float p_x[], | |
uniform int pecell[], | |
uniform float p_q[], | |
uniform float p_adt[], | |
uniform float p_res[], | |
uniform int length, | |
uniform int offset, | |
uniform int runlength, | |
uniform int pedge_colength, | |
uniform int pecell_colength) | |
{ | |
uniform int i; | |
for(i = offset; i + programCount <= offset + runlength; i += programCount) | |
{ | |
int idx = i + programIndex; | |
int edge[2]; | |
if(programIndex < 2) | |
do { | |
edge[0] = pedge[extract(idx, 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
edge[1] = pedge[extract(idx, 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
edge[2] = pedge[extract(idx, 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
edge[3] = pedge[extract(idx, 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
edge[0 + 0 + 0] = tmp1; | |
edge[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
edge[0 + 2 + 0] = tmp1; | |
edge[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
edge[0 + 0 + 0] = tmp1; | |
edge[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
edge[0 + 2 + 0] = tmp1; | |
edge[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float x1[2]; | |
if(programIndex < 2) | |
do { | |
x1[0] = p_x[extract(edge[0], 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x1[1] = p_x[extract(edge[0], 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x1[2] = p_x[extract(edge[0], 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x1[3] = p_x[extract(edge[0], 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float x2[2]; | |
if(programIndex < 2) | |
do { | |
x2[0] = p_x[extract(edge[1], 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x2[1] = p_x[extract(edge[1], 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x2[2] = p_x[extract(edge[1], 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x2[3] = p_x[extract(edge[1], 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 0 + 0] = tmp1; | |
x2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 2 + 0] = tmp1; | |
x2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 0 + 0] = tmp1; | |
x2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 2 + 0] = tmp1; | |
x2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
int ecell[2]; | |
if(programIndex < 2) | |
do { | |
ecell[0] = pecell[extract(idx, 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
ecell[1] = pecell[extract(idx, 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
ecell[2] = pecell[extract(idx, 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
ecell[3] = pecell[extract(idx, 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
ecell[0 + 0 + 0] = tmp1; | |
ecell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
ecell[0 + 2 + 0] = tmp1; | |
ecell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
ecell[0 + 0 + 0] = tmp1; | |
ecell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
ecell[0 + 2 + 0] = tmp1; | |
ecell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float q1[4]; | |
do { | |
q1[0] = p_q[extract(ecell[0], 0) * 4 + programIndex]; | |
} while(0); | |
do { | |
q1[1] = p_q[extract(ecell[0], 1) * 4 + programIndex]; | |
} while(0); | |
do { | |
q1[2] = p_q[extract(ecell[0], 2) * 4 + programIndex]; | |
} while(0); | |
do { | |
q1[3] = p_q[extract(ecell[0], 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 0 + 0] = tmp1; | |
q1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 2 + 0] = tmp1; | |
q1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 0 + 0] = tmp1; | |
q1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 2 + 0] = tmp1; | |
q1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float q2[4]; | |
do { | |
q2[0] = p_q[extract(ecell[1], 0) * 4 + programIndex]; | |
} while(0); | |
do { | |
q2[1] = p_q[extract(ecell[1], 1) * 4 + programIndex]; | |
} while(0); | |
do { | |
q2[2] = p_q[extract(ecell[1], 2) * 4 + programIndex]; | |
} while(0); | |
do { | |
q2[3] = p_q[extract(ecell[1], 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q2[0 + 0 + 0] = tmp1; | |
q2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q2[0 + 2 + 0] = tmp1; | |
q2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q2[0 + 0 + 0] = tmp1; | |
q2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q2[0 + 2 + 0] = tmp1; | |
q2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float adt1[1]; | |
if(programIndex < 1) | |
do { | |
adt1[0] = p_adt[extract(ecell[0], 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt1[1] = p_adt[extract(ecell[0], 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt1[2] = p_adt[extract(ecell[0], 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt1[3] = p_adt[extract(ecell[0], 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 0 + 0] = tmp1; | |
adt1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 2 + 0] = tmp1; | |
adt1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 0 + 0] = tmp1; | |
adt1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 2 + 0] = tmp1; | |
adt1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float adt2[1]; | |
if(programIndex < 1) | |
do { | |
adt2[0] = p_adt[extract(ecell[1], 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt2[1] = p_adt[extract(ecell[1], 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt2[2] = p_adt[extract(ecell[1], 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt2[3] = p_adt[extract(ecell[1], 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt2[0 + 0 + 0] = tmp1; | |
adt2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt2[0 + 2 + 0] = tmp1; | |
adt2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt2[0 + 0 + 0] = tmp1; | |
adt2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt2[0 + 2 + 0] = tmp1; | |
adt2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float res1[4]; | |
res1[0] = 0; res1[1] = 0; res1[2] = 0; res1[3] = 0; | |
float res2[4]; | |
res2[0] = 0; res2[1] = 0; res2[2] = 0; res2[3] = 0; | |
float dx; float dy; float mu; float ri; float p1; float vol1; float p2; float vol2; float f; | |
dx = x1[0] - x2[0]; | |
dy = x1[1] - x2[1]; | |
ri = 1.0 / q1[0]; | |
p1 = 0.4 * (q1[3] - 0.5 * ri * (q1[1] * q1[1] + q1[2] * q1[2])); | |
vol1 = ri * (q1[1] * dy - q1[2] * dx); | |
ri = 1.0 / q2[0]; | |
p2 = 0.4 * (q2[3] - 0.5 * ri * (q2[1] * q2[1] + q2[2] * q2[2])); | |
vol2 = ri * (q2[1] * dy - q2[2] * dx); | |
mu = 0.5 * (adt1[0] + adt2[0]) * eps; | |
f = 0.5 * (vol1 * q1[0] + vol2 * q2[0]) + mu * (q1[0] - q2[0]); | |
adt1[0] += f; | |
adt2[0] -= f; | |
f = 0.5 * (vol1 * q1[1] + p1 * dy + vol2 * q2[1] + p2 * dy) + mu * (q1[1] - q2[1]); | |
adt1[1] += f; | |
adt2[1] -= f; | |
f = 0.5 * (vol1 * q1[2] - p1 * dx + vol2 * q2[2] - p2 * dx) + mu * (q1[2] - q2[2]); | |
adt1[2] += f; | |
adt2[2] -= f; | |
f = 0.5 * (vol1 *(q1[3] + p1) + vol2 *(q2[3] + p2) ) + mu * (q1[3] - q2[3]); | |
adt1[3] += f; | |
adt2[3] -= f; | |
for(uniform int j = 0; j < programCount; j++) | |
{ | |
if ((lanemask() & (1 << j)) != 0) | |
{ | |
p_res[extract(ecell[1], j) * 4 + 0] += extract(res2[0], j); | |
p_res[extract(ecell[1], j) * 4 + 1] += extract(res2[1], j); | |
p_res[extract(ecell[1], j) * 4 + 2] += extract(res2[2], j); | |
p_res[extract(ecell[1], j) * 4 + 3] += extract(res2[3], j); | |
} | |
} | |
for(uniform int j = 0; j < programCount; j++) | |
{ | |
if ((lanemask() & (1 << j)) != 0) | |
{ | |
p_res[extract(ecell[0], j) * 4 + 0] += extract(res1[0], j); | |
p_res[extract(ecell[0], j) * 4 + 1] += extract(res1[1], j); | |
p_res[extract(ecell[0], j) * 4 + 2] += extract(res1[2], j); | |
p_res[extract(ecell[0], j) * 4 + 3] += extract(res1[3], j); | |
} | |
} | |
} | |
uniform int rem = offset + runlength - i; | |
if(rem > 0) | |
{ | |
int idx = i + programIndex; | |
int edge[2]; | |
if(programIndex < 2) | |
if(0 < rem) | |
do { | |
edge[0] = pedge[extract(idx, 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(1 < rem) | |
do { | |
edge[1] = pedge[extract(idx, 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(2 < rem) | |
do { | |
edge[2] = pedge[extract(idx, 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(3 < rem) | |
do { | |
edge[3] = pedge[extract(idx, 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
edge[0 + 0 + 0] = tmp1; | |
edge[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
edge[0 + 2 + 0] = tmp1; | |
edge[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
edge[0 + 0 + 0] = tmp1; | |
edge[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
edge[0 + 2 + 0] = tmp1; | |
edge[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float x1[2]; | |
if(programIndex < 2) | |
if(0 < rem) | |
do { | |
x1[0] = p_x[extract(edge[0], 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(1 < rem) | |
do { | |
x1[1] = p_x[extract(edge[0], 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(2 < rem) | |
do { | |
x1[2] = p_x[extract(edge[0], 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(3 < rem) | |
do { | |
x1[3] = p_x[extract(edge[0], 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float x2[2]; | |
if(programIndex < 2) | |
if(0 < rem) | |
do { | |
x2[0] = p_x[extract(edge[1], 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(1 < rem) | |
do { | |
x2[1] = p_x[extract(edge[1], 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(2 < rem) | |
do { | |
x2[2] = p_x[extract(edge[1], 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(3 < rem) | |
do { | |
x2[3] = p_x[extract(edge[1], 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 0 + 0] = tmp1; | |
x2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 2 + 0] = tmp1; | |
x2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 0 + 0] = tmp1; | |
x2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 2 + 0] = tmp1; | |
x2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
int ecell[2]; | |
if(programIndex < 2) | |
if(0 < rem) | |
do { | |
ecell[0] = pecell[extract(idx, 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(1 < rem) | |
do { | |
ecell[1] = pecell[extract(idx, 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(2 < rem) | |
do { | |
ecell[2] = pecell[extract(idx, 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(3 < rem) | |
do { | |
ecell[3] = pecell[extract(idx, 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
ecell[0 + 0 + 0] = tmp1; | |
ecell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
ecell[0 + 2 + 0] = tmp1; | |
ecell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
ecell[0 + 0 + 0] = tmp1; | |
ecell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
ecell[0 + 2 + 0] = tmp1; | |
ecell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float q1[4]; | |
if(0 < rem) | |
do { | |
q1[0] = p_q[extract(ecell[0], 0) * 4 + programIndex]; | |
} while(0); | |
if(1 < rem) | |
do { | |
q1[1] = p_q[extract(ecell[0], 1) * 4 + programIndex]; | |
} while(0); | |
if(2 < rem) | |
do { | |
q1[2] = p_q[extract(ecell[0], 2) * 4 + programIndex]; | |
} while(0); | |
if(3 < rem) | |
do { | |
q1[3] = p_q[extract(ecell[0], 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 0 + 0] = tmp1; | |
q1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 2 + 0] = tmp1; | |
q1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 0 + 0] = tmp1; | |
q1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 2 + 0] = tmp1; | |
q1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float q2[4]; | |
if(0 < rem) | |
do { | |
q2[0] = p_q[extract(ecell[1], 0) * 4 + programIndex]; | |
} while(0); | |
if(1 < rem) | |
do { | |
q2[1] = p_q[extract(ecell[1], 1) * 4 + programIndex]; | |
} while(0); | |
if(2 < rem) | |
do { | |
q2[2] = p_q[extract(ecell[1], 2) * 4 + programIndex]; | |
} while(0); | |
if(3 < rem) | |
do { | |
q2[3] = p_q[extract(ecell[1], 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q2[0 + 0 + 0] = tmp1; | |
q2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q2[0 + 2 + 0] = tmp1; | |
q2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q2[0 + 0 + 0] = tmp1; | |
q2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q2[0 + 2 + 0] = tmp1; | |
q2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float adt1[1]; | |
if(programIndex < 1) | |
if(0 < rem) | |
do { | |
adt1[0] = p_adt[extract(ecell[0], 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(1 < rem) | |
do { | |
adt1[1] = p_adt[extract(ecell[0], 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(2 < rem) | |
do { | |
adt1[2] = p_adt[extract(ecell[0], 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(3 < rem) | |
do { | |
adt1[3] = p_adt[extract(ecell[0], 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 0 + 0] = tmp1; | |
adt1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 2 + 0] = tmp1; | |
adt1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 0 + 0] = tmp1; | |
adt1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 2 + 0] = tmp1; | |
adt1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float adt2[1]; | |
if(programIndex < 1) | |
if(0 < rem) | |
do { | |
adt2[0] = p_adt[extract(ecell[1], 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(1 < rem) | |
do { | |
adt2[1] = p_adt[extract(ecell[1], 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(2 < rem) | |
do { | |
adt2[2] = p_adt[extract(ecell[1], 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(3 < rem) | |
do { | |
adt2[3] = p_adt[extract(ecell[1], 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt2[0 + 0 + 0] = tmp1; | |
adt2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt2[0 + 2 + 0] = tmp1; | |
adt2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt2[0 + 0 + 0] = tmp1; | |
adt2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt2[0 + 2 + 0] = tmp1; | |
adt2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float res1[4]; | |
res1[0] = 0; res1[1] = 0; res1[2] = 0; res1[3] = 0; | |
float res2[4]; | |
res2[0] = 0; res2[1] = 0; res2[2] = 0; res2[3] = 0; | |
float dx; float dy; float mu; float ri; float p1; float vol1; float p2; float vol2; float f; | |
dx = x1[0] - x2[0]; | |
dy = x1[1] - x2[1]; | |
ri = 1.0 / q1[0]; | |
p1 = 0.4 * (q1[3] - 0.5 * ri * (q1[1] * q1[1] + q1[2] * q1[2])); | |
vol1 = ri * (q1[1] * dy - q1[2] * dx); | |
ri = 1.0 / q2[0]; | |
p2 = 0.4 * (q2[3] - 0.5 * ri * (q2[1] * q2[1] + q2[2] * q2[2])); | |
vol2 = ri * (q2[1] * dy - q2[2] * dx); | |
mu = 0.5 * (adt1[0] + adt2[0]) * eps; | |
f = 0.5 * (vol1 * q1[0] + vol2 * q2[0]) + mu * (q1[0] - q2[0]); | |
adt1[0] += f; | |
adt2[0] -= f; | |
f = 0.5 * (vol1 * q1[1] + p1 * dy + vol2 * q2[1] + p2 * dy) + mu * (q1[1] - q2[1]); | |
adt1[1] += f; | |
adt2[1] -= f; | |
f = 0.5 * (vol1 * q1[2] - p1 * dx + vol2 * q2[2] - p2 * dx) + mu * (q1[2] - q2[2]); | |
adt1[2] += f; | |
adt2[2] -= f; | |
f = 0.5 * (vol1 *(q1[3] + p1) + vol2 *(q2[3] + p2) ) + mu * (q1[3] - q2[3]); | |
adt1[3] += f; | |
adt2[3] -= f; | |
for(uniform int j = 0; j < rem; j++) | |
{ | |
if ((lanemask() & (1 << j)) != 0) | |
{ | |
p_res[extract(ecell[1], j) * 4 + 0] += extract(res2[0], j); | |
p_res[extract(ecell[1], j) * 4 + 1] += extract(res2[1], j); | |
p_res[extract(ecell[1], j) * 4 + 2] += extract(res2[2], j); | |
p_res[extract(ecell[1], j) * 4 + 3] += extract(res2[3], j); | |
} | |
} | |
for(uniform int j = 0; j < rem; j++) | |
{ | |
if ((lanemask() & (1 << j)) != 0) | |
{ | |
p_res[extract(ecell[0], j) * 4 + 0] += extract(res1[0], j); | |
p_res[extract(ecell[0], j) * 4 + 1] += extract(res1[1], j); | |
p_res[extract(ecell[0], j) * 4 + 2] += extract(res1[2], j); | |
p_res[extract(ecell[0], j) * 4 + 3] += extract(res1[3], j); | |
} | |
} | |
} | |
} | |
export void bres_calc(uniform int pbedge[], | |
uniform float p_x[], | |
uniform int pbecell[], | |
uniform float p_q[], | |
uniform float p_adt[], | |
uniform float p_res[], | |
uniform int p_bound[], | |
uniform int length, | |
uniform int offset, | |
uniform int runlength, | |
uniform int pbegde_colength, | |
uniform int pbecell_colength) | |
{ | |
uniform int i; | |
for(i = offset; i + programCount <= offset + runlength; i += programCount) | |
{ | |
int idx = i + programIndex; | |
int bedge[2]; | |
if(programIndex < 2) | |
do { | |
bedge[0] = pbedge[extract(idx, 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
bedge[1] = pbedge[extract(idx, 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
bedge[2] = pbedge[extract(idx, 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
bedge[3] = pbedge[extract(idx, 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
bedge[0 + 0 + 0] = tmp1; | |
bedge[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
bedge[0 + 2 + 0] = tmp1; | |
bedge[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
bedge[0 + 0 + 0] = tmp1; | |
bedge[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
bedge[0 + 2 + 0] = tmp1; | |
bedge[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float x1[2]; | |
if(programIndex < 2) | |
do { | |
x1[0] = p_x[extract(bedge[0], 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x1[1] = p_x[extract(bedge[0], 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x1[2] = p_x[extract(bedge[0], 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x1[3] = p_x[extract(bedge[0], 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float x2[2]; | |
if(programIndex < 2) | |
do { | |
x2[0] = p_x[extract(bedge[1], 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x2[1] = p_x[extract(bedge[1], 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x2[2] = p_x[extract(bedge[1], 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
do { | |
x2[3] = p_x[extract(bedge[1], 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 0 + 0] = tmp1; | |
x2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 2 + 0] = tmp1; | |
x2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 0 + 0] = tmp1; | |
x2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 2 + 0] = tmp1; | |
x2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
int becell[1]; | |
if(programIndex < 1) | |
do { | |
becell[0] = pbecell[extract(idx, 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
becell[1] = pbecell[extract(idx, 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
becell[2] = pbecell[extract(idx, 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
becell[3] = pbecell[extract(idx, 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
becell[0 + 0 + 0] = tmp1; | |
becell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
becell[0 + 2 + 0] = tmp1; | |
becell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
becell[0 + 0 + 0] = tmp1; | |
becell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
becell[0 + 2 + 0] = tmp1; | |
becell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float q1[4]; | |
do { | |
q1[0] = p_q[extract(becell[0], 0) * 4 + programIndex]; | |
} while(0); | |
do { | |
q1[1] = p_q[extract(becell[0], 1) * 4 + programIndex]; | |
} while(0); | |
do { | |
q1[2] = p_q[extract(becell[0], 2) * 4 + programIndex]; | |
} while(0); | |
do { | |
q1[3] = p_q[extract(becell[0], 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 0 + 0] = tmp1; | |
q1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 2 + 0] = tmp1; | |
q1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 0 + 0] = tmp1; | |
q1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 2 + 0] = tmp1; | |
q1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float adt1[1]; | |
if(programIndex < 1) | |
do { | |
adt1[0] = p_adt[extract(becell[0], 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt1[1] = p_adt[extract(becell[0], 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt1[2] = p_adt[extract(becell[0], 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt1[3] = p_adt[extract(becell[0], 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 0 + 0] = tmp1; | |
adt1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 2 + 0] = tmp1; | |
adt1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 0 + 0] = tmp1; | |
adt1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 2 + 0] = tmp1; | |
adt1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float res1[4]; | |
res1[0] = 0; res1[1] = 0; res1[2] = 0; res1[3] = 0; | |
int bound[1]; | |
if(programIndex < 1) | |
do { | |
bound[0] = p_bound[extract(idx, 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
bound[1] = p_bound[extract(idx, 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
bound[2] = p_bound[extract(idx, 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
bound[3] = p_bound[extract(idx, 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
bound[0 + 0 + 0] = tmp1; | |
bound[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
bound[0 + 2 + 0] = tmp1; | |
bound[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
bound[0 + 0 + 0] = tmp1; | |
bound[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
bound[0 + 2 + 0] = tmp1; | |
bound[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float dx; float dy; float mu; float ri; float p1; float vol1; float p2; float vol2; float f; | |
dx = x1[0] - x2[0]; | |
dy = x1[1] - x2[1]; | |
ri = 1.0 / q1[0]; | |
p1 = gm1*(q1[3]-0.5 * ri * (q1[1] * q1[1] + q1[2] * q1[2])); | |
if (bound[0] == 1) | |
{ | |
res1[1] += + p1*dy; | |
res1[2] += - p1*dx; | |
} | |
else | |
{ | |
vol1 = ri*(q1[1]*dy - q1[2]*dx); | |
ri = 1.0 / qinf[0]; | |
p2 = gm1*(qinf[3]-0.5 * ri * (qinf[1]*qinf[1]+qinf[2]*qinf[2])); | |
vol2 = ri*(qinf[1]*dy - qinf[2]*dx); | |
mu = (adt1[0]) * eps; | |
f = 0.5 * (vol1 * q1[0] + vol2 * qinf[0] ) + mu*(q1[0]-qinf[0]); | |
res1[0] += f; | |
f = 0.5 * (vol1 * q1[1] + p1*dy + vol2 * qinf[1] + p2*dy) + mu*(q1[1]-qinf[1]); | |
res1[1] += f; | |
f = 0.5 * (vol1 * q1[2] - p1*dx + vol2 * qinf[2] - p2*dx) + mu*(q1[2]-qinf[2]); | |
res1[2] += f; | |
f = 0.5 * (vol1 * (q1[3]+p1) + vol2 * (qinf[3]+p2) ) + mu*(q1[3]-qinf[3]); | |
res1[3] += f; | |
} | |
for(uniform int j = 0; j < programCount; j++) | |
{ | |
if ((lanemask() & (1 << j)) != 0) | |
{ | |
p_res[extract(becell[0], j) * 4 + 0] += extract(res1[0], j); | |
p_res[extract(becell[0], j) * 4 + 1] += extract(res1[1], j); | |
p_res[extract(becell[0], j) * 4 + 2] += extract(res1[2], j); | |
p_res[extract(becell[0], j) * 4 + 3] += extract(res1[3], j); | |
} | |
} | |
} | |
uniform int rem = offset + runlength - i; | |
if(rem > 0) | |
{ | |
int idx = i + programIndex; | |
int bedge[2]; | |
if(programIndex < 2) | |
if(0 < rem) | |
do { | |
bedge[0] = pbedge[extract(idx, 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(1 < rem) | |
do { | |
bedge[1] = pbedge[extract(idx, 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(2 < rem) | |
do { | |
bedge[2] = pbedge[extract(idx, 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(3 < rem) | |
do { | |
bedge[3] = pbedge[extract(idx, 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
bedge[0 + 0 + 0] = tmp1; | |
bedge[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
bedge[0 + 2 + 0] = tmp1; | |
bedge[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
bedge[0 + 0 + 0] = tmp1; | |
bedge[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
bedge[0 + 2 + 0] = tmp1; | |
bedge[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float x1[2]; | |
if(programIndex < 2) | |
if(0 < rem) | |
do { | |
x1[0] = p_x[extract(bedge[0], 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(1 < rem) | |
do { | |
x1[1] = p_x[extract(bedge[0], 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(2 < rem) | |
do { | |
x1[2] = p_x[extract(bedge[0], 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(3 < rem) | |
do { | |
x1[3] = p_x[extract(bedge[0], 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 0 + 0] = tmp1; | |
x1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x1[0 + 2 + 0] = tmp1; | |
x1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float x2[2]; | |
if(programIndex < 2) | |
if(0 < rem) | |
do { | |
x2[0] = p_x[extract(bedge[1], 0) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(1 < rem) | |
do { | |
x2[1] = p_x[extract(bedge[1], 1) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(2 < rem) | |
do { | |
x2[2] = p_x[extract(bedge[1], 2) * 2 + programIndex]; | |
} while(0); | |
if(programIndex < 2) | |
if(3 < rem) | |
do { | |
x2[3] = p_x[extract(bedge[1], 3) * 2 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 0 + 0] = tmp1; | |
x2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 2 + 0] = tmp1; | |
x2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 0 + 0] = tmp1; | |
x2[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
x2[0 + 2 + 0] = tmp1; | |
x2[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
int becell[1]; | |
if(programIndex < 1) | |
if(0 < rem) | |
do { | |
becell[0] = pbecell[extract(idx, 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(1 < rem) | |
do { | |
becell[1] = pbecell[extract(idx, 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(2 < rem) | |
do { | |
becell[2] = pbecell[extract(idx, 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(3 < rem) | |
do { | |
becell[3] = pbecell[extract(idx, 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
becell[0 + 0 + 0] = tmp1; | |
becell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
becell[0 + 2 + 0] = tmp1; | |
becell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
becell[0 + 0 + 0] = tmp1; | |
becell[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
becell[0 + 2 + 0] = tmp1; | |
becell[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float q1[4]; | |
if(0 < rem) | |
do { | |
q1[0] = p_q[extract(becell[0], 0) * 4 + programIndex]; | |
} while(0); | |
if(1 < rem) | |
do { | |
q1[1] = p_q[extract(becell[0], 1) * 4 + programIndex]; | |
} while(0); | |
if(2 < rem) | |
do { | |
q1[2] = p_q[extract(becell[0], 2) * 4 + programIndex]; | |
} while(0); | |
if(3 < rem) | |
do { | |
q1[3] = p_q[extract(becell[0], 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 0 + 0] = tmp1; | |
q1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 2 + 0] = tmp1; | |
q1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 0 + 0] = tmp1; | |
q1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q1[0 + 2 + 0] = tmp1; | |
q1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float adt1[1]; | |
if(programIndex < 1) | |
if(0 < rem) | |
do { | |
adt1[0] = p_adt[extract(becell[0], 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(1 < rem) | |
do { | |
adt1[1] = p_adt[extract(becell[0], 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(2 < rem) | |
do { | |
adt1[2] = p_adt[extract(becell[0], 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(3 < rem) | |
do { | |
adt1[3] = p_adt[extract(becell[0], 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 0 + 0] = tmp1; | |
adt1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 2 + 0] = tmp1; | |
adt1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 0 + 0] = tmp1; | |
adt1[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt1[0 + 2 + 0] = tmp1; | |
adt1[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float res1[4]; | |
res1[0] = 0; res1[1] = 0; res1[2] = 0; res1[3] = 0; | |
int bound[1]; | |
if(programIndex < 1) | |
if(0 < rem) | |
do { | |
bound[0] = p_bound[extract(idx, 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(1 < rem) | |
do { | |
bound[1] = p_bound[extract(idx, 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(2 < rem) | |
do { | |
bound[2] = p_bound[extract(idx, 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(3 < rem) | |
do { | |
bound[3] = p_bound[extract(idx, 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
int tmp1 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
bound[0 + 0 + 0] = tmp1; | |
bound[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
bound[0 + 2 + 0] = tmp1; | |
bound[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
int tmp1 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
bound[0 + 0 + 0] = tmp1; | |
bound[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
int tmp1 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], t_offset + t_index + t_pc); | |
int tmp2 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
bound[0 + 2 + 0] = tmp1; | |
bound[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float dx; float dy; float mu; float ri; float p1; float vol1; float p2; float vol2; float f; | |
dx = x1[0] - x2[0]; | |
dy = x1[1] - x2[1]; | |
ri = 1.0 / q1[0]; | |
p1 = gm1*(q1[3]-0.5 * ri * (q1[1] * q1[1] + q1[2] * q1[2])); | |
if (bound[0] == 1) | |
{ | |
res1[1] += + p1*dy; | |
res1[2] += - p1*dx; | |
} | |
else | |
{ | |
vol1 = ri*(q1[1]*dy - q1[2]*dx); | |
ri = 1.0 / qinf[0]; | |
p2 = gm1*(qinf[3]-0.5 * ri * (qinf[1]*qinf[1]+qinf[2]*qinf[2])); | |
vol2 = ri*(qinf[1]*dy - qinf[2]*dx); | |
mu = (adt1[0]) * eps; | |
f = 0.5 * (vol1 * q1[0] + vol2 * qinf[0] ) + mu*(q1[0]-qinf[0]); | |
res1[0] += f; | |
f = 0.5 * (vol1 * q1[1] + p1*dy + vol2 * qinf[1] + p2*dy) + mu*(q1[1]-qinf[1]); | |
res1[1] += f; | |
f = 0.5 * (vol1 * q1[2] - p1*dx + vol2 * qinf[2] - p2*dx) + mu*(q1[2]-qinf[2]); | |
res1[2] += f; | |
f = 0.5 * (vol1 * (q1[3]+p1) + vol2 * (qinf[3]+p2) ) + mu*(q1[3]-qinf[3]); | |
res1[3] += f; | |
} | |
for(uniform int j = 0; j < rem; j++) | |
{ | |
if ((lanemask() & (1 << j)) != 0) | |
{ | |
p_res[extract(becell[0], j) * 4 + 0] += extract(res1[0], j); | |
p_res[extract(becell[0], j) * 4 + 1] += extract(res1[1], j); | |
p_res[extract(becell[0], j) * 4 + 2] += extract(res1[2], j); | |
p_res[extract(becell[0], j) * 4 + 3] += extract(res1[3], j); | |
} | |
} | |
} | |
} | |
export void update (uniform float p_qold[], | |
uniform float p_q[], | |
uniform float p_res[], | |
uniform float p_adt[], | |
uniform float rms[], | |
uniform int length, | |
uniform int offset, | |
uniform int runlength) | |
{ | |
uniform int i; | |
for(i = offset; i + programCount <= offset + runlength; i += programCount) | |
{ | |
int idx = i + programIndex; | |
float qold[4]; | |
do { | |
qold[0] = p_qold[extract(0, 0) * 4 + programIndex]; | |
} while(0); | |
do { | |
qold[1] = p_qold[extract(1, 1) * 4 + programIndex]; | |
} while(0); | |
do { | |
qold[2] = p_qold[extract(2, 2) * 4 + programIndex]; | |
} while(0); | |
do { | |
qold[3] = p_qold[extract(3, 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 0 + 0] = tmp1; | |
qold[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 2 + 0] = tmp1; | |
qold[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 0 + 0] = tmp1; | |
qold[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 2 + 0] = tmp1; | |
qold[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float q[4]; | |
float res[4]; | |
do { | |
res[0] = p_res[extract(0, 0) * 4 + programIndex]; | |
} while(0); | |
do { | |
res[1] = p_res[extract(1, 1) * 4 + programIndex]; | |
} while(0); | |
do { | |
res[2] = p_res[extract(2, 2) * 4 + programIndex]; | |
} while(0); | |
do { | |
res[3] = p_res[extract(3, 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
res[0 + 0 + 0] = tmp1; | |
res[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
res[0 + 2 + 0] = tmp1; | |
res[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
res[0 + 0 + 0] = tmp1; | |
res[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
res[0 + 2 + 0] = tmp1; | |
res[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float adt[1]; | |
if(programIndex < 1) | |
do { | |
adt[0] = p_adt[extract(0, 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt[1] = p_adt[extract(1, 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt[2] = p_adt[extract(2, 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
do { | |
adt[3] = p_adt[extract(3, 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 0 + 0] = tmp1; | |
adt[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 2 + 0] = tmp1; | |
adt[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 0 + 0] = tmp1; | |
adt[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 2 + 0] = tmp1; | |
adt[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float adti; float del; | |
adti = 1.0/adt[0]; | |
for(uniform int j = 0; j < 4; j++) | |
{ | |
del = adti * res[j]; | |
q[j] = qold[j] - del; | |
res[j] = 0.0; | |
int rindex = 0; | |
for(uniform int j = 0; j < programCount; j++) | |
{ | |
if ((lanemask() & (1 << j)) != 0) | |
{ | |
rms[extract(rindex, j)] += extract(del * del, j); | |
} | |
} | |
} | |
do { | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 0 + 0] = tmp1; | |
adt[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 2 + 0] = tmp1; | |
adt[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 0 + 0] = tmp1; | |
adt[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 2 + 0] = tmp1; | |
adt[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
; | |
if(programIndex < 1) | |
p_adt[extract(0, 0) * 1 + programIndex] = adt[0]; | |
if(programIndex < 1) | |
p_adt[extract(1, 1) * 1 + programIndex] = adt[1]; | |
if(programIndex < 1) | |
p_adt[extract(2, 2) * 1 + programIndex] = adt[2]; | |
if(programIndex < 1) | |
p_adt[extract(3, 3) * 1 + programIndex] = adt[3]; | |
} while(0); | |
do { | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 0 + 0] = tmp1; | |
q[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 2 + 0] = tmp1; | |
q[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 0 + 0] = tmp1; | |
q[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 2 + 0] = tmp1; | |
q[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
; | |
p_q[extract(0, 0) * 4 + programIndex] = q[0]; | |
p_q[extract(1, 1) * 4 + programIndex] = q[1]; | |
p_q[extract(2, 2) * 4 + programIndex] = q[2]; | |
p_q[extract(3, 3) * 4 + programIndex] = q[3]; | |
} while(0); | |
} | |
uniform int rem = offset + runlength - i; | |
if(rem > 0) | |
{ | |
int idx = i + programIndex; | |
float qold[4]; | |
if(0 < rem) | |
do { | |
qold[0] = p_qold[extract(0, 0) * 4 + programIndex]; | |
} while(0); | |
if(1 < rem) | |
do { | |
qold[1] = p_qold[extract(1, 1) * 4 + programIndex]; | |
} while(0); | |
if(2 < rem) | |
do { | |
qold[2] = p_qold[extract(2, 2) * 4 + programIndex]; | |
} while(0); | |
if(3 < rem) | |
do { | |
qold[3] = p_qold[extract(3, 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 0 + 0] = tmp1; | |
qold[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 2 + 0] = tmp1; | |
qold[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 0 + 0] = tmp1; | |
qold[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
qold[0 + 2 + 0] = tmp1; | |
qold[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float q[4]; | |
float res[4]; | |
if(0 < rem) | |
do { | |
res[0] = p_res[extract(0, 0) * 4 + programIndex]; | |
} while(0); | |
if(1 < rem) | |
do { | |
res[1] = p_res[extract(1, 1) * 4 + programIndex]; | |
} while(0); | |
if(2 < rem) | |
do { | |
res[2] = p_res[extract(2, 2) * 4 + programIndex]; | |
} while(0); | |
if(3 < rem) | |
do { | |
res[3] = p_res[extract(3, 3) * 4 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
res[0 + 0 + 0] = tmp1; | |
res[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
res[0 + 2 + 0] = tmp1; | |
res[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
res[0 + 0 + 0] = tmp1; | |
res[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
res[0 + 2 + 0] = tmp1; | |
res[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float adt[1]; | |
if(programIndex < 1) | |
if(0 < rem) | |
do { | |
adt[0] = p_adt[extract(0, 0) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(1 < rem) | |
do { | |
adt[1] = p_adt[extract(1, 1) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(2 < rem) | |
do { | |
adt[2] = p_adt[extract(2, 2) * 1 + programIndex]; | |
} while(0); | |
if(programIndex < 1) | |
if(3 < rem) | |
do { | |
adt[3] = p_adt[extract(3, 3) * 1 + programIndex]; | |
} while(0); | |
;;;; | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 0 + 0] = tmp1; | |
adt[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 2 + 0] = tmp1; | |
adt[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 0 + 0] = tmp1; | |
adt[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 2 + 0] = tmp1; | |
adt[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
float adti; float del; | |
adti = 1.0/adt[0]; | |
for(uniform int j = 0; j < 4; j++) | |
{ | |
del = adti * res[j]; | |
q[j] = qold[j] - del; | |
res[j] = 0.0; | |
int rindex = 0; | |
for(uniform int j = 0; j < programCount; j++) | |
{ | |
if ((lanemask() & (1 << j)) != 0) | |
{ | |
rms[extract(rindex, j)] += extract(del * del, j); | |
} | |
} | |
} | |
do { | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 0 + 0] = tmp1; | |
adt[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 2 + 0] = tmp1; | |
adt[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 0 + 0] = tmp1; | |
adt[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
adt[0 + 2 + 0] = tmp1; | |
adt[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
; | |
if(programIndex < 1) | |
if(0 < rem) | |
p_adt[extract(0, 0) * 1 + programIndex] = adt[0]; | |
if(programIndex < 1) | |
if(1 < rem) | |
p_adt[extract(1, 1) * 1 + programIndex] = adt[1]; | |
if(programIndex < 1) | |
if(2 < rem) | |
p_adt[extract(2, 2) * 1 + programIndex] = adt[2]; | |
if(programIndex < 1) | |
if(3 < rem) | |
p_adt[extract(3, 3) * 1 + programIndex] = adt[3]; | |
} while(0); | |
do { | |
if(programCount != 4) { | |
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount); | |
abort(2); | |
} | |
// #inner(formula, elemId) | |
// #worker(round) | |
do { | |
int r =0; | |
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1))); | |
int t_index = programIndex & ((1 << 0) - 1); | |
int t_pc = programCount * ((programIndex >> 0) & 1); | |
do { | |
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 0 + 0] = tmp1; | |
q[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 2 + 0] = tmp1; | |
q[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
do { | |
int r =1; | |
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1))); | |
int t_index = programIndex & ((1 << 1) - 1); | |
int t_pc = programCount * ((programIndex >> 1) & 1); | |
do { | |
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 0 + 0] = tmp1; | |
q[0 + 0 + 1] = tmp2; | |
} while (0); | |
do { | |
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc); | |
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc); | |
q[0 + 2 + 0] = tmp1; | |
q[0 + 2 + 1] = tmp2; | |
} while (0); | |
} while(0); | |
; | |
if(0 < rem) | |
p_q[extract(0, 0) * 4 + programIndex] = q[0]; | |
if(1 < rem) | |
p_q[extract(1, 1) * 4 + programIndex] = q[1]; | |
if(2 < rem) | |
p_q[extract(2, 2) * 4 + programIndex] = q[2]; | |
if(3 < rem) | |
p_q[extract(3, 3) * 4 + programIndex] = q[3]; | |
} while(0); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment