Skip to content

Instantly share code, notes, and snippets.

@uzytkownik
Created August 2, 2011 14:51
Show Gist options
  • Save uzytkownik/1120346 to your computer and use it in GitHub Desktop.
Save uzytkownik/1120346 to your computer and use it in GitHub Desktop.
Testcase of gather loads
/********************************* Primitives ********************************/
// aos(dim, length, remote, idx, iidx)
// soa(dim, length, remote, idx, iidx)
extern "C" void abort(uniform int code);
// transpose(type, data, offset)
// trans_no(dim)
// local_aos(type, local, dim)
// local_soa(type, local, dim)
// load_aos(type, local, dim, lenght, remote, cond, idx)
// load_soa(type, local, dim, length, remote, cond, idx)
// store_aos(type, local, dim, length, remote, cond, idx)
// store_soa(type, name, dim, from, length, cond, idx)
// zero(local, dim)
// reduce_aos_indir_inc(type, local, dim, length, remote, cond, idx)
// reduce_soa_indir_inc(type, local, dim, length, remote, cond, idx)
// reduce_global_inc(type, var, idx, inc)
/*********************************** Policy **********************************/
// layout
// load(type, local, dim, length, remote, idx, continue)
// store(type, local, dim, length, remote, idx, continue)
// load_store(type, local, dim, length, remote, idx, cond, continue)
// reduce_indir_inc(type, local, dim, length, remote, idx, continue)
// loop(offset, runlength, macro, args...)
/************************************ Code ***********************************/
extern uniform float gam, gm1, cfl, eps, mach, alpha, air_const, qinf[4];
export void save_solv(uniform float p_q[],
uniform float p_qold[],
uniform int length,
uniform int offset,
uniform int runlength)
{
uniform int i;
for(i = offset; i + programCount <= offset + runlength; i += programCount)
{
int idx = i + programIndex;
float q[4];
do {
q[0] = p_q[extract(idx, 0) * 4 + programIndex];
} while(0);
do {
q[1] = p_q[extract(idx, 1) * 4 + programIndex];
} while(0);
do {
q[2] = p_q[extract(idx, 2) * 4 + programIndex];
} while(0);
do {
q[3] = p_q[extract(idx, 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 0 + 0] = tmp1;
q[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 2 + 0] = tmp1;
q[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 0 + 0] = tmp1;
q[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 2 + 0] = tmp1;
q[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float qold[4];
for(uniform int j = 0; j < 4; j++)
qold[j] = q[j];
do {
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 0 + 0] = tmp1;
qold[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 2 + 0] = tmp1;
qold[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 0 + 0] = tmp1;
qold[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 2 + 0] = tmp1;
qold[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
;
p_qold[extract(idx, 0) * 4 + programIndex] = qold[0];
p_qold[extract(idx, 1) * 4 + programIndex] = qold[1];
p_qold[extract(idx, 2) * 4 + programIndex] = qold[2];
p_qold[extract(idx, 3) * 4 + programIndex] = qold[3];
} while(0);
}
uniform int rem = offset + runlength - i;
if(rem > 0)
{
int idx = i + programIndex;
float q[4];
if(0 < rem)
do {
q[0] = p_q[extract(idx, 0) * 4 + programIndex];
} while(0);
if(1 < rem)
do {
q[1] = p_q[extract(idx, 1) * 4 + programIndex];
} while(0);
if(2 < rem)
do {
q[2] = p_q[extract(idx, 2) * 4 + programIndex];
} while(0);
if(3 < rem)
do {
q[3] = p_q[extract(idx, 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 0 + 0] = tmp1;
q[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 2 + 0] = tmp1;
q[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 0 + 0] = tmp1;
q[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 2 + 0] = tmp1;
q[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float qold[4];
for(uniform int j = 0; j < 4; j++)
qold[j] = q[j];
do {
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 0 + 0] = tmp1;
qold[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 2 + 0] = tmp1;
qold[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 0 + 0] = tmp1;
qold[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 2 + 0] = tmp1;
qold[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
;
if(0 < rem)
p_qold[extract(idx, 0) * 4 + programIndex] = qold[0];
if(1 < rem)
p_qold[extract(idx, 1) * 4 + programIndex] = qold[1];
if(2 < rem)
p_qold[extract(idx, 2) * 4 + programIndex] = qold[2];
if(3 < rem)
p_qold[extract(idx, 3) * 4 + programIndex] = qold[3];
} while(0);
}
}
export void adt_calc(uniform int pcell[],
uniform float p_x[],
uniform float p_q[],
uniform float p_adt[],
uniform int length,
uniform int offset,
uniform int runlength,
uniform int pcell_colength)
{
uniform int i;
for(i = offset; i + programCount <= offset + runlength; i += programCount)
{
int idx = i + programIndex;
int cell[4];
do {
cell[0] = pcell[extract(idx, 0) * 4 + programIndex];
} while(0);
do {
cell[1] = pcell[extract(idx, 1) * 4 + programIndex];
} while(0);
do {
cell[2] = pcell[extract(idx, 2) * 4 + programIndex];
} while(0);
do {
cell[3] = pcell[extract(idx, 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
cell[0 + 0 + 0] = tmp1;
cell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
cell[0 + 2 + 0] = tmp1;
cell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
cell[0 + 0 + 0] = tmp1;
cell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
cell[0 + 2 + 0] = tmp1;
cell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float x1[2];
if(programIndex < 2)
do {
x1[0] = p_x[extract(cell[0], 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x1[1] = p_x[extract(cell[0], 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x1[2] = p_x[extract(cell[0], 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x1[3] = p_x[extract(cell[0], 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
}
uniform int rem = offset + runlength - i;
if(rem > 0)
{
int idx = i + programIndex;
int cell[4];
if(0 < rem)
do {
cell[0] = pcell[extract(idx, 0) * 4 + programIndex];
} while(0);
if(1 < rem)
do {
cell[1] = pcell[extract(idx, 1) * 4 + programIndex];
} while(0);
if(2 < rem)
do {
cell[2] = pcell[extract(idx, 2) * 4 + programIndex];
} while(0);
if(3 < rem)
do {
cell[3] = pcell[extract(idx, 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
cell[0 + 0 + 0] = tmp1;
cell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
cell[0 + 2 + 0] = tmp1;
cell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(cell[0 + 0 + 0], cell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
cell[0 + 0 + 0] = tmp1;
cell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(cell[0 + 2 + 0], cell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
cell[0 + 2 + 0] = tmp1;
cell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float x1[2];
if(programIndex < 2)
if(0 < rem)
do {
x1[0] = p_x[extract(cell[0], 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(1 < rem)
do {
x1[1] = p_x[extract(cell[0], 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(2 < rem)
do {
x1[2] = p_x[extract(cell[0], 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(3 < rem)
do {
x1[3] = p_x[extract(cell[0], 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
}
}
export void res_calc(uniform int pedge[],
uniform float p_x[],
uniform int pecell[],
uniform float p_q[],
uniform float p_adt[],
uniform float p_res[],
uniform int length,
uniform int offset,
uniform int runlength,
uniform int pedge_colength,
uniform int pecell_colength)
{
uniform int i;
for(i = offset; i + programCount <= offset + runlength; i += programCount)
{
int idx = i + programIndex;
int edge[2];
if(programIndex < 2)
do {
edge[0] = pedge[extract(idx, 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
edge[1] = pedge[extract(idx, 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
edge[2] = pedge[extract(idx, 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
edge[3] = pedge[extract(idx, 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
edge[0 + 0 + 0] = tmp1;
edge[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
edge[0 + 2 + 0] = tmp1;
edge[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
edge[0 + 0 + 0] = tmp1;
edge[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
edge[0 + 2 + 0] = tmp1;
edge[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float x1[2];
if(programIndex < 2)
do {
x1[0] = p_x[extract(edge[0], 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x1[1] = p_x[extract(edge[0], 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x1[2] = p_x[extract(edge[0], 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x1[3] = p_x[extract(edge[0], 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float x2[2];
if(programIndex < 2)
do {
x2[0] = p_x[extract(edge[1], 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x2[1] = p_x[extract(edge[1], 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x2[2] = p_x[extract(edge[1], 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x2[3] = p_x[extract(edge[1], 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 0 + 0] = tmp1;
x2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 2 + 0] = tmp1;
x2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 0 + 0] = tmp1;
x2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 2 + 0] = tmp1;
x2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
int ecell[2];
if(programIndex < 2)
do {
ecell[0] = pecell[extract(idx, 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
ecell[1] = pecell[extract(idx, 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
ecell[2] = pecell[extract(idx, 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
ecell[3] = pecell[extract(idx, 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
ecell[0 + 0 + 0] = tmp1;
ecell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
ecell[0 + 2 + 0] = tmp1;
ecell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
ecell[0 + 0 + 0] = tmp1;
ecell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
ecell[0 + 2 + 0] = tmp1;
ecell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float q1[4];
do {
q1[0] = p_q[extract(ecell[0], 0) * 4 + programIndex];
} while(0);
do {
q1[1] = p_q[extract(ecell[0], 1) * 4 + programIndex];
} while(0);
do {
q1[2] = p_q[extract(ecell[0], 2) * 4 + programIndex];
} while(0);
do {
q1[3] = p_q[extract(ecell[0], 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 0 + 0] = tmp1;
q1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 2 + 0] = tmp1;
q1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 0 + 0] = tmp1;
q1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 2 + 0] = tmp1;
q1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float q2[4];
do {
q2[0] = p_q[extract(ecell[1], 0) * 4 + programIndex];
} while(0);
do {
q2[1] = p_q[extract(ecell[1], 1) * 4 + programIndex];
} while(0);
do {
q2[2] = p_q[extract(ecell[1], 2) * 4 + programIndex];
} while(0);
do {
q2[3] = p_q[extract(ecell[1], 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q2[0 + 0 + 0] = tmp1;
q2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q2[0 + 2 + 0] = tmp1;
q2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q2[0 + 0 + 0] = tmp1;
q2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q2[0 + 2 + 0] = tmp1;
q2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float adt1[1];
if(programIndex < 1)
do {
adt1[0] = p_adt[extract(ecell[0], 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt1[1] = p_adt[extract(ecell[0], 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt1[2] = p_adt[extract(ecell[0], 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt1[3] = p_adt[extract(ecell[0], 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 0 + 0] = tmp1;
adt1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 2 + 0] = tmp1;
adt1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 0 + 0] = tmp1;
adt1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 2 + 0] = tmp1;
adt1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float adt2[1];
if(programIndex < 1)
do {
adt2[0] = p_adt[extract(ecell[1], 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt2[1] = p_adt[extract(ecell[1], 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt2[2] = p_adt[extract(ecell[1], 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt2[3] = p_adt[extract(ecell[1], 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt2[0 + 0 + 0] = tmp1;
adt2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt2[0 + 2 + 0] = tmp1;
adt2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt2[0 + 0 + 0] = tmp1;
adt2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt2[0 + 2 + 0] = tmp1;
adt2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float res1[4];
res1[0] = 0; res1[1] = 0; res1[2] = 0; res1[3] = 0;
float res2[4];
res2[0] = 0; res2[1] = 0; res2[2] = 0; res2[3] = 0;
float dx; float dy; float mu; float ri; float p1; float vol1; float p2; float vol2; float f;
dx = x1[0] - x2[0];
dy = x1[1] - x2[1];
ri = 1.0 / q1[0];
p1 = 0.4 * (q1[3] - 0.5 * ri * (q1[1] * q1[1] + q1[2] * q1[2]));
vol1 = ri * (q1[1] * dy - q1[2] * dx);
ri = 1.0 / q2[0];
p2 = 0.4 * (q2[3] - 0.5 * ri * (q2[1] * q2[1] + q2[2] * q2[2]));
vol2 = ri * (q2[1] * dy - q2[2] * dx);
mu = 0.5 * (adt1[0] + adt2[0]) * eps;
f = 0.5 * (vol1 * q1[0] + vol2 * q2[0]) + mu * (q1[0] - q2[0]);
adt1[0] += f;
adt2[0] -= f;
f = 0.5 * (vol1 * q1[1] + p1 * dy + vol2 * q2[1] + p2 * dy) + mu * (q1[1] - q2[1]);
adt1[1] += f;
adt2[1] -= f;
f = 0.5 * (vol1 * q1[2] - p1 * dx + vol2 * q2[2] - p2 * dx) + mu * (q1[2] - q2[2]);
adt1[2] += f;
adt2[2] -= f;
f = 0.5 * (vol1 *(q1[3] + p1) + vol2 *(q2[3] + p2) ) + mu * (q1[3] - q2[3]);
adt1[3] += f;
adt2[3] -= f;
for(uniform int j = 0; j < programCount; j++)
{
if ((lanemask() & (1 << j)) != 0)
{
p_res[extract(ecell[1], j) * 4 + 0] += extract(res2[0], j);
p_res[extract(ecell[1], j) * 4 + 1] += extract(res2[1], j);
p_res[extract(ecell[1], j) * 4 + 2] += extract(res2[2], j);
p_res[extract(ecell[1], j) * 4 + 3] += extract(res2[3], j);
}
}
for(uniform int j = 0; j < programCount; j++)
{
if ((lanemask() & (1 << j)) != 0)
{
p_res[extract(ecell[0], j) * 4 + 0] += extract(res1[0], j);
p_res[extract(ecell[0], j) * 4 + 1] += extract(res1[1], j);
p_res[extract(ecell[0], j) * 4 + 2] += extract(res1[2], j);
p_res[extract(ecell[0], j) * 4 + 3] += extract(res1[3], j);
}
}
}
uniform int rem = offset + runlength - i;
if(rem > 0)
{
int idx = i + programIndex;
int edge[2];
if(programIndex < 2)
if(0 < rem)
do {
edge[0] = pedge[extract(idx, 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(1 < rem)
do {
edge[1] = pedge[extract(idx, 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(2 < rem)
do {
edge[2] = pedge[extract(idx, 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(3 < rem)
do {
edge[3] = pedge[extract(idx, 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
edge[0 + 0 + 0] = tmp1;
edge[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
edge[0 + 2 + 0] = tmp1;
edge[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(edge[0 + 0 + 0], edge[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
edge[0 + 0 + 0] = tmp1;
edge[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(edge[0 + 2 + 0], edge[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
edge[0 + 2 + 0] = tmp1;
edge[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float x1[2];
if(programIndex < 2)
if(0 < rem)
do {
x1[0] = p_x[extract(edge[0], 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(1 < rem)
do {
x1[1] = p_x[extract(edge[0], 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(2 < rem)
do {
x1[2] = p_x[extract(edge[0], 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(3 < rem)
do {
x1[3] = p_x[extract(edge[0], 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float x2[2];
if(programIndex < 2)
if(0 < rem)
do {
x2[0] = p_x[extract(edge[1], 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(1 < rem)
do {
x2[1] = p_x[extract(edge[1], 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(2 < rem)
do {
x2[2] = p_x[extract(edge[1], 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(3 < rem)
do {
x2[3] = p_x[extract(edge[1], 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 0 + 0] = tmp1;
x2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 2 + 0] = tmp1;
x2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 0 + 0] = tmp1;
x2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 2 + 0] = tmp1;
x2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
int ecell[2];
if(programIndex < 2)
if(0 < rem)
do {
ecell[0] = pecell[extract(idx, 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(1 < rem)
do {
ecell[1] = pecell[extract(idx, 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(2 < rem)
do {
ecell[2] = pecell[extract(idx, 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(3 < rem)
do {
ecell[3] = pecell[extract(idx, 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
ecell[0 + 0 + 0] = tmp1;
ecell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
ecell[0 + 2 + 0] = tmp1;
ecell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(ecell[0 + 0 + 0], ecell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
ecell[0 + 0 + 0] = tmp1;
ecell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(ecell[0 + 2 + 0], ecell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
ecell[0 + 2 + 0] = tmp1;
ecell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float q1[4];
if(0 < rem)
do {
q1[0] = p_q[extract(ecell[0], 0) * 4 + programIndex];
} while(0);
if(1 < rem)
do {
q1[1] = p_q[extract(ecell[0], 1) * 4 + programIndex];
} while(0);
if(2 < rem)
do {
q1[2] = p_q[extract(ecell[0], 2) * 4 + programIndex];
} while(0);
if(3 < rem)
do {
q1[3] = p_q[extract(ecell[0], 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 0 + 0] = tmp1;
q1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 2 + 0] = tmp1;
q1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 0 + 0] = tmp1;
q1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 2 + 0] = tmp1;
q1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float q2[4];
if(0 < rem)
do {
q2[0] = p_q[extract(ecell[1], 0) * 4 + programIndex];
} while(0);
if(1 < rem)
do {
q2[1] = p_q[extract(ecell[1], 1) * 4 + programIndex];
} while(0);
if(2 < rem)
do {
q2[2] = p_q[extract(ecell[1], 2) * 4 + programIndex];
} while(0);
if(3 < rem)
do {
q2[3] = p_q[extract(ecell[1], 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q2[0 + 0 + 0] = tmp1;
q2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q2[0 + 2 + 0] = tmp1;
q2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q2[0 + 0 + 0], q2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q2[0 + 0 + 0] = tmp1;
q2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q2[0 + 2 + 0], q2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q2[0 + 2 + 0] = tmp1;
q2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float adt1[1];
if(programIndex < 1)
if(0 < rem)
do {
adt1[0] = p_adt[extract(ecell[0], 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(1 < rem)
do {
adt1[1] = p_adt[extract(ecell[0], 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(2 < rem)
do {
adt1[2] = p_adt[extract(ecell[0], 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(3 < rem)
do {
adt1[3] = p_adt[extract(ecell[0], 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 0 + 0] = tmp1;
adt1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 2 + 0] = tmp1;
adt1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 0 + 0] = tmp1;
adt1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 2 + 0] = tmp1;
adt1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float adt2[1];
if(programIndex < 1)
if(0 < rem)
do {
adt2[0] = p_adt[extract(ecell[1], 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(1 < rem)
do {
adt2[1] = p_adt[extract(ecell[1], 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(2 < rem)
do {
adt2[2] = p_adt[extract(ecell[1], 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(3 < rem)
do {
adt2[3] = p_adt[extract(ecell[1], 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt2[0 + 0 + 0] = tmp1;
adt2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt2[0 + 2 + 0] = tmp1;
adt2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt2[0 + 0 + 0], adt2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt2[0 + 0 + 0] = tmp1;
adt2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt2[0 + 2 + 0], adt2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt2[0 + 2 + 0] = tmp1;
adt2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float res1[4];
res1[0] = 0; res1[1] = 0; res1[2] = 0; res1[3] = 0;
float res2[4];
res2[0] = 0; res2[1] = 0; res2[2] = 0; res2[3] = 0;
float dx; float dy; float mu; float ri; float p1; float vol1; float p2; float vol2; float f;
dx = x1[0] - x2[0];
dy = x1[1] - x2[1];
ri = 1.0 / q1[0];
p1 = 0.4 * (q1[3] - 0.5 * ri * (q1[1] * q1[1] + q1[2] * q1[2]));
vol1 = ri * (q1[1] * dy - q1[2] * dx);
ri = 1.0 / q2[0];
p2 = 0.4 * (q2[3] - 0.5 * ri * (q2[1] * q2[1] + q2[2] * q2[2]));
vol2 = ri * (q2[1] * dy - q2[2] * dx);
mu = 0.5 * (adt1[0] + adt2[0]) * eps;
f = 0.5 * (vol1 * q1[0] + vol2 * q2[0]) + mu * (q1[0] - q2[0]);
adt1[0] += f;
adt2[0] -= f;
f = 0.5 * (vol1 * q1[1] + p1 * dy + vol2 * q2[1] + p2 * dy) + mu * (q1[1] - q2[1]);
adt1[1] += f;
adt2[1] -= f;
f = 0.5 * (vol1 * q1[2] - p1 * dx + vol2 * q2[2] - p2 * dx) + mu * (q1[2] - q2[2]);
adt1[2] += f;
adt2[2] -= f;
f = 0.5 * (vol1 *(q1[3] + p1) + vol2 *(q2[3] + p2) ) + mu * (q1[3] - q2[3]);
adt1[3] += f;
adt2[3] -= f;
for(uniform int j = 0; j < rem; j++)
{
if ((lanemask() & (1 << j)) != 0)
{
p_res[extract(ecell[1], j) * 4 + 0] += extract(res2[0], j);
p_res[extract(ecell[1], j) * 4 + 1] += extract(res2[1], j);
p_res[extract(ecell[1], j) * 4 + 2] += extract(res2[2], j);
p_res[extract(ecell[1], j) * 4 + 3] += extract(res2[3], j);
}
}
for(uniform int j = 0; j < rem; j++)
{
if ((lanemask() & (1 << j)) != 0)
{
p_res[extract(ecell[0], j) * 4 + 0] += extract(res1[0], j);
p_res[extract(ecell[0], j) * 4 + 1] += extract(res1[1], j);
p_res[extract(ecell[0], j) * 4 + 2] += extract(res1[2], j);
p_res[extract(ecell[0], j) * 4 + 3] += extract(res1[3], j);
}
}
}
}
export void bres_calc(uniform int pbedge[],
uniform float p_x[],
uniform int pbecell[],
uniform float p_q[],
uniform float p_adt[],
uniform float p_res[],
uniform int p_bound[],
uniform int length,
uniform int offset,
uniform int runlength,
uniform int pbegde_colength,
uniform int pbecell_colength)
{
uniform int i;
for(i = offset; i + programCount <= offset + runlength; i += programCount)
{
int idx = i + programIndex;
int bedge[2];
if(programIndex < 2)
do {
bedge[0] = pbedge[extract(idx, 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
bedge[1] = pbedge[extract(idx, 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
bedge[2] = pbedge[extract(idx, 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
bedge[3] = pbedge[extract(idx, 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
bedge[0 + 0 + 0] = tmp1;
bedge[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
bedge[0 + 2 + 0] = tmp1;
bedge[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
bedge[0 + 0 + 0] = tmp1;
bedge[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
bedge[0 + 2 + 0] = tmp1;
bedge[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float x1[2];
if(programIndex < 2)
do {
x1[0] = p_x[extract(bedge[0], 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x1[1] = p_x[extract(bedge[0], 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x1[2] = p_x[extract(bedge[0], 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x1[3] = p_x[extract(bedge[0], 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float x2[2];
if(programIndex < 2)
do {
x2[0] = p_x[extract(bedge[1], 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x2[1] = p_x[extract(bedge[1], 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x2[2] = p_x[extract(bedge[1], 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
do {
x2[3] = p_x[extract(bedge[1], 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 0 + 0] = tmp1;
x2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 2 + 0] = tmp1;
x2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 0 + 0] = tmp1;
x2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 2 + 0] = tmp1;
x2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
int becell[1];
if(programIndex < 1)
do {
becell[0] = pbecell[extract(idx, 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
becell[1] = pbecell[extract(idx, 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
becell[2] = pbecell[extract(idx, 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
becell[3] = pbecell[extract(idx, 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
becell[0 + 0 + 0] = tmp1;
becell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
becell[0 + 2 + 0] = tmp1;
becell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
becell[0 + 0 + 0] = tmp1;
becell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
becell[0 + 2 + 0] = tmp1;
becell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float q1[4];
do {
q1[0] = p_q[extract(becell[0], 0) * 4 + programIndex];
} while(0);
do {
q1[1] = p_q[extract(becell[0], 1) * 4 + programIndex];
} while(0);
do {
q1[2] = p_q[extract(becell[0], 2) * 4 + programIndex];
} while(0);
do {
q1[3] = p_q[extract(becell[0], 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 0 + 0] = tmp1;
q1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 2 + 0] = tmp1;
q1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 0 + 0] = tmp1;
q1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 2 + 0] = tmp1;
q1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float adt1[1];
if(programIndex < 1)
do {
adt1[0] = p_adt[extract(becell[0], 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt1[1] = p_adt[extract(becell[0], 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt1[2] = p_adt[extract(becell[0], 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt1[3] = p_adt[extract(becell[0], 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 0 + 0] = tmp1;
adt1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 2 + 0] = tmp1;
adt1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 0 + 0] = tmp1;
adt1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 2 + 0] = tmp1;
adt1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float res1[4];
res1[0] = 0; res1[1] = 0; res1[2] = 0; res1[3] = 0;
int bound[1];
if(programIndex < 1)
do {
bound[0] = p_bound[extract(idx, 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
bound[1] = p_bound[extract(idx, 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
bound[2] = p_bound[extract(idx, 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
bound[3] = p_bound[extract(idx, 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
bound[0 + 0 + 0] = tmp1;
bound[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
bound[0 + 2 + 0] = tmp1;
bound[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
bound[0 + 0 + 0] = tmp1;
bound[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
bound[0 + 2 + 0] = tmp1;
bound[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float dx; float dy; float mu; float ri; float p1; float vol1; float p2; float vol2; float f;
dx = x1[0] - x2[0];
dy = x1[1] - x2[1];
ri = 1.0 / q1[0];
p1 = gm1*(q1[3]-0.5 * ri * (q1[1] * q1[1] + q1[2] * q1[2]));
if (bound[0] == 1)
{
res1[1] += + p1*dy;
res1[2] += - p1*dx;
}
else
{
vol1 = ri*(q1[1]*dy - q1[2]*dx);
ri = 1.0 / qinf[0];
p2 = gm1*(qinf[3]-0.5 * ri * (qinf[1]*qinf[1]+qinf[2]*qinf[2]));
vol2 = ri*(qinf[1]*dy - qinf[2]*dx);
mu = (adt1[0]) * eps;
f = 0.5 * (vol1 * q1[0] + vol2 * qinf[0] ) + mu*(q1[0]-qinf[0]);
res1[0] += f;
f = 0.5 * (vol1 * q1[1] + p1*dy + vol2 * qinf[1] + p2*dy) + mu*(q1[1]-qinf[1]);
res1[1] += f;
f = 0.5 * (vol1 * q1[2] - p1*dx + vol2 * qinf[2] - p2*dx) + mu*(q1[2]-qinf[2]);
res1[2] += f;
f = 0.5 * (vol1 * (q1[3]+p1) + vol2 * (qinf[3]+p2) ) + mu*(q1[3]-qinf[3]);
res1[3] += f;
}
for(uniform int j = 0; j < programCount; j++)
{
if ((lanemask() & (1 << j)) != 0)
{
p_res[extract(becell[0], j) * 4 + 0] += extract(res1[0], j);
p_res[extract(becell[0], j) * 4 + 1] += extract(res1[1], j);
p_res[extract(becell[0], j) * 4 + 2] += extract(res1[2], j);
p_res[extract(becell[0], j) * 4 + 3] += extract(res1[3], j);
}
}
}
uniform int rem = offset + runlength - i;
if(rem > 0)
{
int idx = i + programIndex;
int bedge[2];
if(programIndex < 2)
if(0 < rem)
do {
bedge[0] = pbedge[extract(idx, 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(1 < rem)
do {
bedge[1] = pbedge[extract(idx, 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(2 < rem)
do {
bedge[2] = pbedge[extract(idx, 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(3 < rem)
do {
bedge[3] = pbedge[extract(idx, 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
bedge[0 + 0 + 0] = tmp1;
bedge[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
bedge[0 + 2 + 0] = tmp1;
bedge[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bedge[0 + 0 + 0], bedge[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
bedge[0 + 0 + 0] = tmp1;
bedge[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bedge[0 + 2 + 0], bedge[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
bedge[0 + 2 + 0] = tmp1;
bedge[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float x1[2];
if(programIndex < 2)
if(0 < rem)
do {
x1[0] = p_x[extract(bedge[0], 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(1 < rem)
do {
x1[1] = p_x[extract(bedge[0], 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(2 < rem)
do {
x1[2] = p_x[extract(bedge[0], 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(3 < rem)
do {
x1[3] = p_x[extract(bedge[0], 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 0 + 0], x1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 0 + 0] = tmp1;
x1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x1[0 + 2 + 0], x1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x1[0 + 2 + 0] = tmp1;
x1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float x2[2];
if(programIndex < 2)
if(0 < rem)
do {
x2[0] = p_x[extract(bedge[1], 0) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(1 < rem)
do {
x2[1] = p_x[extract(bedge[1], 1) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(2 < rem)
do {
x2[2] = p_x[extract(bedge[1], 2) * 2 + programIndex];
} while(0);
if(programIndex < 2)
if(3 < rem)
do {
x2[3] = p_x[extract(bedge[1], 3) * 2 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 0 + 0] = tmp1;
x2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 2 + 0] = tmp1;
x2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 0 + 0], x2[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 0 + 0] = tmp1;
x2[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(x2[0 + 2 + 0], x2[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
x2[0 + 2 + 0] = tmp1;
x2[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
int becell[1];
if(programIndex < 1)
if(0 < rem)
do {
becell[0] = pbecell[extract(idx, 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(1 < rem)
do {
becell[1] = pbecell[extract(idx, 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(2 < rem)
do {
becell[2] = pbecell[extract(idx, 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(3 < rem)
do {
becell[3] = pbecell[extract(idx, 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
becell[0 + 0 + 0] = tmp1;
becell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
becell[0 + 2 + 0] = tmp1;
becell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(becell[0 + 0 + 0], becell[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
becell[0 + 0 + 0] = tmp1;
becell[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(becell[0 + 2 + 0], becell[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
becell[0 + 2 + 0] = tmp1;
becell[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float q1[4];
if(0 < rem)
do {
q1[0] = p_q[extract(becell[0], 0) * 4 + programIndex];
} while(0);
if(1 < rem)
do {
q1[1] = p_q[extract(becell[0], 1) * 4 + programIndex];
} while(0);
if(2 < rem)
do {
q1[2] = p_q[extract(becell[0], 2) * 4 + programIndex];
} while(0);
if(3 < rem)
do {
q1[3] = p_q[extract(becell[0], 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 0 + 0] = tmp1;
q1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 2 + 0] = tmp1;
q1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 0 + 0], q1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 0 + 0] = tmp1;
q1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q1[0 + 2 + 0], q1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q1[0 + 2 + 0] = tmp1;
q1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float adt1[1];
if(programIndex < 1)
if(0 < rem)
do {
adt1[0] = p_adt[extract(becell[0], 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(1 < rem)
do {
adt1[1] = p_adt[extract(becell[0], 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(2 < rem)
do {
adt1[2] = p_adt[extract(becell[0], 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(3 < rem)
do {
adt1[3] = p_adt[extract(becell[0], 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 0 + 0] = tmp1;
adt1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 2 + 0] = tmp1;
adt1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 0 + 0], adt1[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 0 + 0] = tmp1;
adt1[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt1[0 + 2 + 0], adt1[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt1[0 + 2 + 0] = tmp1;
adt1[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float res1[4];
res1[0] = 0; res1[1] = 0; res1[2] = 0; res1[3] = 0;
int bound[1];
if(programIndex < 1)
if(0 < rem)
do {
bound[0] = p_bound[extract(idx, 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(1 < rem)
do {
bound[1] = p_bound[extract(idx, 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(2 < rem)
do {
bound[2] = p_bound[extract(idx, 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(3 < rem)
do {
bound[3] = p_bound[extract(idx, 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
int tmp1 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
bound[0 + 0 + 0] = tmp1;
bound[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
bound[0 + 2 + 0] = tmp1;
bound[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
int tmp1 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bound[0 + 0 + 0], bound[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
bound[0 + 0 + 0] = tmp1;
bound[0 + 0 + 1] = tmp2;
} while (0);
do {
int tmp1 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], t_offset + t_index + t_pc);
int tmp2 = shuffle(bound[0 + 2 + 0], bound[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
bound[0 + 2 + 0] = tmp1;
bound[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float dx; float dy; float mu; float ri; float p1; float vol1; float p2; float vol2; float f;
dx = x1[0] - x2[0];
dy = x1[1] - x2[1];
ri = 1.0 / q1[0];
p1 = gm1*(q1[3]-0.5 * ri * (q1[1] * q1[1] + q1[2] * q1[2]));
if (bound[0] == 1)
{
res1[1] += + p1*dy;
res1[2] += - p1*dx;
}
else
{
vol1 = ri*(q1[1]*dy - q1[2]*dx);
ri = 1.0 / qinf[0];
p2 = gm1*(qinf[3]-0.5 * ri * (qinf[1]*qinf[1]+qinf[2]*qinf[2]));
vol2 = ri*(qinf[1]*dy - qinf[2]*dx);
mu = (adt1[0]) * eps;
f = 0.5 * (vol1 * q1[0] + vol2 * qinf[0] ) + mu*(q1[0]-qinf[0]);
res1[0] += f;
f = 0.5 * (vol1 * q1[1] + p1*dy + vol2 * qinf[1] + p2*dy) + mu*(q1[1]-qinf[1]);
res1[1] += f;
f = 0.5 * (vol1 * q1[2] - p1*dx + vol2 * qinf[2] - p2*dx) + mu*(q1[2]-qinf[2]);
res1[2] += f;
f = 0.5 * (vol1 * (q1[3]+p1) + vol2 * (qinf[3]+p2) ) + mu*(q1[3]-qinf[3]);
res1[3] += f;
}
for(uniform int j = 0; j < rem; j++)
{
if ((lanemask() & (1 << j)) != 0)
{
p_res[extract(becell[0], j) * 4 + 0] += extract(res1[0], j);
p_res[extract(becell[0], j) * 4 + 1] += extract(res1[1], j);
p_res[extract(becell[0], j) * 4 + 2] += extract(res1[2], j);
p_res[extract(becell[0], j) * 4 + 3] += extract(res1[3], j);
}
}
}
}
export void update (uniform float p_qold[],
uniform float p_q[],
uniform float p_res[],
uniform float p_adt[],
uniform float rms[],
uniform int length,
uniform int offset,
uniform int runlength)
{
uniform int i;
for(i = offset; i + programCount <= offset + runlength; i += programCount)
{
int idx = i + programIndex;
float qold[4];
do {
qold[0] = p_qold[extract(0, 0) * 4 + programIndex];
} while(0);
do {
qold[1] = p_qold[extract(1, 1) * 4 + programIndex];
} while(0);
do {
qold[2] = p_qold[extract(2, 2) * 4 + programIndex];
} while(0);
do {
qold[3] = p_qold[extract(3, 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 0 + 0] = tmp1;
qold[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 2 + 0] = tmp1;
qold[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 0 + 0] = tmp1;
qold[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 2 + 0] = tmp1;
qold[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float q[4];
float res[4];
do {
res[0] = p_res[extract(0, 0) * 4 + programIndex];
} while(0);
do {
res[1] = p_res[extract(1, 1) * 4 + programIndex];
} while(0);
do {
res[2] = p_res[extract(2, 2) * 4 + programIndex];
} while(0);
do {
res[3] = p_res[extract(3, 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
res[0 + 0 + 0] = tmp1;
res[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
res[0 + 2 + 0] = tmp1;
res[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
res[0 + 0 + 0] = tmp1;
res[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
res[0 + 2 + 0] = tmp1;
res[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float adt[1];
if(programIndex < 1)
do {
adt[0] = p_adt[extract(0, 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt[1] = p_adt[extract(1, 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt[2] = p_adt[extract(2, 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
do {
adt[3] = p_adt[extract(3, 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 0 + 0] = tmp1;
adt[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 2 + 0] = tmp1;
adt[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 0 + 0] = tmp1;
adt[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 2 + 0] = tmp1;
adt[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float adti; float del;
adti = 1.0/adt[0];
for(uniform int j = 0; j < 4; j++)
{
del = adti * res[j];
q[j] = qold[j] - del;
res[j] = 0.0;
int rindex = 0;
for(uniform int j = 0; j < programCount; j++)
{
if ((lanemask() & (1 << j)) != 0)
{
rms[extract(rindex, j)] += extract(del * del, j);
}
}
}
do {
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 0 + 0] = tmp1;
adt[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 2 + 0] = tmp1;
adt[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 0 + 0] = tmp1;
adt[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 2 + 0] = tmp1;
adt[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
;
if(programIndex < 1)
p_adt[extract(0, 0) * 1 + programIndex] = adt[0];
if(programIndex < 1)
p_adt[extract(1, 1) * 1 + programIndex] = adt[1];
if(programIndex < 1)
p_adt[extract(2, 2) * 1 + programIndex] = adt[2];
if(programIndex < 1)
p_adt[extract(3, 3) * 1 + programIndex] = adt[3];
} while(0);
do {
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 0 + 0] = tmp1;
q[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 2 + 0] = tmp1;
q[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 0 + 0] = tmp1;
q[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 2 + 0] = tmp1;
q[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
;
p_q[extract(0, 0) * 4 + programIndex] = q[0];
p_q[extract(1, 1) * 4 + programIndex] = q[1];
p_q[extract(2, 2) * 4 + programIndex] = q[2];
p_q[extract(3, 3) * 4 + programIndex] = q[3];
} while(0);
}
uniform int rem = offset + runlength - i;
if(rem > 0)
{
int idx = i + programIndex;
float qold[4];
if(0 < rem)
do {
qold[0] = p_qold[extract(0, 0) * 4 + programIndex];
} while(0);
if(1 < rem)
do {
qold[1] = p_qold[extract(1, 1) * 4 + programIndex];
} while(0);
if(2 < rem)
do {
qold[2] = p_qold[extract(2, 2) * 4 + programIndex];
} while(0);
if(3 < rem)
do {
qold[3] = p_qold[extract(3, 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 0 + 0] = tmp1;
qold[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 2 + 0] = tmp1;
qold[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 0 + 0], qold[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 0 + 0] = tmp1;
qold[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(qold[0 + 2 + 0], qold[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
qold[0 + 2 + 0] = tmp1;
qold[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float q[4];
float res[4];
if(0 < rem)
do {
res[0] = p_res[extract(0, 0) * 4 + programIndex];
} while(0);
if(1 < rem)
do {
res[1] = p_res[extract(1, 1) * 4 + programIndex];
} while(0);
if(2 < rem)
do {
res[2] = p_res[extract(2, 2) * 4 + programIndex];
} while(0);
if(3 < rem)
do {
res[3] = p_res[extract(3, 3) * 4 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
res[0 + 0 + 0] = tmp1;
res[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
res[0 + 2 + 0] = tmp1;
res[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(res[0 + 0 + 0], res[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
res[0 + 0 + 0] = tmp1;
res[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(res[0 + 2 + 0], res[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
res[0 + 2 + 0] = tmp1;
res[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float adt[1];
if(programIndex < 1)
if(0 < rem)
do {
adt[0] = p_adt[extract(0, 0) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(1 < rem)
do {
adt[1] = p_adt[extract(1, 1) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(2 < rem)
do {
adt[2] = p_adt[extract(2, 2) * 1 + programIndex];
} while(0);
if(programIndex < 1)
if(3 < rem)
do {
adt[3] = p_adt[extract(3, 3) * 1 + programIndex];
} while(0);
;;;;
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 0 + 0] = tmp1;
adt[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 2 + 0] = tmp1;
adt[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 0 + 0] = tmp1;
adt[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 2 + 0] = tmp1;
adt[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
float adti; float del;
adti = 1.0/adt[0];
for(uniform int j = 0; j < 4; j++)
{
del = adti * res[j];
q[j] = qold[j] - del;
res[j] = 0.0;
int rindex = 0;
for(uniform int j = 0; j < programCount; j++)
{
if ((lanemask() & (1 << j)) != 0)
{
rms[extract(rindex, j)] += extract(del * del, j);
}
}
}
do {
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 0 + 0] = tmp1;
adt[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 2 + 0] = tmp1;
adt[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 0 + 0], adt[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 0 + 0] = tmp1;
adt[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(adt[0 + 2 + 0], adt[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
adt[0 + 2 + 0] = tmp1;
adt[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
;
if(programIndex < 1)
if(0 < rem)
p_adt[extract(0, 0) * 1 + programIndex] = adt[0];
if(programIndex < 1)
if(1 < rem)
p_adt[extract(1, 1) * 1 + programIndex] = adt[1];
if(programIndex < 1)
if(2 < rem)
p_adt[extract(2, 2) * 1 + programIndex] = adt[2];
if(programIndex < 1)
if(3 < rem)
p_adt[extract(3, 3) * 1 + programIndex] = adt[3];
} while(0);
do {
if(programCount != 4) {
print("Mismatch of compile time program counts: % vs. %\n", 4, programCount);
abort(2);
}
// #inner(formula, elemId)
// #worker(round)
do {
int r =0;
int t_offset = (1 << r) * (programIndex / (1 << (0 + 1)));
int t_index = programIndex & ((1 << 0) - 1);
int t_pc = programCount * ((programIndex >> 0) & 1);
do {
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 0 + 0] = tmp1;
q[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 2 + 0] = tmp1;
q[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
do {
int r =1;
int t_offset = (1 << r) * (programIndex / (1 << (1 + 1)));
int t_index = programIndex & ((1 << 1) - 1);
int t_pc = programCount * ((programIndex >> 1) & 1);
do {
float tmp1 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 0 + 0], q[0 + 0 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 0 + 0] = tmp1;
q[0 + 0 + 1] = tmp2;
} while (0);
do {
float tmp1 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], t_offset + t_index + t_pc);
float tmp2 = shuffle(q[0 + 2 + 0], q[0 + 2 + 1], programCount + t_offset + t_index + t_pc);
q[0 + 2 + 0] = tmp1;
q[0 + 2 + 1] = tmp2;
} while (0);
} while(0);
;
if(0 < rem)
p_q[extract(0, 0) * 4 + programIndex] = q[0];
if(1 < rem)
p_q[extract(1, 1) * 4 + programIndex] = q[1];
if(2 < rem)
p_q[extract(2, 2) * 4 + programIndex] = q[2];
if(3 < rem)
p_q[extract(3, 3) * 4 + programIndex] = q[3];
} while(0);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment