Skip to content

Instantly share code, notes, and snippets.

@zoep
Created January 22, 2013 13:18
Show Gist options
  • Save zoep/4594612 to your computer and use it in GitHub Desktop.
Save zoep/4594612 to your computer and use it in GitHub Desktop.
lu_tiled version with nested parallel for
/**** Tiled LU decomposition *****/
void lu(double **a, int range, int B)
{
int k;
double ** l_inv, ** u_inv;
for (k=0;k<range-1;k++) {
/****Compute LU decomposition on upper left tile*****/
lu_kernel(a,k*B,k*B,B,B);
/****Compute inverted L and U matrices of upper left tile*****/
l_inv=get_inv_l(a,k*B,k*B,B,B);
u_inv=get_inv_u(a,k*B,k*B,B,B);
/*****Compute LU decomposition on upper horizontal frame and left vertical frame*****/
tbb::parallel_for(
tbb::blocked_range<size_t>(k+1, range),
[=](const tbb::blocked_range<size_t>& r) {
for ( size_t i = r.begin(); i != r.end(); ++i)
{
mm_lower(l_inv,0,0,a,k*B,i*B,a,k*B,i*B,B,B,B);
mm_upper(a,i*B,k*B,u_inv,0,0,a,i*B,k*B,B,B,B);
}
});
/*****Update trailing blocks*****/
tbb::parallel_for(
tbb::blocked_range<size_t>(k+1, range),
[=](const tbb::blocked_range<size_t>& r) {
for (size_t i = r.begin(); i != r.end(); ++i)
{
tbb::parallel_for(
tbb::blocked_range<size_t>(k+1, range),
[=](const tbb::blocked_range<size_t>& r) {
for (size_t j = r.begin(); j != r.end(); ++j)
{
mm_update(a,i*B,k*B,a,k*B,j*B,a,i*B,j*B,B,B,B);
}
});
}
});
}
/***** Compute LU on final diagonal block *****/
lu_kernel(a,(range-1)*B,(range-1)*B,B,B);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment