Last active
August 29, 2015 14:25
-
-
Save bmyerz/709bf367785ade27eaba to your computer and use it in GitHub Desktop.
coarse parallel running sum
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
blockdist = BlockDistribution[P partitions, N elements] | |
global int localsum[P] (blockdist); // for per-partition sums (alternatively represented as partition-private storage) | |
global int A[N] (blockdist); // input | |
global int B[N] (blockdist); // intermediate | |
global int C[N] (blockdist); // result | |
// compute running sum, within each partition | |
// we can only break dependences between groups of iterations with knowledge of how blockdist works | |
forall i in 0..N { | |
p = blockdist(i).blocknum; | |
localsum[p] += A[i]; | |
B[i] = localsum[p]; | |
} | |
// for each partition, compute the running sum up to the end of it | |
forall p in 1..P { | |
localsum[p] = localsum[p-1] + localsum[p] | |
} | |
// update running sum to reflect partition sums | |
forall i in blockdist(c).left .. N { | |
p = blockdist(i).blocknum | |
C[i] = B[i] + localsum[p-1] | |
} | |
////////////////////////////////////////////////// | |
// Suppose the above is a TV-UDF in a SQL query | |
// If there were a filter predicate on C[], then it could be pipelined onto the last loop | |
// update running sum to reflect partition sums | |
forall i in blockdist(c).left .. N { | |
p = blockdist(i).blocknum | |
auto res = B[i] + localsum[p-1] | |
if (predicate(res)) { | |
C.unordered_append(B[i] + localsum[p-1]); | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment