public
anonymous / hist.cpp
Created

  • Download Gist
hist.cpp
C++
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
/* Compile with: g++ -o hist -O3 hist.cpp -fopenmp -Wall */
#include <algorithm>
#include <vector>
#include <iostream>
#include <assert.h>
#include "omp.h"
#define SIZE_TEST_VECTOR 400000000
#define NBLOCKS 8
#define NBUCKETS 10
typedef unsigned int uint;
bool computeBlockHistograms(std::vector<uint>& array, uint numBlocks, uint numBuckets, uint blockSize) {
std::vector<uint> blockHistogramsPar1(numBlocks * numBuckets, 0);
std::vector<uint> blockHistogramsPar2(numBlocks * numBuckets, 0);
std::vector<uint> blockHistogramsSer(numBlocks * numBuckets, 0);
assert(array.size() == numBlocks * blockSize);
// Perform computation serially
for(uint idx = 0; idx < array.size(); idx++){
uint blockNum = idx / blockSize;
for(uint divisorIdx = 1; divisorIdx <= numBuckets; divisorIdx++){
if(array[idx] % divisorIdx == 0)
blockHistogramsSer[blockNum * numBuckets + (divisorIdx - 1)]++;
}
}
// Perform computation in parallel -- works
#pragma omp parallel shared(blockHistogramsPar1)
{
#pragma omp for schedule(static)
for(uint blockNum = 0; blockNum < numBlocks; blockNum++){
for(uint blockSubIdx = 0; blockSubIdx < blockSize; blockSubIdx++){
uint idx = blockNum * blockSize + blockSubIdx;
for(uint divisorIdx = 1; divisorIdx <= numBuckets; divisorIdx++){
if(array[idx] % divisorIdx == 0)
blockHistogramsPar1[blockNum * numBuckets + (divisorIdx - 1)]++;
}
}
}
}
// Peform computation in parallel -- doesn't work with large thread count
#pragma omp parallel shared(blockHistogramsPar2)
{
#pragma omp for schedule(static, blockSize)
for(uint idx = 0; idx < array.size(); idx++){
uint blockNum = idx / blockSize;
for(uint divisorIdx = 1; divisorIdx <= numBuckets; divisorIdx++){
if(array[idx] % divisorIdx == 0)
blockHistogramsPar2[blockNum * numBuckets + (divisorIdx - 1)]++;
}
}
}
for(uint i = 0; i < blockHistogramsSer.size(); i++){
if(blockHistogramsSer[i] != blockHistogramsPar1[i]){
std::cout << 1 << ": " << i << ":: " << blockHistogramsSer[i] << " != " << blockHistogramsPar1[i] << std::endl;
return false;
}
if(blockHistogramsSer[i] != blockHistogramsPar2[i]){
std::cout << 2 << ": " << i << ":: " << blockHistogramsSer[i] << " != " << blockHistogramsPar2[i] << std::endl;
return false;
}
}
return true;
}
void initializeRandomly(std::vector<uint>& array) {
srand(0);
for (uint i = 0; i < array.size(); i++)
array[i] = rand();
}
int main(void)
{
std::vector<uint> array(SIZE_TEST_VECTOR, 0);
initializeRandomly(array);
if (computeBlockHistograms(array, NBLOCKS, NBUCKETS, array.size() / NBLOCKS))
std::cout << "Success!" << std::endl;
return 0;
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.