Skip to content

Instantly share code, notes, and snippets.

@tonykero
Created July 27, 2017 22:41
Show Gist options
  • Save tonykero/9512f2fb7f47d1ee687ae8595b17666e to your computer and use it in GitHub Desktop.
Save tonykero/9512f2fb7f47d1ee687ae8595b17666e to your computer and use it in GitHub Desktop.
parallel_for implementations
#include <cmath> // std::ceil
#include <ctime> // std::clock_t, std::clock()
#include <thread> // std::thread, std::async
#include <future> // std::future
#include <iostream> // std::cout
#include <string> // std::cout << std::string
#include <vector> // std::vector
template <typename UserType>
void benchmark(std::string _label, UserType& _data, std::function<void(void)> _function)
{
std::clock_t t1 = std::clock();
_function();
std::clock_t t2 = std::clock();
std::cout << _label << "\t" << "data = " << _data << "\t" << 1000.0f * (t2 - t1) / CLOCKS_PER_SEC << "ms" << std::endl;
}
void parallel_for(size_t begin, size_t end, std::function<void(uint64_t,uint64_t)> _function)
{
unsigned int nbrThreads = std::thread::hardware_concurrency(),
blockIters = std::ceil((float)(end-begin) / (float)nbrThreads);
std::vector<std::future<void>> futures(nbrThreads);
size_t blockBegin = begin-blockIters;
size_t blockEnd = begin;
auto step = [&]() -> void
{
blockBegin += blockIters;
blockEnd += blockIters;
blockEnd = (blockEnd > end) ? end : blockEnd;
};
step();
for (auto &future : futures)
{
future = std::async(std::launch::async,
[=,&_function]() -> void
{
_function(blockBegin, blockEnd);
});
step();
}
for (auto &future : futures)
future.wait();
}
int main()
{
unsigned int iterations;
std::cin >> iterations;
unsigned int acc = 0;
benchmark<unsigned int>("parallel_for", acc,
[&]()
{
parallel_for(0, iterations,
[&acc](uint64_t blockBegin, uint64_t blockEnd)
{
unsigned int local_acc = 0;
for(unsigned int i = blockBegin; i < blockEnd; i++)
{
local_acc += i;
}
acc += local_acc;
}
);
}
);
acc = 0;
benchmark<unsigned int>("sequential_for", acc,
[&]()
{
for(unsigned int i = 0; i < iterations; i++)
{
acc += i;
}
}
);
acc = 0;
benchmark<unsigned int>("openmp_for", acc,
[&]()
{
unsigned int acc2 = 0;
#pragma omp parallel for reduction(+:acc2)
for(int i = 0; i < iterations; i++)
{
acc2 += i;
}
acc = acc2;
}
);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment