Skip to content

Instantly share code, notes, and snippets.

@wangyangkobe
Created December 15, 2016 05:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wangyangkobe/6e6a55f8d60d1c2299d1aab4845bf378 to your computer and use it in GitHub Desktop.
Save wangyangkobe/6e6a55f8d60d1c2299d1aab4845bf378 to your computer and use it in GitHub Desktop.
a parallel version of std::accumulate.
#include <thread>
#include <iostream>
#include <memory>
#include <vector>
#include <algorithm>
#include <functional>
#include <numeric>
using namespace std;
template<typename Iterator, typename T>
struct accumulate_block
{
void operator() (Iterator first, Iterator last, T& result)
{
result = std::accumulate(first, last, result);
}
};
template<typename Iterator, typename T>
T parallel_accumulate(Iterator first, Iterator last, T& init)
{
unsigned long const length = std::distance(first, last);
if(!length)
return init;
const unsigned long min_per_thread = 25;
const unsigned long max_threads = (length + min_per_thread - 1) / min_per_thread;
const unsigned long hardware_threads = std::thread::hardware_concurrency();
const unsigned long num_threads = std::min(hardware_threads != 0 ? hardware_threads : 2, max_threads);
const unsigned long block_size = length / num_threads;
std::vector<T> results(num_threads);
std::vector<std::thread> threads(num_threads - 1);
Iterator block_start = first;
for(unsigned long i = 0; i < (num_threads - 1); i++)
{
Iterator block_end = block_start;
std::advance(block_end, block_size);
threads[i] = std::thread(accumulate_block<Iterator, T>(), block_start, block_end, std::ref(results[i]));
block_start = block_end;
}
accumulate_block<Iterator,T>()(block_start, last, results[num_threads-1]);
std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
return init = std::accumulate(results.begin(), results.end(), init);
}
int main()
{
vector<int> vec(1000000);
for(int i = 0; i < 1000000; i++)
vec[i] = i + 1;
unsigned long result = 0;
cout<<parallel_accumulate(begin(vec), end(vec), result)<<endl;
cout<<result<<endl;
}
@wangyangkobe
Copy link
Author

A simple implementation of a parallel version of std::accumulate.
It divides the work among the threads, with a minimum number of elements per thread in order to avoid the overhead of too many threads.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment