Skip to content

Instantly share code, notes, and snippets.

@TalissonBento
Last active August 29, 2015 14:19
Show Gist options
  • Save TalissonBento/2be6fe35e66a8732244e to your computer and use it in GitHub Desktop.
Save TalissonBento/2be6fe35e66a8732244e to your computer and use it in GitHub Desktop.
Parallel Accomulator
#include <stdio.h>
#include <functional>
#include <memory>
#include <thread>
#include <string>
#include <vector>
#include <algorithm>
#include <utility>
#include <numeric>
#include <time.h>
using namespace std;
template<typename Iterator, typename T>
struct Accomulator_Block
{
void operator()(Iterator first, Iterator last, T& result)
{
result = std::accumulate(first, last, result);
}
};
template<typename Iterator, typename T>
T Parallel_accomulate(Iterator first, Iterator last, T init)
{
unsigned long const length = distance(first, last);
if (!length) return init;
unsigned short const Min_per_Thread = 25;
unsigned long const Max_threads = (length + Min_per_Thread - 1) / Min_per_Thread; //Ensure the the max threads for little length
unsigned long const hardware_threads = thread::hardware_concurrency();
unsigned short const num_threads = std::min( hardware_threads!=0? hardware_threads : 2, Max_threads) ;
unsigned long const block_size = length / num_threads;
vector<T> th_results(num_threads);
vector<std::thread> threads(num_threads-1);
Iterator it_start;
Iterator it_end = first;
for (unsigned short i = 0; i < (num_threads - 1); ++i)
{
it_start = it_end;
advance(it_end, block_size);
threads[i] = std::thread( Accomulator_Block<Iterator, T>(), it_start, it_end, ref(th_results[i]) );
}
std::for_each(threads.begin(), threads.end(), mem_fn(&std::thread::join)); //Wait for jobs
Accomulator_Block<Iterator, T>()( it_end, last, ref(th_results[num_threads - 1]) );
return accumulate(th_results.begin(), th_results.end(), init);
};
int main(int argc, char* argv[])
{
clock_t t;
printf("Hardware core: %d\n", thread::hardware_concurrency());
vector<unsigned long> data(100000000, 10);
t = clock();
int total = Parallel_accomulate(data.begin(), data.end(), 1);
t = clock() - t;
printf("Total: %d in %.3f\n", total, (float)(t / CLOCKS_PER_SEC));
total = 1;
t = clock();
// total = accumulate(data.begin(), data.end(), total);// Faster
/*
int size = data.size();
for (int it = 0; it < size; ++it) //Faster
{
total += data[it];
}
*/
for (auto it = data.begin(); it != data.end(); it++) //Slower
{
total += *it;
}
t = clock() - t;
printf("Total: %d in %.3f\n", total, (float)(t / CLOCKS_PER_SEC));
getc(stdin);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment