TalissonBento/source_code

## source_code
#include <stdio.h>
#include <functional>
#include <memory>
#include <thread>
#include <string>
#include <vector>
#include <algorithm>
#include <utility>
#include <numeric>
#include <time.h>

using namespace std;

template<typename Iterator, typename T>
struct Accomulator_Block
{
	void operator()(Iterator first, Iterator last, T& result)
	{
		result = std::accumulate(first, last, result);
	}
};

template<typename Iterator, typename T>
T Parallel_accomulate(Iterator first, Iterator last, T init)
{
	unsigned long const length = distance(first, last);

	if (!length) return init;

	unsigned short const Min_per_Thread = 25;

	unsigned long const  Max_threads = (length + Min_per_Thread - 1) / Min_per_Thread; //Ensure the the max threads for little length

	unsigned long const  hardware_threads = thread::hardware_concurrency();

	unsigned short const num_threads = std::min( hardware_threads!=0? hardware_threads : 2, Max_threads) ;

	unsigned long const block_size = length / num_threads;

	vector<T>      th_results(num_threads);

	vector<std::thread> threads(num_threads-1);

	Iterator it_start;
	Iterator it_end = first;

	for (unsigned short i = 0; i < (num_threads - 1); ++i)
	{
		it_start = it_end;
		advance(it_end, block_size);

		threads[i] = std::thread( Accomulator_Block<Iterator, T>(), it_start, it_end, ref(th_results[i]) );

	}

	std::for_each(threads.begin(), threads.end(), mem_fn(&std::thread::join)); //Wait for jobs

	Accomulator_Block<Iterator, T>()( it_end, last, ref(th_results[num_threads - 1]) );

	return accumulate(th_results.begin(), th_results.end(), init);


};

int main(int argc, char* argv[])
{

	clock_t t;

	printf("Hardware core: %d\n", thread::hardware_concurrency());

	vector<unsigned long> data(100000000, 10);

	t = clock();

	int total = Parallel_accomulate(data.begin(), data.end(), 1);
	t = clock() - t;
	printf("Total: %d in %.3f\n", total, (float)(t / CLOCKS_PER_SEC));

	total = 1;
	t = clock();

	//	total = accumulate(data.begin(), data.end(), total);// Faster

	/*
	int size = data.size();
	for (int it = 0; it < size; ++it) //Faster
	{
		total += data[it];
	}
	*/


	for (auto it = data.begin(); it != data.end(); it++) //Slower
	{
		total += *it;
	}


	t = clock() - t;
	printf("Total: %d in %.3f\n", total, (float)(t / CLOCKS_PER_SEC));


	getc(stdin);

	return 0;
}
	#include <stdio.h>
	#include <functional>
	#include <memory>
	#include <thread>
	#include <string>
	#include <vector>
	#include <algorithm>
	#include <utility>
	#include <numeric>
	#include <time.h>

	using namespace std;

	template<typename Iterator, typename T>
	struct Accomulator_Block
	{
	void operator()(Iterator first, Iterator last, T& result)
	{
	result = std::accumulate(first, last, result);
	}
	};

	template<typename Iterator, typename T>
	T Parallel_accomulate(Iterator first, Iterator last, T init)
	{
	unsigned long const length = distance(first, last);

	if (!length) return init;

	unsigned short const Min_per_Thread = 25;

	unsigned long const Max_threads = (length + Min_per_Thread - 1) / Min_per_Thread; //Ensure the the max threads for little length

	unsigned long const hardware_threads = thread::hardware_concurrency();

	unsigned short const num_threads = std::min( hardware_threads!=0? hardware_threads : 2, Max_threads) ;

	unsigned long const block_size = length / num_threads;

	vector<T> th_results(num_threads);

	vector<std::thread> threads(num_threads-1);

	Iterator it_start;
	Iterator it_end = first;

	for (unsigned short i = 0; i < (num_threads - 1); ++i)
	{
	it_start = it_end;
	advance(it_end, block_size);

	threads[i] = std::thread( Accomulator_Block<Iterator, T>(), it_start, it_end, ref(th_results[i]) );

	}

	std::for_each(threads.begin(), threads.end(), mem_fn(&std::thread::join)); //Wait for jobs

	Accomulator_Block<Iterator, T>()( it_end, last, ref(th_results[num_threads - 1]) );

	return accumulate(th_results.begin(), th_results.end(), init);


	};

	int main(int argc, char* argv[])
	{

	clock_t t;

	printf("Hardware core: %d\n", thread::hardware_concurrency());

	vector<unsigned long> data(100000000, 10);

	t = clock();

	int total = Parallel_accomulate(data.begin(), data.end(), 1);
	t = clock() - t;
	printf("Total: %d in %.3f\n", total, (float)(t / CLOCKS_PER_SEC));

	total = 1;
	t = clock();

	// total = accumulate(data.begin(), data.end(), total);// Faster

	/*
	int size = data.size();
	for (int it = 0; it < size; ++it) //Faster
	{
	total += data[it];
	}
	*/


	for (auto it = data.begin(); it != data.end(); it++) //Slower
	{
	total += *it;
	}


	t = clock() - t;
	printf("Total: %d in %.3f\n", total, (float)(t / CLOCKS_PER_SEC));



	getc(stdin);

	return 0;
	}