gchatelet/concurrency.cpp

## concurrency.cpp
#include <algorithm>
#include <chrono>
#include <future>
#include <thread>
#include <vector>

using namespace std;
using namespace std::chrono;

struct StopWatch {
  StopWatch(const char* message) :
      start(high_resolution_clock::now()) {
    printf("%s\n", message);
  }
  ~StopWatch() {
    printf("%ldms\n", duration_cast<milliseconds>(high_resolution_clock::now() - start).count());
  }

private:
  high_resolution_clock::time_point start;
};

void processElement(char current, double &result) {
  result += log(sqrt(current * current));
}

void processRange(const char* const begin, const char* const end, double *result) {
  for (auto itr = begin; itr != end; ++itr)
    processElement(*itr, *result);
}

void processRangeWithTemporary(const char* const begin, const char* const end, double *result) {
  double tmp;
  for (auto itr = begin; itr != end; ++itr)
    processElement(*itr, tmp);
  *result = tmp;
}

int main() {
  const vector<char> buffer(512 * 1024 * 1024); // 512MB
  const auto processors = thread::hardware_concurrency();
  const auto slice = buffer.size() / processors;
  if (processors * slice != buffer.size())
    throw runtime_error("buffer size must be a multiple of processors");

  printf("Now processing %ld bytes\n", buffer.size());

  {
    StopWatch stopwatch("Singlethread");
    double result = 0;
    processRange(buffer.data(), buffer.data() + buffer.size(), &result);
  }

  {
    StopWatch stopwatch("Multithread accessing colocated elements");
    double results[processors];
    auto output = &results[0];
    auto begin = buffer.data();

    vector<thread> threads;
    for (auto i = 0u; i < processors; ++i, begin += slice, ++output)
      threads.emplace_back([=]() {processRange(begin, begin + slice, output);});

    for (auto& thread : threads)
      thread.join();
  }

  {
    StopWatch stopwatch("Multithread accessing temporary elements");
    double results[processors];
    auto output = &results[0];
    auto begin = buffer.data();

    vector<thread> threads;
    for (auto i = 0u; i < processors; ++i, begin += slice, ++output)
      threads.emplace_back([=]() {processRangeWithTemporary(begin, begin + slice, output);});

    for (auto& thread : threads)
      thread.join();
  }

  return EXIT_SUCCESS;
}
	#include <algorithm>
	#include <chrono>
	#include <future>
	#include <thread>
	#include <vector>

	using namespace std;
	using namespace std::chrono;

	struct StopWatch {
	StopWatch(const char* message) :
	start(high_resolution_clock::now()) {
	printf("%s\n", message);
	}
	~StopWatch() {
	printf("%ldms\n", duration_cast<milliseconds>(high_resolution_clock::now() - start).count());
	}

	private:
	high_resolution_clock::time_point start;
	};

	void processElement(char current, double &result) {
	result += log(sqrt(current * current));
	}

	void processRange(const char* const begin, const char* const end, double *result) {
	for (auto itr = begin; itr != end; ++itr)
	processElement(itr, result);
	}

	void processRangeWithTemporary(const char* const begin, const char* const end, double *result) {
	double tmp;
	for (auto itr = begin; itr != end; ++itr)
	processElement(*itr, tmp);
	*result = tmp;
	}

	int main() {
	const vector<char> buffer(512 * 1024 * 1024); // 512MB
	const auto processors = thread::hardware_concurrency();
	const auto slice = buffer.size() / processors;
	if (processors * slice != buffer.size())
	throw runtime_error("buffer size must be a multiple of processors");

	printf("Now processing %ld bytes\n", buffer.size());

	{
	StopWatch stopwatch("Singlethread");
	double result = 0;
	processRange(buffer.data(), buffer.data() + buffer.size(), &result);
	}

	{
	StopWatch stopwatch("Multithread accessing colocated elements");
	double results[processors];
	auto output = &results[0];
	auto begin = buffer.data();

	vector<thread> threads;
	for (auto i = 0u; i < processors; ++i, begin += slice, ++output)
	threads.emplace_back([=]() {processRange(begin, begin + slice, output);});

	for (auto& thread : threads)
	thread.join();
	}

	{
	StopWatch stopwatch("Multithread accessing temporary elements");
	double results[processors];
	auto output = &results[0];
	auto begin = buffer.data();

	vector<thread> threads;
	for (auto i = 0u; i < processors; ++i, begin += slice, ++output)
	threads.emplace_back([=]() {processRangeWithTemporary(begin, begin + slice, output);});

	for (auto& thread : threads)
	thread.join();
	}

	return EXIT_SUCCESS;
	}