goldshtn/primes-parallelization.cpp

## primes-parallelization.cpp
#include <chrono>
#include <thread>
#include <iostream>
#include <cmath>
#include <vector>
#include <functional>
#include <algorithm>

template <typename Fn>
double avg_time_ms(Fn&& fn, unsigned int repetitions) {
	double total = 0.0;
	for (unsigned int i = 0; i < repetitions; ++i) {
		auto start = std::chrono::steady_clock::now();
		fn();
		total += std::chrono::duration_cast<std::chrono::milliseconds>(
			std::chrono::steady_clock::now() - start).count() / 1000.0;
	}
	return total / repetitions;
}

bool is_prime(unsigned int n) {
	if (n == 2) return true;
	if (n % 2 == 0) return false;
#ifdef NAIVE_PRIMES
	unsigned int root = n;
#else
	unsigned int root = static_cast<unsigned int>(
		std::sqrt(static_cast<double>(n)));
#endif
	for (unsigned int x = 3; x <= root; x += 2) {
		if (n % x == 0) return false;
	}
	return true;
}

unsigned int count_primes(unsigned int begin, unsigned int end) {
	unsigned int count = 0;
	for (unsigned int n = begin; n < end; ++n) {
		if (is_prime(n)) ++count;
	}
	return count;
}

volatile unsigned int dont_optimize;

void parallelize_count(unsigned int nthreads, unsigned int begin, unsigned int end) {
	std::vector<std::thread> threads;
	unsigned int chunk_size = (end - begin) / nthreads;
	for (unsigned int i = 0; i < nthreads; ++i) {
		unsigned int chunk_begin = chunk_size*i;
		unsigned int chunk_end = chunk_begin + chunk_size;
		threads.emplace_back([=]() {
			dont_optimize = count_primes(chunk_begin, chunk_end);
		});
	}
	std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
}

int main() {
	unsigned int repetitions = 3;
#ifdef NAIVE_PRIMES
	unsigned int begin = 2, end = 100002;
#else
	unsigned int begin = 2, end = 8000002;
#endif
	std::cout << "#\ttime(s)" << std::endl;
	for (unsigned int nthreads = 1; nthreads <= 64; ++nthreads) {
		std::cout << nthreads << "\t" << avg_time_ms([=]() {
			parallelize_count(nthreads, begin, end);
		}, repetitions) 	<< std::endl;
	}
	return 0;
}
	#include <chrono>
	#include <thread>
	#include <iostream>
	#include <cmath>
	#include <vector>
	#include <functional>
	#include <algorithm>

	template <typename Fn>
	double avg_time_ms(Fn&& fn, unsigned int repetitions) {
	double total = 0.0;
	for (unsigned int i = 0; i < repetitions; ++i) {
	auto start = std::chrono::steady_clock::now();
	fn();
	total += std::chrono::duration_cast<std::chrono::milliseconds>(
	std::chrono::steady_clock::now() - start).count() / 1000.0;
	}
	return total / repetitions;
	}

	bool is_prime(unsigned int n) {
	if (n == 2) return true;
	if (n % 2 == 0) return false;
	#ifdef NAIVE_PRIMES
	unsigned int root = n;
	#else
	unsigned int root = static_cast<unsigned int>(
	std::sqrt(static_cast<double>(n)));
	#endif
	for (unsigned int x = 3; x <= root; x += 2) {
	if (n % x == 0) return false;
	}
	return true;
	}

	unsigned int count_primes(unsigned int begin, unsigned int end) {
	unsigned int count = 0;
	for (unsigned int n = begin; n < end; ++n) {
	if (is_prime(n)) ++count;
	}
	return count;
	}

	volatile unsigned int dont_optimize;

	void parallelize_count(unsigned int nthreads, unsigned int begin, unsigned int end) {
	std::vector<std::thread> threads;
	unsigned int chunk_size = (end - begin) / nthreads;
	for (unsigned int i = 0; i < nthreads; ++i) {
	unsigned int chunk_begin = chunk_size*i;
	unsigned int chunk_end = chunk_begin + chunk_size;
	threads.emplace_back([=]() {
	dont_optimize = count_primes(chunk_begin, chunk_end);
	});
	}
	std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
	}

	int main() {
	unsigned int repetitions = 3;
	#ifdef NAIVE_PRIMES
	unsigned int begin = 2, end = 100002;
	#else
	unsigned int begin = 2, end = 8000002;
	#endif
	std::cout << "#\ttime(s)" << std::endl;
	for (unsigned int nthreads = 1; nthreads <= 64; ++nthreads) {
	std::cout << nthreads << "\t" << avg_time_ms([=]() {
	parallelize_count(nthreads, begin, end);
	}, repetitions) << std::endl;
	}
	return 0;
	}