Skip to content

Instantly share code, notes, and snippets.

@yc0
Last active September 11, 2019 08:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yc0/a94a057ee75ef7279be32ae699014a97 to your computer and use it in GitHub Desktop.
Save yc0/a94a057ee75ef7279be32ae699014a97 to your computer and use it in GitHub Desktop.
Modern C++ Concurrency in Depth
/**
MacOS : clang++ -Wall -std=c++17 parallel_accumulate.cpp -O3 -o out
----------------------------
num of cores : (8) w/o task switching
1166029 (ns)
the anwser : 45000497
2564850 (ns)
the anwser : 45000497
----------------------------
**/
#include <iostream>
#include <thread>
#include <numeric>
#include <vector>
#include <time.h>
#define MIN_BLOCK_SIZE 1000
template <typename iterator, typename T>
void accumulate(iterator start, iterator end, T &ref)
{
ref += std::accumulate(start, end, 0);
}
template <typename iterator, typename T>
T parallel_accumulate(iterator start, iterator end, T &ref)
{
int sz = std::distance(start, end);
int allowed_threads_by_elements = (sz)/MIN_BLOCK_SIZE;
int aloowed_threads_by_hardware = std::thread::hardware_concurrency();
int num_threads = std::min(allowed_threads_by_elements, aloowed_threads_by_hardware);
int block_size = (sz+1) / num_threads;
std::vector<T> rst(num_threads);
std::vector<std::thread> threads(num_threads-1);
iterator last;
for(int i=0; i < num_threads-1; i++) {
last = start;
std::advance(last, block_size);
threads[i] = std::thread(accumulate<iterator, T>,start, last, std::ref(rst[i]));
start = last;
}
rst[num_threads-1] = std::accumulate(start, end, 0);
std::for_each( std::begin(threads), std::end(threads), std::mem_fn(&std::thread::join));
return std::accumulate(std::begin(rst), std::end(rst), ref);
}
int main() {
srand(time(NULL));
const int sz = 10000000;
int *data = new int[sz];
for(int i=0; i < sz; i++)
data[i] = rand() %10;
std::cout << "num of cores : (" << std::thread::hardware_concurrency()
<< ") w/o task switching" << std::endl;
int ans = 0,
ref = 0;
auto start = std::chrono::high_resolution_clock::now();
ans = parallel_accumulate<int*, int>((int *) data, reinterpret_cast<int *>(data + sz), ref);
auto finish = std::chrono::high_resolution_clock::now();
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(finish-start).count() << " (ns)\n";
std::cout << "the anwser : " << ans << std::endl;
ref = 0;
ans = 0;
start = std::chrono::high_resolution_clock::now();
ans = std::accumulate((int *) data, reinterpret_cast<int *>(data + sz), ref);
finish = std::chrono::high_resolution_clock::now();
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(finish-start).count() << " (ns)\n";
std::cout << "the anwser : " << ans << std::endl;
delete [] data;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment