public
Last active

parallel_find using Intel TBB

  • Download Gist
parallel_find.cpp
C++
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
#include <boost/range/irange.hpp>
#include <boost/range/algorithm.hpp>
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics/stats.hpp>
#include <boost/accumulators/statistics/mean.hpp>
#include <boost/accumulators/statistics/median.hpp>
 
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#include <tbb/task.h>
#include <tbb/atomic.h>
#include <tbb/tick_count.h>
 
#include <algorithm>
#include <iostream>
#include <vector>
 
//Note: All thresholds are random
 
template<typename Iter, typename T>
struct find_task : tbb::task {
const T& value;
tbb::atomic<bool>& done_ptr;
Iter begin;
Iter end;
Iter& result_iter;
 
find_task(const T& search_val, tbb::atomic<bool>& done_flag, Iter b, Iter e, Iter& f_ptr)
: value(search_val), done_ptr(done_flag), begin(b), end(e), result_iter(f_ptr) {}
 
tbb::task* execute() {
if(done_ptr == true) {
return NULL;
}
 
if(std::distance(begin, end) < 10000) {
result_iter = std::find(begin, end, value);
}
else {
Iter middle = std::next(begin, std::distance(begin, end) / 2);
Iter left_result_iter = middle;
Iter right_result_iter = end;
find_task<Iter, T>& left
= *new(allocate_child())
find_task<Iter, T>(value, done_ptr, begin, middle, left_result_iter);
 
find_task<Iter, T>& right
= *new(allocate_child())
find_task<Iter, T>(value, done_ptr, middle, end, right_result_iter);
 
set_ref_count(3);
spawn(right);
spawn_and_wait_for_all(left);
if(left_result_iter != middle) {
result_iter = left_result_iter;
done_ptr = true;
}
else if(right_result_iter != end) {
result_iter = right_result_iter;
done_ptr = true;
}
}
return NULL;
}
};
 
// version 1 using task constructs
template<typename Iter, typename T>
Iter parallel_find(Iter begin, Iter end, const T& value) {
tbb::atomic<bool> done;
done = false;
Iter found_ptr(end);
 
find_task<Iter, T>& root
= *new(tbb::task::allocate_root())
find_task<Iter, T>(value, done, begin, end, found_ptr);
 
tbb::task::spawn_root_and_wait(root);
return found_ptr;
}
 
template<typename Iter, typename T>
struct parallel_find2_helper {
tbb::task_group_context& context;
tbb::atomic<std::size_t>& index;
const T& value;
Iter begin;
 
parallel_find2_helper(tbb::task_group_context& c, tbb::atomic<std::size_t>& i, const T& t, Iter b)
: context(c), index(i), value(t), begin(b) {}
 
void operator()(tbb::blocked_range<Iter> range) const {
Iter iter = std::find(range.begin(), range.end(), value);
if(iter != range.end()) {
index = std::distance(begin, iter);
context.cancel_group_execution();
}
}
};
 
// version 2 using parallel_for with task_group_context
template<typename Iter, typename T>
Iter parallel_find2(Iter begin, Iter end, const T& value) {
tbb::task_group_context context;
tbb::atomic<std::size_t> index;
index = std::numeric_limits<std::size_t>::max();
 
parallel_find2_helper<Iter, T> worker(context, index, value, begin);
 
tbb::parallel_for(tbb::blocked_range<Iter>(begin, end, 10000), worker, context);
 
std::size_t result_index = index.load();
return result_index == std::numeric_limits<std::size_t>::max() ? end : std::next(begin, result_index);
}
 
int main()
{
namespace ba = boost::accumulators;
typedef std::vector<int> container;
typedef container::iterator iter;
typedef ba::accumulator_set<double, ba::stats<ba::tag::mean, ba::tag::median> > ba_acc_set;
 
container vec(100000000, 0);
boost::copy(boost::irange<int>(0, vec.size()), vec.begin());
 
const int runs = 5;
const int search_val = vec.size() / 2 ;
 
std::cout << "version 1" << std::endl;
 
{
ba_acc_set accu;
for(int i = 0; i != runs; ++i) {
tbb::tick_count t0 = tbb::tick_count::now();
iter it = parallel_find(vec.begin(), vec.end(), search_val);
tbb::tick_count t1 = tbb::tick_count::now();
accu((t1-t0).seconds());
std::cout << (it != vec.end() ? *it : vec.size() + 1) << std::endl;
}
std::cout << "mean " << ba::mean(accu) << std::endl;
std::cout << "median " << ba::median(accu) << std::endl;
}
std::cout << "version 2" << std::endl;
 
{
ba_acc_set accu;
for(int i = 0; i != runs; ++i) {
tbb::tick_count t0 = tbb::tick_count::now();
iter it = parallel_find2(vec.begin(), vec.end(), search_val);
tbb::tick_count t1 = tbb::tick_count::now();
accu((t1-t0).seconds());
std::cout << (it != vec.end() ? *it : vec.size() + 1) << std::endl;
}
std::cout << "mean " << ba::mean(accu) << std::endl;
std::cout << "median " << ba::median(accu) << std::endl;
}
 
std::cout << "serial" << std::endl;
 
{
ba_acc_set accu;
for(int i = 0; i != runs; ++i) {
tbb::tick_count t0 = tbb::tick_count::now();
iter it = std::find(vec.begin(), vec.end(), search_val);
tbb::tick_count t1 = tbb::tick_count::now();
accu((t1-t0).seconds());
std::cout << (it != vec.end() ? *it : vec.size() + 1) << std::endl;
}
std::cout << "mean " << ba::mean(accu) << std::endl;
std::cout << "median " << ba::median(accu) << std::endl;
}
 
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.