Last active

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

parallel_find using Intel TBB

View parallel_find.cpp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
#include <boost/range/irange.hpp>
#include <boost/range/algorithm.hpp>
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics/stats.hpp>
#include <boost/accumulators/statistics/mean.hpp>
#include <boost/accumulators/statistics/median.hpp>
 
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#include <tbb/task.h>
#include <tbb/atomic.h>
#include <tbb/tick_count.h>
 
#include <algorithm>
#include <iostream>
#include <vector>
 
//Note: All thresholds are random
 
template<typename Iter, typename T>
struct find_task : tbb::task {
const T& value;
tbb::atomic<bool>& done_ptr;
Iter begin;
Iter end;
Iter& result_iter;
 
find_task(const T& search_val, tbb::atomic<bool>& done_flag, Iter b, Iter e, Iter& f_ptr)
: value(search_val), done_ptr(done_flag), begin(b), end(e), result_iter(f_ptr) {}
 
tbb::task* execute() {
if(done_ptr == true) {
return NULL;
}
 
if(std::distance(begin, end) < 10000) {
result_iter = std::find(begin, end, value);
}
else {
Iter middle = std::next(begin, std::distance(begin, end) / 2);
Iter left_result_iter = middle;
Iter right_result_iter = end;
find_task<Iter, T>& left
= *new(allocate_child())
find_task<Iter, T>(value, done_ptr, begin, middle, left_result_iter);
 
find_task<Iter, T>& right
= *new(allocate_child())
find_task<Iter, T>(value, done_ptr, middle, end, right_result_iter);
 
set_ref_count(3);
spawn(right);
spawn_and_wait_for_all(left);
if(left_result_iter != middle) {
result_iter = left_result_iter;
done_ptr = true;
}
else if(right_result_iter != end) {
result_iter = right_result_iter;
done_ptr = true;
}
}
return NULL;
}
};
 
// version 1 using task constructs
template<typename Iter, typename T>
Iter parallel_find(Iter begin, Iter end, const T& value) {
tbb::atomic<bool> done;
done = false;
Iter found_ptr(end);
 
find_task<Iter, T>& root
= *new(tbb::task::allocate_root())
find_task<Iter, T>(value, done, begin, end, found_ptr);
 
tbb::task::spawn_root_and_wait(root);
return found_ptr;
}
 
template<typename Iter, typename T>
struct parallel_find2_helper {
tbb::task_group_context& context;
tbb::atomic<std::size_t>& index;
const T& value;
Iter begin;
 
parallel_find2_helper(tbb::task_group_context& c, tbb::atomic<std::size_t>& i, const T& t, Iter b)
: context(c), index(i), value(t), begin(b) {}
 
void operator()(tbb::blocked_range<Iter> range) const {
Iter iter = std::find(range.begin(), range.end(), value);
if(iter != range.end()) {
index = std::distance(begin, iter);
context.cancel_group_execution();
}
}
};
 
// version 2 using parallel_for with task_group_context
template<typename Iter, typename T>
Iter parallel_find2(Iter begin, Iter end, const T& value) {
tbb::task_group_context context;
tbb::atomic<std::size_t> index;
index = std::numeric_limits<std::size_t>::max();
 
parallel_find2_helper<Iter, T> worker(context, index, value, begin);
 
tbb::parallel_for(tbb::blocked_range<Iter>(begin, end, 10000), worker, context);
 
std::size_t result_index = index.load();
return result_index == std::numeric_limits<std::size_t>::max() ? end : std::next(begin, result_index);
}
 
int main()
{
namespace ba = boost::accumulators;
typedef std::vector<int> container;
typedef container::iterator iter;
typedef ba::accumulator_set<double, ba::stats<ba::tag::mean, ba::tag::median> > ba_acc_set;
 
container vec(100000000, 0);
boost::copy(boost::irange<int>(0, vec.size()), vec.begin());
 
const int runs = 5;
const int search_val = vec.size() / 2 ;
 
std::cout << "version 1" << std::endl;
 
{
ba_acc_set accu;
for(int i = 0; i != runs; ++i) {
tbb::tick_count t0 = tbb::tick_count::now();
iter it = parallel_find(vec.begin(), vec.end(), search_val);
tbb::tick_count t1 = tbb::tick_count::now();
accu((t1-t0).seconds());
std::cout << (it != vec.end() ? *it : vec.size() + 1) << std::endl;
}
std::cout << "mean " << ba::mean(accu) << std::endl;
std::cout << "median " << ba::median(accu) << std::endl;
}
std::cout << "version 2" << std::endl;
 
{
ba_acc_set accu;
for(int i = 0; i != runs; ++i) {
tbb::tick_count t0 = tbb::tick_count::now();
iter it = parallel_find2(vec.begin(), vec.end(), search_val);
tbb::tick_count t1 = tbb::tick_count::now();
accu((t1-t0).seconds());
std::cout << (it != vec.end() ? *it : vec.size() + 1) << std::endl;
}
std::cout << "mean " << ba::mean(accu) << std::endl;
std::cout << "median " << ba::median(accu) << std::endl;
}
 
std::cout << "serial" << std::endl;
 
{
ba_acc_set accu;
for(int i = 0; i != runs; ++i) {
tbb::tick_count t0 = tbb::tick_count::now();
iter it = std::find(vec.begin(), vec.end(), search_val);
tbb::tick_count t1 = tbb::tick_count::now();
accu((t1-t0).seconds());
std::cout << (it != vec.end() ? *it : vec.size() + 1) << std::endl;
}
std::cout << "mean " << ba::mean(accu) << std::endl;
std::cout << "median " << ba::median(accu) << std::endl;
}
 
}
 
 
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.