Skip to content

Instantly share code, notes, and snippets.

@g14a
Created August 23, 2018 16:41
Show Gist options
  • Save g14a/258d55bfe10b3d886a65eb13550be7e1 to your computer and use it in GitHub Desktop.
Save g14a/258d55bfe10b3d886a65eb13550be7e1 to your computer and use it in GitHub Desktop.
// Chris Nauroth
// Intel Threading Challenge 2011
// Prime Sums
// This solution uses Intel Threading Building Blocks to set up a parallel
// pipeline consisting of 3 filters:
//
// 1. PrimeFunctor runs in serial and uses an implementation of the sieve of
// Eratosthenes to emit prime numbers.
//
// 2. PerfectPowerFunctor runs in parallel and receives the primes discovered by
// PrimeFunctor, checks the sums of each subset of primes in range up to that
// prime against possible perfect powers, and emits a vector of structures
// describing any perfect powers that it found.
//
// 3. OutputFunctor runs in serial and writes results to the output file.
//
// PrimeFunctor and PerfectPowerFunctor have some shared state in the form of a
// concurrent_vector containing primes that have been discovered. When
// PerfectPowerFunctor receives a particular prime, it is guaranteed that the
// shared concurrent_vector already contains all primes less than that prime.
// PerfectPowerFunctor uses this concurrent_vector to calculate the sums.
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
#include <tbb/concurrent_vector.h>
#include <tbb/pipeline.h>
#include <tbb/tick_count.h>
using namespace std;
using namespace tbb;
static concurrent_vector<size_t> primes;
class PrimeFunctor {
public:
PrimeFunctor(const size_t rangeStart, const size_t rangeEnd):
_candidates(vector<bool>(rangeEnd + 1, true)), _rangeStart(rangeStart), _rangeEnd(rangeEnd) {
this->_candidates[0] = false;
this->_candidates[1] = false;
this->_candidates[2] = true;
primes.push_back(2);
for (size_t i = 2 * 2; i <= this->_rangeEnd; i += 2) {
this->_candidates[i] = false;
}
this->_i = 3;
}
size_t operator()(flow_control& fc) const {
size_t nextPrime = 0;
size_t i = this->_i;
for (; 0 == nextPrime && i <= this->_rangeEnd; ++i) {
if (this->_candidates[i]) {
for (size_t j = i * i; j <= this->_rangeEnd; j += i) {
this->_candidates[j] = false;
}
nextPrime = i;
}
}
this->_i = i;
size_t nextIndex = 0;
if (0 == nextPrime) {
fc.stop();
}
else {
primes.push_back(nextPrime);
nextIndex = primes.size() - 1;
}
return nextIndex;
}
private:
mutable vector<bool> _candidates;
const size_t _rangeStart, _rangeEnd;
mutable size_t _i;
};
struct PerfectPower {
size_t start, end, sum, base, power;
};
class PerfectPowerFunctor {
public:
PerfectPowerFunctor(const size_t maxPower, const size_t rangeStart):
_maxPower(maxPower), _rangeStart(rangeStart) {
}
vector<PerfectPower> operator()(const size_t index) const {
vector<PerfectPower> perfectPowers;
size_t sum = primes[index];
for (size_t j = index - 1; primes[j] >= this->_rangeStart; --j) {
sum += primes[j];
for (size_t base = 2, product = base * base;
product <= sum;
++base, product = base * base) {
for (size_t power = 2;
power <= this->_maxPower && product <= sum;
++power, product *= base) {
if (product == sum) {
PerfectPower perfectPower;
perfectPower.start = primes[j];
perfectPower.end = primes[index];
perfectPower.sum = sum;
perfectPower.base = base;
perfectPower.power = power;
perfectPowers.push_back(perfectPower);
}
}
}
if (0 == j) break;
}
return perfectPowers;
}
private:
const size_t _maxPower;
const size_t _rangeStart;
};
class OutputFunctor {
public:
OutputFunctor(ostream& out):_out(out) {
}
void operator()(const vector<PerfectPower> perfectPowers) const {
for (vector<PerfectPower>::const_iterator i = perfectPowers.begin();
i != perfectPowers.end(); ++i) {
this->_out << "sum(" << i->start << ":" << i->end
<< ") = " << i->sum << " = " << i->base
<< "**" << i->power << endl;
}
}
private:
ostream& _out;
};
int main(int argc, char** argv) {
tick_count begin = tick_count::now();
if (argc < 5) {
cerr << "Must specify range start, range end, max power, and output "
"file name." << endl;
return 1;
}
unsigned int rangeStart, rangeEnd, maxPower, ntoken;
istringstream(argv[1]) >> rangeStart;
istringstream(argv[2]) >> rangeEnd;
istringstream(argv[3]) >> maxPower;
ofstream out(argv[4]);
// ntoken is the maximum number of "tokens" that can flow through the
// parallel pipeline concurrently. Basically, it's the maximum degree of
// concurrency. It would be nice to auto-tune this based on discovering the
// number of CPUs available at runtime. (i.e. check /proc/cpuinfo) For
// now, this defaults to 100. Experimentally, that seems to be a good
// number for running on the Intel Manycore Testing Lab, where the judging
// happens. You can pass an extra argument to override the default and test
// other values.
if (argc > 5)
istringstream(argv[5]) >> ntoken;
else
ntoken = 100;
filter_t<void, size_t> f1(filter::serial_in_order,
PrimeFunctor(rangeStart, rangeEnd));
filter_t<size_t, vector<PerfectPower> > f2(filter::parallel,
PerfectPowerFunctor(maxPower, rangeStart));
filter_t<vector<PerfectPower>, void> f3(filter::serial_in_order,
OutputFunctor(out));
parallel_pipeline(ntoken, f1 & f2 & f3);
cout << (tick_count::now() - begin).seconds() << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment