Skip to content

Instantly share code, notes, and snippets.

@Centrinia
Created November 3, 2015 01:03
Show Gist options
  • Save Centrinia/0c337963bc19542c699f to your computer and use it in GitHub Desktop.
Save Centrinia/0c337963bc19542c699f to your computer and use it in GitHub Desktop.
Sieve of Eratosthenes With Wheel
/* bitset.h */
template <typename T>
inline constexpr T ceildiv(const T a, const T b) {
return (a + b - 1) / b;
}
/* Rotate the bits of x to the right by I bits. */
template <typename T>
inline T rotate_right(const T & x, const int & I)
{
const int N = sizeof(T) * 8;
const int reduced_I = I % N;
if (reduced_I > 0) {
const int J = (N - reduced_I) % N;
const T low = x >> reduced_I;
const T high = x << J;
const T y = low | high;
return y;
} else {
return x;
}
}
/* Rotate the bits of x to the left by I bits. */
template <typename T>
inline T rotate_left(const T & x, const int & I) {
const int N = sizeof(T) * 8;
const int reduced_I = I % N;
const int J = (N - reduced_I) % N;
return rotate_right<T>(x,J);
}
/* Have the static value of X rotated to the right by I bits. */
template <typename T,int I,T X>
struct RotateRight {
static const int N = sizeof(T) * 8;
static const int reduced_I = I % N;
static const int J = (N-reduced_I) % N;
static const T value = (X >> reduced_I) | (X << J);
};
template <typename T,T X>
struct RotateRight<T,0,X> {
static const T value = X;
};
/* Have the static value of X rotated to the left by I bits. */
template <typename T,int I,T X>
struct RotateLeft {
static const int N = sizeof(T) * 8;
static const int reduced_I = I % N;
static const int J = (N-reduced_I) % N;
static const T value = (X << reduced_I) | (X >> J);
};
template <typename T,T X>
struct RotateLeft<T,0,X> {
static const T value = X;
};
/* Reverse the I bit sized blocks of the N bit sized word. */
template <typename T, int I,int N, int S, bool CONTINUE>
struct BitReverse {
static const T unity = 1;
static const T unrotated_low_bitmask = static_cast<T>(-1) / ((unity << I) + unity);
/* This mask corresponds to the lower bit-blocks. */
static const T low_bitmask = RotateLeft<T,S,unrotated_low_bitmask>::value;
/* This mask corresponds to the upper bit-blocks. */
static const T high_bitmask = ~low_bitmask;
static const T inner(const T & x) {
/* Extract the lower bit-blocks and swap them with the upper bit-blocks by rotating
them to the left by twice the size of a bit-block. */
const T low = rotate_left<T>(x & low_bitmask,I*2);
const T rotated_high = x & high_bitmask;
const T next = low | rotated_high;
/* The size of the next bit-blocks are twice the size of the current bit-blocks. */
const int next_I = I*2;
/* The accumulated shift increases by the size of a bit-block. */
const int next_S = (S+I)%N;
const bool next_CONTINUE = next_I < N;
return BitReverse<T,next_I,N,next_S,next_CONTINUE>::inner(next);
}
};
template <typename T,int I,int N,int S>
struct BitReverse<T,I,N,S,false> {
static const T inner(const T & x) {
/* Undo the rotations. */
return rotate_right<T>(x,S);
}
};
template <typename T>
static const T bit_reverse(const T & x) {
const int N = sizeof(T) * 8;
return BitReverse<T,1,N,0,true>::inner(x);
}
/* Count the number of set bits in an N bit sized word. */
template <typename T, int I,int N, bool CONTINUE>
struct PopulationCount {
static const T unity = 1;
static const T low_bitmask = static_cast<T>(-1) / ((unity << I) + unity);
static const int inner(const T & x) {
/* Extract the lower bit-blocks. */
const T low = x & low_bitmask;
/* Extract the upper bit-blocks and shift them to coincide with the lower bit-blocks. */
const T high = (x >> I) & low_bitmask;
/* Add the bit-blocks. */
const T next = low+high;
/* The size of the next bit-blocks are twice the size of the current bit-blocks. */
const int next_I = I*2;
const bool next_CONTINUE = next_I < N;
return PopulationCount<T,next_I,N,next_CONTINUE>::inner(next);
}
};
template <typename T,int I,int N>
struct PopulationCount<T,I,N,false> {
static const int inner(const T & x) {
/* The input contains the population count. */
return static_cast<int>(x);
}
}
;
template <typename T>
static const int population_count(const T & x) {
const int N = sizeof(T) * 8;
return PopulationCount<T,1,N,true>::inner(x);
}
template <int N>
class BitSet {
public:
static const int SIZE = N;
inline BitSet() {
for(int i = 0; i < WORD_COUNT; i++) {
m_bits[i] = 0;
}
}
inline BitSet(const BitSet<N>& b) {
for(int i = 0; i < WORD_COUNT; i++) {
m_bits[i] = b.m_bits[i];
}
}
inline bool test(int index) const {
return (m_bits[index / WORD_WIDTH] & (word_t(1) << (index % WORD_WIDTH))) != word_t(0);
}
inline BitSet<N> operator ~() const {
BitSet<N> c;
for(int i = 0; i < WORD_COUNT; i++) {
c.m_bits[i] = ~m_bits[i];
}
if(N % WORD_WIDTH > 0) {
c.m_bits[WORD_COUNT - 1] &= (word_t(1) << (N % WORD_WIDTH)) - word_t(1);
}
return c;
}
inline BitSet<N> operator &(const BitSet<N>& b) const {
BitSet<N> c = *this;
for(int i = 0; i < WORD_COUNT; i++) {
c.m_bits[i] &= b.m_bits[i];
}
return c;
}
inline BitSet<N> operator |(const BitSet<N>& b) const {
BitSet<N> c = *this;
for(int i = 0; i < WORD_COUNT; i++) {
c.m_bits[i] |= b.m_bits[i];
}
return c;
}
inline BitSet<N>& operator |=(const BitSet<N>& b) {
for(int i = 0; i < WORD_COUNT; i++) {
m_bits[i] |= b.m_bits[i];
}
return *this;
}
inline BitSet<N>& operator &=(const BitSet<N>& b) {
for(int i = 0; i < WORD_COUNT; i++) {
m_bits[i] &= b.m_bits[i];
}
return *this;
}
inline int find_item() const {
for(int i = 0; i < WORD_COUNT; i++) {
if(m_bits[i] != 0) {
const word_t word = m_bits[i];
return i * WORD_WIDTH + population_count((word - 1)^word) - 1;
}
}
return N;
}
inline void clear(const int index) {
if(0 <= index && index < N) {
m_bits[index / WORD_WIDTH] &= ~(word_t(1) << (index % WORD_WIDTH));
}
}
inline void set(const int index) {
if(0 <= index && index < N) {
m_bits[index / WORD_WIDTH] |= word_t(1) << (index % WORD_WIDTH);
}
}
inline int count_bits() const {
int count = 0;
for(int i = 0; i < WORD_COUNT; i++) {
count += population_count(m_bits[i]);
}
return count;
}
protected:
typedef unsigned long word_t;
static constexpr int WORD_WIDTH = sizeof(word_t) * 8;
static constexpr int WORD_COUNT = ceildiv(N, WORD_WIDTH);
word_t m_bits[WORD_COUNT];
};
exc@lambda ~/src/examples/wheel $ time ./a.out
memory usage: 380954904 bytes
pi(10000000000) = 455052511
real 1m11.314s
user 1m11.040s
sys 0m0.328s
/* wheel_sieve.cc */
#include <vector>
#include <iostream>
#include "bitset.h"
template <typename T>
inline void swap(T& a, T& b) {
T c = a;
a = b;
b = c;
}
template <typename T>
T gcd(T a, T b) {
if(a < b) {
swap(a, b);
}
while(b != 0) {
T c = a % b;
a = b;
b = c;
}
return a;
}
template <typename T>
bool is_composite(T n) {
for(T m = 2; m * m <= n; m++) {
if(n % m == 0) {
return true;
}
}
return false;
}
template <typename T>
std::pair<T, std::vector<T> > make_wheel(T B) {
T N = 1;
for(T p = 2; p < B; p++) {
if(!is_composite(p)) {
N *= p;
}
}
std::vector<T> classes;
for(T m = 1; m < N; m++) {
if(gcd(m, N) == 1) {
classes.push_back(m);
}
}
return std::pair<T, std::vector<T> >(N, classes);
}
template <typename T>
std::ostream& operator <<(std::ostream& out, const std::vector<T>& as) {
out << "[";
bool first = true;
/*for(const auto & a : as)*/
for(int i = 0; i < as.size(); i++) {
const T& a = as[i];
if(!first) {
out << ", ";
}
out << a;
first = false;
}
out << "]";
return out;
}
template <typename T>
T sieve(const T B) {
#if 0
typedef BitSet<1 * 2 * 4 * 6 * 10> bits_t;
const T B1 = 12;
#elif 1
typedef BitSet<1 * 2 * 4 * 6> bits_t;
const T B1 = 10;
#else
typedef BitSet<1 * 2 * 4> bits_t;
const T B1 = 6;
#endif
std::pair<T, std::vector<T> > wheel = make_wheel(B1);
int* indexes = new int[wheel.first];
for(int i = 0; i < wheel.first; i++) {
indexes[i] = -1;
}
for(int i = 0; i < wheel.second.size(); i++) {
indexes[wheel.second[i]] = i;
}
int wheels_count = ceildiv(B, wheel.first);
std::cerr << "memory usage: " << ((sizeof(bits_t) * wheels_count) + ((sizeof(int) + sizeof(bits_t))*wheel.first)) << " bytes" << std::endl;
bits_t* wheels = new bits_t[wheels_count];
bits_t* mask = new bits_t[wheel.first];
#if defined(USE_EXCLUDE)
for(int i = 0; i < wheels_count; i++) {
wheels[i] = ~bits_t();
}
wheels[0].clear(indexes[1]);
#else
wheels[0].set(indexes[1]);
#endif
for(int i = 0; i < wheel.first; i++) {
if(is_composite(i)) {
if(indexes[i] >= 0) {
#if defined(USE_EXCLUDE)
wheels[0].clear(indexes[i]);
#else
wheels[0].set(indexes[i]);
#endif
}
}
}
bits_t wheel0 = wheels[0];
for(int i = 0; i < wheels_count; i++) {
const T nk = i * wheel.first;
if(nk * nk > B) {
break;
}
for(auto c : wheel.second) {
const T p = nk + c;
if(p == 1) {
continue;
}
/* Use masks. */
if(p <= wheel.first) {
for(int j = 0; j < p; j++) {
#if defined(USE_EXCLUDE)
mask[j] = ~bits_t();
#else
mask[j] = bits_t();
#endif
}
for(auto b : wheel.second) {
int kpb = p * b;
int index = indexes[kpb % wheel.first];
#if defined(USE_EXCLUDE)
mask[kpb / wheel.first].clear(index);
#else
mask[kpb / wheel.first].set(index);
#endif
}
for(int j = 0; j < wheels_count; j += p) {
for(int k = 0; k < p; k++) {
if(j + k >= wheels_count) {
break;
}
#if defined(USE_EXCLUDE)
wheels[j + k] &= mask[k];
#else
wheels[j + k] |= mask[k];
#endif
}
}
} else {
bits_t s = wheels[p / wheel.first];
for(auto b : wheel.second) {
T kpb = p * b;
int index = indexes[kpb % wheel.first];
if(index < wheel.second.size()) {
T index0 = kpb / wheel.first;
#if defined(USE_EXCLUDE)
bits_t mask0 = ~bits_t();
mask0.clear(index);
#else
bits_t mask0;
mask0.set(index);
#endif
for(int l = index0; l < wheels_count; l += p) {
#if defined(USE_EXCLUDE)
wheels[l] &= mask0;
#else
wheels[l] |= mask0;
#endif
}
}
}
#if defined(USE_EXCLUDE)
bits_t m = bits_t();
m.set(indexes[p % wheel.first]);
wheels[p / wheel.first] |= s & m;
#else
bits_t m = ~bits_t();
m.clear(indexes[p % wheel.first]);
wheels[p / wheel.first] &= s | m;
#endif
}
}
}
wheels[0] = wheel0;
for(int i = B % wheel.first; i < wheel.first; i++) {
#if defined(USE_EXCLUDE)
wheels[B / wheel.first].clear(indexes[i]);
#else
wheels[B / wheel.first].set(indexes[i]);
#endif
}
{
long count = 0;
for(T p = 2; p < B1; p++) {
if(!is_composite(p)) {
count++;
}
}
for(T k = 0; k < wheels_count; k++) {
#if defined(USE_EXCLUDE)
count += wheels[k].count_bits();
/*if(count >=203277289) {
std::cerr << k*wheel.first << std::endl;
return count;
}*/
#else
count -= wheels[k].count_bits();
#endif
}
#if !defined(USE_EXCLUDE)
count += long(wheels_count) * bits_t::SIZE;
#endif
return count;
}
delete [] indexes;
delete [] mask;
delete [] wheels;
}
int main() {
const unsigned long int B = 10000000000;
std::cerr << "pi(" << B << ") = " << sieve(B) << std::endl;
// pi(10000000000) = 455,052,511
// pi(1000000000) = 50,847,534
// pi(100000000) = 5,761,455
// pi(1000000) = 78,498
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment