Created
November 3, 2015 01:03
-
-
Save Centrinia/0c337963bc19542c699f to your computer and use it in GitHub Desktop.
Sieve of Eratosthenes With Wheel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* bitset.h */ | |
template <typename T> | |
inline constexpr T ceildiv(const T a, const T b) { | |
return (a + b - 1) / b; | |
} | |
/* Rotate the bits of x to the right by I bits. */ | |
template <typename T> | |
inline T rotate_right(const T & x, const int & I) | |
{ | |
const int N = sizeof(T) * 8; | |
const int reduced_I = I % N; | |
if (reduced_I > 0) { | |
const int J = (N - reduced_I) % N; | |
const T low = x >> reduced_I; | |
const T high = x << J; | |
const T y = low | high; | |
return y; | |
} else { | |
return x; | |
} | |
} | |
/* Rotate the bits of x to the left by I bits. */ | |
template <typename T> | |
inline T rotate_left(const T & x, const int & I) { | |
const int N = sizeof(T) * 8; | |
const int reduced_I = I % N; | |
const int J = (N - reduced_I) % N; | |
return rotate_right<T>(x,J); | |
} | |
/* Have the static value of X rotated to the right by I bits. */ | |
template <typename T,int I,T X> | |
struct RotateRight { | |
static const int N = sizeof(T) * 8; | |
static const int reduced_I = I % N; | |
static const int J = (N-reduced_I) % N; | |
static const T value = (X >> reduced_I) | (X << J); | |
}; | |
template <typename T,T X> | |
struct RotateRight<T,0,X> { | |
static const T value = X; | |
}; | |
/* Have the static value of X rotated to the left by I bits. */ | |
template <typename T,int I,T X> | |
struct RotateLeft { | |
static const int N = sizeof(T) * 8; | |
static const int reduced_I = I % N; | |
static const int J = (N-reduced_I) % N; | |
static const T value = (X << reduced_I) | (X >> J); | |
}; | |
template <typename T,T X> | |
struct RotateLeft<T,0,X> { | |
static const T value = X; | |
}; | |
/* Reverse the I bit sized blocks of the N bit sized word. */ | |
template <typename T, int I,int N, int S, bool CONTINUE> | |
struct BitReverse { | |
static const T unity = 1; | |
static const T unrotated_low_bitmask = static_cast<T>(-1) / ((unity << I) + unity); | |
/* This mask corresponds to the lower bit-blocks. */ | |
static const T low_bitmask = RotateLeft<T,S,unrotated_low_bitmask>::value; | |
/* This mask corresponds to the upper bit-blocks. */ | |
static const T high_bitmask = ~low_bitmask; | |
static const T inner(const T & x) { | |
/* Extract the lower bit-blocks and swap them with the upper bit-blocks by rotating | |
them to the left by twice the size of a bit-block. */ | |
const T low = rotate_left<T>(x & low_bitmask,I*2); | |
const T rotated_high = x & high_bitmask; | |
const T next = low | rotated_high; | |
/* The size of the next bit-blocks are twice the size of the current bit-blocks. */ | |
const int next_I = I*2; | |
/* The accumulated shift increases by the size of a bit-block. */ | |
const int next_S = (S+I)%N; | |
const bool next_CONTINUE = next_I < N; | |
return BitReverse<T,next_I,N,next_S,next_CONTINUE>::inner(next); | |
} | |
}; | |
template <typename T,int I,int N,int S> | |
struct BitReverse<T,I,N,S,false> { | |
static const T inner(const T & x) { | |
/* Undo the rotations. */ | |
return rotate_right<T>(x,S); | |
} | |
}; | |
template <typename T> | |
static const T bit_reverse(const T & x) { | |
const int N = sizeof(T) * 8; | |
return BitReverse<T,1,N,0,true>::inner(x); | |
} | |
/* Count the number of set bits in an N bit sized word. */ | |
template <typename T, int I,int N, bool CONTINUE> | |
struct PopulationCount { | |
static const T unity = 1; | |
static const T low_bitmask = static_cast<T>(-1) / ((unity << I) + unity); | |
static const int inner(const T & x) { | |
/* Extract the lower bit-blocks. */ | |
const T low = x & low_bitmask; | |
/* Extract the upper bit-blocks and shift them to coincide with the lower bit-blocks. */ | |
const T high = (x >> I) & low_bitmask; | |
/* Add the bit-blocks. */ | |
const T next = low+high; | |
/* The size of the next bit-blocks are twice the size of the current bit-blocks. */ | |
const int next_I = I*2; | |
const bool next_CONTINUE = next_I < N; | |
return PopulationCount<T,next_I,N,next_CONTINUE>::inner(next); | |
} | |
}; | |
template <typename T,int I,int N> | |
struct PopulationCount<T,I,N,false> { | |
static const int inner(const T & x) { | |
/* The input contains the population count. */ | |
return static_cast<int>(x); | |
} | |
} | |
; | |
template <typename T> | |
static const int population_count(const T & x) { | |
const int N = sizeof(T) * 8; | |
return PopulationCount<T,1,N,true>::inner(x); | |
} | |
template <int N> | |
class BitSet { | |
public: | |
static const int SIZE = N; | |
inline BitSet() { | |
for(int i = 0; i < WORD_COUNT; i++) { | |
m_bits[i] = 0; | |
} | |
} | |
inline BitSet(const BitSet<N>& b) { | |
for(int i = 0; i < WORD_COUNT; i++) { | |
m_bits[i] = b.m_bits[i]; | |
} | |
} | |
inline bool test(int index) const { | |
return (m_bits[index / WORD_WIDTH] & (word_t(1) << (index % WORD_WIDTH))) != word_t(0); | |
} | |
inline BitSet<N> operator ~() const { | |
BitSet<N> c; | |
for(int i = 0; i < WORD_COUNT; i++) { | |
c.m_bits[i] = ~m_bits[i]; | |
} | |
if(N % WORD_WIDTH > 0) { | |
c.m_bits[WORD_COUNT - 1] &= (word_t(1) << (N % WORD_WIDTH)) - word_t(1); | |
} | |
return c; | |
} | |
inline BitSet<N> operator &(const BitSet<N>& b) const { | |
BitSet<N> c = *this; | |
for(int i = 0; i < WORD_COUNT; i++) { | |
c.m_bits[i] &= b.m_bits[i]; | |
} | |
return c; | |
} | |
inline BitSet<N> operator |(const BitSet<N>& b) const { | |
BitSet<N> c = *this; | |
for(int i = 0; i < WORD_COUNT; i++) { | |
c.m_bits[i] |= b.m_bits[i]; | |
} | |
return c; | |
} | |
inline BitSet<N>& operator |=(const BitSet<N>& b) { | |
for(int i = 0; i < WORD_COUNT; i++) { | |
m_bits[i] |= b.m_bits[i]; | |
} | |
return *this; | |
} | |
inline BitSet<N>& operator &=(const BitSet<N>& b) { | |
for(int i = 0; i < WORD_COUNT; i++) { | |
m_bits[i] &= b.m_bits[i]; | |
} | |
return *this; | |
} | |
inline int find_item() const { | |
for(int i = 0; i < WORD_COUNT; i++) { | |
if(m_bits[i] != 0) { | |
const word_t word = m_bits[i]; | |
return i * WORD_WIDTH + population_count((word - 1)^word) - 1; | |
} | |
} | |
return N; | |
} | |
inline void clear(const int index) { | |
if(0 <= index && index < N) { | |
m_bits[index / WORD_WIDTH] &= ~(word_t(1) << (index % WORD_WIDTH)); | |
} | |
} | |
inline void set(const int index) { | |
if(0 <= index && index < N) { | |
m_bits[index / WORD_WIDTH] |= word_t(1) << (index % WORD_WIDTH); | |
} | |
} | |
inline int count_bits() const { | |
int count = 0; | |
for(int i = 0; i < WORD_COUNT; i++) { | |
count += population_count(m_bits[i]); | |
} | |
return count; | |
} | |
protected: | |
typedef unsigned long word_t; | |
static constexpr int WORD_WIDTH = sizeof(word_t) * 8; | |
static constexpr int WORD_COUNT = ceildiv(N, WORD_WIDTH); | |
word_t m_bits[WORD_COUNT]; | |
}; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exc@lambda ~/src/examples/wheel $ time ./a.out | |
memory usage: 380954904 bytes | |
pi(10000000000) = 455052511 | |
real 1m11.314s | |
user 1m11.040s | |
sys 0m0.328s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* wheel_sieve.cc */ | |
#include <vector> | |
#include <iostream> | |
#include "bitset.h" | |
template <typename T> | |
inline void swap(T& a, T& b) { | |
T c = a; | |
a = b; | |
b = c; | |
} | |
template <typename T> | |
T gcd(T a, T b) { | |
if(a < b) { | |
swap(a, b); | |
} | |
while(b != 0) { | |
T c = a % b; | |
a = b; | |
b = c; | |
} | |
return a; | |
} | |
template <typename T> | |
bool is_composite(T n) { | |
for(T m = 2; m * m <= n; m++) { | |
if(n % m == 0) { | |
return true; | |
} | |
} | |
return false; | |
} | |
template <typename T> | |
std::pair<T, std::vector<T> > make_wheel(T B) { | |
T N = 1; | |
for(T p = 2; p < B; p++) { | |
if(!is_composite(p)) { | |
N *= p; | |
} | |
} | |
std::vector<T> classes; | |
for(T m = 1; m < N; m++) { | |
if(gcd(m, N) == 1) { | |
classes.push_back(m); | |
} | |
} | |
return std::pair<T, std::vector<T> >(N, classes); | |
} | |
template <typename T> | |
std::ostream& operator <<(std::ostream& out, const std::vector<T>& as) { | |
out << "["; | |
bool first = true; | |
/*for(const auto & a : as)*/ | |
for(int i = 0; i < as.size(); i++) { | |
const T& a = as[i]; | |
if(!first) { | |
out << ", "; | |
} | |
out << a; | |
first = false; | |
} | |
out << "]"; | |
return out; | |
} | |
template <typename T> | |
T sieve(const T B) { | |
#if 0 | |
typedef BitSet<1 * 2 * 4 * 6 * 10> bits_t; | |
const T B1 = 12; | |
#elif 1 | |
typedef BitSet<1 * 2 * 4 * 6> bits_t; | |
const T B1 = 10; | |
#else | |
typedef BitSet<1 * 2 * 4> bits_t; | |
const T B1 = 6; | |
#endif | |
std::pair<T, std::vector<T> > wheel = make_wheel(B1); | |
int* indexes = new int[wheel.first]; | |
for(int i = 0; i < wheel.first; i++) { | |
indexes[i] = -1; | |
} | |
for(int i = 0; i < wheel.second.size(); i++) { | |
indexes[wheel.second[i]] = i; | |
} | |
int wheels_count = ceildiv(B, wheel.first); | |
std::cerr << "memory usage: " << ((sizeof(bits_t) * wheels_count) + ((sizeof(int) + sizeof(bits_t))*wheel.first)) << " bytes" << std::endl; | |
bits_t* wheels = new bits_t[wheels_count]; | |
bits_t* mask = new bits_t[wheel.first]; | |
#if defined(USE_EXCLUDE) | |
for(int i = 0; i < wheels_count; i++) { | |
wheels[i] = ~bits_t(); | |
} | |
wheels[0].clear(indexes[1]); | |
#else | |
wheels[0].set(indexes[1]); | |
#endif | |
for(int i = 0; i < wheel.first; i++) { | |
if(is_composite(i)) { | |
if(indexes[i] >= 0) { | |
#if defined(USE_EXCLUDE) | |
wheels[0].clear(indexes[i]); | |
#else | |
wheels[0].set(indexes[i]); | |
#endif | |
} | |
} | |
} | |
bits_t wheel0 = wheels[0]; | |
for(int i = 0; i < wheels_count; i++) { | |
const T nk = i * wheel.first; | |
if(nk * nk > B) { | |
break; | |
} | |
for(auto c : wheel.second) { | |
const T p = nk + c; | |
if(p == 1) { | |
continue; | |
} | |
/* Use masks. */ | |
if(p <= wheel.first) { | |
for(int j = 0; j < p; j++) { | |
#if defined(USE_EXCLUDE) | |
mask[j] = ~bits_t(); | |
#else | |
mask[j] = bits_t(); | |
#endif | |
} | |
for(auto b : wheel.second) { | |
int kpb = p * b; | |
int index = indexes[kpb % wheel.first]; | |
#if defined(USE_EXCLUDE) | |
mask[kpb / wheel.first].clear(index); | |
#else | |
mask[kpb / wheel.first].set(index); | |
#endif | |
} | |
for(int j = 0; j < wheels_count; j += p) { | |
for(int k = 0; k < p; k++) { | |
if(j + k >= wheels_count) { | |
break; | |
} | |
#if defined(USE_EXCLUDE) | |
wheels[j + k] &= mask[k]; | |
#else | |
wheels[j + k] |= mask[k]; | |
#endif | |
} | |
} | |
} else { | |
bits_t s = wheels[p / wheel.first]; | |
for(auto b : wheel.second) { | |
T kpb = p * b; | |
int index = indexes[kpb % wheel.first]; | |
if(index < wheel.second.size()) { | |
T index0 = kpb / wheel.first; | |
#if defined(USE_EXCLUDE) | |
bits_t mask0 = ~bits_t(); | |
mask0.clear(index); | |
#else | |
bits_t mask0; | |
mask0.set(index); | |
#endif | |
for(int l = index0; l < wheels_count; l += p) { | |
#if defined(USE_EXCLUDE) | |
wheels[l] &= mask0; | |
#else | |
wheels[l] |= mask0; | |
#endif | |
} | |
} | |
} | |
#if defined(USE_EXCLUDE) | |
bits_t m = bits_t(); | |
m.set(indexes[p % wheel.first]); | |
wheels[p / wheel.first] |= s & m; | |
#else | |
bits_t m = ~bits_t(); | |
m.clear(indexes[p % wheel.first]); | |
wheels[p / wheel.first] &= s | m; | |
#endif | |
} | |
} | |
} | |
wheels[0] = wheel0; | |
for(int i = B % wheel.first; i < wheel.first; i++) { | |
#if defined(USE_EXCLUDE) | |
wheels[B / wheel.first].clear(indexes[i]); | |
#else | |
wheels[B / wheel.first].set(indexes[i]); | |
#endif | |
} | |
{ | |
long count = 0; | |
for(T p = 2; p < B1; p++) { | |
if(!is_composite(p)) { | |
count++; | |
} | |
} | |
for(T k = 0; k < wheels_count; k++) { | |
#if defined(USE_EXCLUDE) | |
count += wheels[k].count_bits(); | |
/*if(count >=203277289) { | |
std::cerr << k*wheel.first << std::endl; | |
return count; | |
}*/ | |
#else | |
count -= wheels[k].count_bits(); | |
#endif | |
} | |
#if !defined(USE_EXCLUDE) | |
count += long(wheels_count) * bits_t::SIZE; | |
#endif | |
return count; | |
} | |
delete [] indexes; | |
delete [] mask; | |
delete [] wheels; | |
} | |
int main() { | |
const unsigned long int B = 10000000000; | |
std::cerr << "pi(" << B << ") = " << sieve(B) << std::endl; | |
// pi(10000000000) = 455,052,511 | |
// pi(1000000000) = 50,847,534 | |
// pi(100000000) = 5,761,455 | |
// pi(1000000) = 78,498 | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment