Skip to content

Instantly share code, notes, and snippets.

@jakobkogler
Created July 1, 2021 19:59
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jakobkogler/e6359ea9ced24fe304f1a8af3c9bee0e to your computer and use it in GitHub Desktop.
Save jakobkogler/e6359ea9ced24fe304f1a8af3c9bee0e to your computer and use it in GitHub Desktop.
Benchmark prime sieve speed between vector<char> and vector<bool>
>> g++ -std=c++20 -Ofast benchmark_prime_sieve.cpp && ./a.out
N = 30000000:
norm: bool: 106 ms
norm: char: 235 ms
norm: bool is 2.217 times FASTER
seg: bool: 147 ms
seg: char: 38 ms
seg: bool is 3.868 times SLOWER
N = 60000000:
norm: bool: 300 ms
norm: char: 527 ms
norm: bool is 1.757 times FASTER
seg: bool: 311 ms
seg: char: 87 ms
seg: bool is 3.575 times SLOWER
N = 90000000:
norm: bool: 498 ms
norm: char: 811 ms
norm: bool is 1.629 times FASTER
seg: bool: 488 ms
seg: char: 143 ms
seg: bool is 3.413 times SLOWER
N = 200000000:
norm: bool: 1305 ms
norm: char: 1987 ms
norm: bool is 1.523 times FASTER
seg: bool: 1164 ms
seg: char: 380 ms
seg: bool is 3.063 times SLOWER
N = 500000000:
norm: bool: 3731 ms
norm: char: 5210 ms
norm: bool is 1.396 times FASTER
seg: bool: 3268 ms
seg: char: 1214 ms
seg: bool is 2.692 times SLOWER
N = 1000000000:
norm: bool: 7868 ms
norm: char: 11066 ms
norm: bool is 1.406 times FASTER
seg: bool: 7273 ms
seg: char: 3039 ms
seg: bool is 2.393 times SLOWER
#include "bits/stdc++.h"
using namespace std;
template <typename T>
int count_primes_seg(int n) {
const int S = 10'000;
vector<int> primes;
int nsqrt = sqrt(n);
vector<T> is_prime(nsqrt + 1, true);
for (int i = 2; i <= nsqrt; i++) {
if (is_prime[i]) {
primes.push_back(i);
for (int j = i * i; j <= nsqrt; j += i)
is_prime[j] = false;
}
}
int result = 0;
vector<T> block(S);
for (int k = 0; k * S <= n; k++) {
fill(block.begin(), block.end(), true);
int start = k * S;
for (int p : primes) {
int start_idx = (start + p - 1) / p;
int j = max(start_idx, p) * p - start;
for (; j < S; j += p)
block[j] = false;
}
if (k == 0)
block[0] = block[1] = false;
for (int i = 0; i < S && start + i <= n; i++) {
if (block[i])
result++;
}
}
return result;
}
template <typename T>
int count_primes(int n) {
vector<T> is_prime(n+1, true);
is_prime[0] = is_prime[1] = false;
for (int i = 2; i * i <= n; i++) {
if (is_prime[i]) {
for (int j = i * i; j <= n; j += i)
is_prime[j] = false;
}
}
int cnt = 0;
for (int i = 2; i <= n; i++) {
if (is_prime[i])
cnt++;
}
return cnt;
/* return is_prime[n]; */
}
template <typename T>
long long benchmark(int n, string s)
{
auto t1 = std::chrono::high_resolution_clock::now();
int res = count_primes<T>(n);
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
cout << " norm: " << s << ": " << duration.count() << " ms" << endl;
return duration.count();
}
template <typename T>
long long benchmark_seg(int n, string s)
{
auto t1 = std::chrono::high_resolution_clock::now();
int res = count_primes_seg<T>(n);
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
cout << " seg: " << s << ": " << duration.count() << " ms" << endl;
return duration.count();
}
int main() {
ios_base::sync_with_stdio(false);
cin.tie(nullptr);
std::cout << std::fixed << std::setprecision(3);
for (int n : {3e7, 6e7, 9e7, 2e8, 5e8, 1e9}) {
cout << "N = " << n << ": " << endl;
{
long long a = benchmark<bool>(n, "bool");
long long b = benchmark<char>(n, "char");
cout << " norm: bool is ";
if (a < b) {
cout << ((double)b / a) << " times FASTER" << endl;
} else {
cout << ((double)a / b) << " times SLOWER" << endl;
}
}
{
long long a = benchmark_seg<bool>(n, "bool");
long long b = benchmark_seg<char>(n, "char");
cout << " seg: bool is ";
if (a < b) {
cout << ((double)b / a) << " times FASTER" << endl;
} else {
cout << ((double)a / b) << " times SLOWER" << endl;
}
}
cout << endl;
}
}
@jxu
Copy link

jxu commented Feb 16, 2023

Is there an explanation for primesieve stating "The segment size must not be smaller than the square root of the sieving limit otherwise the run-time complexity of the algorithm deteriorates." I'm not sure why. Should that be mentioned in the article?

@jakobkogler
Copy link
Author

@jxu My guess is, that if the segment is small, then most of the prime numbers up to the square root, will not have a single multiple in the range. So you do a lot of operations - figuring out if there is a multiple in the range - just for nothing.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment