Created
July 1, 2021 19:59
-
-
Save jakobkogler/e6359ea9ced24fe304f1a8af3c9bee0e to your computer and use it in GitHub Desktop.
Benchmark prime sieve speed between vector<char> and vector<bool>
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>> g++ -std=c++20 -Ofast benchmark_prime_sieve.cpp && ./a.out | |
N = 30000000: | |
norm: bool: 106 ms | |
norm: char: 235 ms | |
norm: bool is 2.217 times FASTER | |
seg: bool: 147 ms | |
seg: char: 38 ms | |
seg: bool is 3.868 times SLOWER | |
N = 60000000: | |
norm: bool: 300 ms | |
norm: char: 527 ms | |
norm: bool is 1.757 times FASTER | |
seg: bool: 311 ms | |
seg: char: 87 ms | |
seg: bool is 3.575 times SLOWER | |
N = 90000000: | |
norm: bool: 498 ms | |
norm: char: 811 ms | |
norm: bool is 1.629 times FASTER | |
seg: bool: 488 ms | |
seg: char: 143 ms | |
seg: bool is 3.413 times SLOWER | |
N = 200000000: | |
norm: bool: 1305 ms | |
norm: char: 1987 ms | |
norm: bool is 1.523 times FASTER | |
seg: bool: 1164 ms | |
seg: char: 380 ms | |
seg: bool is 3.063 times SLOWER | |
N = 500000000: | |
norm: bool: 3731 ms | |
norm: char: 5210 ms | |
norm: bool is 1.396 times FASTER | |
seg: bool: 3268 ms | |
seg: char: 1214 ms | |
seg: bool is 2.692 times SLOWER | |
N = 1000000000: | |
norm: bool: 7868 ms | |
norm: char: 11066 ms | |
norm: bool is 1.406 times FASTER | |
seg: bool: 7273 ms | |
seg: char: 3039 ms | |
seg: bool is 2.393 times SLOWER |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "bits/stdc++.h" | |
using namespace std; | |
template <typename T> | |
int count_primes_seg(int n) { | |
const int S = 10'000; | |
vector<int> primes; | |
int nsqrt = sqrt(n); | |
vector<T> is_prime(nsqrt + 1, true); | |
for (int i = 2; i <= nsqrt; i++) { | |
if (is_prime[i]) { | |
primes.push_back(i); | |
for (int j = i * i; j <= nsqrt; j += i) | |
is_prime[j] = false; | |
} | |
} | |
int result = 0; | |
vector<T> block(S); | |
for (int k = 0; k * S <= n; k++) { | |
fill(block.begin(), block.end(), true); | |
int start = k * S; | |
for (int p : primes) { | |
int start_idx = (start + p - 1) / p; | |
int j = max(start_idx, p) * p - start; | |
for (; j < S; j += p) | |
block[j] = false; | |
} | |
if (k == 0) | |
block[0] = block[1] = false; | |
for (int i = 0; i < S && start + i <= n; i++) { | |
if (block[i]) | |
result++; | |
} | |
} | |
return result; | |
} | |
template <typename T> | |
int count_primes(int n) { | |
vector<T> is_prime(n+1, true); | |
is_prime[0] = is_prime[1] = false; | |
for (int i = 2; i * i <= n; i++) { | |
if (is_prime[i]) { | |
for (int j = i * i; j <= n; j += i) | |
is_prime[j] = false; | |
} | |
} | |
int cnt = 0; | |
for (int i = 2; i <= n; i++) { | |
if (is_prime[i]) | |
cnt++; | |
} | |
return cnt; | |
/* return is_prime[n]; */ | |
} | |
template <typename T> | |
long long benchmark(int n, string s) | |
{ | |
auto t1 = std::chrono::high_resolution_clock::now(); | |
int res = count_primes<T>(n); | |
auto t2 = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1); | |
cout << " norm: " << s << ": " << duration.count() << " ms" << endl; | |
return duration.count(); | |
} | |
template <typename T> | |
long long benchmark_seg(int n, string s) | |
{ | |
auto t1 = std::chrono::high_resolution_clock::now(); | |
int res = count_primes_seg<T>(n); | |
auto t2 = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1); | |
cout << " seg: " << s << ": " << duration.count() << " ms" << endl; | |
return duration.count(); | |
} | |
int main() { | |
ios_base::sync_with_stdio(false); | |
cin.tie(nullptr); | |
std::cout << std::fixed << std::setprecision(3); | |
for (int n : {3e7, 6e7, 9e7, 2e8, 5e8, 1e9}) { | |
cout << "N = " << n << ": " << endl; | |
{ | |
long long a = benchmark<bool>(n, "bool"); | |
long long b = benchmark<char>(n, "char"); | |
cout << " norm: bool is "; | |
if (a < b) { | |
cout << ((double)b / a) << " times FASTER" << endl; | |
} else { | |
cout << ((double)a / b) << " times SLOWER" << endl; | |
} | |
} | |
{ | |
long long a = benchmark_seg<bool>(n, "bool"); | |
long long b = benchmark_seg<char>(n, "char"); | |
cout << " seg: bool is "; | |
if (a < b) { | |
cout << ((double)b / a) << " times FASTER" << endl; | |
} else { | |
cout << ((double)a / b) << " times SLOWER" << endl; | |
} | |
} | |
cout << endl; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@jxu My guess is, that if the segment is small, then most of the prime numbers up to the square root, will not have a single multiple in the range. So you do a lot of operations - figuring out if there is a multiple in the range - just for nothing.