Skip to content

Instantly share code, notes, and snippets.

@jakobkogler
Created July 1, 2021 20:36
Show Gist options
  • Save jakobkogler/6e92105c56317f87bd51e9224f4e2787 to your computer and use it in GitHub Desktop.
Save jakobkogler/6e92105c56317f87bd51e9224f4e2787 to your computer and use it in GitHub Desktop.
Benchmark prime sieve speed between vector<char> and vector<bool>
>> g++ -std=c++20 -Ofast a.cpp && ./a.out
N = 30000000:
norm: bool: 157 ms 1857859
norm: char: 257 ms 1857859
norm: bool is 1.637 times FASTER
seg: bool: 177 ms 1857859
seg: char: 57 ms 1857859
seg: bool is 3.105 times SLOWER
seg opt: char: 19 ms 1857859
N = 60000000:
norm: bool: 352 ms 3562115
norm: char: 545 ms 3562115
norm: bool is 1.548 times FASTER
seg: bool: 370 ms 3562115
seg: char: 150 ms 3562115
seg: bool is 2.467 times SLOWER
seg opt: char: 57 ms 3562115
N = 90000000:
norm: bool: 645 ms 5216954
norm: char: 812 ms 5216954
norm: bool is 1.259 times FASTER
seg: bool: 645 ms 5216954
seg: char: 198 ms 5216954
seg: bool is 3.258 times SLOWER
seg opt: char: 68 ms 5216954
N = 200000000:
norm: bool: 1576 ms 11078937
norm: char: 2099 ms 11078937
norm: bool is 1.332 times FASTER
seg: bool: 1378 ms 11078937
seg: char: 507 ms 11078937
seg: bool is 2.718 times SLOWER
seg opt: char: 148 ms 11078937
N = 500000000:
norm: bool: 4388 ms 26355867
norm: char: 6034 ms 26355867
norm: bool is 1.375 times FASTER
seg: bool: 4020 ms 26355867
seg: char: 1617 ms 26355867
seg: bool is 2.486 times SLOWER
seg opt: char: 431 ms 26355867
N = 1000000000:
norm: bool: 10243 ms 50847534
norm: char: 11573 ms 50847534
norm: bool is 1.130 times FASTER
seg: bool: 8229 ms 50847534
seg: char: 3628 ms 50847534
seg: bool is 2.268 times SLOWER
seg opt: char: 810 ms 50847534
#pragma GCC target("avx,avx2,fma")
#pragma GCC optimization("unroll-loops")
#include "bits/stdc++.h"
using namespace std;
template <typename T>
int count_primes_seg_opt(int n) {
const int S = 30000;
int nsqrt = std::round(std::sqrt(n));
std::vector<char> is_prime(nsqrt + 1, true);
std::vector<int> sieve_primes, start_indices;
for (int i = 3; i <= nsqrt; i += 2) {
if (is_prime[i]) {
sieve_primes.push_back(i);
start_indices.push_back((i * i - 1) / 2);
for (int j = i * i; j <= nsqrt; j += 2 * i)
is_prime[j] = false;
}
}
int cnt = 1;
std::vector<char> block(S);
int high = (n - 1) / 2;
for (int low = 0; low <= high; low += S) {
fill(block.begin(), block.end(), true);
for (auto i = 0u; i < sieve_primes.size(); i++) {
int p = sieve_primes[i];
int idx = start_indices[i];
for (; idx < S; idx += p)
block[idx] = false;
start_indices[i] = idx - S;
}
if (low == 0)
block[0] = false;
for (int i = 0; i < S && low + i <= high; i++) {
if (block[i])
cnt++;
}
};
return cnt;
}
template <typename T>
int count_primes_seg(int n) {
const int S = 10000;
vector<int> primes;
int nsqrt = sqrt(n);
vector<T> is_prime(nsqrt + 1, true);
for (int i = 2; i <= nsqrt; i++) {
if (is_prime[i]) {
primes.push_back(i);
for (int j = i * i; j <= nsqrt; j += i)
is_prime[j] = false;
}
}
int result = 0;
vector<T> block(S);
for (int k = 0; k * S <= n; k++) {
fill(block.begin(), block.end(), true);
int start = k * S;
for (int p : primes) {
int start_idx = (start + p - 1) / p;
int j = max(start_idx, p) * p - start;
for (; j < S; j += p)
block[j] = false;
}
if (k == 0)
block[0] = block[1] = false;
for (int i = 0; i < S && start + i <= n; i++) {
if (block[i])
result++;
}
}
return result;
}
template <typename T>
int count_primes(int n) {
vector<T> is_prime(n+1, true);
is_prime[0] = is_prime[1] = false;
for (int i = 2; i * i <= n; i++) {
if (is_prime[i]) {
for (int j = i * i; j <= n; j += i)
is_prime[j] = false;
}
}
int cnt = 0;
for (int i = 2; i <= n; i++) {
if (is_prime[i])
cnt++;
}
return cnt;
}
template <typename T>
long long benchmark(int n, string s)
{
auto t1 = std::chrono::high_resolution_clock::now();
int res = count_primes<T>(n);
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
cout << " norm: " << s << ": " << duration.count() << " ms " << res << endl;
return duration.count();
}
template <typename T>
long long benchmark_seg(int n, string s)
{
auto t1 = std::chrono::high_resolution_clock::now();
int res = count_primes_seg<T>(n);
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
cout << " seg: " << s << ": " << duration.count() << " ms " << res << endl;
return duration.count();
}
template <typename T>
long long benchmark_seg_opt(int n, string s)
{
auto t1 = std::chrono::high_resolution_clock::now();
int res = count_primes_seg_opt<T>(n);
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
cout << " seg opt: " << s << ": " << duration.count() << " ms " << res << endl;
return duration.count();
}
int main() {
ios_base::sync_with_stdio(false);
cin.tie(nullptr);
std::cout << std::fixed << std::setprecision(3);
for (int n : {3e7, 6e7, 9e7, 2e8, 5e8, 1e9}) {
cout << "N = " << n << ": " << endl;
{
long long a = benchmark<bool>(n, "bool");
long long b = benchmark<char>(n, "char");
cout << " norm: bool is ";
if (a < b) {
cout << ((double)b / a) << " times FASTER" << endl;
} else {
cout << ((double)a / b) << " times SLOWER" << endl;
}
}
{
long long a = benchmark_seg<bool>(n, "bool");
long long b = benchmark_seg<char>(n, "char");
cout << " seg: bool is ";
if (a < b) {
cout << ((double)b / a) << " times FASTER" << endl;
} else {
cout << ((double)a / b) << " times SLOWER" << endl;
}
}
long long b = benchmark_seg_opt<char>(n, "char");
cout << endl;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment