Skip to content

Instantly share code, notes, and snippets.

@jdtournier
Last active April 18, 2019 09:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdtournier/09c2986321fd3532cc3e2a1fdb7496bc to your computer and use it in GitHub Desktop.
Save jdtournier/09c2986321fd3532cc3e2a1fdb7496bc to your computer and use it in GitHub Desktop.
vector<bool> tests
===========================================
g++
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7f4d05b3c010
0.012332 constructor
4.54972 find_true = 268435203
5.87218 count_true = 2
11.6554 rotate_64bits
0.002081 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7f4d03b3b010
7.40615 copy_test
12.6272 swap_ranges
real 0m42.143s
user 0m42.083s
sys 0m0.027s
===========================================
g++ -O2
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7ff6810ae010
0.012357 constructor
0.298897 find_true = 268435203
0.286135 count_true = 2
0.68443 rotate_64bits
0.002139 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7ff67f0ad010
0.737079 copy_test
0.753826 swap_ranges
real 0m2.793s
user 0m2.757s
sys 0m0.033s
===========================================
g++ -O3
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7ff92a23a010
0.012148 constructor
0.2872 find_true = 268435203
0.287541 count_true = 2
0.734427 rotate_64bits
0.002044 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7ff928239010
0.667283 copy_test
0.76056 swap_ranges
real 0m2.769s
user 0m2.742s
sys 0m0.023s
===========================================
clang++
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7fc402c34010
0.012382 constructor
6.00291 find_true = 268435203
6.57037 count_true = 2
12.3009 rotate_64bits
0.00219 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7fc400c33010
7.41892 copy_test
13.1829 swap_ranges
real 0m45.508s
user 0m45.303s
sys 0m0.063s
===========================================
clang++ -O2
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7fd6b0a28010
0.012262 constructor
0.416594 find_true = 268435203
0.423148 count_true = 2
0.838044 rotate_64bits
0.002117 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7fd6aea27010
0.695481 copy_test
0.992936 swap_ranges
real 0m3.399s
user 0m3.375s
sys 0m0.020s
===========================================
clang++ -O3
===========================================
expected best-case memory consumption for 268435456 bits: 33554432 bytes (32 MB)
<- allocating 33554432 bytes (32 MB) at 0x7f7d85c33010
0.012232 constructor
0.414085 find_true = 268435203
0.421944 count_true = 2
0.828335 rotate_64bits
0.002097 fill_true
<- allocating 33554432 bytes (32 MB) at 0x7f7d83c32010
0.689993 copy_test
0.990964 swap_ranges
real 0m3.378s
user 0m3.352s
sys 0m0.023s
#!/bin/bash
function run {
echo "==========================================="
echo "$@"
echo "==========================================="
"$@" vector_bool.cpp -o vector_bool
time ./vector_bool
echo ""
}
run g++
run g++ -O2
run g++ -O3
run clang++
run clang++ -O2
run clang++ -O3
#include <vector>
#include <algorithm>
#include <chrono>
#include <iostream>
// to track memory allocations:
void * operator new(decltype(sizeof(0)) n)
{
void * p = malloc (n);
std::cerr << " <- allocating " << n << " bytes (" << (double(n)/(1<<20)) << " MB) at " << p << "\n";
return p;
}
void operator delete(void * p)
{
//std::cerr << " <- freeing memory at " << p << "\n";
free (p);
}
// for precise timings:
class Timer {
public:
explicit Timer () : from (std::chrono::high_resolution_clock::now()) { }
void start () {
from = std::chrono::high_resolution_clock::now();
}
double elapsed() const {
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - from).count() * 1.0e-6;
}
private:
std::chrono::high_resolution_clock::time_point from;
};
// A bunch of operations to be tested:
typedef std::vector<bool> VB;
#define NOINLINE __attribute__((noinline))
NOINLINE int find_true(const VB &c) {
auto pos = std::find(c.begin(), c.end(), true);
if (pos == c.end())
return -1;
return pos - c.begin();
}
NOINLINE int count_true(const VB &c) {
return std::count(c.begin(), c.end(), true);
}
NOINLINE void rotate_64bits(VB &c) {
std::rotate(c.begin(), c.begin()+64, c.end());
}
NOINLINE void fill_true(VB &c) {
std::fill(c.begin(), c.end(), true);
}
NOINLINE void copy_test(const VB &src, VB &dst) {
std::copy(src.begin(), src.end(), dst.begin());
}
NOINLINE void swap_ranges(VB &a, VB &b) {
std::swap_ranges(a.begin(), a.end(), b.begin());
}
NOINLINE VB dummy()
{
VB v(1000 * 1024);
fill_true(v);
return v;
}
int main()
{
using namespace std;
Timer t;
int ret;
const size_t numel = 1<<28;
cerr << "expected best-case memory consumption for " << numel << " bits: " << (numel+7)/8 << " bytes (" << (double((numel+7)/8)/(1<<20)) << " MB)\n\n";
VB v(numel); // optimizes away with libc++!
cout << t.elapsed() << " constructor\n";
v[v.size()-10] = true;
v[v.size()-253] = true;
t.start();
ret = find_true(v);
cout << t.elapsed() << " find_true = " << ret << "\n";
t.start();
ret = count_true(v);
cout << t.elapsed() << " count_true = " << ret << "\n";
t.start();
rotate_64bits(v);
cout << t.elapsed() << " rotate_64bits\n";
t.start();
fill_true(v);
cout << t.elapsed() << " fill_true\n";
VB d (v.size());
t.start();
copy_test(v, d);
cout << t.elapsed() << " copy_test\n";
t.start();
swap_ranges(v, d);
cout << t.elapsed() << " swap_ranges\n";
return 0;
}
@jdtournier
Copy link
Author

jdtournier commented Apr 18, 2019

Inspired from this blog post, with the code lifted (with thanks) from the code sample referenced in Peter Cordes' comment.

@jdtournier
Copy link
Author

Also tested on MSYS2 (Win10 Core i7) - results are essentially identical, for both g++ and clang++

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment