Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#include <iostream>
#include <vector>
#include <random>
#include <chrono>
#include <popcntintrin.h>
#include <stdlib.h>
std::vector<int> fill_vector(long size) {
std::vector<int> vec(size);
std::mt19937 rng;
std::uniform_int_distribution<int> dst(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
std::for_each(vec.begin(), vec.end(), [&rng, &dst](int& x) {
x = dst(rng);
});
return std::move(vec);
}
template <typename T>
char slow_cnt(T x) {
char res = 0;
while (x != 0) {
res += x & 1;
x >>= 1;
}
return res;
}
template <typename T>
class population_counter {
std::vector<int> cache_;
public:
population_counter() {
typedef typename std::vector<T>::difference_type dt;
dt min_ = std::numeric_limits<T>::min();
dt max_ = std::numeric_limits<T>::max();
cache_.reserve(max_ - min_ + 1);
std::cout << "cache size is " << max_ - min_ << ", szof=" << sizeof(T) << "\n";
for (dt i = min_; i <= max_; ++i) {
dt idx = i - min_;
cache_.push_back(slow_cnt(i));
}
}
__inline__ inline int operator()(T val) const {
return cache_[val];
}
};
int main(int argc, char** argv) {
long len = 1024;
if (argc == 2) {
len = atoi(argv[1]);
}
auto t1 = std::chrono::high_resolution_clock::now();
auto vec = fill_vector(len * 1024 * 1024); //4G vector
auto t2 = std::chrono::high_resolution_clock::now();
const population_counter<unsigned char> charcnt;
const population_counter<unsigned short int> scnt;
long vsize = vec.size() * sizeof(int);
int* ptr = vec.data();
auto t3 = std::chrono::high_resolution_clock::now();
auto chptr = reinterpret_cast<unsigned char*>(ptr);
long chcnt = 0;
for (long i = 0; i < vsize; ++i) {
chcnt += charcnt(chptr[i]);
}
auto t4 = std::chrono::high_resolution_clock::now();
auto shptr = reinterpret_cast<unsigned short int*>(ptr);
auto shsz = vsize / sizeof(unsigned short int);
long shcnt = 0;
for (long i = 0; i < shsz; ++i) {
shcnt += scnt(shptr[i]);
}
auto t5 = std::chrono::high_resolution_clock::now();
typedef unsigned long long long_tp;
auto lptr = reinterpret_cast<long_tp *>(ptr);
auto lsz = vsize / sizeof(long_tp);
long lcnt = 0;
for (long i = 0; i < lsz; ++i) {
lcnt += _mm_popcnt_u64(lptr[i]);
}
auto t6 = std::chrono::high_resolution_clock::now();
std::cout << "counts are " << chcnt << " " << shcnt << " " << lcnt << "\n";
std::cout <<
(t6 - t5).count() << "\n" <<
(t5 - t4).count() << "\n" <<
(t4 - t3).count() << "\n" <<
(t3 - t2).count() << "\n" <<
(t2 - t1).count() << "\n";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment