Created
March 10, 2014 04:47
-
-
Save eiennohito/9459622 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include <random> | |
#include <chrono> | |
#include <popcntintrin.h> | |
#include <stdlib.h> | |
std::vector<int> fill_vector(long size) { | |
std::vector<int> vec(size); | |
std::mt19937 rng; | |
std::uniform_int_distribution<int> dst(std::numeric_limits<int>::min(), std::numeric_limits<int>::max()); | |
std::for_each(vec.begin(), vec.end(), [&rng, &dst](int& x) { | |
x = dst(rng); | |
}); | |
return std::move(vec); | |
} | |
template <typename T> | |
char slow_cnt(T x) { | |
char res = 0; | |
while (x != 0) { | |
res += x & 1; | |
x >>= 1; | |
} | |
return res; | |
} | |
template <typename T> | |
class population_counter { | |
std::vector<int> cache_; | |
public: | |
population_counter() { | |
typedef typename std::vector<T>::difference_type dt; | |
dt min_ = std::numeric_limits<T>::min(); | |
dt max_ = std::numeric_limits<T>::max(); | |
cache_.reserve(max_ - min_ + 1); | |
std::cout << "cache size is " << max_ - min_ << ", szof=" << sizeof(T) << "\n"; | |
for (dt i = min_; i <= max_; ++i) { | |
dt idx = i - min_; | |
cache_.push_back(slow_cnt(i)); | |
} | |
} | |
__inline__ inline int operator()(T val) const { | |
return cache_[val]; | |
} | |
}; | |
int main(int argc, char** argv) { | |
long len = 1024; | |
if (argc == 2) { | |
len = atoi(argv[1]); | |
} | |
auto t1 = std::chrono::high_resolution_clock::now(); | |
auto vec = fill_vector(len * 1024 * 1024); //4G vector | |
auto t2 = std::chrono::high_resolution_clock::now(); | |
const population_counter<unsigned char> charcnt; | |
const population_counter<unsigned short int> scnt; | |
long vsize = vec.size() * sizeof(int); | |
int* ptr = vec.data(); | |
auto t3 = std::chrono::high_resolution_clock::now(); | |
auto chptr = reinterpret_cast<unsigned char*>(ptr); | |
long chcnt = 0; | |
for (long i = 0; i < vsize; ++i) { | |
chcnt += charcnt(chptr[i]); | |
} | |
auto t4 = std::chrono::high_resolution_clock::now(); | |
auto shptr = reinterpret_cast<unsigned short int*>(ptr); | |
auto shsz = vsize / sizeof(unsigned short int); | |
long shcnt = 0; | |
for (long i = 0; i < shsz; ++i) { | |
shcnt += scnt(shptr[i]); | |
} | |
auto t5 = std::chrono::high_resolution_clock::now(); | |
typedef unsigned long long long_tp; | |
auto lptr = reinterpret_cast<long_tp *>(ptr); | |
auto lsz = vsize / sizeof(long_tp); | |
long lcnt = 0; | |
for (long i = 0; i < lsz; ++i) { | |
lcnt += _mm_popcnt_u64(lptr[i]); | |
} | |
auto t6 = std::chrono::high_resolution_clock::now(); | |
std::cout << "counts are " << chcnt << " " << shcnt << " " << lcnt << "\n"; | |
std::cout << | |
(t6 - t5).count() << "\n" << | |
(t5 - t4).count() << "\n" << | |
(t4 - t3).count() << "\n" << | |
(t3 - t2).count() << "\n" << | |
(t2 - t1).count() << "\n"; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment