Skip to content

Instantly share code, notes, and snippets.

@chris-b1
Created October 19, 2016 01:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chris-b1/59743744458a2e0a628f84a817b14a50 to your computer and use it in GitHub Desktop.
Save chris-b1/59743744458a2e0a628f84a817b14a50 to your computer and use it in GitHub Desktop.
#include <numeric>
#include <vector>
#include <iostream>
#include <string>
#include <random>
#include <chrono>
#include <algorithm>
#include <cstdint>
#include <cmath>
#include <memory>
#include <cassert>
#include <intrin.h>
static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
static inline bool GetBit(const uint8_t* bits, int i) {
return static_cast<bool>(bits[i / 8] & kBitmask[i % 8]);
}
static inline int64_t CeilByte(int64_t size) {
return (size + 7) & ~7;
}
static inline int64_t BytesForBits(int64_t size) {
return CeilByte(size) / 8;
}
static inline void SetBit(uint8_t* bits, int i) {
bits[i / 8] |= kBitmask[i % 8];
}
struct Values {
Values(size_t N, double percent_missing) : mask(new uint8_t[BytesForBits(N)])
{
memset(mask.get(), 0, BytesForBits(N));
std::uniform_real_distribution<double> uniform(0., 1.);
std::uniform_real_distribution<double> values(0., 100000.);
std::default_random_engine eng(42);
for (int i = 0; i < N; ++i) {
if (uniform(eng) < percent_missing) {
// std::cout << "set null " << i << '\n';
SetBit(mask.get(), i);
data.push_back(std::nan(""));
}
else {
data.push_back(uniform(eng));
}
}
}
std::vector<double> data;
std::unique_ptr<uint8_t[]> mask;
};
double sum_with_nan(double *data, size_t size) {
double ans = 0.0;
for (int i = 0; i < size; ++i) {
double val = data[i];
if (val == val) {
ans += val;
}
}
return ans;
}
template <int NumBlocks>
uint64_t popcount(uint64_t* data) {
// don't make it here
}
template <>
uint64_t popcount<1>(uint64_t* data) {
return _mm_popcnt_u64(data[0]);
}
template <>
uint64_t popcount<2>(uint64_t* data) {
return _mm_popcnt_u64(data[0]) + _mm_popcnt_u64(data[1]);
}
template <>
uint64_t popcount<4>(uint64_t* data) {
return (_mm_popcnt_u64(data[0]) + _mm_popcnt_u64(data[1]) +
_mm_popcnt_u64(data[2]) + _mm_popcnt_u64(data[3]));
}
template <int NumBlocks>
double sum_with_bitmask(double *data, uint8_t* mask, size_t size) {
double ans = 0.0;
constexpr size_t block_size = (64 * NumBlocks);
size_t num_blocks = size / block_size;
for (int block = 0; block < num_blocks; ++block) {
uint64_t null_count = popcount<NumBlocks>(reinterpret_cast<uint64_t*>(mask + block * block_size / 8));
// uint64_t null_count = __popcnt64(*reinterpret_cast<int64_t*>(mask + block * 8));
// std::cout << "null_count = " << null_count << '\n';
if (null_count) {
for (int i = 0; i < block_size; ++i) {
if (!GetBit(mask, i + block * block_size)) {
//std::cout << "checking and summing element i = " << i + block * block_size << '\n';
ans += data[i + block * block_size];
}
}
}
else {
for (int i = 0; i < block_size; ++i) {
// std::cout << "summing element i = " << i + block * block_size << '\n';
ans += data[i + block * block_size];
}
}
}
// remainder of elements after complete blocks
for (int j = 0; j < size % block_size; ++j) {
// std::cout << "summing element j = " << j + num_blocks * 64 << '\n';
if (!GetBit(mask, j + num_blocks * block_size)) {
//std::cout << "summing element j = " << j + num_blocks * block_size << '\n';
ans += data[j + num_blocks * block_size];
}
}
return ans;
}
template <int NumBlocks>
void timeit(size_t N, double percent_missing) {
Values v(N, percent_missing);
auto start = std::chrono::system_clock::now();
auto a = sum_with_bitmask<NumBlocks>(v.data.data(), v.mask.get(), v.data.size());
auto end = std::chrono::system_clock::now();
auto bitmask_duration = (float)(std::chrono::duration_cast<std::chrono::milliseconds> (end - start).count());
start = std::chrono::system_clock::now();
auto b = sum_with_nan(v.data.data(), v.data.size());
end = std::chrono::system_clock::now();
auto nan_duration = (float) (std::chrono::duration_cast<std::chrono::milliseconds> (end - start).count());
if (a != b) {
std::cout << "fail!\n";
}
std::cout << "N = " << N << " missing = " << percent_missing << " blocks = " << NumBlocks << '\n';
std::cout << "Bitmask / NAN = " << bitmask_duration / nan_duration << '\n';
}
int main()
{
timeit<1>(100000000, 0.00);
timeit<2>(100000000, 0.00);
timeit<4>(100000000, 0.00);
timeit<1>(100000000, 0.01);
timeit<2>(100000000, 0.01);
timeit<4>(100000000, 0.01);
timeit<1>(100000000, 0.05);
timeit<2>(100000000, 0.05);
timeit<4>(100000000, 0.05);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment