Created
October 1, 2020 11:14
-
-
Save Mytherin/5c1b18ae62d7694f7c8be847dba3f6ef to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <chrono> | |
#include <iostream> | |
#include <stdlib.h> | |
#include <bitset> | |
#include <vector> | |
// #include "x86/avx.h" | |
// #include "x86/avx512f.h" | |
// #include "x86/avx512h.h" | |
#include <immintrin.h> | |
// clang++ -std=c++11 test-nulls.cpp -O3 -mavx512f && ./a.out | |
// clang++ -std=c++11 test-nulls.cpp -O3 -DDISABLE_AVX512 && ./a.out | |
/* | |
<< Null Percentage 1% >> | |
Selectivity 100% | |
SelVector 193ms | |
Bitmask (no SIMD) 117ms | |
Bitmask (SIMD) (Selectivity 100): 112ms | |
Selectivity 99% | |
SelVector 197ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 99): 112ms | |
Selectivity 95% | |
SelVector 220ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 95): 112ms | |
Selectivity 75% | |
SelVector 305ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 75): 112ms | |
Selectivity 50% | |
SelVector 385ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 50): 112ms | |
Selectivity 25% | |
SelVector 282ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 25): 112ms | |
Selectivity 5% | |
SelVector 169ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 5): 112ms | |
Selectivity 1% | |
SelVector 125ms | |
Bitmask (no SIMD) 113ms | |
Bitmask (SIMD) (Selectivity 1): 112ms | |
Selectivity -1% | |
SelVector 67ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity -1): 112ms | |
<< Null Percentage 10% >> | |
Selectivity 100% | |
SelVector 186ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 100): 112ms | |
Selectivity 99% | |
SelVector 196ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 99): 112ms | |
Selectivity 95% | |
SelVector 225ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 95): 112ms | |
Selectivity 75% | |
SelVector 348ms | |
Bitmask (no SIMD) 117ms | |
Bitmask (SIMD) (Selectivity 75): 112ms | |
Selectivity 50% | |
SelVector 469ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 50): 112ms | |
Selectivity 25% | |
SelVector 326ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 25): 112ms | |
Selectivity 5% | |
SelVector 180ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 5): 112ms | |
Selectivity 1% | |
SelVector 128ms | |
Bitmask (no SIMD) 112ms | |
Bitmask (SIMD) (Selectivity 1): 112ms | |
Selectivity -1% | |
SelVector 65ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity -1): 112ms | |
<< Null Percentage 25% >> | |
Selectivity 100% | |
SelVector 178ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 100): 112ms | |
Selectivity 99% | |
SelVector 189ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 99): 112ms | |
Selectivity 95% | |
SelVector 218ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 95): 112ms | |
Selectivity 75% | |
SelVector 341ms | |
Bitmask (no SIMD) 117ms | |
Bitmask (SIMD) (Selectivity 75): 112ms | |
Selectivity 50% | |
SelVector 465ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 50): 112ms | |
Selectivity 25% | |
SelVector 324ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 25): 112ms | |
Selectivity 5% | |
SelVector 176ms | |
Bitmask (no SIMD) 115ms | |
Bitmask (SIMD) (Selectivity 5): 112ms | |
Selectivity 1% | |
SelVector 124ms | |
Bitmask (no SIMD) 108ms | |
Bitmask (SIMD) (Selectivity 1): 112ms | |
Selectivity -1% | |
SelVector 67ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity -1): 112ms | |
<< Null Percentage 50% >> | |
Selectivity 100% | |
SelVector 165ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 100): 112ms | |
Selectivity 99% | |
SelVector 176ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 99): 112ms | |
Selectivity 95% | |
SelVector 206ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 95): 112ms | |
Selectivity 75% | |
SelVector 331ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 75): 112ms | |
Selectivity 50% | |
SelVector 458ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 50): 112ms | |
Selectivity 25% | |
SelVector 320ms | |
Bitmask (no SIMD) 116ms | |
Bitmask (SIMD) (Selectivity 25): 112ms | |
Selectivity 5% | |
SelVector 166ms | |
Bitmask (no SIMD) 108ms | |
Bitmask (SIMD) (Selectivity 5): 112ms | |
Selectivity 1% | |
SelVector 118ms | |
Bitmask (no SIMD) 103ms | |
Bitmask (SIMD) (Selectivity 1): 112ms | |
Selectivity -1% | |
SelVector 66ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity -1): 112ms | |
<< Null Percentage 75% >> | |
Selectivity 100% | |
SelVector 151ms | |
Bitmask (no SIMD) 112ms | |
Bitmask (SIMD) (Selectivity 100): 112ms | |
Selectivity 99% | |
SelVector 161ms | |
Bitmask (no SIMD) 112ms | |
Bitmask (SIMD) (Selectivity 99): 112ms | |
Selectivity 95% | |
SelVector 194ms | |
Bitmask (no SIMD) 113ms | |
Bitmask (SIMD) (Selectivity 95): 112ms | |
Selectivity 75% | |
SelVector 320ms | |
Bitmask (no SIMD) 111ms | |
Bitmask (SIMD) (Selectivity 75): 112ms | |
Selectivity 50% | |
SelVector 450ms | |
Bitmask (no SIMD) 108ms | |
Bitmask (SIMD) (Selectivity 50): 112ms | |
Selectivity 25% | |
SelVector 313ms | |
Bitmask (no SIMD) 104ms | |
Bitmask (SIMD) (Selectivity 25): 112ms | |
Selectivity 5% | |
SelVector 155ms | |
Bitmask (no SIMD) 100ms | |
Bitmask (SIMD) (Selectivity 5): 112ms | |
Selectivity 1% | |
SelVector 109ms | |
Bitmask (no SIMD) 99ms | |
Bitmask (SIMD) (Selectivity 1): 112ms | |
Selectivity -1% | |
SelVector 65ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity -1): 112ms | |
<< Null Percentage 99% >> | |
Selectivity 100% | |
SelVector 81ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity 100): 112ms | |
Selectivity 99% | |
SelVector 86ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity 99): 112ms | |
Selectivity 95% | |
SelVector 99ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity 95): 112ms | |
Selectivity 75% | |
SelVector 153ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity 75): 112ms | |
Selectivity 50% | |
SelVector 208ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity 50): 112ms | |
Selectivity 25% | |
SelVector 145ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity 25): 112ms | |
Selectivity 5% | |
SelVector 81ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity 5): 112ms | |
Selectivity 1% | |
SelVector 66ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity 1): 112ms | |
Selectivity -1% | |
SelVector 52ms | |
Bitmask (no SIMD) 98ms | |
Bitmask (SIMD) (Selectivity -1): 112ms | |
*/ | |
using namespace std; | |
#define VECTOR_SIZE 1024 | |
#define DATA_SIZE 102400000 | |
#define VECTOR_COUNT (DATA_SIZE / VECTOR_SIZE) | |
int32_t *aligned_alloc(int32_t count) { | |
auto ptr = new uint8_t[count * sizeof(int32_t) + 63]; | |
return (int32_t*)(((uintptr_t(ptr) + 63) / 64) * 64); | |
} | |
struct nullmask_t { | |
union { | |
uint64_t mask_u64[VECTOR_SIZE / 64]; | |
uint32_t mask_u32[VECTOR_SIZE / 32]; | |
uint16_t mask_u16[VECTOR_SIZE / 16]; | |
uint8_t mask_u8[VECTOR_SIZE / 8]; | |
} mask_union; | |
void set_index(size_t i, bool is_null) { | |
uint64_t mask_index = i / 64; | |
uint64_t index_in_mask = i - mask_index * 64; | |
if (is_null) { | |
mask_union.mask_u64[mask_index] |= uint64_t(1) << index_in_mask; | |
} else { | |
mask_union.mask_u64[mask_index] &= ~(uint64_t(1) << index_in_mask); | |
} | |
} | |
bool get_index(size_t i) { | |
uint64_t mask_index = i / 64; | |
uint64_t index_in_mask = i - mask_index * 64; | |
return mask_union.mask_u64[mask_index] & (uint64_t(1) << index_in_mask); | |
} | |
static void or_nullmask(const nullmask_t &a, const nullmask_t &b, nullmask_t &result) { | |
for(size_t i = 0; i < VECTOR_SIZE / 64; i++) { | |
result.mask_union.mask_u64[i] = a.mask_union.mask_u64[i] | b.mask_union.mask_u64[i]; | |
} | |
} | |
static void or_nullmask_inplace(const nullmask_t &a, nullmask_t &result) { | |
for(size_t i = 0; i < VECTOR_SIZE / 64; i++) { | |
result.mask_union.mask_u64[i] |= a.mask_union.mask_u64[i]; | |
} | |
} | |
static void and_nullmask(const nullmask_t &a, const nullmask_t &b, nullmask_t &result) { | |
for(size_t i = 0; i < VECTOR_SIZE / 64; i++) { | |
result.mask_union.mask_u64[i] = a.mask_union.mask_u64[i] & b.mask_union.mask_u64[i]; | |
} | |
} | |
static void and_nullmask_inplace(const nullmask_t &a, nullmask_t &result) { | |
for(size_t i = 0; i < VECTOR_SIZE / 64; i++) { | |
result.mask_union.mask_u64[i] &= a.mask_union.mask_u64[i]; | |
} | |
} | |
void reset() { | |
for(size_t i = 0; i < VECTOR_SIZE / 64; i++) { | |
mask_union.mask_u64[i] = 0; | |
} | |
} | |
void set() { | |
for(size_t i = 0; i < VECTOR_SIZE / 64; i++) { | |
mask_union.mask_u64[i] = ~0; | |
} | |
} | |
void assign(const nullmask_t &other) { | |
for(size_t i = 0; i < VECTOR_SIZE / 64; i++) { | |
mask_union.mask_u64[i] = other.mask_union.mask_u64[i]; | |
} | |
} | |
}; | |
struct Vector { | |
Vector(){} | |
Vector(int32_t *data, nullmask_t *mask) : data(data), mask(mask) {} | |
int32_t *data; | |
nullmask_t *mask; | |
}; | |
size_t sel_vector_lt(Vector &a, int32_t constant, size_t count, uint16_t *result_sel) { | |
size_t result_count = 0; | |
for(uint64_t i = 0; i < count / 64; i++) { | |
auto mask = a.mask->mask_union.mask_u64[i]; | |
if (mask == 0) { | |
// nothing valid: skip | |
continue; | |
} else if (mask == ~uint64_t(0)) { | |
// everything valid: no need to check mask | |
for(size_t j = 0; j < 64; j++) { | |
size_t index = i * 64 + j; | |
result_sel[result_count] = index; | |
result_count += a.data[index] <= constant; | |
} | |
} else { | |
// partially valid: check mask | |
for(uint64_t j = 0; j < 64; j++) { | |
uint64_t index = i * 64 + j; | |
result_sel[result_count] = index; | |
result_count += a.data[index] <= constant && (mask & (uint64_t(1) << j)); | |
} | |
} | |
} | |
return result_count; | |
} | |
void sel_vector_addition(const Vector &a , const Vector &b , size_t count, const uint16_t *sel, Vector &result ) { | |
nullmask_t::and_nullmask(*a.mask, *b.mask, *result.mask); | |
for(size_t i = 0; i < count; i++) { | |
result.data[sel[i]] = a.data[sel[i]] + b.data[sel[i]]; | |
} | |
} | |
void sel_vector_summation(const Vector &a , size_t count, const uint16_t *sel, int64_t &sum) { | |
for(size_t i = 0; i < count; i++) { | |
size_t index = sel[i]; | |
sum += a.data[index] * a.mask->get_index(index); | |
} | |
} | |
int64_t selection_vector_benchmark(Vector *a, Vector *b, Vector *c, int32_t constant) { | |
uint16_t sel_vector[VECTOR_SIZE]; | |
int32_t *intermediate_data = aligned_alloc(VECTOR_SIZE); | |
nullmask_t intermediate_mask; | |
Vector intermediate(intermediate_data, &intermediate_mask); | |
int64_t sum = 0; | |
auto start = chrono::system_clock::now(); | |
for(int i = 0; i < VECTOR_COUNT; i++) { | |
size_t result_count = sel_vector_lt(c[i], constant, VECTOR_SIZE, sel_vector); | |
sel_vector_addition(a[i], b[i], result_count, sel_vector, intermediate); | |
sel_vector_summation(intermediate, result_count, sel_vector, sum); | |
} | |
auto end = chrono::system_clock::now(); | |
auto elapsed = | |
std::chrono::duration_cast<std::chrono::milliseconds>(end - start); | |
std::cout << "SelVector " << elapsed.count() << "ms\n"; | |
return sum; | |
} | |
#ifndef DISABLE_AVX512 | |
void bitmask_lt(const Vector &a , const __m512i constant , size_t count, nullmask_t &sel) { | |
for(size_t i = 0, target = count / 16; i < target; i++) { | |
__m512i a_mm512 = _mm512_load_epi32(a.data + i * 16); | |
sel.mask_union.mask_u16[i] = _mm512_cmple_epi32_mask(a_mm512, constant); | |
} | |
nullmask_t::and_nullmask_inplace(*a.mask, sel); | |
} | |
void bitmask_addition(const Vector &a , const Vector &b , size_t count, const nullmask_t &sel, Vector &result) { | |
nullmask_t addition_mask; | |
nullmask_t::and_nullmask(*a.mask, *b.mask, *result.mask); | |
nullmask_t::and_nullmask(*result.mask, sel, addition_mask); | |
for(size_t i = 0, target = count / 16; i < target; i++) { | |
__m512i a_mm512 = _mm512_load_epi32(a.data + i * 16); | |
__m512i b_mm512 = _mm512_load_epi32(b.data + i * 16); | |
auto mask = addition_mask.mask_union.mask_u16[i]; | |
auto result_mm512 = _mm512_maskz_add_epi32(mask, a_mm512, b_mm512); | |
_mm512_store_epi32(result.data + i * 16, result_mm512); | |
} | |
} | |
void bitmask_summation(const Vector &a , const nullmask_t &sel, size_t count, int64_t &sum) { | |
nullmask_t sum_mask; | |
nullmask_t::and_nullmask(*a.mask, sel, sum_mask); | |
for(size_t i = 0, target = count / 16; i < target; i++) { | |
__m512i a_mm512 = _mm512_load_epi32(a.data + i * 16); | |
sum += _mm512_mask_reduce_add_epi32(sum_mask.mask_union.mask_u16[i], a_mm512); | |
} | |
} | |
int64_t bitmask_benchmark(Vector *a, Vector *b, Vector *c, int32_t constant) { | |
nullmask_t selection_mask; | |
int32_t *constant_i32 = aligned_alloc(VECTOR_SIZE); | |
for(int i = 0; i < VECTOR_SIZE; i++) { | |
constant_i32[i] = constant; | |
} | |
__m512i constant_mm512 = _mm512_load_epi32(constant_i32); | |
int32_t *intermediate_data = aligned_alloc(VECTOR_SIZE); | |
nullmask_t intermediate_mask; | |
Vector intermediate(intermediate_data, &intermediate_mask); | |
int64_t sum = 0; | |
auto start = chrono::system_clock::now(); | |
for(int i = 0; i < VECTOR_COUNT; i++) { | |
bitmask_lt(c[i], constant_mm512, VECTOR_SIZE, selection_mask); | |
bitmask_addition(a[i], b[i], VECTOR_SIZE, selection_mask, intermediate); | |
bitmask_summation(intermediate, selection_mask, VECTOR_SIZE, sum); | |
} | |
auto end = chrono::system_clock::now(); | |
auto elapsed = | |
std::chrono::duration_cast<std::chrono::milliseconds>(end - start); | |
std::cout << "Bitmask (SIMD) (Selectivity " << constant << "): " << elapsed.count() << "ms\n"; | |
return sum; | |
} | |
#endif | |
void bitmask_nosimd_lt(const Vector &a , const int32_t constant, nullmask_t &sel) { | |
for(size_t i = 0, target = VECTOR_SIZE / 64; i < target; i++) { | |
auto mask = a.mask->mask_union.mask_u64[i]; | |
for(size_t j = 0; j < 64; j++) { | |
size_t index = i * 64 + j; | |
mask &= ~(uint64_t(!(a.data[index] <= constant)) << j); | |
} | |
sel.mask_union.mask_u64[i] = mask; | |
} | |
} | |
void bitmask_nosimd_addition(const Vector &a , const Vector &b, const nullmask_t &sel, Vector &result) { | |
nullmask_t::and_nullmask(*a.mask, *b.mask, *result.mask); | |
for(size_t i = 0; i < VECTOR_SIZE; i++) { | |
result.data[i] = a.data[i] + b.data[i]; | |
} | |
} | |
void bitmask_nosimd_sum(const Vector &a, const nullmask_t &sel, int64_t &result) { | |
nullmask_t filter_mask; | |
nullmask_t::and_nullmask(sel, *a.mask, filter_mask); | |
for(size_t i = 0; i < VECTOR_SIZE / 64; i++) { | |
auto mask = filter_mask.mask_union.mask_u64[i]; | |
if (mask == 0) { | |
// nothing valid: skip | |
continue; | |
} else if (mask == ~uint64_t(0)) { | |
// everything valid: no need to check mask | |
for(size_t j = 0; j < 64; j++) { | |
size_t index = i * 64 + j; | |
result += a.data[index]; | |
} | |
} else { | |
// partially valid: check mask | |
for(size_t j = 0; j < 64; j++) { | |
size_t index = i * 64 + j; | |
result += a.data[index] * ((mask & (uint64_t(1) << j)) >> j); | |
} | |
} | |
} | |
} | |
int64_t bitmask_benchmark_nosimd(Vector *a, Vector *b, Vector *c, int32_t constant) { | |
nullmask_t selection_mask; | |
int32_t *intermediate_data = aligned_alloc(VECTOR_SIZE); | |
nullmask_t intermediate_mask; | |
Vector intermediate(intermediate_data, &intermediate_mask); | |
int64_t sum = 0; | |
auto start = chrono::system_clock::now(); | |
for(int i = 0; i < VECTOR_COUNT; i++) { | |
bitmask_nosimd_lt(c[i], constant, selection_mask); | |
bitmask_nosimd_addition(a[i], b[i], selection_mask, intermediate); | |
bitmask_nosimd_sum(intermediate, selection_mask, sum); | |
} | |
auto end = chrono::system_clock::now(); | |
auto elapsed = | |
std::chrono::duration_cast<std::chrono::milliseconds>(end - start); | |
std::cout << "Bitmask (no SIMD) " << elapsed.count() << "ms\n"; | |
return sum; | |
} | |
int64_t hardcoded(Vector *a, Vector *b, Vector *c, int32_t constant) { | |
int64_t sum = 0; | |
auto start = chrono::system_clock::now(); | |
for(int i = 0; i < VECTOR_COUNT; i++) { | |
nullmask_t result_mask; | |
nullmask_t::and_nullmask(*a[i].mask, *b[i].mask, result_mask); | |
nullmask_t::and_nullmask_inplace(*c[i].mask, result_mask); | |
for(size_t j = 0; j < VECTOR_SIZE; j++) { | |
if (result_mask.get_index(j)) { | |
if (c[i].data[j] <= constant) { | |
sum += a[i].data[j] + b[i].data[j]; | |
} | |
} | |
} | |
} | |
auto end = chrono::system_clock::now(); | |
auto elapsed = | |
std::chrono::duration_cast<std::chrono::milliseconds>(end - start); | |
std::cout << "Hardcoded " << elapsed.count() << "ms\n"; | |
return sum; | |
} | |
int main() { | |
int32_t *a_data = aligned_alloc(DATA_SIZE); | |
int32_t *b_data = aligned_alloc(DATA_SIZE); | |
int32_t *c_data = aligned_alloc(DATA_SIZE); | |
nullmask_t *a_nullmask = new nullmask_t[VECTOR_COUNT]; | |
nullmask_t *b_nullmask = new nullmask_t[VECTOR_COUNT]; | |
nullmask_t *c_nullmask = new nullmask_t[VECTOR_COUNT]; | |
Vector *a = new Vector[VECTOR_COUNT]; | |
Vector *b = new Vector[VECTOR_COUNT]; | |
Vector *c = new Vector[VECTOR_COUNT]; | |
for(size_t i = 0; i < DATA_SIZE; i++) { | |
a_data[i] = rand() % 101; | |
b_data[i] = rand() % 101; | |
c_data[i] = rand() % 101; | |
} | |
for(size_t i = 0; i < VECTOR_COUNT; i++) { | |
a[i].data = a_data + i * VECTOR_SIZE; | |
b[i].data = b_data + i * VECTOR_SIZE; | |
c[i].data = c_data + i * VECTOR_SIZE; | |
a[i].mask = a_nullmask + i; | |
b[i].mask = b_nullmask + i; | |
c[i].mask = c_nullmask + i; | |
a[i].mask->set(); | |
b[i].mask->set(); | |
c[i].mask->set(); | |
} | |
// SELECT SUM(a+b) FROM tbl WHERE c <= CONSTANT | |
// CONSTANT determines selectivity (0-100%) | |
vector<int> null_percentages {1, 10, 25, 50, 75, 99}; | |
vector<int> selectivities { 100, 99, 95, 75, 50, 25, 5, 1, -1}; | |
// vector<int> null_percentages {10}; | |
// vector<int> selectivities { 95}; | |
vector<string> names { "Hardcoded", "Selection Vector", "Bitmask (No SIMD)", "Bitmask"}; | |
for(auto null_percentage : null_percentages) { | |
for(size_t i = 0; i < VECTOR_COUNT; i++) { | |
for(size_t j = 0; j < VECTOR_SIZE; j++) { | |
a[i].mask->set_index(j, (rand() % 101) > null_percentage); | |
b[i].mask->set_index(j, (rand() % 101) > null_percentage); | |
c[i].mask->set_index(j, (rand() % 101) > null_percentage); | |
} | |
} | |
std::cout << std::endl << "<< Null Percentage " << null_percentage << "% >>" << std::endl; | |
for(auto selectivity : selectivities) { | |
std::cout << std::endl << "Selectivity " << selectivity << "%" << std::endl; | |
vector<int64_t> results; | |
int64_t hardcoded_res = hardcoded(a, b, c, selectivity); | |
int64_t selvector_res = selection_vector_benchmark(a, b, c, selectivity); | |
int64_t bitmasknosimd_res = bitmask_benchmark_nosimd(a, b, c, selectivity); | |
results.push_back(hardcoded_res); | |
results.push_back(selvector_res); | |
results.push_back(bitmasknosimd_res); | |
#ifndef DISABLE_AVX512 | |
int64_t bitmask_res = bitmask_benchmark(a, b, c, selectivity); | |
results.push_back(bitmask_res); | |
#endif | |
for(size_t j = 0; j < results.size(); j++) { | |
for(size_t k = 0; k < results.size(); k++) { | |
if (results[j] != results[k]) { | |
std::cout << "Discrepency in results between " << names[j] << " and " << names[k] << std::endl; | |
for(size_t l = 0; l < results.size(); l++) { | |
std::cout << names[l] << ": " << results[l] << std::endl; | |
} | |
exit(1); | |
} | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment