Skip to content

Instantly share code, notes, and snippets.

@tomilov
Created May 9, 2024 22:56
Show Gist options
  • Save tomilov/a4186d88227d845a8d6845fe42757aed to your computer and use it in GitHub Desktop.
Save tomilov/a4186d88227d845a8d6845fe42757aed to your computer and use it in GitHub Desktop.
sort unique 16 non-zero values using avx512f+avx512bw
#include <algorithm>
#include <iterator>
#include <cstdint>
#include <numeric>
#include <type_traits>
#include <bitset>
#include <cassert>
#include <random>
#include <iostream>
#include <iomanip>
#include <x86intrin.h>
// ls ssesort.cpp | entr -ncs 'clang++ -march=native -O3 -Wall -Wextra -stdlib=libc++ -std=c++23 -o ssesort main.cpp -o ssesort && ./ssesort'
int main()
{
using I = uint16_t;
constexpr size_t count = sizeof(__m512i) / sizeof(I);
alignas(64) I inputs[count];
std::iota(std::begin(inputs), std::end(inputs), I{1});
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(std::begin(inputs), std::end(inputs), g);
auto mid = std::next(std::begin(inputs), std::ssize(inputs) / 2);
std::sample(std::begin(inputs), mid, mid, std::distance(mid, std::end(inputs)), g);
__m512i values = _mm512_load_si512(&inputs);
alignas(64) I result[count];
std::fill(std::begin(result), std::end(result), I{0});
for (I input : inputs) {
std::cout << input << " ";
__m512i value = _mm512_set1_epi16(input);
__mmask32 is_less_mask = _mm512_cmplt_epu16_mask(values, value);
uint32_t index = _mm_popcnt_u32(_cvtmask32_u32(is_less_mask));
result[index] = input;
}
std::cout << "\n";
//__m512i r = _mm512_load_si512(&result);
//__mmask32 z = _mm512_cmpneq_epi16_mask(_mm512_setzero_si512(), r);
//__m512i c = _mm512_maskz_compress_epi16(z, r);
//_mm512_store_si512(&result, c);
//values = _mm512_permutexvar_epi16(_mm512_load_si512(&indices), values);
auto r = std::remove(std::begin(result), std::end(result), I{0});
std::copy(std::begin(result), r, std::ostream_iterator<I>(std::cout, " "));
std::cout << "\n";
std::sort(std::begin(inputs), std::end(inputs));
auto i = std::unique(std::begin(inputs), std::end(inputs));
std::copy(std::begin(inputs), i, std::ostream_iterator<I>(std::cout, " "));
std::cout << "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment