Skip to content

Instantly share code, notes, and snippets.

@Endilll
Created September 7, 2023 18:49
Show Gist options
  • Save Endilll/4919fa68cba9608b59dc12faf64e7c9e to your computer and use it in GitHub Desktop.
Save Endilll/4919fa68cba9608b59dc12faf64e7c9e to your computer and use it in GitHub Desktop.
LLVM GH45566 initial reduction
typedef long unsigned int size_t;
namespace std {
inline namespace __1 {
template <int __v> struct __attribute__(()) integral_constant {
static constexpr const int value = __v;
};
template <bool __b> using bool_constant = integral_constant<__b>;
typedef bool_constant<true> true_type;
typedef bool_constant<0> false_type;
template <bool> struct _MetaBase;
template <> struct _MetaBase<true> {
template <class _Result, class _First, class... _Rest>
using _OrImpl __attribute__(()) =
typename _MetaBase<_First::value != true && sizeof...(_Rest) != 0>::
template _OrImpl<_First, _Rest...>;
};
template <> struct _MetaBase<false> {
template <class _Result, class...> using _OrImpl __attribute__(()) = _Result;
template <class _Result, class...> using _AndImpl __attribute__(()) = _Result;
};
template <class... _Rest>
using _Or __attribute__(()) =
typename _MetaBase<sizeof...(_Rest) != 0>::template _OrImpl<false_type,
_Rest...>;
template <class... _Rest>
using _And __attribute__(()) =
typename _MetaBase<sizeof...(_Rest) != 0>::template _AndImpl<true_type,
_Rest...>;
template <bool _Bp, class _If, class _Then>
struct __attribute__(()) conditional {
typedef _If type;
};
template <class _If, class _Then>
struct __attribute__(()) conditional<false, _If, _Then> {
typedef _Then type;
};
template <bool _Bp, class _If, class _Then>
using conditional_t = typename conditional<_Bp, _If, _Then>::type;
template <bool, class _Tp = void> struct __attribute__(()) enable_if;
template <class _Tp> struct __attribute__(()) enable_if<true, _Tp> {
typedef _Tp type;
};
template <bool _Bp, class _Tp = void>
using enable_if_t = typename enable_if<_Bp, _Tp>::type;
template <class _Tp, class _Up>
struct __attribute__(()) is_same : public false_type {};
template <class _Tp>
struct __attribute__(()) is_same<_Tp, _Tp> : public true_type {};
template <class _Tp, class _Up>
inline constexpr bool is_same_v = is_same<_Tp, _Up>::value;
template <class _Tp> inline constexpr bool is_const_v = std::false_type ::value;
template <class _Tp> struct __libcpp_is_integral : public false_type {};
template <> struct __libcpp_is_integral<unsigned int> : public true_type {};
template <class _Tp>
inline constexpr bool is_integral_v = __libcpp_is_integral<_Tp>::value;
template <class _Tp>
inline constexpr bool is_pointer_v = std::false_type ::value;
template <class _Tp>
inline constexpr bool is_lvalue_reference_v = std::false_type ::value;
template <class _Tp>
inline constexpr bool is_rvalue_reference_v = std::false_type ::value;
template <class _Tp> using add_const_t = int;
template <class _Tp> using remove_reference_t = _Tp;
template <class _Tp> using add_lvalue_reference_t = int;
template <class _Tp> using add_rvalue_reference_t = int;
template <class _Tp> _Tp &&__declval(int);
template <class _Tp> decltype(std::__1::__declval<_Tp>(0)) declval() noexcept;
template <class _Tp> using add_pointer_t = int;
template <class _Tp>
inline constexpr bool is_signed_v = bool_constant<0>::value;
template <class _Tp> using decay_t = _Tp;
template <class _Tp> using make_signed_t = int;
template <class _Tp>
inline __attribute__(()) __attribute__(()) constexpr _Tp &&
forward(_Tp &__t) noexcept {
return static_cast<_Tp &&>(__t);
}
template <class...> using void_t = void;
struct conjunction : _And<> {};
template <class... _Args>
inline constexpr bool conjunction_v = conjunction::value;
template <class... _Args> struct disjunction : _Or<_Args...> {};
} // namespace __1
} // namespace std
using ssize_t = std::make_signed_t<size_t>;
constexpr size_t array_default_size = 4 / 4, Dynamic = 0 - 1;
template <typename Derived_> struct ArrayBase;
template <typename Value_, size_t Size_ = array_default_size> struct Array;
template <typename Value_, size_t Size_ = array_default_size> struct Mask;
extern void __assert_fail(const char *__assertion, const char *__file,
unsigned int __line, const char *__function) throw()
__attribute__(());
namespace std {
inline namespace __1 {
template <class _Tp, _Tp... _Ip> struct __attribute__(()) integer_sequence {};
template <size_t... _Ip>
using index_sequence = integer_sequence<size_t, _Ip...>;
template <class _Tp, _Tp _Ep>
using __make_integer_sequence __attribute__(()) =
__make_integer_seq<integer_sequence, _Tp, _Ep>;
template <class _Tp, _Tp _Np>
using make_integer_sequence = __make_integer_sequence<_Tp, _Np>;
template <size_t _Np>
using make_index_sequence = make_integer_sequence<size_t, _Np>;
} // namespace __1
} // namespace std
template <bool B> using enable_if_t = std::enable_if_t<B, int>;
template <template <typename...> typename B, typename T>
struct is_base_of_impl {
private:
template <typename... Ts>
static constexpr std::true_type test(const B<Ts...> *);
static constexpr std::false_type test(...);
public:
using type = decltype(test(std::declval<T *>()));
};
template <typename, template <typename...> typename Op, typename... Ts>
struct detector;
template <template <typename...> typename Op, typename... Ts>
struct detector<std::void_t<>, Op, Ts...> : std::true_type {};
template <template <typename...> class Op, class... Args>
constexpr bool is_detected_v = detector<void, Op, Args...>::value;
template <template <typename...> typename B, typename T>
using is_base_of = typename is_base_of_impl<B, T>::type;
template <typename T>
using is_int32 = std::bool_constant<std::is_integral_v<T> && sizeof(T) == 4>;
template <typename T> constexpr bool is_int32_v = is_int32<T>::value;
template <typename T>
using is_int64 = std::bool_constant<std::is_integral_v<T> && sizeof(T) == 8>;
template <typename T> constexpr bool is_int64_v = is_int64<T>::value;
template <typename T> constexpr bool is_float_v = std::is_same_v<T, float>;
template <typename T> constexpr bool is_double_v = std::is_same_v<T, double>;
template <typename T>
using is_std_float = std::bool_constant<is_float_v<T> || is_double_v<T>>;
template <typename T> constexpr bool is_std_float_v = is_std_float<T>::value;
template <typename T>
using is_std_int = std::bool_constant<is_int32_v<T> || is_int64_v<T>>;
template <typename T> constexpr bool is_std_int_v = is_std_int<T>::value;
template <typename T>
using is_std_type = std::bool_constant<is_std_int_v<T> || is_std_float_v<T>>;
template <typename T> constexpr bool is_std_type_v = is_std_type<T>::value;
template <typename T> using enable_if_int32_t = enable_if_t<is_int32_v<T>>;
template <typename T>
using has_size = std::enable_if_t<std::decay_t<T>::Size != Dynamic>;
template <typename T> constexpr bool has_size_v = is_detected_v<has_size, T>;
template <typename T> using is_array = is_base_of<ArrayBase, std::decay_t<T>>;
template <typename T> constexpr bool is_array_v = is_array<T>::value;
template <typename T> using enable_if_array_t = enable_if_t<is_array_v<T>>;
template <typename T> using enable_if_not_array_t = enable_if_t<!is_array_v<T>>;
template <typename... Ts>
using is_array_any = std::disjunction<is_array<Ts>...>;
template <typename... Ts>
constexpr bool is_array_any_v = is_array_any<Ts...>::value;
template <typename... Ts>
using enable_if_array_any_t = enable_if_t<is_array_any_v<Ts...>>;
template <typename T>
using is_static_array = std::bool_constant<is_array_v<T> && has_size_v<T>>;
template <typename T>
constexpr bool is_static_array_v = is_static_array<T>::value;
template <typename T>
using enable_if_static_array_t = enable_if_t<is_static_array_v<T>>;
template <typename T> struct packet_ {
using type = typename std::decay_t<T>::Derived::Value;
};
template <typename T> constexpr size_t array_depth_v = 0;
template <typename T, typename = int> struct array_size {
static constexpr size_t value = 1;
};
template <typename T> struct array_size<T, enable_if_static_array_t<T>> {
static constexpr size_t value = std::decay_t<T>::Derived::Size;
};
template <typename T> constexpr size_t array_size_v = array_size<T>::value;
template <typename T> using packet_t = typename packet_<T>::type;
template <typename T> struct copy_flags {
private:
using R = std::remove_reference_t<int>;
using T1 = std::conditional_t<std::is_const_v<R>, std::add_const_t<T>, T>;
using T2 =
std::conditional_t<std::is_pointer_v<int>, std::add_pointer_t<T1>, T1>;
using T3 = std::conditional_t<std::is_lvalue_reference_v<int>,
std::add_lvalue_reference_t<T2>, T2>;
using T4 = std::conditional_t<std::is_rvalue_reference_v<int>,
std::add_rvalue_reference_t<T3>, T3>;
public:
using type = T4;
};
template <typename S, typename T>
using copy_flags_t = typename copy_flags<T>::type;
template <typename T, bool CopyFlags> struct mask {
private:
using Mask = copy_flags_t<T, typename std::decay_t<T>::Derived::MaskType>;
public:
using type = std::conditional_t<CopyFlags, copy_flags_t<T, Mask>, Mask>;
};
template <typename T, bool CopyFlags = true>
using mask_t = typename mask<T, CopyFlags>::type;
template <typename T, typename Value, bool CopyFlags = true, typename = int>
struct replace_scalar;
template <typename T, typename Value, bool CopyFlags = true>
using replace_scalar_t = typename replace_scalar<T, Value, CopyFlags>::type;
template <typename T, typename Value, bool CopyFlags>
struct replace_scalar<T, Value, CopyFlags, enable_if_not_array_t<T>> {
using type = std::conditional_t<CopyFlags, copy_flags_t<T, Value>, Value>;
};
template <typename T, typename Value, bool CopyFlags>
struct replace_scalar<T, Value, CopyFlags, enable_if_array_t<T>> {
private:
using Entry = replace_scalar_t<packet_t<T>, Value, CopyFlags>;
using Array = typename std::decay_t<T>::Derived::template ReplaceValue<Entry>;
public:
using type = std::conditional_t<CopyFlags, copy_flags_t<T, Array>, Array>;
};
template <typename T, bool CopyFlags = true>
using uint_array_t = replace_scalar_t<T, unsigned int, CopyFlags>;
typedef float __m512 __attribute__((__vector_size__(64), __aligned__0));
typedef long long __m512i __attribute__((__vector_size__(64), __aligned__0));
static __inline __m512 __attribute__(()) _mm512_setzero_ps() {
return __extension__(__m512){};
}
static __inline __m512 __attribute__(())
_mm512_maskz_load_ps(unsigned short __U, void const *__P) {
__attribute__((__vector_size__(16 * sizeof(float)))) float __trans_tmp_1 =
_mm512_setzero_ps();
return __builtin_ia32_loadaps512_mask(
(const __attribute__((__vector_size__(16 * sizeof(float)))) float *)__P,
__trans_tmp_1, __U);
}
static __inline __m512i __attribute__(()) _mm512_load_si512(void const *__P) {
return *(__m512i *)__P;
}
static __inline__ __m512i __attribute__(())
_mm512_slli_epi32(__m512i __A, int __B) {
return __builtin_ia32_pslldi512(__A, __B);
}
static __inline__ __m512i __attribute__(())
_mm512_srli_epi32(__m512i __A, int __B) {
return __builtin_ia32_psrldi512(__A, __B);
}
static __m512i __attribute__(()) _mm512_srai_epi32(__m512i __A, int __B);
static constexpr bool has_sse42 = true, has_neon = false,
has_vectorization = has_sse42 || has_neon;
template <typename T, enable_if_array_t<T> = 0>
__attribute__(()) inline decltype(auto) eval(const T &x) {
return x.derived();
}
template <size_t Imm, typename T> __attribute__(()) inline auto sl(const T &a) {
if constexpr (!is_array_v<T>)
return a < Imm;
else
return eval(a).template sl_<Imm>();
}
template <size_t Imm, typename T> __attribute__(()) inline auto sr(const T &a) {
if constexpr (!is_array_v<T>)
return a > Imm;
else
return eval(a).template sr_<Imm>();
}
template <typename T1, typename T2>
__attribute__(()) inline auto eq(const T1 &a1, const T2 &a2) {
return a1.derived().eq_(a2.derived());
}
template <typename T1, typename T2, enable_if_array_any_t<T1, T2> = 0>
__attribute__(()) bool operator==(const T1 &a1, const T2 &a2);
template <typename Array,
enable_if_t<(Array::Size > 1 && Array::Size != -1)> = 0>
auto low(const Array &a) {
return a.derived().low_();
}
template <typename Array,
enable_if_t<(Array::Size > 1 && Array::Size != -1)> = 0>
auto high(const Array &a) {
return a.derived().high_();
}
template <typename Array, enable_if_static_array_t<Array> = 0>
__attribute__(()) inline Array arange() {
return Array::arange_(0, Array::Size, 1);
}
template <typename T> __attribute__(()) inline T load(const void *mem) {
return T::load_(mem);
}
template <typename T>
__attribute__(()) inline T load(const void *mem, const mask_t<T> &mask) {
return T::load_(mem, mask);
}
template <typename Derived_> struct ArrayBase {
using Derived = Derived_;
__attribute__(()) inline const Derived &derived() const {
return (Derived &)*this;
}
using Value = int;
};
static constexpr size_t fill(size_t i) { return i != 0 ? i | 0 : 0; }
static constexpr size_t lpow2(size_t i) {
return i != 0 ? (fill(i - 1) >> 1) + 1 : 0;
}
template <typename Value_, size_t Size_, bool IsMask_, typename Derived_>
struct StaticArrayBase : ArrayBase<Derived_> {
using Base = ArrayBase<Derived_>;
using typename Base::Derived;
static constexpr size_t Size = Size_;
static constexpr size_t Size1 = lpow2(Size_);
static constexpr size_t Size2 = Size_ - Size1;
using Array1 =
std::conditional_t<!IsMask_, Array<Value_, Size1>, Mask<Value_, Size1>>;
using Array2 =
std::conditional_t<!IsMask_, Array<Value_, Size2>, Mask<Value_, Size2>>;
static __attribute__(()) inline Derived arange_(ssize_t start, ssize_t stop,
ssize_t step) {
return linspace_(std::make_index_sequence<Derived::Size>(), step);
}
private:
template <typename T, size_t... Is>
static __attribute__(()) inline auto linspace_(std::index_sequence<Is...>,
T step) {
T offset;
if constexpr (sizeof...(Is) == 1)
return Derived();
else
return Derived(Is * step + offset...);
}
public:
};
template <size_t Size> struct is_native {
static constexpr bool value = false;
};
template <typename Value, size_t Size>
constexpr bool is_native_v = is_native<Size>::value;
template <typename Value, size_t Size> struct array_config {
static constexpr bool use_native_impl = is_native_v<Value, Size>;
static constexpr bool use_recursive_impl =
!use_native_impl && is_std_type_v<Value> && has_vectorization && Size > 3;
};
template <typename Value_, size_t Size_, bool IsMask_, typename Derived_,
typename = int>
struct StaticArrayImpl;
template <typename Value_, size_t Size_, bool IsMask_, typename Derived_>
struct StaticArrayImpl<
Value_, Size_, IsMask_, Derived_,
enable_if_t<array_config<Value_, Size_>::use_recursive_impl>>
: StaticArrayBase<Value_, Size_, IsMask_, Derived_> {
using Base = StaticArrayBase<Value_, Size_, IsMask_, Derived_>;
using Base::Size;
using Base::Size1;
using Base::Size2;
using typename Base::Array1;
using typename Base::Array2;
using typename Base::Derived;
using typename Base::Value;
using Ref = const Derived &;
template <typename... Ts,
enable_if_t<sizeof...(Ts) == Size && std::conjunction_v<>> = 0>
__attribute__(()) inline StaticArrayImpl(Ts... args) {
alignas(alignof(Array1)) Value storage[] = {(Value)args...};
a1 = load<Array1>(storage);
}
template <typename T1, typename T2,
enable_if_t<T1::Size == Size1 && T2::Size == Size2> = 0>
__attribute__(()) inline StaticArrayImpl(const T1 &a1, const T2 &a2)
: a1(a1), a2(a2) {}
template <typename Value2, size_t Size2, bool IsMask2, typename Derived2,
enable_if_t<Derived2::Size == Size_> = 0>
__attribute__(()) inline StaticArrayImpl(
const StaticArrayBase<Value2, Size2, IsMask2, Derived2> &a, int)
: a1(low(a), int()) {}
__attribute__(()) inline auto eq_(Ref a) const {
return mask_t<Derived>(eq(a1, a.a1), eq(a2, a.a2));
}
template <size_t Imm> __attribute__(()) inline Derived sl_() const {
return Derived(sl<Imm>(a1), sl<Imm>(a2));
}
template <size_t Imm> __attribute__(()) inline Derived sr_() const {
return Derived(sr<Imm>(a1), sr<Imm>(a2));
}
template <typename Mask>
static __attribute__(()) inline Derived load_(const void *mem,
const Mask &mask) {
auto __trans_tmp_2 = low(mask), __trans_tmp_3 = high(mask);
return Derived(
load<Array1>(mem, __trans_tmp_2),
load<Array2>((unsigned char *)mem + sizeof(Array1), __trans_tmp_3));
}
__attribute__(()) inline const Array1 &low_() const { return a1; }
__attribute__(()) inline const Array2 &high_() const { return a2; }
Array1 a1;
Array2 a2;
};
template <size_t Size_, typename Derived_>
struct KMaskBase : StaticArrayBase<int, Size_, true, Derived_> {
using Register =
std::conditional_t<(Size_ > 8), unsigned short, unsigned char>;
using Derived = Derived_;
KMaskBase() {}
template <typename Array,
enable_if_t<std::is_same_v<Register,
typename Array::Derived::Register>> = 0>
__attribute__(()) inline KMaskBase(const Array &other, int)
: k(other.derived().k) {}
template <typename T>
__attribute__(()) inline static Derived from_k(const T &k) {
Derived result;
result.k = k;
return result;
}
Register k;
};
template <> struct is_native<16> : std::true_type {};
template <bool IsMask_, typename Derived_>
struct alignas(64) StaticArrayImpl<float, 16, IsMask_, Derived_>
: StaticArrayBase<float, 16, IsMask_, Derived_> {
using Base = StaticArrayBase<float, 16, IsMask_, Derived_>;
using typename Base::Derived;
using Register = __m512;
Register m;
__attribute__(()) inline StaticArrayImpl(Register value) : m(value) {}
template <typename Mask>
static __attribute__(()) inline Derived load_(const void *ptr,
const Mask &mask) {
return _mm512_maskz_load_ps(mask.k, __builtin_assume_aligned(ptr, 4));
}
} __attribute__(());
template <typename Value_, bool IsMask_, typename Derived_>
struct alignas(64)
StaticArrayImpl<Value_, 16, IsMask_, Derived_, enable_if_int32_t<Value_>>
: StaticArrayBase<Value_, 16, IsMask_, Derived_> {
using Base = StaticArrayBase<Value_, 16, IsMask_, Derived_>;
using typename Base::Derived;
using typename Base::Value;
using Ref = const Derived &;
using Register = __m512i;
Register m;
__attribute__(()) inline StaticArrayImpl(Register value) : m(value) {}
__attribute__(()) inline StaticArrayImpl() : m() {}
template <size_t k> __attribute__(()) inline Derived sl_() const {
return _mm512_slli_epi32(m, k);
}
template <size_t k> __attribute__(()) inline Derived sr_() const {
return std::is_signed_v<Value> ? _mm512_srai_epi32(m, k)
: _mm512_srli_epi32(m, k);
}
__attribute__(()) inline auto eq_(Ref a) const {
return mask_t<Derived>::from_k(
__builtin_ia32_cmpd512_mask(m, a.m, 0, 0 - 1));
}
static __attribute__(()) inline Derived load_(const void *ptr) {
return _mm512_load_si512(__builtin_assume_aligned(ptr, 4));
}
} __attribute__(());
template <typename Derived_>
struct StaticArrayImpl<float, 16, true, Derived_, int>
: KMaskBase<16, Derived_> {
using Base = KMaskBase<16, Derived_>;
using Base::Base;
};
template <typename Value_, typename Derived_>
struct StaticArrayImpl<Value_, 16, true, Derived_, enable_if_int32_t<Value_>>
: KMaskBase<16, Derived_> {};
template <typename Value_, size_t Size_>
struct Array : StaticArrayImpl<Value_, Size_, false, Array<Value_, Size_>> {
using Base = StaticArrayImpl<Value_, Size_, false, Array>;
using MaskType = Mask<Value_, Size_>;
template <typename T> using ReplaceValue = Array<T, Size_>;
using Base::Base;
};
template <typename Value_, size_t Size_>
struct Mask : StaticArrayImpl<Value_, Size_, true, Mask<Value_, Size_>> {
using Base = StaticArrayImpl<Value_, Size_, true, Mask>;
Mask() = default;
template <typename T> Mask(T &&value) : Base(std::forward(value), int()) {}
template <typename T>
Mask(T &&value, int) : Base(std::forward(value), int()) {}
template <
typename T1, typename T2, typename T = Mask,
enable_if_t<array_depth_v<T1> == array_depth_v<T> &&
array_size_v<T1> == Base::Size1 &&
array_depth_v<T2> == array_depth_v<T> &&
array_size_v<T2> == Base::Size2 && Base::Size2 != 0> = 0>
Mask(const T1 &a1, const T2 &a2) : Base(a1, a2) {}
};
template <typename Value, size_t Size, typename T = Array<Value, Size>>
void test11_load_masked();
void array_float_32_test11_load_masked() { test11_load_masked<float, 32>(); }
template <typename Value, size_t Size, typename T> void test11_load_masked() {
alignas(alignof(T)) Value mem[Size];
for (size_t i = 0; i < Size; ++i)
mem[i] = i;
auto idx = arange<uint_array_t<Array<float, 32>>>(),
__trans_tmp_5 = sr<1>(idx);
auto __trans_tmp_4 = sl<1>(__trans_tmp_5);
auto even_mask = mask_t<Array<float, 32>>(eq(__trans_tmp_4, idx));
static_cast<bool>(load<Array<float, 32>>(mem, even_mask) == 0)
? void()
: __assert_fail("load0 == load_unaligned0",
"/media/lin/LinuxSpace/program/enoki/tests/memory.cpp", 5,
__extension__ __PRETTY_FUNCTION__);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment