Skip to content

Instantly share code, notes, and snippets.

@marzer
Last active July 4, 2019 16:11
Show Gist options
  • Save marzer/77e9aa54209aed2986893bf7b9ebc154 to your computer and use it in GitHub Desktop.
Save marzer/77e9aa54209aed2986893bf7b9ebc154 to your computer and use it in GitHub Desktop.
MSVC inlining bug
//compiled using VS 2019 latest, v141 toolset
//flags: /fp:fast /O2 /permissive- /std:c++latest /utf-8 /volatile:iso /Zc:__cplusplus /bigobj /Zc:inline /Zc:throwingNew /Gm- /Ot /arch:AVX2 /Gd /Oy /Oi /GS- /Qpar-
#include <cstdint>
#include <cstddef>
#include <cmath>
#include <array>
#define ACH_ALWAYS_INLINE __forceinline //changing this to __declspec(noinline) 'fixes' the issue
#define ACH_ASSUME(cond) __assume(cond)
using namespace std;
namespace
{
constexpr auto operator"" _sz(unsigned long long n) noexcept
{
return static_cast<size_t>(n);
}
using scalar_t = float; //making this volatile 'fixes' the issue
template <size_t DIM>
struct VecBase
{
std::array<scalar_t, DIM> Elements;
template <size_t IDX>
ACH_ALWAYS_INLINE
constexpr const scalar_t& __vectorcall At() const noexcept
{
static_assert(
IDX < DIM,
"Element index out of range"
);
return Elements[IDX];
}
template <size_t IDX>
ACH_ALWAYS_INLINE
constexpr scalar_t& __vectorcall At() noexcept
{
static_assert(
IDX < DIM,
"Element index out of range"
);
return Elements[IDX];
}
};
template <>
struct VecBase<2>
{
scalar_t X;
scalar_t Y;
constexpr VecBase() noexcept = default;
explicit constexpr VecBase(scalar_t splat) noexcept
: X{ splat },
Y{ splat }
{}
template <size_t IDX>
ACH_ALWAYS_INLINE
constexpr const scalar_t& __vectorcall At() const noexcept
{
static_assert(
IDX < 2,
"Element index out of range"
);
if constexpr (IDX == 1)
return Y;
else
return X;
}
template <size_t IDX>
ACH_ALWAYS_INLINE
constexpr scalar_t& __vectorcall At() noexcept
{
static_assert(
IDX < 2,
"Element index out of range"
);
if constexpr (IDX == 1)
return Y;
else
return X;
}
};
template <>
struct VecBase<3>
{
scalar_t X;
scalar_t Y;
scalar_t Z;
constexpr VecBase() noexcept = default;
explicit constexpr VecBase(scalar_t splat) noexcept
: X{ splat },
Y{ splat },
Z{ splat }
{}
template <size_t IDX>
ACH_ALWAYS_INLINE
constexpr const scalar_t& __vectorcall At() const noexcept
{
static_assert(
IDX < 3,
"Element index out of range"
);
if constexpr (IDX == 2)
return Z;
else if constexpr (IDX == 1)
return Y;
else
return X;
}
template <size_t IDX>
ACH_ALWAYS_INLINE
constexpr scalar_t& __vectorcall At() noexcept
{
static_assert(
IDX < 3,
"Element index out of range"
);
if constexpr (IDX == 2)
return Z;
else if constexpr (IDX == 1)
return Y;
else
return X;
}
};
template <>
struct VecBase<4>
{
scalar_t X;
scalar_t Y;
scalar_t Z;
scalar_t W;
constexpr VecBase() noexcept = default;
explicit constexpr VecBase(scalar_t splat) noexcept
: X{ splat },
Y{ splat },
Z{ splat },
W{ splat }
{}
template <size_t IDX>
ACH_ALWAYS_INLINE
constexpr const scalar_t& __vectorcall At() const noexcept
{
static_assert(
IDX < 4,
"Element index out of range"
);
if constexpr (IDX == 3)
return W;
else if constexpr (IDX == 2)
return Z;
else if constexpr (IDX == 1)
return Y;
else
return X;
}
template <size_t IDX>
ACH_ALWAYS_INLINE
constexpr scalar_t& __vectorcall At() noexcept
{
static_assert(
IDX < 4,
"Element index out of range"
);
if constexpr (IDX == 3)
return W;
else if constexpr (IDX == 2)
return Z;
else if constexpr (IDX == 1)
return Y;
else
return X;
}
};
template <size_t DIM>
struct __declspec(empty_bases) Vec : VecBase<DIM>
{
using base_t = VecBase<DIM>;
template <size_t IDX>
ACH_ALWAYS_INLINE
constexpr const scalar_t& At() const noexcept
{
return base_t::template At<IDX>();
}
template <size_t IDX>
ACH_ALWAYS_INLINE
constexpr scalar_t& At() noexcept
{
return base_t::template At<IDX>();
}
constexpr Vec() noexcept = default;
template <typename T>
explicit constexpr Vec(T splat) noexcept
: base_t{ static_cast<scalar_t>(splat) }
{}
};
template <size_t ROWS, size_t COLS = ROWS>
struct Matrix
{
using column_t = Vec<ROWS>;
using row_t = Vec<COLS>;
static constexpr bool Square = ROWS == COLS;
std::array<column_t, COLS> Columns;
template <size_t R, size_t C>
ACH_ALWAYS_INLINE
constexpr const scalar_t& __vectorcall At() const noexcept
{
static_assert(
R < ROWS,
"Row index out of range"
);
static_assert(
C < COLS,
"Column index out of range"
);
return Columns[C].template At<R>();
}
template <size_t R, size_t C>
ACH_ALWAYS_INLINE
constexpr scalar_t& __vectorcall At() noexcept
{
static_assert(
R < ROWS,
"Row index out of range"
);
static_assert(
C < COLS,
"Column index out of range"
);
return Columns[C].template At<R>();
}
ACH_ALWAYS_INLINE
static constexpr size_t RowOf_ColMaj(size_t index) noexcept
{
return index % ROWS;
}
ACH_ALWAYS_INLINE
static constexpr size_t ColOf_ColMaj(size_t index) noexcept
{
return index / ROWS;
}
ACH_ALWAYS_INLINE
static constexpr size_t RowOf_RowMaj(size_t index) noexcept
{
return index / COLS;
}
ACH_ALWAYS_INLINE
static constexpr size_t ColOf_RowMaj(size_t index) noexcept
{
return index % COLS;
}
constexpr Matrix() noexcept = default;
template <typename T, typename U, typename... V, typename = std::enable_if_t<
((ROWS * COLS) >= (sizeof...(V) + 2_sz))
>
>
constexpr Matrix(T val0, U val1, V... vals) noexcept
{
constexpr auto varArgCount = sizeof...(V);
constexpr auto allArgCount = varArgCount + 2_sz;
if constexpr (ROWS * COLS > allArgCount) //disabling this block 'fixes' the issue
{
for (size_t c = 0; c < COLS; c++)
Columns[c] = column_t{ 0 };
}
//regular arguments
At<0, 0>() = static_cast<scalar_t>(val0);
At<RowOf_RowMaj(1), ColOf_RowMaj(1)>() = static_cast<scalar_t>(val1);
//variadic arguments
if constexpr (varArgCount > 0)
{
std::array<scalar_t, varArgCount> v{ { static_cast<scalar_t>(vals)... } };
//handle arguments 2-15 explicitly
if constexpr (varArgCount > 0 ) At<RowOf_RowMaj(2 ), ColOf_RowMaj(2 )>() = v[0 ];
if constexpr (varArgCount > 1 ) At<RowOf_RowMaj(3 ), ColOf_RowMaj(3 )>() = v[1 ];
if constexpr (varArgCount > 2 ) At<RowOf_RowMaj(4 ), ColOf_RowMaj(4 )>() = v[2 ];
if constexpr (varArgCount > 3 ) At<RowOf_RowMaj(5 ), ColOf_RowMaj(5 )>() = v[3 ];
if constexpr (varArgCount > 4 ) At<RowOf_RowMaj(6 ), ColOf_RowMaj(6 )>() = v[4 ];
if constexpr (varArgCount > 5 ) At<RowOf_RowMaj(7 ), ColOf_RowMaj(7 )>() = v[5 ];
if constexpr (varArgCount > 6 ) At<RowOf_RowMaj(8 ), ColOf_RowMaj(8 )>() = v[6 ];
if constexpr (varArgCount > 7 ) At<RowOf_RowMaj(9 ), ColOf_RowMaj(9 )>() = v[7 ];
if constexpr (varArgCount > 8 ) At<RowOf_RowMaj(10), ColOf_RowMaj(10)>() = v[8 ];
if constexpr (varArgCount > 9 ) At<RowOf_RowMaj(11), ColOf_RowMaj(11)>() = v[9 ];
if constexpr (varArgCount > 10) At<RowOf_RowMaj(12), ColOf_RowMaj(12)>() = v[10];
if constexpr (varArgCount > 11) At<RowOf_RowMaj(13), ColOf_RowMaj(13)>() = v[11];
if constexpr (varArgCount > 12) At<RowOf_RowMaj(14), ColOf_RowMaj(14)>() = v[12];
if constexpr (varArgCount > 13) At<RowOf_RowMaj(15), ColOf_RowMaj(15)>() = v[13];
//handle argument 16 and onward by looping
if constexpr (varArgCount > 14)
{
/*
//Vec's operator[] omitted for brevity, but this branch was this:
__pragma(omp simd)
for (size_t i = 14, arg = 16; i < varArgCount; i++, arg++)
Columns[ColOf_RowMaj(arg)][RowOf_RowMaj(arg)] = v[i];
*/
}
}
}
};
}
int main()
{
Matrix<3> mat{ 5.3, 7.1, 6.0, 9, 100, 4 };
// mat is not initialized properly here in release builds;
// it should contain the following:
// 5.3f 7.1f 6.0f
// 9.0f 100.0f 4.0f
// 0.0f 0.0f 0.0f
//
return static_cast<int>(mat.At<0,2>());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment