Last active
July 4, 2019 16:11
-
-
Save marzer/77e9aa54209aed2986893bf7b9ebc154 to your computer and use it in GitHub Desktop.
MSVC inlining bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//compiled using VS 2019 latest, v141 toolset | |
//flags: /fp:fast /O2 /permissive- /std:c++latest /utf-8 /volatile:iso /Zc:__cplusplus /bigobj /Zc:inline /Zc:throwingNew /Gm- /Ot /arch:AVX2 /Gd /Oy /Oi /GS- /Qpar- | |
#include <cstdint> | |
#include <cstddef> | |
#include <cmath> | |
#include <array> | |
#define ACH_ALWAYS_INLINE __forceinline //changing this to __declspec(noinline) 'fixes' the issue | |
#define ACH_ASSUME(cond) __assume(cond) | |
using namespace std; | |
namespace | |
{ | |
constexpr auto operator"" _sz(unsigned long long n) noexcept | |
{ | |
return static_cast<size_t>(n); | |
} | |
using scalar_t = float; //making this volatile 'fixes' the issue | |
template <size_t DIM> | |
struct VecBase | |
{ | |
std::array<scalar_t, DIM> Elements; | |
template <size_t IDX> | |
ACH_ALWAYS_INLINE | |
constexpr const scalar_t& __vectorcall At() const noexcept | |
{ | |
static_assert( | |
IDX < DIM, | |
"Element index out of range" | |
); | |
return Elements[IDX]; | |
} | |
template <size_t IDX> | |
ACH_ALWAYS_INLINE | |
constexpr scalar_t& __vectorcall At() noexcept | |
{ | |
static_assert( | |
IDX < DIM, | |
"Element index out of range" | |
); | |
return Elements[IDX]; | |
} | |
}; | |
template <> | |
struct VecBase<2> | |
{ | |
scalar_t X; | |
scalar_t Y; | |
constexpr VecBase() noexcept = default; | |
explicit constexpr VecBase(scalar_t splat) noexcept | |
: X{ splat }, | |
Y{ splat } | |
{} | |
template <size_t IDX> | |
ACH_ALWAYS_INLINE | |
constexpr const scalar_t& __vectorcall At() const noexcept | |
{ | |
static_assert( | |
IDX < 2, | |
"Element index out of range" | |
); | |
if constexpr (IDX == 1) | |
return Y; | |
else | |
return X; | |
} | |
template <size_t IDX> | |
ACH_ALWAYS_INLINE | |
constexpr scalar_t& __vectorcall At() noexcept | |
{ | |
static_assert( | |
IDX < 2, | |
"Element index out of range" | |
); | |
if constexpr (IDX == 1) | |
return Y; | |
else | |
return X; | |
} | |
}; | |
template <> | |
struct VecBase<3> | |
{ | |
scalar_t X; | |
scalar_t Y; | |
scalar_t Z; | |
constexpr VecBase() noexcept = default; | |
explicit constexpr VecBase(scalar_t splat) noexcept | |
: X{ splat }, | |
Y{ splat }, | |
Z{ splat } | |
{} | |
template <size_t IDX> | |
ACH_ALWAYS_INLINE | |
constexpr const scalar_t& __vectorcall At() const noexcept | |
{ | |
static_assert( | |
IDX < 3, | |
"Element index out of range" | |
); | |
if constexpr (IDX == 2) | |
return Z; | |
else if constexpr (IDX == 1) | |
return Y; | |
else | |
return X; | |
} | |
template <size_t IDX> | |
ACH_ALWAYS_INLINE | |
constexpr scalar_t& __vectorcall At() noexcept | |
{ | |
static_assert( | |
IDX < 3, | |
"Element index out of range" | |
); | |
if constexpr (IDX == 2) | |
return Z; | |
else if constexpr (IDX == 1) | |
return Y; | |
else | |
return X; | |
} | |
}; | |
template <> | |
struct VecBase<4> | |
{ | |
scalar_t X; | |
scalar_t Y; | |
scalar_t Z; | |
scalar_t W; | |
constexpr VecBase() noexcept = default; | |
explicit constexpr VecBase(scalar_t splat) noexcept | |
: X{ splat }, | |
Y{ splat }, | |
Z{ splat }, | |
W{ splat } | |
{} | |
template <size_t IDX> | |
ACH_ALWAYS_INLINE | |
constexpr const scalar_t& __vectorcall At() const noexcept | |
{ | |
static_assert( | |
IDX < 4, | |
"Element index out of range" | |
); | |
if constexpr (IDX == 3) | |
return W; | |
else if constexpr (IDX == 2) | |
return Z; | |
else if constexpr (IDX == 1) | |
return Y; | |
else | |
return X; | |
} | |
template <size_t IDX> | |
ACH_ALWAYS_INLINE | |
constexpr scalar_t& __vectorcall At() noexcept | |
{ | |
static_assert( | |
IDX < 4, | |
"Element index out of range" | |
); | |
if constexpr (IDX == 3) | |
return W; | |
else if constexpr (IDX == 2) | |
return Z; | |
else if constexpr (IDX == 1) | |
return Y; | |
else | |
return X; | |
} | |
}; | |
template <size_t DIM> | |
struct __declspec(empty_bases) Vec : VecBase<DIM> | |
{ | |
using base_t = VecBase<DIM>; | |
template <size_t IDX> | |
ACH_ALWAYS_INLINE | |
constexpr const scalar_t& At() const noexcept | |
{ | |
return base_t::template At<IDX>(); | |
} | |
template <size_t IDX> | |
ACH_ALWAYS_INLINE | |
constexpr scalar_t& At() noexcept | |
{ | |
return base_t::template At<IDX>(); | |
} | |
constexpr Vec() noexcept = default; | |
template <typename T> | |
explicit constexpr Vec(T splat) noexcept | |
: base_t{ static_cast<scalar_t>(splat) } | |
{} | |
}; | |
template <size_t ROWS, size_t COLS = ROWS> | |
struct Matrix | |
{ | |
using column_t = Vec<ROWS>; | |
using row_t = Vec<COLS>; | |
static constexpr bool Square = ROWS == COLS; | |
std::array<column_t, COLS> Columns; | |
template <size_t R, size_t C> | |
ACH_ALWAYS_INLINE | |
constexpr const scalar_t& __vectorcall At() const noexcept | |
{ | |
static_assert( | |
R < ROWS, | |
"Row index out of range" | |
); | |
static_assert( | |
C < COLS, | |
"Column index out of range" | |
); | |
return Columns[C].template At<R>(); | |
} | |
template <size_t R, size_t C> | |
ACH_ALWAYS_INLINE | |
constexpr scalar_t& __vectorcall At() noexcept | |
{ | |
static_assert( | |
R < ROWS, | |
"Row index out of range" | |
); | |
static_assert( | |
C < COLS, | |
"Column index out of range" | |
); | |
return Columns[C].template At<R>(); | |
} | |
ACH_ALWAYS_INLINE | |
static constexpr size_t RowOf_ColMaj(size_t index) noexcept | |
{ | |
return index % ROWS; | |
} | |
ACH_ALWAYS_INLINE | |
static constexpr size_t ColOf_ColMaj(size_t index) noexcept | |
{ | |
return index / ROWS; | |
} | |
ACH_ALWAYS_INLINE | |
static constexpr size_t RowOf_RowMaj(size_t index) noexcept | |
{ | |
return index / COLS; | |
} | |
ACH_ALWAYS_INLINE | |
static constexpr size_t ColOf_RowMaj(size_t index) noexcept | |
{ | |
return index % COLS; | |
} | |
constexpr Matrix() noexcept = default; | |
template <typename T, typename U, typename... V, typename = std::enable_if_t< | |
((ROWS * COLS) >= (sizeof...(V) + 2_sz)) | |
> | |
> | |
constexpr Matrix(T val0, U val1, V... vals) noexcept | |
{ | |
constexpr auto varArgCount = sizeof...(V); | |
constexpr auto allArgCount = varArgCount + 2_sz; | |
if constexpr (ROWS * COLS > allArgCount) //disabling this block 'fixes' the issue | |
{ | |
for (size_t c = 0; c < COLS; c++) | |
Columns[c] = column_t{ 0 }; | |
} | |
//regular arguments | |
At<0, 0>() = static_cast<scalar_t>(val0); | |
At<RowOf_RowMaj(1), ColOf_RowMaj(1)>() = static_cast<scalar_t>(val1); | |
//variadic arguments | |
if constexpr (varArgCount > 0) | |
{ | |
std::array<scalar_t, varArgCount> v{ { static_cast<scalar_t>(vals)... } }; | |
//handle arguments 2-15 explicitly | |
if constexpr (varArgCount > 0 ) At<RowOf_RowMaj(2 ), ColOf_RowMaj(2 )>() = v[0 ]; | |
if constexpr (varArgCount > 1 ) At<RowOf_RowMaj(3 ), ColOf_RowMaj(3 )>() = v[1 ]; | |
if constexpr (varArgCount > 2 ) At<RowOf_RowMaj(4 ), ColOf_RowMaj(4 )>() = v[2 ]; | |
if constexpr (varArgCount > 3 ) At<RowOf_RowMaj(5 ), ColOf_RowMaj(5 )>() = v[3 ]; | |
if constexpr (varArgCount > 4 ) At<RowOf_RowMaj(6 ), ColOf_RowMaj(6 )>() = v[4 ]; | |
if constexpr (varArgCount > 5 ) At<RowOf_RowMaj(7 ), ColOf_RowMaj(7 )>() = v[5 ]; | |
if constexpr (varArgCount > 6 ) At<RowOf_RowMaj(8 ), ColOf_RowMaj(8 )>() = v[6 ]; | |
if constexpr (varArgCount > 7 ) At<RowOf_RowMaj(9 ), ColOf_RowMaj(9 )>() = v[7 ]; | |
if constexpr (varArgCount > 8 ) At<RowOf_RowMaj(10), ColOf_RowMaj(10)>() = v[8 ]; | |
if constexpr (varArgCount > 9 ) At<RowOf_RowMaj(11), ColOf_RowMaj(11)>() = v[9 ]; | |
if constexpr (varArgCount > 10) At<RowOf_RowMaj(12), ColOf_RowMaj(12)>() = v[10]; | |
if constexpr (varArgCount > 11) At<RowOf_RowMaj(13), ColOf_RowMaj(13)>() = v[11]; | |
if constexpr (varArgCount > 12) At<RowOf_RowMaj(14), ColOf_RowMaj(14)>() = v[12]; | |
if constexpr (varArgCount > 13) At<RowOf_RowMaj(15), ColOf_RowMaj(15)>() = v[13]; | |
//handle argument 16 and onward by looping | |
if constexpr (varArgCount > 14) | |
{ | |
/* | |
//Vec's operator[] omitted for brevity, but this branch was this: | |
__pragma(omp simd) | |
for (size_t i = 14, arg = 16; i < varArgCount; i++, arg++) | |
Columns[ColOf_RowMaj(arg)][RowOf_RowMaj(arg)] = v[i]; | |
*/ | |
} | |
} | |
} | |
}; | |
} | |
int main() | |
{ | |
Matrix<3> mat{ 5.3, 7.1, 6.0, 9, 100, 4 }; | |
// mat is not initialized properly here in release builds; | |
// it should contain the following: | |
// 5.3f 7.1f 6.0f | |
// 9.0f 100.0f 4.0f | |
// 0.0f 0.0f 0.0f | |
// | |
return static_cast<int>(mat.At<0,2>()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment