Created
September 6, 2022 18:21
-
-
Save rygorous/5c815b74e8428fe9ab1e75495c59a9ce to your computer and use it in GitHub Desktop.
FP state scope guard
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Scope guard to set up FP state as desired and reset it on exit | |
struct FPStateScope | |
{ | |
U32 saved_state; | |
FPStateScope(); | |
~FPStateScope(); | |
}; | |
// ... | |
#if defined(__RADSSE2__) | |
FPStateScope::FPStateScope() | |
{ | |
saved_state = _mm_getcsr(); | |
// Set up our expected FP state: no exception flags set, | |
// all exceptions masked (suppressed), round to nearest, | |
// flush to zero and denormals are zero both off. | |
_mm_setcsr(_MM_MASK_MASK /* all exceptions masked */ | _MM_ROUND_NEAREST | _MM_FLUSH_ZERO_OFF); | |
} | |
FPStateScope::~FPStateScope() | |
{ | |
_mm_setcsr(saved_state); | |
} | |
#elif defined(__RADARM__) && defined(__RAD64__) | |
#ifdef _MSC_VER | |
static U32 read_fpcr() | |
{ | |
// The system register R/W instructions use 64-bit GPRs, but the | |
// architectural FPCR is 32b | |
return (U32)_ReadStatusReg(ARM64_FPCR); | |
} | |
static void write_fpcr(U32 state) | |
{ | |
_WriteStatusReg(ARM64_FPCR, state); | |
} | |
#elif defined(__clang__) || defined(__GNUC__) | |
static U32 read_fpcr() | |
{ | |
// The system register R/W instructions use 64-bit GPRs, but the | |
// architectural FPCR is 32b | |
U64 value; | |
__asm__ volatile("mrs %0, fpcr" : "=r"(value)); | |
return value; | |
} | |
static void write_fpcr(U32 state) | |
{ | |
U64 state64 = state; | |
__asm__ volatile("msr fpcr, %0" : : "r"(state64)); | |
} | |
#else | |
#error compiler? | |
#endif | |
FPStateScope::FPStateScope() | |
{ | |
saved_state = read_fpcr(); | |
// IEEE compliant mode in FPCR is just all-0 | |
write_fpcr(0); | |
} | |
FPStateScope::~FPStateScope() | |
{ | |
write_fpcr(saved_state); | |
} | |
#else // neither SSE2 nor ARM64 | |
FPStateScope::FPStateScope() | |
: saved_state(0) | |
{ | |
} | |
FPStateScope::~FPStateScope() | |
{ | |
} | |
#endif | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment