Skip to content

Instantly share code, notes, and snippets.

@rygorous
Created September 6, 2022 18:21
Show Gist options
  • Save rygorous/5c815b74e8428fe9ab1e75495c59a9ce to your computer and use it in GitHub Desktop.
Save rygorous/5c815b74e8428fe9ab1e75495c59a9ce to your computer and use it in GitHub Desktop.
FP state scope guard
// Scope guard to set up FP state as desired and reset it on exit
struct FPStateScope
{
U32 saved_state;
FPStateScope();
~FPStateScope();
};
// ...
#if defined(__RADSSE2__)
FPStateScope::FPStateScope()
{
saved_state = _mm_getcsr();
// Set up our expected FP state: no exception flags set,
// all exceptions masked (suppressed), round to nearest,
// flush to zero and denormals are zero both off.
_mm_setcsr(_MM_MASK_MASK /* all exceptions masked */ | _MM_ROUND_NEAREST | _MM_FLUSH_ZERO_OFF);
}
FPStateScope::~FPStateScope()
{
_mm_setcsr(saved_state);
}
#elif defined(__RADARM__) && defined(__RAD64__)
#ifdef _MSC_VER
static U32 read_fpcr()
{
// The system register R/W instructions use 64-bit GPRs, but the
// architectural FPCR is 32b
return (U32)_ReadStatusReg(ARM64_FPCR);
}
static void write_fpcr(U32 state)
{
_WriteStatusReg(ARM64_FPCR, state);
}
#elif defined(__clang__) || defined(__GNUC__)
static U32 read_fpcr()
{
// The system register R/W instructions use 64-bit GPRs, but the
// architectural FPCR is 32b
U64 value;
__asm__ volatile("mrs %0, fpcr" : "=r"(value));
return value;
}
static void write_fpcr(U32 state)
{
U64 state64 = state;
__asm__ volatile("msr fpcr, %0" : : "r"(state64));
}
#else
#error compiler?
#endif
FPStateScope::FPStateScope()
{
saved_state = read_fpcr();
// IEEE compliant mode in FPCR is just all-0
write_fpcr(0);
}
FPStateScope::~FPStateScope()
{
write_fpcr(saved_state);
}
#else // neither SSE2 nor ARM64
FPStateScope::FPStateScope()
: saved_state(0)
{
}
FPStateScope::~FPStateScope()
{
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment