Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
SSE round/floor (untested)
// Essentially the same algorithm as
// but slightly better impl (orps on magic const instead of two xorps).
// NOTE: this really is not tested, and it's likely to lose a bit of precision
// for x in [2^22,2^23).
// NOTE 2: Yup, confirmed, it doesn't do the right thing for large integer values
// representable as float.
__m128 round_to_int(__m128 x)
__m128 sign = _mm_and_ps(x, _mm_set1_ps(-0.0f));
__m128 magic = _mm_or_ps(sign, _mm_set1_ps(8388608.0f)); // 2^23
__m128 round1 = _mm_add_ps(x, magic);
__m128 round2 = _mm_sub_ps(round1, magic);
return round2;
__m128 floor(__m128 x)
__m128 y = round_to_int(x);
__m128 cmp = _mm_cmplt_ps(x, y); // x < round(x)?
__m128 fudge = _mm_and_ps(cmp, _mm_set1_ps(1.0f)); // 1.0f if x < round(x)
__m128 result = _mm_sub_ps(y, fudge);
// This variant of "round" should work, though.
// But I think it's more work than going through ints.
// (Still completely untested, mind.)
__m128 round_to_int_fixed(__m128 x)
__m128 magic = _mm_set1_ps(8388608.0f); // 2^23
__m128 xabs = _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)));
__m128 sign = _mm_xor_ps(x, xabs);
__m128 smallenough = _mm_cmplt_ps(xabs, magic);
__m128 bias = _mm_and_ps(smallenough, magic);
__m128 round1 = _mm_add_ps(xabs, bias);
__m128 round2 = _mm_sub_ps(round1, bias);
__m128 round3 = _mm_xor_ps(round2, sign);
return round3;

This comment has been minimized.

Copy link

@sopyer sopyer commented Dec 15, 2015

round_to_int(0.5f) and round_to_int_fixed(0.5f) return 0.0f - it seems this code has issues. Tested in MSVC 2013.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment