SSE round/floor (untested)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Essentially the same algorithm as | |
| // https://github.com/ispc/ispc/blob/master/builtins/target-sse2-common.ll#L153 | |
| // but slightly better impl (orps on magic const instead of two xorps). | |
| // | |
| // NOTE: this really is not tested, and it's likely to lose a bit of precision | |
| // for x in [2^22,2^23). | |
| // | |
| // NOTE 2: Yup, confirmed, it doesn't do the right thing for large integer values | |
| // representable as float. | |
| __m128 round_to_int(__m128 x) | |
| { | |
| __m128 sign = _mm_and_ps(x, _mm_set1_ps(-0.0f)); | |
| __m128 magic = _mm_or_ps(sign, _mm_set1_ps(8388608.0f)); // 2^23 | |
| __m128 round1 = _mm_add_ps(x, magic); | |
| __m128 round2 = _mm_sub_ps(round1, magic); | |
| return round2; | |
| } | |
| __m128 floor(__m128 x) | |
| { | |
| __m128 y = round_to_int(x); | |
| __m128 cmp = _mm_cmplt_ps(x, y); // x < round(x)? | |
| __m128 fudge = _mm_and_ps(cmp, _mm_set1_ps(1.0f)); // 1.0f if x < round(x) | |
| __m128 result = _mm_sub_ps(y, fudge); | |
| } | |
| // This variant of "round" should work, though. | |
| // But I think it's more work than going through ints. | |
| // (Still completely untested, mind.) | |
| __m128 round_to_int_fixed(__m128 x) | |
| { | |
| __m128 magic = _mm_set1_ps(8388608.0f); // 2^23 | |
| __m128 xabs = _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))); | |
| __m128 sign = _mm_xor_ps(x, xabs); | |
| __m128 smallenough = _mm_cmplt_ps(xabs, magic); | |
| __m128 bias = _mm_and_ps(smallenough, magic); | |
| __m128 round1 = _mm_add_ps(xabs, bias); | |
| __m128 round2 = _mm_sub_ps(round1, bias); | |
| __m128 round3 = _mm_xor_ps(round2, sign); | |
| return round3; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
round_to_int(0.5f) and round_to_int_fixed(0.5f) return 0.0f - it seems this code has issues. Tested in MSVC 2013.