Created
December 10, 2020 18:40
-
-
Save Triang3l/e8d8256d808cf7f889971219aa771003 to your computer and use it in GitHub Desktop.
Xenia — exact unsigned to float for 0x80000000–0xFFFFFFFF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct VECTOR_CONVERT_I2F | |
: Sequence<VECTOR_CONVERT_I2F, | |
I<OPCODE_VECTOR_CONVERT_I2F, V128Op, V128Op>> { | |
static void Emit(X64Emitter& e, const EmitArgType& i) { | |
// flags = ARITHMETIC_UNSIGNED | |
if (i.instr->flags & ARITHMETIC_UNSIGNED) { | |
#if 1 | |
// 0.5 ULP precision rounding to nearest even (the only rounding mode on | |
// AltiVec) for negatives. | |
// TODO(Triang3l): Ignore the current rounding mode for positives as well | |
// (and ideally throughout all the AltiVec instructions). | |
// Handle values from 0x80000000 first (that would be negative as signed | |
// integers). Round them to the nearest even to the mantissa of 2.0f ^ 31, | |
// overflowing to 2.0f ^ 32 near the end of the range. | |
// uint32_t(2.0f ^ 31 + ULP) == 0b10000000000000000000000100000000u. | |
// Therefore, for even ULP: | |
// ...0001111111 -> ...00 | |
// ...0010000000 -> ...00 | |
// ...0010000001 -> ...01 | |
// For odd ULP: | |
// ...0101111111 -> ...01 | |
// ...0110000000 -> ...10 | |
// ...0110000001 -> ...10 | |
// Add 0b01111111 + ((src >> 8) & 1) to the 32-bit integer before | |
// truncating - however, near UINT32_MAX, it will overflow, in this case, | |
// 1 should be added to the exponent (the number should become 2.0f ^ 32). | |
// xmm0 = (src >> 8) & 1 | |
e.vpslld(e.xmm0, i.src1, 31 - 8); | |
e.vpsrld(e.xmm0, e.xmm0, 31); | |
// xmm0 = src + ((src >> 8) & 1) | |
e.vpaddd(e.xmm0, e.xmm0, i.src1); | |
// xmm0 = src + ((src >> 8) & 1) + 0b1111111 | |
e.vpaddd(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMInt127)); | |
// xmm1 = UINT_MAX if not overflowed, 0 if overflowed. | |
e.vpsrad(e.xmm1, e.xmm0, 31); | |
// xmm0 = rounded number, biased exponent 1 if not overflowed or 0 if | |
// overflowed. | |
e.vpsrld(e.xmm0, e.xmm0, 8); | |
// xmm1 = -2 << 23 if not overflowed, 0 if overflowed. | |
e.vpslld(e.xmm1, e.xmm1, 24); | |
// xmm0 = 2^33 and mantissa if not overflowed, 2^32 if overflowed. | |
e.vpaddd(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMM2To32)); | |
// xmm0 = unsigned integer as float if 0x80000000 or above. | |
e.vpaddd(e.xmm0, e.xmm0, e.xmm1); | |
// xmm1 = for the positive case, signed integer converted to float. | |
e.vcvtdq2ps(e.xmm1, i.src1); | |
// Merge the two ways depending on whether the number is >= 0x80000000. | |
e.vblendvps(i.dest, e.xmm1, e.xmm0, i.src1); | |
#else | |
// xmm0 = mask of positive values | |
e.vpcmpgtd(e.xmm0, i.src1, e.GetXmmConstPtr(XMMFFFF)); | |
// scale any values >= (unsigned)INT_MIN back to [0, INT_MAX] | |
e.vpsubd(e.xmm1, i.src1, e.GetXmmConstPtr(XMMSignMaskI32)); | |
e.vblendvps(e.xmm1, e.xmm1, i.src1, e.xmm0); | |
// xmm1 = [0, INT_MAX] | |
e.vcvtdq2ps(i.dest, e.xmm1); | |
// scale values back above [INT_MIN, UINT_MAX] | |
e.vpandn(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMPosIntMinPS)); | |
e.vaddps(i.dest, i.dest, e.xmm0); | |
#endif | |
} else { | |
e.vcvtdq2ps(i.dest, i.src1); | |
} | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment