Skip to content

Instantly share code, notes, and snippets.

@Triang3l
Created December 10, 2020 18:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Triang3l/e8d8256d808cf7f889971219aa771003 to your computer and use it in GitHub Desktop.
Save Triang3l/e8d8256d808cf7f889971219aa771003 to your computer and use it in GitHub Desktop.
Xenia — exact unsigned to float for 0x80000000–0xFFFFFFFF
struct VECTOR_CONVERT_I2F
: Sequence<VECTOR_CONVERT_I2F,
I<OPCODE_VECTOR_CONVERT_I2F, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// flags = ARITHMETIC_UNSIGNED
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
#if 1
// 0.5 ULP precision rounding to nearest even (the only rounding mode on
// AltiVec) for negatives.
// TODO(Triang3l): Ignore the current rounding mode for positives as well
// (and ideally throughout all the AltiVec instructions).
// Handle values from 0x80000000 first (that would be negative as signed
// integers). Round them to the nearest even to the mantissa of 2.0f ^ 31,
// overflowing to 2.0f ^ 32 near the end of the range.
// uint32_t(2.0f ^ 31 + ULP) == 0b10000000000000000000000100000000u.
// Therefore, for even ULP:
// ...0001111111 -> ...00
// ...0010000000 -> ...00
// ...0010000001 -> ...01
// For odd ULP:
// ...0101111111 -> ...01
// ...0110000000 -> ...10
// ...0110000001 -> ...10
// Add 0b01111111 + ((src >> 8) & 1) to the 32-bit integer before
// truncating - however, near UINT32_MAX, it will overflow, in this case,
// 1 should be added to the exponent (the number should become 2.0f ^ 32).
// xmm0 = (src >> 8) & 1
e.vpslld(e.xmm0, i.src1, 31 - 8);
e.vpsrld(e.xmm0, e.xmm0, 31);
// xmm0 = src + ((src >> 8) & 1)
e.vpaddd(e.xmm0, e.xmm0, i.src1);
// xmm0 = src + ((src >> 8) & 1) + 0b1111111
e.vpaddd(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMInt127));
// xmm1 = UINT_MAX if not overflowed, 0 if overflowed.
e.vpsrad(e.xmm1, e.xmm0, 31);
// xmm0 = rounded number, biased exponent 1 if not overflowed or 0 if
// overflowed.
e.vpsrld(e.xmm0, e.xmm0, 8);
// xmm1 = -2 << 23 if not overflowed, 0 if overflowed.
e.vpslld(e.xmm1, e.xmm1, 24);
// xmm0 = 2^33 and mantissa if not overflowed, 2^32 if overflowed.
e.vpaddd(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMM2To32));
// xmm0 = unsigned integer as float if 0x80000000 or above.
e.vpaddd(e.xmm0, e.xmm0, e.xmm1);
// xmm1 = for the positive case, signed integer converted to float.
e.vcvtdq2ps(e.xmm1, i.src1);
// Merge the two ways depending on whether the number is >= 0x80000000.
e.vblendvps(i.dest, e.xmm1, e.xmm0, i.src1);
#else
// xmm0 = mask of positive values
e.vpcmpgtd(e.xmm0, i.src1, e.GetXmmConstPtr(XMMFFFF));
// scale any values >= (unsigned)INT_MIN back to [0, INT_MAX]
e.vpsubd(e.xmm1, i.src1, e.GetXmmConstPtr(XMMSignMaskI32));
e.vblendvps(e.xmm1, e.xmm1, i.src1, e.xmm0);
// xmm1 = [0, INT_MAX]
e.vcvtdq2ps(i.dest, e.xmm1);
// scale values back above [INT_MIN, UINT_MAX]
e.vpandn(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMPosIntMinPS));
e.vaddps(i.dest, i.dest, e.xmm0);
#endif
} else {
e.vcvtdq2ps(i.dest, i.src1);
}
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment