castano/half_to_float_test.cpp

## half_to_float_test.cpp
#include <intrin.h> // __rdtsc
#include <stdio.h>

typedef unsigned int uint;
typedef unsigned short uint16;
typedef unsigned long long uint64;

// ISPC functions:
extern "C" void half_to_float_test(const uint16 * vin, float * vout, int count);
extern "C" void half_to_float_test_fast(const uint16 * vin, float * vout, int count);
extern "C" void half_to_float_test_ryg(const uint16 * vin, float * vout, int count);
extern "C" void half_to_float_test_fast_ryg(const uint16 * vin, float * vout, int count);

union FP32
{
    uint u;
    float f;
    struct
    {
        uint Mantissa : 23;
        uint Exponent : 8;
        uint Sign : 1;
    };
};

union FP16
{
    unsigned short u;
    struct
    {
        uint Mantissa : 10;
        uint Exponent : 5;
        uint Sign : 1;
    };
};

static FP32 half_to_float_full(FP16 h)
{
    FP32 o = { 0 };

    // From ISPC ref code
    if (h.Exponent == 0 && h.Mantissa == 0) // (Signed) zero
        o.Sign = h.Sign;
    else
    {
        if (h.Exponent == 0) // Denormal (will convert to normalized)
        {
            // Adjust mantissa so it's normalized (and keep track of exp adjust)
            int e = -1;
            uint m = h.Mantissa;
            do
            {
                e++;
                m <<= 1;
            } while ((m & 0x400) == 0);

            o.Mantissa = (m & 0x3ff) << 13;
            o.Exponent = 127 - 15 - e;
            o.Sign = h.Sign;
        }
        else if (h.Exponent == 0x1f) // Inf/NaN
        {
            // NOTE: It's safe to treat both with the same code path by just truncating
            // lower Mantissa bits in NaNs (this is valid).
            o.Mantissa = h.Mantissa << 13;
            o.Exponent = 255;
            o.Sign = h.Sign;
        }
        else // Normalized number
        {
            o.Mantissa = h.Mantissa << 13;
            o.Exponent = 127 - 15 + h.Exponent;
            o.Sign = h.Sign;
        }
    }

    return o;
}

static __m128 half_to_float4_SSE2(__m128i h)
{
#define SSE_CONST4(name, val) static const __declspec(align(16)) uint name[4] = { (val), (val), (val), (val) }
#define CONST(name) *(const __m128i *)&name

    SSE_CONST4(mask_nosign,         0x7fff);
    SSE_CONST4(mask_justsign,       0x8000);
    SSE_CONST4(mask_shifted_exp,    0x7c00 << 13);
    SSE_CONST4(expadjust_normal,    (127 - 15) << 23);
    SSE_CONST4(expadjust_infnan,    (128 - 16) << 23);
    SSE_CONST4(expadjust_denorm,    1 << 23);
    SSE_CONST4(magic_denorm,        113 << 23);

    __m128i mnosign     = CONST(mask_nosign);
    __m128i expmant     = _mm_and_si128(mnosign, h);
    __m128i justsign    = _mm_and_si128(h, CONST(mask_justsign));
    __m128i mshiftexp   = CONST(mask_shifted_exp);
    __m128i eadjust     = CONST(expadjust_normal);
    __m128i shifted     = _mm_slli_epi32(expmant, 13);
    __m128i adjusted    = _mm_add_epi32(eadjust, shifted);
    __m128i justexp     = _mm_and_si128(shifted, mshiftexp);

    __m128i zero        = _mm_setzero_si128();
    __m128i b_isinfnan  = _mm_cmpeq_epi32(mshiftexp, justexp);
    __m128i b_isdenorm  = _mm_cmpeq_epi32(zero, justexp);

    __m128i adj_infnan  = _mm_and_si128(b_isinfnan, CONST(expadjust_infnan));
    __m128i adjusted2   = _mm_add_epi32(adjusted, adj_infnan);

    __m128i adj_den     = CONST(expadjust_denorm);
    __m128i den1        = _mm_add_epi32(adj_den, adjusted2);
    __m128  den2        = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm);
    __m128  adjusted3   = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm));
    __m128  adjusted4   = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2));
    __m128  adjusted5   = _mm_or_ps(adjusted3, adjusted4);
    __m128i sign        = _mm_slli_epi32(justsign, 16);
    __m128  final       = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign));

    // ~21 SSE2 ops.
    return final;

#undef SSE_CONST4
#undef CONST
}

void half_to_float_test_sse2(const uint16 * vin, float * vout, int count) {
    __m128i zero = _mm_setzero_si128();

    for (int i = 0; i < count; i += 8)
    {
        __m128i in = _mm_loadu_si128((const __m128i *)(vin + i));
        __m128i a = _mm_unpacklo_epi16(in, zero);
        __m128i b = _mm_unpackhi_epi16(in, zero);

        __m128 outa = half_to_float4_SSE2(a);
        _mm_storeu_ps((float *)(vout + i), outa);

        __m128 outb = half_to_float4_SSE2(b);
        _mm_storeu_ps((float *)(vout + i + 4), outb);
    }
}


int test_results(const uint16 * vin, const float * vout, int count) {
    int error_count = 0;
    for (int i = 0; i < count; i++)
    {
        FP16 in;
        in.u = vin[i];
        FP32 full = half_to_float_full(in);

        FP32 out;
        out.f = vout[i];

        if (full.u != out.u) error_count++;
    }
    return error_count;
}


int main(void) {

    uint16 vin[0x10000];
    float vout[0x10000];

    // Init input.
    for (int i = 0; i < 0x10000; i++)
    {
        vin[i] = i;
    }

    // Run tests.
    uint64 start, end;

    half_to_float_test(vin, vout, 0x10000);
    int error_count = test_results(vin, vout, 0x10000);

    start = __rdtsc();
    for (int i = 0; i < 64; i++) half_to_float_test(vin, vout, 0x10000);
    end = __rdtsc();
    printf("half_to_float:          %.3f (%d)\n", double(end-start) / (1000*1000*4), error_count);

    half_to_float_test_fast(vin, vout, 0x10000);
    error_count = test_results(vin, vout, 0x10000);

    start = __rdtsc();
    for (int i = 0; i < 64; i++) half_to_float_test_fast(vin, vout, 0x10000);
    end = __rdtsc();
    printf("half_to_float_fast:     %.3f (%d)\n", double(end-start) / (1000*1000*4), error_count);

    half_to_float_test_ryg(vin, vout, 0x10000);
    error_count = test_results(vin, vout, 0x10000);

    start = __rdtsc();
    for (int i = 0; i < 64; i++) half_to_float_test_ryg(vin, vout, 0x10000);
    end = __rdtsc();
    printf("half_to_float_ryg:      %.3f (%d)\n", double(end-start) / (1000*1000*4), error_count);

    half_to_float_test_fast_ryg(vin, vout, 0x10000);
    error_count = test_results(vin, vout, 0x10000);

    start = __rdtsc();
    for (int i = 0; i < 64; i++) half_to_float_test_fast_ryg(vin, vout, 0x10000);
    end = __rdtsc();
    printf("half_to_float_fast_ryg: %.3f (%d)\n", double(end-start) / (1000*1000*4), error_count);

    half_to_float_test_sse2(vin, vout, 0x10000);
    error_count = test_results(vin, vout, 0x10000);

    start = __rdtsc();
    for (int i = 0; i < 64; i++) half_to_float_test_sse2(vin, vout, 0x10000);
    end = __rdtsc();
    printf("half_to_float_sse2:     %.3f (%d)\n", double(end-start) / (1000*1000*4), error_count);

    return 0;
}

## half_to_float_test.ispc

inline float half_to_float_ryg(unsigned int16 h)
{
    if (__have_native_half) {
        return __half_to_float_varying(h);
    }
    else {
        unsigned int32 hs = h & (int32)0x8000u;     // Pick off sign bit
        unsigned int32 hem = h & (int32)0x7fffu;    // Pick off exponent-mantissa bits

        unsigned int32 xs = ((unsigned int32) hs) << 16;
        unsigned int32 xem = ((unsigned int32) hem) << 13;
        unsigned int32 xe = xem & 0xF800000;    // & (0x7c00 << 13)

        xem += 0x38000000;              // (127 - 15) << 23

        // handle exponent special cases
        if (xe == 0xF800000)            // Inf/NaN?
        {
            xem += 0x38000000;          // extra exp adjust
        }
        else if (xe == 0)               // Zero/Denormal?
        {
            xem += 0x800000;            // extra exp adjust
            xem = intbits(floatbits(xem) - floatbits(0x38800000));   // renormalize
        }

        return floatbits(xs | xem);
    }
}

inline float half_to_float_fast_ryg(unsigned int16 h)
{
    if (__have_native_half) {
        return __half_to_float_varying(h);
    }
    else {
        unsigned int32 hs = h & (int32)0x8000u;     // Pick off sign bit
        unsigned int32 hem = h & (int32)0x7fffu;    // Pick off exponent-mantissa bits

        unsigned int32 xs = ((unsigned int32) hs) << 16;
        unsigned int32 xem = ((unsigned int32) hem) << 13;

        xem += 0x38000000;      // (127 - 15) << 23

        return floatbits(xs | xem);
    }
}


export void half_to_float_test(uniform const unsigned int16 vin[], uniform float vout[], uniform int count)
{
    foreach(i = 0 ... count) {
        vout[i] = half_to_float(vin[i]);
    }
}

export void half_to_float_test_fast(uniform const unsigned int16 vin[], uniform float vout[], uniform int count)
{
    foreach(i = 0 ... count) {
        vout[i] = half_to_float_fast(vin[i]);
    }
}

export void half_to_float_test_ryg(uniform const unsigned int16 vin[], uniform float vout[], uniform int count)
{
    foreach(i = 0 ... count) {
        vout[i] = half_to_float_ryg(vin[i]);
    }
}

export void half_to_float_test_fast_ryg(uniform const unsigned int16 vin[], uniform float vout[], uniform int count)
{
    foreach(i = 0 ... count) {
        vout[i] = half_to_float_fast_ryg(vin[i]);
    }
}

## results.txt
Reference:

half_to_float_sse2:     2.306

ISPC targeting SSE2:

    half_to_float:          6.668
    half_to_float_fast:     2.554
    half_to_float_ryg:      4.390
    half_to_float_fast_ryg: 2.005

ISPC targeting SSE4:

    half_to_float:          4.776
    half_to_float_fast:     1.939
    half_to_float_ryg:      3.121
    half_to_float_fast_ryg: 1.448

ISPC targeting AVX:

    half_to_float:          4.368
    half_to_float_fast:     1.857
    half_to_float_ryg:      2.791
    half_to_float_fast_ryg: 1.275
	#include <intrin.h> // __rdtsc
	#include <stdio.h>

	typedef unsigned int uint;
	typedef unsigned short uint16;
	typedef unsigned long long uint64;

	// ISPC functions:
	extern "C" void half_to_float_test(const uint16 * vin, float * vout, int count);
	extern "C" void half_to_float_test_fast(const uint16 * vin, float * vout, int count);
	extern "C" void half_to_float_test_ryg(const uint16 * vin, float * vout, int count);
	extern "C" void half_to_float_test_fast_ryg(const uint16 * vin, float * vout, int count);

	union FP32
	{
	uint u;
	float f;
	struct
	{
	uint Mantissa : 23;
	uint Exponent : 8;
	uint Sign : 1;
	};
	};

	union FP16
	{
	unsigned short u;
	struct
	{
	uint Mantissa : 10;
	uint Exponent : 5;
	uint Sign : 1;
	};
	};

	static FP32 half_to_float_full(FP16 h)
	{
	FP32 o = { 0 };

	// From ISPC ref code
	if (h.Exponent == 0 && h.Mantissa == 0) // (Signed) zero
	o.Sign = h.Sign;
	else
	{
	if (h.Exponent == 0) // Denormal (will convert to normalized)
	{
	// Adjust mantissa so it's normalized (and keep track of exp adjust)
	int e = -1;
	uint m = h.Mantissa;
	do
	{
	e++;
	m <<= 1;
	} while ((m & 0x400) == 0);

	o.Mantissa = (m & 0x3ff) << 13;
	o.Exponent = 127 - 15 - e;
	o.Sign = h.Sign;
	}
	else if (h.Exponent == 0x1f) // Inf/NaN
	{
	// NOTE: It's safe to treat both with the same code path by just truncating
	// lower Mantissa bits in NaNs (this is valid).
	o.Mantissa = h.Mantissa << 13;
	o.Exponent = 255;
	o.Sign = h.Sign;
	}
	else // Normalized number
	{
	o.Mantissa = h.Mantissa << 13;
	o.Exponent = 127 - 15 + h.Exponent;
	o.Sign = h.Sign;
	}
	}

	return o;
	}

	static __m128 half_to_float4_SSE2(__m128i h)
	{
	#define SSE_CONST4(name, val) static const __declspec(align(16)) uint name[4] = { (val), (val), (val), (val) }
	#define CONST(name) (const __m128i )&name

	SSE_CONST4(mask_nosign, 0x7fff);
	SSE_CONST4(mask_justsign, 0x8000);
	SSE_CONST4(mask_shifted_exp, 0x7c00 << 13);
	SSE_CONST4(expadjust_normal, (127 - 15) << 23);
	SSE_CONST4(expadjust_infnan, (128 - 16) << 23);
	SSE_CONST4(expadjust_denorm, 1 << 23);
	SSE_CONST4(magic_denorm, 113 << 23);

	__m128i mnosign = CONST(mask_nosign);
	__m128i expmant = _mm_and_si128(mnosign, h);
	__m128i justsign = _mm_and_si128(h, CONST(mask_justsign));
	__m128i mshiftexp = CONST(mask_shifted_exp);
	__m128i eadjust = CONST(expadjust_normal);
	__m128i shifted = _mm_slli_epi32(expmant, 13);
	__m128i adjusted = _mm_add_epi32(eadjust, shifted);
	__m128i justexp = _mm_and_si128(shifted, mshiftexp);

	__m128i zero = _mm_setzero_si128();
	__m128i b_isinfnan = _mm_cmpeq_epi32(mshiftexp, justexp);
	__m128i b_isdenorm = _mm_cmpeq_epi32(zero, justexp);

	__m128i adj_infnan = _mm_and_si128(b_isinfnan, CONST(expadjust_infnan));
	__m128i adjusted2 = _mm_add_epi32(adjusted, adj_infnan);

	__m128i adj_den = CONST(expadjust_denorm);
	__m128i den1 = _mm_add_epi32(adj_den, adjusted2);
	__m128 den2 = _mm_sub_ps(_mm_castsi128_ps(den1), (const __m128 )&magic_denorm);
	__m128 adjusted3 = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm));
	__m128 adjusted4 = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2));
	__m128 adjusted5 = _mm_or_ps(adjusted3, adjusted4);
	__m128i sign = _mm_slli_epi32(justsign, 16);
	__m128 final = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign));

	// ~21 SSE2 ops.
	return final;

	#undef SSE_CONST4
	#undef CONST
	}

	void half_to_float_test_sse2(const uint16 * vin, float * vout, int count) {
	__m128i zero = _mm_setzero_si128();

	for (int i = 0; i < count; i += 8)
	{
	__m128i in = _mm_loadu_si128((const __m128i *)(vin + i));
	__m128i a = _mm_unpacklo_epi16(in, zero);
	__m128i b = _mm_unpackhi_epi16(in, zero);

	__m128 outa = half_to_float4_SSE2(a);
	_mm_storeu_ps((float *)(vout + i), outa);

	__m128 outb = half_to_float4_SSE2(b);
	_mm_storeu_ps((float *)(vout + i + 4), outb);
	}
	}


	int test_results(const uint16 * vin, const float * vout, int count) {
	int error_count = 0;
	for (int i = 0; i < count; i++)
	{
	FP16 in;
	in.u = vin[i];
	FP32 full = half_to_float_full(in);

	FP32 out;
	out.f = vout[i];

	if (full.u != out.u) error_count++;
	}
	return error_count;
	}




	int main(void) {

	uint16 vin[0x10000];
	float vout[0x10000];

	// Init input.
	for (int i = 0; i < 0x10000; i++)
	{
	vin[i] = i;
	}

	// Run tests.
	uint64 start, end;

	half_to_float_test(vin, vout, 0x10000);
	int error_count = test_results(vin, vout, 0x10000);

	start = __rdtsc();
	for (int i = 0; i < 64; i++) half_to_float_test(vin, vout, 0x10000);
	end = __rdtsc();
	printf("half_to_float: %.3f (%d)\n", double(end-start) / (100010004), error_count);

	half_to_float_test_fast(vin, vout, 0x10000);
	error_count = test_results(vin, vout, 0x10000);

	start = __rdtsc();
	for (int i = 0; i < 64; i++) half_to_float_test_fast(vin, vout, 0x10000);
	end = __rdtsc();
	printf("half_to_float_fast: %.3f (%d)\n", double(end-start) / (100010004), error_count);

	half_to_float_test_ryg(vin, vout, 0x10000);
	error_count = test_results(vin, vout, 0x10000);

	start = __rdtsc();
	for (int i = 0; i < 64; i++) half_to_float_test_ryg(vin, vout, 0x10000);
	end = __rdtsc();
	printf("half_to_float_ryg: %.3f (%d)\n", double(end-start) / (100010004), error_count);

	half_to_float_test_fast_ryg(vin, vout, 0x10000);
	error_count = test_results(vin, vout, 0x10000);

	start = __rdtsc();
	for (int i = 0; i < 64; i++) half_to_float_test_fast_ryg(vin, vout, 0x10000);
	end = __rdtsc();
	printf("half_to_float_fast_ryg: %.3f (%d)\n", double(end-start) / (100010004), error_count);

	half_to_float_test_sse2(vin, vout, 0x10000);
	error_count = test_results(vin, vout, 0x10000);

	start = __rdtsc();
	for (int i = 0; i < 64; i++) half_to_float_test_sse2(vin, vout, 0x10000);
	end = __rdtsc();
	printf("half_to_float_sse2: %.3f (%d)\n", double(end-start) / (100010004), error_count);

	return 0;
	}

	inline float half_to_float_ryg(unsigned int16 h)
	{
	if (__have_native_half) {
	return __half_to_float_varying(h);
	}
	else {
	unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
	unsigned int32 hem = h & (int32)0x7fffu; // Pick off exponent-mantissa bits

	unsigned int32 xs = ((unsigned int32) hs) << 16;
	unsigned int32 xem = ((unsigned int32) hem) << 13;
	unsigned int32 xe = xem & 0xF800000; // & (0x7c00 << 13)

	xem += 0x38000000; // (127 - 15) << 23

	// handle exponent special cases
	if (xe == 0xF800000) // Inf/NaN?
	{
	xem += 0x38000000; // extra exp adjust
	}
	else if (xe == 0) // Zero/Denormal?
	{
	xem += 0x800000; // extra exp adjust
	xem = intbits(floatbits(xem) - floatbits(0x38800000)); // renormalize
	}

	return floatbits(xs \| xem);
	}
	}

	inline float half_to_float_fast_ryg(unsigned int16 h)
	{
	if (__have_native_half) {
	return __half_to_float_varying(h);
	}
	else {
	unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
	unsigned int32 hem = h & (int32)0x7fffu; // Pick off exponent-mantissa bits

	unsigned int32 xs = ((unsigned int32) hs) << 16;
	unsigned int32 xem = ((unsigned int32) hem) << 13;

	xem += 0x38000000; // (127 - 15) << 23

	return floatbits(xs \| xem);
	}
	}


	export void half_to_float_test(uniform const unsigned int16 vin[], uniform float vout[], uniform int count)
	{
	foreach(i = 0 ... count) {
	vout[i] = half_to_float(vin[i]);
	}
	}

	export void half_to_float_test_fast(uniform const unsigned int16 vin[], uniform float vout[], uniform int count)
	{
	foreach(i = 0 ... count) {
	vout[i] = half_to_float_fast(vin[i]);
	}
	}

	export void half_to_float_test_ryg(uniform const unsigned int16 vin[], uniform float vout[], uniform int count)
	{
	foreach(i = 0 ... count) {
	vout[i] = half_to_float_ryg(vin[i]);
	}
	}

	export void half_to_float_test_fast_ryg(uniform const unsigned int16 vin[], uniform float vout[], uniform int count)
	{
	foreach(i = 0 ... count) {
	vout[i] = half_to_float_fast_ryg(vin[i]);
	}
	}
	Reference:

	half_to_float_sse2: 2.306

	ISPC targeting SSE2:

	half_to_float: 6.668
	half_to_float_fast: 2.554
	half_to_float_ryg: 4.390
	half_to_float_fast_ryg: 2.005

	ISPC targeting SSE4:

	half_to_float: 4.776
	half_to_float_fast: 1.939
	half_to_float_ryg: 3.121
	half_to_float_fast_ryg: 1.448

	ISPC targeting AVX:

	half_to_float: 4.368
	half_to_float_fast: 1.857
	half_to_float_ryg: 2.791
	half_to_float_fast_ryg: 1.275