Created
September 21, 2017 12:46
-
-
Save rossy/67be6a23bfd3b2375286e05dadefb42c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// gcc -O2 -Wall -std=c99 -mf16c float16.c -o float16 | |
#include <stdbool.h> | |
#include <inttypes.h> | |
#include <immintrin.h> | |
#include <stdio.h> | |
#include <math.h> | |
static char *format_float32_buf(float f, char buf[static 35]) | |
{ | |
union { | |
float f; | |
uint32_t i; | |
} u = { .f = f }; | |
char *p = buf; | |
for (int i = 0; i < 32; i++) { | |
*p++ = ((u.i >> (31 - i)) & 1) ? '1' : '0'; | |
if (i == 0 || i == 8) | |
*p++ = ' '; | |
} | |
*p++ = '\0'; | |
return buf; | |
} | |
#define format_float32(f) format_float32_buf(f, (char[35]) { 0 }) | |
static char *format_float16_buf(uint16_t f, char buf[static 19]) | |
{ | |
char *p = buf; | |
for (int i = 0; i < 16; i++) { | |
*p++ = ((f >> (15 - i)) & 1) ? '1' : '0'; | |
if (i == 0 || i == 5) | |
*p++ = ' '; | |
} | |
*p++ = '\0'; | |
return buf; | |
} | |
#define format_float16(f) format_float16_buf(f, (char[19]) { 0 }) | |
static uint16_t float32_to_16_hw(float f) | |
{ | |
return _cvtss_sh(f, 0); | |
} | |
static uint16_t float32_to_16_sw(float f) | |
{ | |
uint16_t sign = signbit(f) ? 0x8000 : 0x0000; | |
switch (fpclassify(f)) { | |
case FP_INFINITE: | |
// Handle +/-Inf as a special case | |
return 0x7c00 | sign; | |
case FP_NAN: | |
// Return a NaN with the non-signalling bit set | |
return 0x7e00 | sign; | |
case FP_SUBNORMAL: | |
case FP_ZERO: | |
// Zeros need to be special-cased too. Handle subnormals as well, since | |
// float32 subnormals are so small that they will always round to zero. | |
return 0x0000 | sign; | |
} | |
// Extract the exponent and a normalized mantissa, with value [0.5; 1) | |
int exp; | |
float norm_mant = fabsf(frexpf(f, &exp)); | |
// The normalized mantissa will be multiplied by 2^mant_exp. In a normal | |
// float, this will be 2^11, for 10 bits of mantissa and the implied | |
// leading '1' bit. In a subnormal float, mant_exp will take a lower value. | |
int mant_exp = 11; | |
// Handle subnormals | |
if (exp < -13) { | |
mant_exp = exp + 24; | |
exp = -13; | |
} | |
// If the exponent is this high, clamp to +/-Inf to prevent overflow | |
if (exp > 16) | |
return 0x7c00 | sign; | |
// Get the final mantissa. This uses the current float rounding mode. | |
uint16_t mant = lrintf(ldexpf(norm_mant, mant_exp)); | |
// Note: exp_biased is lower than the final exponent for a float16, since | |
// we rely on the mantissa to overflow into the exponent to handle | |
// rounding. For normal floats, the implied leading '1' bit will also | |
// overflow into the exponent, so the real exponent bias is 14. | |
uint16_t exp_biased = exp + 13; | |
return (mant + (exp_biased << 10)) | sign; | |
} | |
static bool float16_isnan(uint16_t f) | |
{ | |
return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) != 0x0000; | |
} | |
int main(int argc, char **argv) | |
{ | |
union { | |
float f; | |
uint32_t i; | |
} u = { .i = 0 }; | |
int errors = 0; | |
do { | |
uint16_t hf_hw = float32_to_16_hw(u.f); | |
uint16_t hf_sw = float32_to_16_sw(u.f); | |
// Don't try to replicate the exact NaN bit patterns | |
if (float16_isnan(hf_hw) && float16_isnan(hf_sw)) | |
continue; | |
if (hf_hw == hf_sw) | |
continue; | |
printf("hw: %s -> %s\n", format_float32(u.f), format_float16(hf_hw)); | |
printf("sw: %s -> %s\n", format_float32(u.f), format_float16(hf_sw)); | |
if (++errors == 10) | |
return 1; | |
} while (++u.i); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment