Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save unknownbrackets/e5bdd06cd8d85712fc51bd7b7707cfd1 to your computer and use it in GitHub Desktop.
Save unknownbrackets/e5bdd06cd8d85712fc51bd7b7707cfd1 to your computer and use it in GitHub Desktop.
Attempt at exponent-aligned fp addition
#include <iostream>
static uint32_t bit_cast(float f) {
uint32_t x;
memcpy(&x, &f, 4);
return x;
}
static float bit_cast(uint32_t x) {
float f;
memcpy(&f, &x, 4);
return f;
}
static uint32_t get_uexp(uint32_t x) {
return (x >> 23) & 0xFF;
}
static int32_t get_mant(uint32_t x) {
// Note: this returns the hidden 1.
return (x & 0x007FFFFF) | 0x00800000;
}
static int32_t get_sign(uint32_t x) {
return x & 0x80000000;
}
float add4(float f[4]) {
static const int EXTRA_BITS = 2;
uint32_t x[4];
uint32_t exps[4];
int32_t mants[4];
uint32_t max_exp = 0;
for (int i = 0; i < 4; i++) {
x[i] = bit_cast(f[i]);
exps[i] = get_uexp(x[i]);
// Preserve extra bits of precision in the mantissa during the add.
mants[i] = get_mant(x[i]) << EXTRA_BITS;
if (exps[i] > max_exp) {
max_exp = exps[i];
}
}
int32_t mant_sum = 0;
for (int i = 0; i < 4; i++) {
int exp = max_exp - exps[i];
if (exp >= 32) {
mants[i] = 0;
} else {
mants[i] >>= max_exp - exps[i];
}
if (get_sign(x[i])) {
mants[i] = -mants[i];
}
mant_sum += mants[i];
}
int sign_sum = 0;
if (mant_sum < 0) {
sign_sum = 0x80000000;
mant_sum = -mant_sum;
}
// Chop off the extra bits.
mant_sum >>= EXTRA_BITS;
if (mant_sum == 0 || max_exp == 0) {
return 0.0f;
}
// Now they all have the same exponent, max_exp.
// Sum the mantissas, adjust, and reconstruct the float.
while (mant_sum < 0x00800000) {
mant_sum <<= 1;
max_exp -= 1;
}
while (mant_sum >= 0x1000000) {
mant_sum >>= 1;
max_exp += 1;
}
if (max_exp >= 255) {
max_exp = 255;
mant_sum = 0;
} else if (max_exp == 0) {
return 0.0f;
}
uint32_t y = sign_sum | (max_exp << 23) | (mant_sum & 0x7FFFFF);
return bit_cast(y);
}
int main()
{
std::cout << "Hello World!\n";
float a[4] = { 1.0, 1.0, 1.0, 1.0 };
float b[4] = { bit_cast((uint32_t)0x33800000), bit_cast((uint32_t)0x33800000), bit_cast((uint32_t)0x33800000), bit_cast((uint32_t)0x3F800000) };
// std::cout << add4(a) << std::endl;
std::cout << add4(b) << " " << bit_cast(add4(b)) << std::endl;
printf("%08x", bit_cast(add4(b)));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment