Skip to content

Instantly share code, notes, and snippets.

@ramntry
Last active August 9, 2018 05:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ramntry/76abebaf8c0a17dc517cd21d9eea5c4f to your computer and use it in GitHub Desktop.
Save ramntry/76abebaf8c0a17dc517cd21d9eea5c4f to your computer and use it in GitHub Desktop.
IEEE-754 single precision software implementation
#include <algorithm>
#include <iostream>
#include <iomanip>
#include <bitset>
#include <limits>
#include <cassert>
using namespace std;
struct single {
unsigned sign : 1;
unsigned exp : 8;
unsigned mant : 23;
operator float() const {
unsigned value = (sign << 31) | (exp << 23) | mant;
return *reinterpret_cast<float *>(&value);
}
bool is_normal() const {
return (1 <= exp && exp <= 254) || (exp == 0 && mant == 0);
}
bool is_denormal() const { return exp == 0 && mant != 0; }
bool is_infinity() const { return exp == 255 && mant == 0; }
bool is_nan() const { return exp == 255 && mant != 0; }
bool is_qnan() const { return is_nan() && (mant >> 22); }
bool is_snan() const { return is_nan() && !(mant >> 22); }
static single minus_zero() { return {1, 0, 0}; }
static single plus_zero() { return {0, 0, 0}; }
static single min_pos_denormal() { return {0, 0, 1 }; }
static single max_denormal() { return {0, 0, ~0u}; }
static single min_pos_normal() { return {0, 1, 0 }; }
static single one() { return {0, 127, 0 }; }
static single max() { return {0, 254, ~0u}; }
static single pos_infinity() { return {0, 255, 0}; }
static single qnan() { return {0, 255, 1 << 22}; }
static single snan() { return {0, 255, 1}; }
single operator-() const {
return {static_cast<unsigned>(~sign), exp, mant};
}
single operator+(single b) const {
assert(is_normal() && b.is_normal());
assert(sign == b.sign);
assert(exp == b.exp);
assert(exp);
const unsigned res_exp = exp + 1u;
const unsigned res_mant = res_exp < 255
? (static_cast<unsigned>(mant + b.mant) >> 1)
: 0;
return {sign, res_exp, res_mant};
}
};
ostream &operator<<(ostream &out, single x) {
return out << setw(15)
<< setprecision(numeric_limits<float>::max_digits10)
<< static_cast<float>(x) << " "
<< bitset< 1>(x.sign) << " "
<< bitset< 8>(x.exp) << " "
<< bitset<23>(x.mant);
}
void test_predicates();
int main() {
cout << " " << single::minus_zero() << endl;
cout << " " << single::plus_zero() << endl;
cout << " " << single::min_pos_denormal() << " minimum positive denormalized value" << endl;
cout << " " << single::max_denormal() << " maximum denormalized value" << endl;
cout << " " << single::min_pos_normal() << " minimum positive normalized value" << endl;
cout << " " << single::one() << endl;
cout << " " << single::max() << " maximum value" << endl;
cout << " " << single::pos_infinity() << endl;
cout << " " << single::qnan() << " quiet Not a Number" << endl;
cout << " " << single::snan() << " signalling Not a Number" << endl;
cout << "\n";
test_predicates();
const single one_and_half = {0, 127, 1 << 22};
const single one_and_three_quarters = {0, 127, 3 << 21};
cout << " " << one_and_half << "\n+ " << one_and_three_quarters << "\n= "
<< (one_and_half + one_and_three_quarters) << endl;
cout << "\n";
const single half_max = {0, 253, ~0u};
cout << " " << -half_max << "\n+ " << -half_max << "\n= "
<< (-half_max + -half_max) << endl;
cout << "\n";
const single half_inf = {0, 254, 0 };
const single over_half_inf = {0, 254, 2 };
cout << " " << -half_inf << "\n+ " << -half_inf << "\n= "
<< (-half_inf + -half_inf) << endl;
cout << "\n";
cout << " " << half_inf << "\n+ " << over_half_inf << "\n= "
<< ( half_inf + over_half_inf) << endl;
}
void test_predicates() {
for (single x : {single::minus_zero(),
single::plus_zero(),
single::min_pos_normal(),
single::one(),
single::max()}) {
assert( x.is_normal());
assert(!x.is_denormal());
assert(!x.is_infinity());
assert(!x.is_nan());
assert(!x.is_qnan());
assert(!x.is_snan());
}
for (single x : {single::min_pos_denormal(),
single::max_denormal()}) {
assert(!x.is_normal());
assert( x.is_denormal());
assert(!x.is_infinity());
assert(!x.is_nan());
assert(!x.is_qnan());
assert(!x.is_snan());
}
for (single x : { single::pos_infinity(),
-single::pos_infinity()}) {
assert(!x.is_normal());
assert(!x.is_denormal());
assert( x.is_infinity());
assert(!x.is_nan());
assert(!x.is_qnan());
assert(!x.is_snan());
}
for (single x : { single::qnan(),
-single::qnan()}) {
assert(!x.is_normal());
assert(!x.is_denormal());
assert(!x.is_infinity());
assert( x.is_nan());
assert( x.is_qnan());
assert(!x.is_snan());
}
for (single x : { single::snan(),
-single::snan()}) {
assert(!x.is_normal());
assert(!x.is_denormal());
assert(!x.is_infinity());
assert( x.is_nan());
assert(!x.is_qnan());
assert( x.is_snan());
}
}
@ramntry
Copy link
Author

ramntry commented Aug 9, 2018

               -0    1 00000000 00000000000000000000000
                0    0 00000000 00000000000000000000000
   1.40129846e-45    0 00000000 00000000000000000000001    minimum positive denormalized value
   1.17549421e-38    0 00000000 11111111111111111111111    maximum          denormalized value
   1.17549435e-38    0 00000001 00000000000000000000000    minimum positive   normalized value
                1    0 01111111 00000000000000000000000
   3.40282347e+38    0 11111110 11111111111111111111111    maximum value
              inf    0 11111111 00000000000000000000000
              nan    0 11111111 10000000000000000000000    quiet      Not a Number
              nan    0 11111111 00000000000000000000001    signalling Not a Number

              1.5    0 01111111 10000000000000000000000
+            1.75    0 01111111 11000000000000000000000
=            3.25    0 10000000 10100000000000000000000

  -1.70141173e+38    1 11111101 11111111111111111111111
+ -1.70141173e+38    1 11111101 11111111111111111111111
= -3.40282347e+38    1 11111110 11111111111111111111111

  -1.70141183e+38    1 11111110 00000000000000000000000
+ -1.70141183e+38    1 11111110 00000000000000000000000
=            -inf    1 11111111 00000000000000000000000

   1.70141183e+38    0 11111110 00000000000000000000000
+  1.70141224e+38    0 11111110 00000000000000000000010
=             inf    0 11111111 00000000000000000000000

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment