Created
January 19, 2013 18:39
-
-
Save ldematte/4574214 to your computer and use it in GitHub Desktop.
Floating-point emulation and mathematics in C++, using integers ant bitsets. Part of the material for an introductory course on Computer Architecture
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <bitset> | |
#include <stdio.h> | |
#include <math.h> | |
using std::bitset; | |
using std::cout; | |
using std::endl; | |
//bitset helpers | |
template<int n, int m> | |
void Contract(const bitset<n>& a, bitset<m>& b) | |
{ | |
int diff = a.size() - b.size(); | |
for (int i = a.size() - 1; i >= diff; --i) | |
{ | |
b[i - diff] = a[i]; | |
} | |
} | |
template<int n, int m> | |
void ExpandLeft(const bitset<n>& a, bitset<m>& b) | |
{ | |
int diff = b.size() - a.size(); | |
b = 0; | |
for (int i = b.size() - 1; i >= diff; --i) | |
{ | |
b[i] = a[i - diff]; | |
} | |
} | |
template<int n, int m> | |
void ExpandRight(const bitset<n>& a, bitset<m>& b) | |
{ | |
b = 0; | |
for (size_t i = 0; i < a.size(); ++i) | |
{ | |
b[i] = a[i]; | |
} | |
} | |
template<int n> | |
void ConvertToBits(bitset<n>& b, int num) | |
{ | |
int i = 0; | |
while (num > 0 && i < n) | |
{ | |
int r = num % 2; | |
b[i] = (r == 0 ? 0 : 1); | |
++i; | |
num = num / 2; | |
} | |
} | |
template<int n> | |
void ConvertToBitsFrac(bitset<n>& b, float frac) | |
{ | |
int i = n - 1; | |
while (frac > 0 && i >= 0) | |
{ | |
frac = frac * 2; | |
if (frac >= 1.0f) | |
{ | |
frac -= 1.0f; | |
b[i] = 1; | |
} | |
else | |
b[i] = 0; | |
--i; | |
} | |
} | |
template<int n, int m> | |
void PrintBits(int& sign, bitset<n>& exp, bitset<m>& frac) | |
{ | |
std::cout << sign << " " << exp << " " << frac << std::endl; | |
} | |
template<int n, int m> | |
void ConvertToBits(int& sign, bitset<n>& exp, bitset<m>& frac, float num) | |
{ | |
//sign | |
if (num < 0.0f) | |
sign = 1; | |
else | |
sign = 0; | |
int n_exp = 0; | |
if (num == 0.0f) | |
{ | |
exp = 0; | |
frac = 0; | |
return; | |
} | |
//separate | |
float ipart; | |
float fpart; | |
fpart = modf(num, &ipart); | |
ipart = fabs(ipart); | |
fpart = fabs(fpart); | |
bitset<8> b_ipart(0); | |
ConvertToBits(b_ipart, (int)ipart); | |
bitset<16> b_fpart(0); | |
ConvertToBitsFrac(b_fpart, fpart); | |
//see if shift left or right | |
if (ipart == 0.0f) | |
{ | |
n_exp = -1; | |
int i = b_fpart.size() - 1; | |
while (b_fpart[i] == 0) | |
{ | |
--n_exp; | |
b_fpart <<= 1; | |
} | |
//reached last i | |
b_fpart <<= 1; | |
} | |
else | |
{ | |
//shift right | |
while (b_ipart.to_ulong() > 1) | |
{ | |
++n_exp; | |
b_fpart >>= 1; | |
b_fpart[7] = b_ipart[0]; | |
b_ipart >>= 1; | |
} | |
} | |
int bias = (1 << (exp.size() - 1)) - 1; | |
ConvertToBits(exp, n_exp + bias); | |
Contract(b_fpart, frac); | |
} | |
template<int n> | |
float FracToFloat(bitset<n>& frac) | |
{ | |
float retval = 0.0f; | |
unsigned long iretval = (1 << frac.size()) + frac.to_ulong(); | |
retval = (float)iretval; | |
//divide | |
for (size_t i = 0; i < frac.size(); ++i) | |
retval *= 0.5f; | |
return retval; | |
} | |
template<int n, int m> | |
float ConvertToFloat(int& sign, bitset<n>& exp, bitset<m>& frac) | |
{ | |
float retval = 0.0f; | |
int bias = (1 << (exp.size() - 1)) - 1; | |
int n_exp = exp.to_ulong() - bias; | |
retval = FracToFloat(frac); | |
if (n_exp > 0) | |
{ | |
while (n_exp > 0) | |
{ | |
--n_exp; | |
retval *= 2.0f; | |
} | |
} | |
else | |
{ | |
while (n_exp < 0) | |
{ | |
++n_exp; | |
retval *= 0.5f; | |
} | |
} | |
if (sign == 1) | |
return -retval; | |
return retval; | |
} | |
template<int n, int m> | |
void Multiply(int sa, bitset<n>& ea, bitset<m>& fa, | |
int sb, bitset<n>& eb, bitset<m>& fb, | |
int& sc, bitset<n>& ec, bitset<m>& fc) | |
{ | |
int bias = (1 << (ea.size() - 1)) - 1; | |
int n_exp = ea.to_ulong() + eb.to_ulong() - bias; | |
bitset<m + 1> fae; | |
ExpandRight(fa, fae); | |
fae[fa.size()] = 1; | |
bitset<m + 1> fbe; | |
ExpandRight(fb, fbe); | |
fbe[fb.size()] = 1; | |
//multiply | |
bitset<(m + 1) * 2> fce(fae.to_ulong() * fbe.to_ulong()); | |
//normalize | |
//the point is at fa.digits * 2 | |
int i = fce.size() - 1; | |
if (fce[i] == 1) | |
{ | |
++n_exp; | |
} | |
fce <<= 2; | |
Contract(fce, fc); | |
ConvertToBits(ec, n_exp); | |
//sign | |
sc = sa ^ sb; | |
} | |
template<int n, int m> | |
void Add(int sa, bitset<n>& ea, bitset<m>& fa, | |
int sb, bitset<n>& eb, bitset<m>& fb, | |
int& sc, bitset<n>& ec, bitset<m>& fc) | |
{ | |
if (ea.to_ulong() > eb.to_ulong()) | |
{ | |
//use ea | |
int diff = ea.to_ulong() - eb.to_ulong(); | |
bitset<16> temp1; | |
bitset<16> temp2; | |
ExpandRight(fb, temp1); | |
temp1[m] = 1; | |
temp1 <<= diff - 1; | |
ExpandLeft(fa, temp2); | |
temp1 >>= 2; | |
temp1[14] = 1; | |
bitset<16> res(temp1.to_ulong() + temp2.to_ulong()); | |
//normalize | |
if (res[15] == 1) | |
ec = (ea.to_ulong() + 1); | |
else | |
ec = ea; | |
res <<= 1; | |
Contract(res, fc); | |
} | |
else if (ea.to_ulong() < eb.to_ulong()) | |
{ | |
//use ea | |
int diff = eb.to_ulong() - ea.to_ulong(); | |
bitset<16> temp1; | |
bitset<16> temp2; | |
ExpandLeft(fa, temp1); | |
//spazio per somma e per l'1 implicito | |
temp1 >>= 2; | |
temp1[14] = 1; | |
temp1 >>= diff; | |
ExpandLeft(fb, temp2); | |
temp2 >>= 2; | |
temp2[14] = 1; | |
bitset<16> res(temp1.to_ulong() + temp2.to_ulong()); | |
//normalize | |
if (res[15] == 1) | |
ec = (eb.to_ulong() + 1); | |
else | |
ec = eb; | |
res <<= 2; | |
Contract(res, fc); | |
} | |
//TODO: se uguali | |
} | |
int main() | |
{ | |
int sign1 = 0; | |
bitset<5> exp1(0); | |
bitset<6> frac1(0); | |
int sign2 = 0; | |
bitset<5> exp2(0); | |
bitset<6> frac2(0); | |
int sign3 = 0; | |
bitset<5> exp3(0); | |
bitset<6> frac3(0); | |
//float num = -0.012f; | |
//float num1 = 1.0f / 4.0f; | |
//float num2 = 0.5f; | |
float num1 = 0.12; | |
float num2 = 1.34; | |
float res; | |
ConvertToBits(sign1, exp1, frac1, num1); | |
ConvertToBits(sign2, exp2, frac2, num2); | |
PrintBits(sign1, exp1, frac1); | |
PrintBits(sign2, exp2, frac2); | |
res = ConvertToFloat(sign1, exp1, frac1); | |
cout << res << endl; | |
res = ConvertToFloat(sign2, exp2, frac2); | |
cout << res << endl; | |
cout << "+" << endl; | |
Add(sign1, exp1, frac1, | |
sign2, exp2, frac2, sign3, exp3, frac3); | |
PrintBits(sign3, exp3, frac3); | |
res = ConvertToFloat(sign3, exp3, frac3); | |
cout << res << endl; | |
cout << "*" << endl; | |
Multiply(sign1, exp1, frac1, | |
sign2, exp2, frac2, sign3, exp3, frac3); | |
PrintBits(sign3, exp3, frac3); | |
res = ConvertToFloat(sign3, exp3, frac3); | |
cout << res << endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment