Last active
December 20, 2017 16:11
-
-
Save giordi91/1388504fadcf94b3f6f42103dfd1f938 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <bitset> | |
#include <iostream> | |
#ifdef MSVC | |
#include <intrin.h> | |
#endif | |
#ifdef CLANG | |
#include <x86intrin.h> | |
#endif | |
#include <cmath> | |
#include <cstdint> | |
inline uint32_t findHighestBit(uint32_t v) { | |
#ifdef MSVC | |
return 31 - __lzcnt(v); | |
#endif | |
#ifdef CLANG | |
return 31 - _lzcnt_u32(v); | |
#endif | |
} | |
/** | |
* Struct that allows us to easily access the different parts of the floating | |
* point | |
*/ | |
union SWFloat { | |
struct { | |
unsigned int mantissa : 23; | |
unsigned int exponent : 8; | |
unsigned int sign : 1; | |
}; | |
float original; | |
}; | |
int normalize32BitMantissaInPlace(int &mantissa) { | |
// NOTE : expected 32 bits mantissa with grs bits at the end | |
// making a copy of the mantissa so we can freely manipulate it | |
int tempMantissa = mantissa; | |
// extracting the sticky bits | |
int sticky = mantissa & 1; | |
// here we find where the highest set bit is, keep in mind that find highest | |
// bit returns the numeration starting from 0, once we have that we need to | |
// know how fare the highest bit is from the position it should be (26) | |
int bit = 26 - findHighestBit(tempMantissa); | |
// need to be really careful here or MSVC won't generate instructions | |
// conditional move, if here i use mantissa rather than tempMantissa it | |
// wont work, clang is fine | |
mantissa = bit > 23 ? 0 : tempMantissa; | |
int returnValue = bit > 23 ? DENORMAL : bit; | |
// at this point instead we have a good value to normalize | |
// we shift left or right based on where the bit is | |
int mantissaLeft = mantissa << bit; | |
int mantissaRight = mantissa >> abs(bit); | |
mantissa = (bit > 0) & (bit < 23) ? mantissaLeft : mantissaRight; | |
mantissa |= sticky; | |
return returnValue; | |
} | |
inline uint32_t insertHiddenOne(SWFloat value) { | |
// casting the 23 bit mantissa to a 32 bit int | |
// and inserting the 1 at the 24th slot | |
uint32_t mantissa = (value.mantissa); | |
return (mantissa |= (1 << 23)); | |
} | |
inline int extendStickyGRSbits(int mantissa) { return mantissa << 3; } | |
inline int64_t extendStickyGRSbits(int64_t mantissa) { return mantissa << 3; } | |
inline int extractGRSbits(int mantissa) { return mantissa & 7u; } | |
inline int roundMantissa(int mantissa) { | |
int grs = extractGRSbits(mantissa); | |
// now that we extracted the values we can shift the mantissa by | |
// the 3 extra bits | |
int cleanedMantissa = mantissa >> 3; | |
// now if the grs is equal to 100 base two, aka 4 base 10, | |
// we need to check if we need to round up, this happens only | |
// if the LSB of the mantissa is one, we extract that with cleanedMantissa &1 | |
int toAdd = (grs == 4u) & (cleanedMantissa & 1) ? 1 : 0; | |
// now we check just if the guard bit is one, and we did not already rounded | |
// up thanks to the LSB we increase the rounding, this is basically computing | |
// different branches of the if statement withouth banches | |
// the first part of the check :((grs & 4u) == 4) | |
// we make sure that the g bit is 1, we do that by making a mask 100 (which is | |
// 4 in decimal, this mean only the 3rd bit will survive if set, the grs& 4u | |
// operation will give us either a 4 or a 0. the second part we check if grs | |
// is !=4 , meaning is not 100, because if so we alrady took care of it above | |
toAdd += (((grs & 4u) == 4) & (grs != 4)) ? 1 : 0; | |
// here we compute the mantissa if we have rounding, we are doing this | |
// explicitelly because otherwise MSVC creates a jump instead clang doesnt | |
// https://godbolt.org/g/StQajo | |
int mantissaIf4u = cleanedMantissa + toAdd; | |
// condtitional move based on guard bit | |
cleanedMantissa = (grs & 4u) ? mantissaIf4u : cleanedMantissa; | |
return cleanedMantissa; | |
} | |
inline uint32_t countMantissaBits(uint32_t mantissa) { | |
while (!(mantissa & 1)) { | |
mantissa = mantissa >> 1; | |
} | |
return findHighestBit(mantissa); | |
} | |
SWFloat inline swFloatDivision(SWFloat a, SWFloat b) { | |
uint32_t amantissa = insertHiddenOne(a); | |
uint32_t bmantissa = insertHiddenOne(b); | |
// computing exponent | |
int aexp = int(a.exponent) - 127; | |
int bexp = int(b.exponent) - 127; | |
int exponent = (aexp - bexp) + 127; | |
uint32_t bmantbit = countMantissaBits(bmantissa) + 1; | |
// generating mask | |
uint32_t mask = ((1 << 24) - 1); | |
uint32_t divmant = (bmantissa & mask) >> (24u - bmantbit); | |
// now we need to extract the first nth bit from the amant | |
int result = 0; | |
uint32_t start = amantissa >> (24 - bmantbit); | |
int startingIndex = 23 - bmantbit ; | |
for (int i = 0; i < (27); ++i, --startingIndex) { | |
uint32_t currHigh = findHighestBit(start) + 1; | |
if (currHigh >= bmantbit) { | |
if (divmant <= start) { | |
// it means we fit | |
result = result << 1; | |
result |= 1; | |
start -= divmant; | |
} else { | |
result = result << 1; | |
} | |
} else { | |
result = result << 1; | |
} | |
//extracting an extra bit to the mantissa adding it to | |
//the working number | |
int newExtracted = (amantissa >> startingIndex) & 1; | |
start = (start << 1) | newExtracted; | |
} | |
int bit = normalize32BitMantissaInPlace(result); | |
exponent -= bit; | |
result = roundMantissa(result); | |
SWFloat res; | |
res.sign = (a.sign ^ b.sign) ? 1 : 0; | |
res.exponent = exponent; | |
res.mantissa = result; | |
return res; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment