|
#include <iostream> |
|
#include <cmath> |
|
#include <chrono> |
|
#include <xmmintrin.h> |
|
|
|
using namespace std; |
|
using namespace std::chrono; |
|
|
|
auto outputval = 0.0; |
|
|
|
// sqrtintr - Use the intrinsics for SSE/SIMD rsqrt |
|
|
|
__forceinline double sqrtintr(const double f) |
|
{ |
|
__m128 temp = _mm_set_ss(f); |
|
temp = _mm_rsqrt_ss(temp); |
|
return _mm_cvtss_f32(temp); |
|
} |
|
|
|
// isqrt - 64 bit Unsigned Integer Square Root |
|
|
|
uint64_t isqrt(uint64_t x) |
|
{ |
|
uint64_t root = 0; |
|
for (uint64_t Bit = 0x80000000L; Bit > 0; Bit >>= 1) |
|
{ |
|
uint64_t trial = root + Bit; |
|
if ((uint64_t)trial * (uint64_t)trial <= x) |
|
root += Bit; |
|
} |
|
return root; |
|
} |
|
|
|
// fisqrt - Fast Inverse Square Root. Accepts and returns 32-bit precision float |
|
|
|
__forceinline float fisqrt(float n) |
|
{ |
|
const float threehalfs = 1.5F; |
|
float y = n; |
|
|
|
long i = *(long *)&y; |
|
|
|
i = 0x5f3759df - (i >> 1); |
|
y = *(float *)&i; |
|
|
|
y = y * (threehalfs - ((n * 0.5F) * y * y)); |
|
// y = y * (threehalfs - ((n * 0.5F) * y * y)); |
|
|
|
return y; |
|
} |
|
|
|
// CountIDivisions - Counts integer square roots |
|
|
|
uint64_t CountIDivisions(long long secondsToRun) |
|
{ |
|
const auto tStart = steady_clock::now(); |
|
auto value = 0.0; |
|
auto sum = 0.0; |
|
auto passes = 0UL; |
|
while ((duration_cast<seconds>(steady_clock::now() - tStart)).count() < secondsToRun) |
|
{ |
|
for (int i = 0; i < 1000; i++) |
|
{ |
|
value += ++passes; |
|
sum += 1 / isqrt((uint64_t)value); |
|
} |
|
} |
|
outputval = sum; |
|
return passes; |
|
} |
|
|
|
// CountDivisions - Uses the normal compiler sqrt() function |
|
|
|
uint64_t CountDivisions(long long secondsToRun) |
|
{ |
|
const auto tStart = steady_clock::now(); |
|
auto value = 0.0; |
|
auto sum = 0.0; |
|
auto passes = 0UL; |
|
while ((duration_cast<seconds>(steady_clock::now() - tStart)).count() < secondsToRun) |
|
{ |
|
for (int i = 0; i < 1000; i++) |
|
{ |
|
value += ++passes; |
|
sum += 1 / sqrt(value); |
|
} |
|
} |
|
outputval = sum; |
|
return passes; |
|
} |
|
|
|
// CountIntrDivisions - Use the intrinsics for SSE/SIMD rsqrt |
|
|
|
uint64_t CountIntrDivisions(long long secondsToRun) |
|
{ |
|
const auto tStart = steady_clock::now(); |
|
auto value = 0.0; |
|
auto sum = 0.0; |
|
auto passes = 0UL; |
|
while ((duration_cast<seconds>(steady_clock::now() - tStart)).count() < secondsToRun) |
|
{ |
|
for (int i = 0; i < 1000; i++) |
|
{ |
|
value += ++passes; |
|
sum += sqrtintr(value); |
|
} |
|
} |
|
outputval = sum; |
|
return passes; |
|
} |
|
|
|
// CountFIDivisions - Test the FISR |
|
|
|
uint64_t CountFIDivisions(long long secondsToRun) |
|
{ |
|
const auto tStart = steady_clock::now(); |
|
auto value = 0.0; |
|
auto sum = 0.0; |
|
auto passes = 0UL; |
|
while ((duration_cast<seconds>(steady_clock::now() - tStart)).count() < secondsToRun) |
|
{ |
|
for (int i = 0; i < 1000; i++) |
|
{ |
|
value += ++passes; |
|
sum += fisqrt((float)value); |
|
} |
|
} |
|
outputval = sum; |
|
return passes; |
|
} |
|
|
|
int main(int argc, char *argv[]) |
|
{ |
|
cout << "Sanity checks:" << endl; |
|
cout << "============================================" << endl; |
|
cout << "Sqrt = " << round(sqrt(1000000)) << endl; |
|
cout << "isqrt = " << round(isqrt(1000000)) << endl; |
|
cout << "fisqrt = " << round(1000000 * fisqrt(1000000)) << endl; |
|
cout << "intrsqrt = " << round(1000000 * sqrtintr(1000000)) << endl; |
|
cout << "============================================" << endl; |
|
|
|
auto seconds = 5; |
|
auto cDiv = CountDivisions(seconds) / 1000000; |
|
auto ciDiv = CountIDivisions(seconds) / 1000000; |
|
auto fiDiv = CountFIDivisions(seconds) / 1000000; |
|
auto intDiv = CountIntrDivisions(seconds) / 1000000; |
|
|
|
cout << "Integer Square Root | Passes " << ciDiv << endl; |
|
cout << "Floating point unit | Passes " << cDiv << endl; |
|
cout << "Fast Inverse Square Root | Passes " << fiDiv << endl; |
|
cout << "Intrinsic SqrRoot | Passes " << intDiv << endl; |
|
cout << outputval << endl; // so that the compiler doesn't ignore the variable |
|
} |