Skip to content

Instantly share code, notes, and snippets.

@LebedevRI
Created January 16, 2016 13:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save LebedevRI/15f76f2368c984c7eeae to your computer and use it in GitHub Desktop.
Save LebedevRI/15f76f2368c984c7eeae to your computer and use it in GitHub Desktop.
/*
* $ rm a.out; gcc -Wall -lm -O3 -fopenmp main.c && time ./a.out
*/
#include <stdio.h> // for printf
#include <string.h>
#include <math.h>
#include <xmmintrin.h> // for __m128, _mm_mul_ps, _mm_set1_ps, _mm_add_ps
#define CLAMP(x, low, high) (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x)))
#define _DATAPOINTS 1e9
#define STOPS ((size_t)(round(_DATAPOINTS)))
/*
* interpolate values from p1 and p2 into out.
*/
static float interpolate(const float min, // the smaller tuning
const float max, // the larger tuning (can't be == p1)
const size_t i)
{
// stupid linear interpolation.
// to be confirmed.
const double t = (((double)i) / ((double)(STOPS-1)));
return (1.0 - t) * min + t * max;
}
static inline float cbrtf_fast(const float x)
{
union convert
{
float f;
int i;
} data, dataout;
data.f = x;
// approximate cbrtf(x):
dataout.i =
(
(
(int)
(
(
(float)
(
data.i
)
)
/
3.0f
)
)
+
709921077
);
return dataout.f;
}
static inline __m128 cbrtf_sse(const __m128 x)
{
// approximate cbrtf(x):
return _mm_castsi128_ps(
_mm_add_epi32(_mm_cvtps_epi32(_mm_div_ps(_mm_cvtepi32_ps(_mm_castps_si128(x)), _mm_set1_ps(3.0f))),
_mm_set1_epi32(709921077)));
}
int main()
{
const float min = -1.0f;
const float max = 2.0f;
printf("STOPS = %zu\n\n", STOPS);
// go through all x
#ifdef _OPENMP
#pragma omp parallel for simd schedule(static)
#endif
for(size_t i = 0; i < (STOPS); i++)
{
const float x = interpolate(min, max, i);
const __m128 X = _mm_set1_ps(x);
const __m128 c_SSE = cbrtf_sse(X);
float c_float[4];
_mm_store_ps(c_float, c_SSE);
const float c = cbrtf_fast(x);
if(c != c_float[0]) printf("%f: %f != %f, diff = %e\n", x, c, c_float[0], fmax((double)c, (double)c_float[0]) - fmin((double)c, (double)c_float[0]));
//printf("\tref = %f, diff(sse) = %e, diff(plain) = %e\n", cbrtf(x), fmax((double)cbrtf(x), (double)c_float[0]) - fmin((double)cbrtf(x), (double)c_float[0]), fmax((double)cbrtf(x), (double)c) - fmin((double)cbrtf(x), (double)c));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment