Last active
May 29, 2017 08:11
-
-
Save bisqwit/f78a0d0dc62d71ff23680d89cbe01e65 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Mandelbrot fractal portable SIMD calculation core | |
* Copyright (c) 2017 Joel Yliluoma (http://iki.fi/bisqwit/) | |
*/ | |
#include <array> | |
#include <cmath> | |
/* | |
* SIMDMandelCalculator template: | |
* | |
* T is the underlying (real) numeric type, with which the calculations | |
* are peformed. It is ideally float or double. | |
* This type directly controls the maximum zoom depth in the fractal. | |
* Ideally we would use fixed-point, because only the number of bits in the mantissa | |
* matter, and with 64-bit integers we can have almost 63 bits of mantissa, | |
* while with a 64-bit IEEE double we only get 52 bits of mantissa. | |
* However, currently this engine only supports floating pointer numbers. | |
* | |
* N is the number of mandelbrot coordinates to calculate in parallel (vector size). | |
* Ideally N should be a small integer multiple of the largest native vector size for T. | |
* Some options: | |
* MMX SSE SSE2 AVX AVX512 Altivec Neon | |
* ------------ --- --- ---- --- ------ ------- ---- | |
* float16 * * * * * * 8 | |
* float 2 4 4 8 16 4 4 | |
* double 1 1 2 4 8 1 2 | |
* long double 1 1 1 1 1 1 1 | |
* | |
* If you use GCC or Clang, compilation with -march=native | |
* and either -O3, -Ofast or -ftree-vectorize is suggested. | |
* -fopenmp may also help (#pragma omp simd tells the compiler to try harder at vectorizing). | |
* | |
* If do_smooth is enabled, points that are outside the set are given | |
* a smooth (floating point) iteration count rather than a discrete | |
* (integral) iteration count. This incurs a slight performance penalty. | |
* If you use this, you should also set iter_mul to e.g. 4096. | |
* | |
* If do_periodicity_checking is enabled, light-weight periodicity checking | |
* is enabled to improve the performance significantly for points that are | |
* inside the set. It has a slight performance penalty for points that are outside the set. | |
* | |
* If do_logarithmic_iteration_counts is enabled, a log() function is applied | |
* to the returned iteration counts. You should also set iter_mul to e.g. 4096. | |
* | |
* The returned iteration counts (float) are multiplied by iter_mul before | |
* they are converted into integers and returned (fixed-point). | |
*/ | |
template<typename T, unsigned N, | |
bool do_smooth = true /* S */, | |
bool do_periodicity_checking = true /* P */, | |
bool do_logarithmic_iteration_counts = false /* L */, | |
unsigned iter_mul = 1 /* I */> | |
struct SIMDMandelCalculator | |
{ | |
static constexpr T color_scale = 8; | |
static constexpr T escape_radius_squared = do_smooth ? 6*6 : 4; | |
typedef std::array<T, N> dvec; | |
typedef std::array<int, N> bvec; | |
dvec x,y; // The "c", complex coordinate we are iterating | |
dvec r,i; // The "z", temporary complex number | |
dvec dist; // Last recorded squared distance before iteration was stopped | |
// // If do_smooth is false, it continues being updated | |
// // even then, for a moderate improvement in performance. | |
dvec perr,peri; // A recorded sample of "z" used in periodicity checking | |
bvec escaped; // Flag used to stop iterating (indicates whether an escape happened) | |
bvec iter; // The iteration count | |
bvec maxiter; // Maximum iteration count | |
/* Constructor: Initialize calculation for the specified number of maximum iterations, at given coordinates */ | |
SIMDMandelCalculator(unsigned MaxIter, const T* cr, const T* ci) | |
{ | |
#pragma omp simd | |
for(unsigned n=0; n<N; ++n) | |
{ | |
x[n] = cr[n]; | |
y[n] = ci[n]; | |
r[n] = cr[n]; | |
i[n] = ci[n]; | |
maxiter[n] = MaxIter; | |
iter[n] = MaxIter; | |
perr[n] = 0; | |
peri[n] = 0; | |
dist[n] = 0; | |
escaped[n] = 0; | |
} | |
} | |
/* End(): Returns true if all N coordinates have escaped or MaxIter iterations has been reached */ | |
bool End() const | |
{ | |
typename bvec::value_type n_escaped = 0; | |
#pragma omp simd | |
for(unsigned n=0; n<N; ++n) n_escaped += escaped[n]; | |
return n_escaped >= typename bvec::value_type(N); | |
} | |
/* Round(): Performs one iteration for all N coordinates */ | |
void Round() | |
{ | |
dvec r2, i2, ri; | |
/* Note: GCC generates slightly better performing code, when this code is | |
* in a single loop than when the statements are each in separate loops. | |
* However, if one of the statements fails to be vectorized, the entire loop | |
* will be de-vectorized. If that happens (inspect the code with gcc -S!), | |
* try splitting the loop into separate consecutive loops. | |
*/ | |
#pragma omp simd | |
for(unsigned n=0; n<N; ++n) { r2[n] = (r[n] * r[n]); | |
i2[n] = (i[n] * i[n]); | |
if(!do_smooth || !escaped[n]) { dist[n] = r2[n] + i2[n]; } | |
escaped[n] = escaped[n] || !iter[n] || (dist[n] >= escape_radius_squared); | |
iter[n] -= !escaped[n]; | |
ri[n] = (r[n] * i[n]); | |
i[n] = y[n] + (ri[n] * 2); | |
r[n] = x[n] + (r2[n] - i2[n]); } | |
/* Periodicity checking produces a significant speedup for coordinates | |
* that are inside the set (especially if they go into a cycle early on), | |
* but at a cost of a slight performance penalty for coordinates that are not. | |
*/ | |
if(do_periodicity_checking) | |
{ | |
bvec moment; | |
/* When the number of remaining iterations is a power of two, | |
* save the current complex number (r & i). | |
* Otherwise, compare it to the saved number. If they match, we are inside the set, | |
* and can set the iteration count to max immediately. | |
*/ | |
#pragma omp simd | |
for(unsigned n=0; n<N; ++n) { int tmp = iter[n]; moment[n] = tmp & (tmp-1); } | |
#pragma omp simd | |
for(unsigned n=0; n<N; ++n) { if(!moment[n]) { perr[n] = r[n]; peri[n] = i[n]; } | |
else if(r[n] == perr[n] && i[n] == peri[n]) { iter[n] = 0; } } | |
} | |
} | |
/* Get(): Get the translated iteration counts for all N coordinates according | |
* to the current progress (call after End()==true, but can be called before) */ | |
std::array<int,N> Get() const | |
{ | |
std::array<int,N> out; | |
if(!do_logarithmic_iteration_counts && iter_mul == 1) | |
{ | |
#pragma omp simd | |
for(unsigned n=0; n<N; ++n) out[n] = maxiter[n] - iter[n]; | |
return out; | |
} | |
dvec iter_f; | |
#pragma omp simd | |
for(unsigned n=0; n<N; ++n) iter_f[n] = maxiter[n] - iter[n]; | |
#pragma omp simd | |
for(unsigned n=0; n<N; ++n) | |
if(do_smooth && iter[n] != 0) | |
iter_f[n] += 1 - std::log2(std::log2(dist[n]) / T(2)); | |
#pragma omp simd | |
for(unsigned n=0; n<N; ++n) iter_f[n] = (do_logarithmic_iteration_counts ? std::log(iter_f[n]) : iter_f[n]) * color_scale; | |
#pragma omp simd | |
for(unsigned n=0; n<N; ++n) out[n] = iter_f[n] * iter_mul; | |
return out; | |
} | |
}; | |
/* Utility example: Calculate for N coordinates, return iteration counts as array */ | |
template<unsigned N, bool S,bool P=true,bool L=true,unsigned I, typename T> | |
std::array<int,N> Iterate(unsigned MaxIter, const T* x, const T* y) | |
{ | |
SIMDMandelCalculator<T,N, S,P,L,I> calc(MaxIter, x,y); | |
while(!calc.End()) { calc.Round(); } | |
return calc.Get(); | |
} | |
/* Utility example: Calculate for N coordinates passed as std::array, return iteration counts as array */ | |
template<bool S,bool P=true,bool L=true,unsigned I, typename T,std::size_t N> | |
std::array<int,N> Iterate(unsigned MaxIter, const std::array<T,N>& x, const std::array<T,N>& y) | |
{ | |
return Iterate<N, S,P,L,I>(MaxIter, &x[0], &y[0]); | |
} | |
/* Utility example: Calculate for N coordinates where N is not a compile-time constant */ | |
template<bool S,bool P=true,bool L=true,unsigned I, typename T> | |
void Iterate(unsigned MaxIter, const T* x, const T* y, int* out, unsigned N) | |
{ | |
while(N >= 16) | |
{ | |
auto tmp = Iterate<16, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n<16; ++n) out[n] = tmp[n]; | |
x += 16; y += 16; out += 16; N -= 16; | |
} | |
if(N == 8) | |
{ | |
auto tmp = Iterate< 8, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 8; ++n) out[n] = tmp[n]; | |
return; | |
} | |
while(N >= 4) | |
{ | |
auto tmp = Iterate< 4, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 4; ++n) out[n] = tmp[n]; | |
x += 4; y += 4; out += 4; N -= 4; | |
} | |
while(N >= 1) | |
{ | |
auto tmp = Iterate< 1, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 1; ++n) out[n] = tmp[n]; | |
x += 1; y += 1; out += 1; N -= 1; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment