Skip to content

Instantly share code, notes, and snippets.

@bisqwit
Last active May 29, 2017 08:11
Show Gist options
  • Save bisqwit/f78a0d0dc62d71ff23680d89cbe01e65 to your computer and use it in GitHub Desktop.
Save bisqwit/f78a0d0dc62d71ff23680d89cbe01e65 to your computer and use it in GitHub Desktop.
/**
* Mandelbrot fractal portable SIMD calculation core
* Copyright (c) 2017 Joel Yliluoma (http://iki.fi/bisqwit/)
*/
#include <array>
#include <cmath>
/*
* SIMDMandelCalculator template:
*
* T is the underlying (real) numeric type, with which the calculations
* are peformed. It is ideally float or double.
* This type directly controls the maximum zoom depth in the fractal.
* Ideally we would use fixed-point, because only the number of bits in the mantissa
* matter, and with 64-bit integers we can have almost 63 bits of mantissa,
* while with a 64-bit IEEE double we only get 52 bits of mantissa.
* However, currently this engine only supports floating pointer numbers.
*
* N is the number of mandelbrot coordinates to calculate in parallel (vector size).
* Ideally N should be a small integer multiple of the largest native vector size for T.
* Some options:
* MMX SSE SSE2 AVX AVX512 Altivec Neon
* ------------ --- --- ---- --- ------ ------- ----
* float16 * * * * * * 8
* float 2 4 4 8 16 4 4
* double 1 1 2 4 8 1 2
* long double 1 1 1 1 1 1 1
*
* If you use GCC or Clang, compilation with -march=native
* and either -O3, -Ofast or -ftree-vectorize is suggested.
* -fopenmp may also help (#pragma omp simd tells the compiler to try harder at vectorizing).
*
* If do_smooth is enabled, points that are outside the set are given
* a smooth (floating point) iteration count rather than a discrete
* (integral) iteration count. This incurs a slight performance penalty.
* If you use this, you should also set iter_mul to e.g. 4096.
*
* If do_periodicity_checking is enabled, light-weight periodicity checking
* is enabled to improve the performance significantly for points that are
* inside the set. It has a slight performance penalty for points that are outside the set.
*
* If do_logarithmic_iteration_counts is enabled, a log() function is applied
* to the returned iteration counts. You should also set iter_mul to e.g. 4096.
*
* The returned iteration counts (float) are multiplied by iter_mul before
* they are converted into integers and returned (fixed-point).
*/
template<typename T, unsigned N,
bool do_smooth = true /* S */,
bool do_periodicity_checking = true /* P */,
bool do_logarithmic_iteration_counts = false /* L */,
unsigned iter_mul = 1 /* I */>
struct SIMDMandelCalculator
{
static constexpr T color_scale = 8;
static constexpr T escape_radius_squared = do_smooth ? 6*6 : 4;
typedef std::array<T, N> dvec;
typedef std::array<int, N> bvec;
dvec x,y; // The "c", complex coordinate we are iterating
dvec r,i; // The "z", temporary complex number
dvec dist; // Last recorded squared distance before iteration was stopped
// // If do_smooth is false, it continues being updated
// // even then, for a moderate improvement in performance.
dvec perr,peri; // A recorded sample of "z" used in periodicity checking
bvec escaped; // Flag used to stop iterating (indicates whether an escape happened)
bvec iter; // The iteration count
bvec maxiter; // Maximum iteration count
/* Constructor: Initialize calculation for the specified number of maximum iterations, at given coordinates */
SIMDMandelCalculator(unsigned MaxIter, const T* cr, const T* ci)
{
#pragma omp simd
for(unsigned n=0; n<N; ++n)
{
x[n] = cr[n];
y[n] = ci[n];
r[n] = cr[n];
i[n] = ci[n];
maxiter[n] = MaxIter;
iter[n] = MaxIter;
perr[n] = 0;
peri[n] = 0;
dist[n] = 0;
escaped[n] = 0;
}
}
/* End(): Returns true if all N coordinates have escaped or MaxIter iterations has been reached */
bool End() const
{
typename bvec::value_type n_escaped = 0;
#pragma omp simd
for(unsigned n=0; n<N; ++n) n_escaped += escaped[n];
return n_escaped >= typename bvec::value_type(N);
}
/* Round(): Performs one iteration for all N coordinates */
void Round()
{
dvec r2, i2, ri;
/* Note: GCC generates slightly better performing code, when this code is
* in a single loop than when the statements are each in separate loops.
* However, if one of the statements fails to be vectorized, the entire loop
* will be de-vectorized. If that happens (inspect the code with gcc -S!),
* try splitting the loop into separate consecutive loops.
*/
#pragma omp simd
for(unsigned n=0; n<N; ++n) { r2[n] = (r[n] * r[n]);
i2[n] = (i[n] * i[n]);
if(!do_smooth || !escaped[n]) { dist[n] = r2[n] + i2[n]; }
escaped[n] = escaped[n] || !iter[n] || (dist[n] >= escape_radius_squared);
iter[n] -= !escaped[n];
ri[n] = (r[n] * i[n]);
i[n] = y[n] + (ri[n] * 2);
r[n] = x[n] + (r2[n] - i2[n]); }
/* Periodicity checking produces a significant speedup for coordinates
* that are inside the set (especially if they go into a cycle early on),
* but at a cost of a slight performance penalty for coordinates that are not.
*/
if(do_periodicity_checking)
{
bvec moment;
/* When the number of remaining iterations is a power of two,
* save the current complex number (r & i).
* Otherwise, compare it to the saved number. If they match, we are inside the set,
* and can set the iteration count to max immediately.
*/
#pragma omp simd
for(unsigned n=0; n<N; ++n) { int tmp = iter[n]; moment[n] = tmp & (tmp-1); }
#pragma omp simd
for(unsigned n=0; n<N; ++n) { if(!moment[n]) { perr[n] = r[n]; peri[n] = i[n]; }
else if(r[n] == perr[n] && i[n] == peri[n]) { iter[n] = 0; } }
}
}
/* Get(): Get the translated iteration counts for all N coordinates according
* to the current progress (call after End()==true, but can be called before) */
std::array<int,N> Get() const
{
std::array<int,N> out;
if(!do_logarithmic_iteration_counts && iter_mul == 1)
{
#pragma omp simd
for(unsigned n=0; n<N; ++n) out[n] = maxiter[n] - iter[n];
return out;
}
dvec iter_f;
#pragma omp simd
for(unsigned n=0; n<N; ++n) iter_f[n] = maxiter[n] - iter[n];
#pragma omp simd
for(unsigned n=0; n<N; ++n)
if(do_smooth && iter[n] != 0)
iter_f[n] += 1 - std::log2(std::log2(dist[n]) / T(2));
#pragma omp simd
for(unsigned n=0; n<N; ++n) iter_f[n] = (do_logarithmic_iteration_counts ? std::log(iter_f[n]) : iter_f[n]) * color_scale;
#pragma omp simd
for(unsigned n=0; n<N; ++n) out[n] = iter_f[n] * iter_mul;
return out;
}
};
/* Utility example: Calculate for N coordinates, return iteration counts as array */
template<unsigned N, bool S,bool P=true,bool L=true,unsigned I, typename T>
std::array<int,N> Iterate(unsigned MaxIter, const T* x, const T* y)
{
SIMDMandelCalculator<T,N, S,P,L,I> calc(MaxIter, x,y);
while(!calc.End()) { calc.Round(); }
return calc.Get();
}
/* Utility example: Calculate for N coordinates passed as std::array, return iteration counts as array */
template<bool S,bool P=true,bool L=true,unsigned I, typename T,std::size_t N>
std::array<int,N> Iterate(unsigned MaxIter, const std::array<T,N>& x, const std::array<T,N>& y)
{
return Iterate<N, S,P,L,I>(MaxIter, &x[0], &y[0]);
}
/* Utility example: Calculate for N coordinates where N is not a compile-time constant */
template<bool S,bool P=true,bool L=true,unsigned I, typename T>
void Iterate(unsigned MaxIter, const T* x, const T* y, int* out, unsigned N)
{
while(N >= 16)
{
auto tmp = Iterate<16, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n<16; ++n) out[n] = tmp[n];
x += 16; y += 16; out += 16; N -= 16;
}
if(N == 8)
{
auto tmp = Iterate< 8, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 8; ++n) out[n] = tmp[n];
return;
}
while(N >= 4)
{
auto tmp = Iterate< 4, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 4; ++n) out[n] = tmp[n];
x += 4; y += 4; out += 4; N -= 4;
}
while(N >= 1)
{
auto tmp = Iterate< 1, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 1; ++n) out[n] = tmp[n];
x += 1; y += 1; out += 1; N -= 1;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment