Skip to content

Instantly share code, notes, and snippets.

@syoyo
Last active March 26, 2016 08:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save syoyo/24d0bf30dd2a9b5b2b69 to your computer and use it in GitHub Desktop.
Save syoyo/24d0bf30dd2a9b5b2b69 to your computer and use it in GitHub Desktop.
Intel C compiler bug or ill-defined C++?
// Writing (&x)[i] in operator[] is safe/correct C++ code or not?
// The following is the reduced code fragment(Thus not work by just copy&paste) which Intel C++ compier(ver 13 and 15) miscompiles(Release build only) the code for the access to real3 object through operator[] inside OpenMP loop.
// clang and gcc are OK to compile&run
typedef float real;
struct real3 {
real3() {}
real3(real xx, real yy, real zz) {
x = xx;
y = yy;
z = zz;
}
explicit real3(real *p) {
x = p[0];
y = p[1];
z = p[2];
}
real operator[](int i) const { return (&x)[i]; }
real &operator[](int i) { return (&x)[i]; }
real x, y, z;
// real pad; // for alignment
};
// -----------------------------
void MortonCodesTetraFloat30(
uint32_t *codes, const float *points, const uint32_t *faces,
const real3 &bmin, const real3 &bmax, int64_t startIdx, int64_t endIdx) {
int kDIV = (1 << 10);
real invx = kDIV / (bmax[0] - bmin[0]);
real invy = kDIV / (bmax[1] - bmin[1]);
real invz = kDIV / (bmax[2] - bmin[2]);
int64_t n = endIdx - startIdx;
float one_fourth = 1.0f / 4.0f;
#ifdef _OPENMP
#pragma omp parallel for if (n > 4096)
#endif
for (int64_t i = startIdx; i < endIdx; i++) {
uint32_t f0 = faces[3 * i + 0];
uint32_t f1 = faces[3 * i + 1];
uint32_t f2 = faces[3 * i + 2];
real3 p0(points[3 * f0 + 0], points[3 * f0 + 1], points[3 * f0 + 2]);
real3 p1(points[3 * f1 + 0], points[3 * f1 + 1], points[3 * f1 + 2]);
real3 p2(points[3 * f2 + 0], points[3 * f2 + 1], points[3 * f2 + 2]);
real3 p_i;
// *** Intel Compiler miscompiles(?) real3::operator[] and give wrong result inside OpenMP loop ***
p_i[0] = one_third * (p0[0] + p1[0] + p2[0]);
p_i[1] = one_third * (p0[1] + p1[1] + p2[1]);
p_i[2] = one_third * (p0[2] + p1[2] + p2[2]);
codes[i] = MortionCode30(p_i, bmin, invx, invy, invz);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment