Last active
April 2, 2020 06:26
-
-
Save dannas/19d30632021237aae68db6bf562d1314 to your computer and use it in GitHub Desktop.
Demonstrate how nested array accesses are lowered by the C compiler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stddef.h> | |
// Demonstrate how nested array accesses are lowered by the C | |
// compiler. https://godbolt.org/z/Nj4uYn | |
// | |
// A nested array T a[nrows][ncols]. The expression a[i][j] is lowered as: | |
// | |
// a[i][j] | |
// => | |
// a[i * ncols + j] | |
// => | |
// *(T*)(uintptr_t)a + i * ncols + j * sizeof(T)) | |
// => | |
// index = i * ncols + j | |
// *(T*)((uintptr_t)a + index * sizeof(T)) | |
// => | |
// lea rIndex, [rCols + rRows * ncols] | |
// mov rDest, a[0+rIndex * sizeof(T)] | |
// | |
// The most straight-forward way fo translating the code would be to | |
// use a mul and add, but multiplications are slow (though they're getting | |
// faster by each CPU generation). So instead use lea if possible. | |
// | |
// But lea can only multiply by 2, 2, 4, 8. So use a combination of | |
// shifts, add, sub for other factors. | |
#define F(COL) \ | |
int W ## COL(size_t row, size_t col) { \ | |
extern int a ## COL[][COL]; \ | |
return a ## COL[row][col]; \ | |
} | |
// Factors that lea can handle | |
// lea rDest, [j + i * ncols] | |
F(1) | |
F(2) | |
F(4) | |
F(8) | |
// Close to factors that lea can handle | |
// lea rDest, [j + i * ncols] | |
// add rDest, j | |
F(3) | |
F(5) | |
F(7) | |
F(9) | |
// Powers of two replaces mul with sal | |
// sal i, log2(ncols) | |
// add i, j | |
F(16) | |
F(32) | |
F(64) | |
F(128) | |
// Prime numbers | |
F(11) | |
F(13) | |
F(17) | |
F(19) | |
// Large values | |
F(20) | |
F(50) | |
F(100) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment