Skip to content

Instantly share code, notes, and snippets.

@ssvb
Created September 14, 2022 03:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ssvb/5c926ed9bc755900fdaac3b71a0f7cfd to your computer and use it in GitHub Desktop.
Save ssvb/5c926ed9bc755900fdaac3b71a0f7cfd to your computer and use it in GitHub Desktop.
D compiler configuration test for competitive programming online judge platforms.
import std.stdio, std.range, std.algorithm, std.compiler, core.bitop;
// Optimizing compilers should do tail call optimization here, so
// 0xBADF00D magic constant won't be found in stack if optimizations
// are enabled. Is there a cleaner way to detect optimizations?
bool detect_opt() {
int[100] filler;
bool detect_tail_call_opt(int depth, int magic) {
if (depth > 100) {
int x; foreach (i ; 20 .. 80) if (*(&x + i) == magic) return false;
return true;
}
return detect_tail_call_opt(depth + 1, magic);
}
return filler[0] || detect_tail_call_opt(0, 0xBADF00D);
}
// GDC11 was the first version to start supporting getTargetInfo traits
bool detect_gdc11() {
version(GNU) { return __traits(compiles, __traits(getTargetInfo, "cppStd")); }
else return false;
}
// GDC inline assembly for RDTSCP instruction (works only on x86 hardware)
ulong rdtscp(ref uint coreid)
{
uint hi, lo;
version (GNU) {
asm { "rdtscp" : "=d" (hi), "=a" (lo), "=c" (coreid); }
return (cast(ulong)hi << 32) + lo;
} else {
assert(false);
}
}
// GDC11 and newer is unable to inline phobos templates
// without -flto option, see https://gcc.gnu.org/PR102765
//
// This function compares performance of
//
// iota(1, max_x + 1).map!(x => x * x ^ x).sum;
//
// against
//
// size_t ans = 0;
// for (size_t x = 1; x <= max_x; x++)
// ans += x * x ^ x;
//
// With proper templates inlining, the performance of these
// two code fragments should be roughly the same.
//
// If the iota/map version is at least 2x slower, then we
// assume that templates are not inlined and return false.
//
// The benchmarked code is sandwiched between volatileLoad
// and volatileStore calls to ensure that the compiler does
// not entirely optimize it out.
//
// RDTSCP is used to make this check very fast and accurate
//
// TODO: can we somehow get rid of the deprecation warnings
// without breaking compatibility with GDC 6.3?
bool detect_templates_inlining()
{
uint coreid1, coreid2;
ulong before, after, time1 = ulong.max, time2 = ulong.max;
size_t repeats = 100, counter = 1000, ans1, ans2;
foreach (i ; 0 .. repeats) {
volatileStore(&before, rdtscp(coreid1));
size_t max_x = volatileLoad(&counter);
size_t ans = iota(1, max_x + 1).map!(x => x * x ^ x).sum;
volatileStore(&ans1, volatileLoad(&ans1) + ans);
volatileStore(&after, rdtscp(coreid2));
if (coreid1 == coreid2)
time1 = min(time1, volatileLoad(&after) - volatileLoad(&before));
}
foreach (i ; 0 .. repeats) {
volatileStore(&before, rdtscp(coreid1));
size_t max_x = volatileLoad(&counter);
size_t ans = 0;
for (size_t x = 1; x <= max_x; x++)
ans += x * x ^ x;
volatileStore(&ans2, volatileLoad(&ans2) + ans);
volatileStore(&after, rdtscp(coreid2));
if (coreid1 == coreid2)
time2 = min(time2, volatileLoad(&after) - volatileLoad(&before));
}
if (ans1 != ans2)
assert(false);
return time1 / time2 < 2;
}
void main() {
bool assert_on = false, optimizations_on = detect_opt();
version(assert) { assert_on = true; }
string[] warnings;
version(GNU) {
writefln("Detected compiler: GDC (frontend v%d.%d)", version_major, version_minor);
if (!optimizations_on)
warnings ~= "Performance warning: '-O2' or '-O3' option was not used!";
if (assert_on)
warnings ~= "Performance warning: '-frelease' option was not used!";
if (detect_gdc11()) {
if (!detect_templates_inlining()) {
warnings ~= "Performance warning: '-flto' or '-fno-weak-templates' option was not used!";
warnings ~= "Note: see https://gcc.gnu.org/PR102765";
}
}
} else version(LDC) {
writefln("Detected compiler: LDC (optimizing for %s)", __traits(targetCPU));
if (!optimizations_on)
warnings ~= "Performance warning: '-O' option was not used!";
if (assert_on)
warnings ~= "Performance warning: '-release' option was not used!";
if (__traits(targetCPU) == "pentium4")
warnings ~= "Performance warning: '-mcpu=native' option was not used!";
} else version(DigitalMars) {
writeln("Detected compiler: DMD");
if (!optimizations_on)
warnings ~= "Performance warning: '-O' option was not used!";
if (assert_on)
warnings ~= "Performance warning: '-release' option was not used!";
warnings ~= "Performance warning: DMD generates much slower code than GDC or LDC!";
} else {
warnings ~= "Unknown compiler";
}
if (size_t.sizeof < 8)
warnings ~= "Performance warning: not a 64-bit compiler!";
if (warnings.empty)
writeln("Everything seems to be properly configured.");
else
writeln(warnings.joiner("\n"));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment