Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save wilzbach/2b64e10dec66a3153c51fbd1e6848f72 to your computer and use it in GitHub Desktop.
Save wilzbach/2b64e10dec66a3153c51fbd1e6848f72 to your computer and use it in GitHub Desktop.
std.math vs. core.stdc.math vs. intrinsics
fun: pow
std.math.pow = 13 secs, 743 ms, 902 μs, and 7 hnsecs
core.stdc.pow = 12 secs, 490 ms, 213 μs, and 2 hnsecs
fun: exp
std.math.exp = 6 secs, 905 ms, and 644 μs
core.stdc.exp = 16 secs, 336 ms, 330 μs, and 4 hnsecs
fun: exp2
std.math.exp2 = 3 secs, 338 ms, 447 μs, and 9 hnsecs
core.stdc.exp2 = 5 secs, 244 ms, 528 μs, and 6 hnsecs
fun: sin
std.math.sin = 7 secs, 18 ms, 574 μs, and 5 hnsecs
core.stdc.sin = 18 secs, 82 ms, 553 μs, and 5 hnsecs
fun: cos
std.math.cos = 8 secs, 242 ms, 836 μs, and 2 hnsecs
core.stdc.cos = 18 secs, 69 ms, 296 μs, and 5 hnsecs
fun: log
std.math.log = 4 secs, 798 ms, 570 μs, and 7 hnsecs
core.stdc.log = 16 secs, 755 ms, 19 μs, and 1 hnsec
fun: log2
std.math.log2 = 4 secs, 950 ms, 840 μs, and 9 hnsecs
core.stdc.log2 = 7 secs, 822 ms, 165 μs, and 7 hnsecs
fun: sqrt
std.math.sqrt = 1 sec, 29 ms, 414 μs, and 4 hnsecs
core.stdc.sqrt = 2 secs, 121 ms, 935 μs, and 7 hnsecs
fun: ceil
std.math.ceil = 3 secs, 762 ms, 841 μs, and 8 hnsecs
core.stdc.ceil = 1 sec, 321 ms, 931 μs, and 4 hnsecs
fun: round
std.math.round = 3 secs, 575 ms, and 408 μs
core.stdc.round = 1 sec, 504 ms, 444 μs, and 4 hnsecs
fun: pow
std.math.pow = 15 secs, 914 ms, 102 μs, and 8 hnsecs
core.stdc.pow = 11 secs, 590 ms, 702 μs, and 5 hnsecs
llvm_pow = 13 secs, 570 ms, 439 μs, and 7 hnsecs
fun: exp
std.math.exp = 6 secs, 85 ms, 741 μs, and 7 hnsecs
core.stdc.exp = 16 secs, 267 ms, 997 μs, and 4 hnsecs
llvm_exp = 2 secs, 22 ms, and 876 μs
fun: exp2
std.math.exp2 = 3 secs, 117 ms, 624 μs, and 2 hnsecs
core.stdc.exp2 = 2 secs, 973 ms, and 243 μs
llvm_exp2 = 2 secs, 451 ms, 628 μs, and 9 hnsecs
fun: sin
std.math.sin = 1 sec, 805 ms, 626 μs, and 7 hnsecs
core.stdc.sin = 17 secs, 743 ms, 33 μs, and 5 hnsecs
llvm_sin = 2 secs, 95 ms, and 178 μs
fun: cos
std.math.cos = 2 secs, 820 ms, 684 μs, and 5 hnsecs
core.stdc.cos = 17 secs, 626 ms, 78 μs, and 1 hnsec
llvm_cos = 2 secs, 814 ms, 60 μs, and 5 hnsecs
fun: log
std.math.log = 5 secs, 584 ms, 344 μs, and 5 hnsecs
core.stdc.log = 16 secs, 443 ms, 893 μs, and 3 hnsecs
llvm_log = 2 secs, 13 ms, 291 μs, and 1 hnsec
fun: log2
std.math.log2 = 5 secs, 583 ms, 777 μs, and 7 hnsecs
core.stdc.log2 = 2 secs, 800 ms, 848 μs, and 5 hnsecs
llvm_log2 = 2 secs, 165 ms, 849 μs, and 6 hnsecs
fun: sqrt
std.math.sqrt = 799 ms and 917 μs
core.stdc.sqrt = 864 ms, 834 μs, and 7 hnsecs
llvm_sqrt = 439 ms, 469 μs, and 2 hnsecs
fun: ceil
std.math.ceil = 540 ms and 167 μs
core.stdc.ceil = 971 ms, 533 μs, and 6 hnsecs
llvm_ceil = 562 ms, 490 μs, and 2 hnsecs
fun: round
std.math.round = 3 secs, 52 ms, 567 μs, and 3 hnsecs
core.stdc.round = 958 ms and 217 μs
llvm_round = 590 ms, 742 μs, and 7 hnsecs
__gshared float r = 0.0;
void main()
{
import std.datetime: benchmark, Duration;
import std.stdio : writefln;
import std.conv : to;
import std.meta : AliasSeq;
float a = 12.2;
float b = 5.5;
foreach (fun; AliasSeq!("pow", "exp", "exp2", "sin", "cos", "log", "log2",
"sqrt", "ceil", "round"))
{
auto bench = benchmark!(
{
import std.math;
static if (__traits(compiles, mixin(fun)(a)))
r += mixin(fun)(a);
else
r += mixin(fun)(a, b);
},
{
import core.stdc.math;
static if (__traits(compiles, mixin(fun)(a)))
r += mixin(fun)(a);
else
r += mixin(fun)(a, b);
},
{
version(LDC)
{
import ldc.intrinsics;
static if (__traits(compiles, mixin("llvm_" ~ fun)(a)))
r += mixin("llvm_" ~ fun)(a);
else
r += mixin("llvm_" ~ fun)(a, b);
}
},
)(20_000_0000);
string[] names = ["std.math." ~ fun, "core.stdc." ~ fun];
version(LDC)
names ~= "llvm_" ~ fun;
writefln("fun: %s", fun);
foreach(j,r;bench)
if (j < names.length)
writefln("%-14s = %s", names[j], r.to!Duration);
}
}
@katastic
Copy link

Hey, what CPU and OS did you run this on?

I'm on Windows and I'm getting significantly different results. Many functions are much slower (notably sin/cos) using std.math compared to core.stdc when compiled with DMD (but not LDC). DMD simply emits fsin and fcos instructions, LDC appears to call stdc functions.

DMD
image

LDC
image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment