-
-
Save astanin/5270668 to your computer and use it in GitHub Desktop.
#include <stdio.h> | |
#include <time.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#define M_PI_2 1.57079632679489661923 /* pi/2 */ | |
#define M_PI_2_INV (1.0/M_PI_2) | |
#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ | |
#define ERF_COEF (1.0/M_2_SQRTPI) | |
const int SIZE=100; | |
const int CYCLES=10000000; | |
double benchmark(const char* name, double (*fun)(double)) { | |
clock_t start, stop; | |
double xs[SIZE]; | |
double t_ns; | |
for (int i=0; i<SIZE; i++) { | |
xs[i] = rand(); | |
} | |
start = clock(); | |
for (int repeat=0; repeat<CYCLES; repeat++) { | |
for (int i=0; i<SIZE; i++) { | |
(*fun)(xs[i]); | |
} | |
} | |
stop = clock(); | |
t_ns = (stop-start)*1.0e9/CLOCKS_PER_SEC/CYCLES/SIZE; | |
printf("%-17s %6.1f ns\n", name, t_ns); | |
return t_ns; | |
} | |
double with_atan(double x) { | |
/* normalized atan */ | |
return M_PI_2_INV*atan(M_PI_2*x); | |
} | |
double with_exp(double x) { | |
return 1.0/(1.0 + exp(-x)); | |
} | |
double with_sqrt(double x) { | |
return 1.0/sqrt(1.0 + x*x); | |
} | |
double with_erf(double x) { | |
return erf(ERF_COEF*x); | |
} | |
double with_fabs(double x) { | |
return x/(1.0 + fabs(x)); | |
} | |
int main(int argc, char **argv) { | |
benchmark("atan(pi*x/2)*2/pi", with_atan); | |
benchmark("atan(x)", atan); | |
benchmark("1/(1+exp(-x))", with_exp); | |
benchmark("1/sqrt(1+x^2)", with_sqrt); | |
benchmark("erf(sqrt(pi)*x/2)", with_erf); | |
benchmark("tanh(x)", tanh); | |
benchmark("x/(1+|x|)", with_fabs); | |
} |
On AMD 1090T @ gcc 4.8.2:
% gcc -Wall -O2 -o sigmoid-bench{,.c} -std=c99 -lm && ./sigmoid-bench
atan(pi*x/2)*2/pi 30.5 ns
atan(x) 26.0 ns
1/(1+exp(-x)) 81.6 ns
1/sqrt(1+x^2) 12.7 ns
erf(sqrt(pi)*x/2) 6.6 ns
tanh(x) 5.8 ns
x/(1+|x|) 5.1 ns
On Intel i7-4702MQ Notebook with gcc 4.8.3 20140627:
atan(pi*x/2)*2/pi 16.5 ns
atan(x) 14.9 ns
1/(1+exp(-x)) 12.1 ns
1/sqrt(1+x^2) 10.7 ns
erf(sqrt(pi)*x/2) 3.9 ns
tanh(x) 3.2 ns
x/(1+|x|) 4.4 ns
But note: if I evaluate a real neural net with them, tanh(x) and 1/(1+exp(-x)) are about the same speed, and x/(1+|x|) is more than twice as fast as those two. Trying to investigate why...
Edit:
If I change the line:
(*fun)(xs[i]);
to this:
xs[i] = (*fun)(xs[i]);
I get this:
atan(pi*x/2)*2/pi 9.2 ns
atan(x) 7.9 ns
1/(1+exp(-x)) 19.3 ns
1/sqrt(1+x^2) 10.8 ns
erf(sqrt(pi)*x/2) 6.5 ns
tanh(x) 15.4 ns
x/(1+|x|) 4.4 ns
Which seems closer to what I get with a real neural net...
And if I use:
xs[i] = (*fun)(xs[i]) + 0.5;
I get:
atan(pi*x/2)*2/pi 20.5 ns
atan(x) 19.4 ns
1/(1+exp(-x)) 19.3 ns
1/sqrt(1+x^2) 10.8 ns
erf(sqrt(pi)*x/2) 49.9 ns
tanh(x) 28.1 ns
x/(1+|x|) 2.5 ns
As you see, some of these functions depend very much on the value they have to evaluate...
Test on BashOnWindows!
Intel® Core™ i3-5010U
gcc version 6.3.0 20170519
almost every compiler option was used :-)
atan(pi*x/2)*2/pi 13.0 ns
atan(x) 11.5 ns
1/(1+exp(-x)) 14.0 ns
1/sqrt(1+x^2) 10.6 ns
erf(sqrt(pi)*x/2) 7.0 ns
tanh(x) 5.3 ns
x/(1+|x|) 2.8 ns
Intel(R) Xeon(R) CPU E5-2609 0 @ 2.40GHz, SuSE Linux
gcc Version 6.3.0:
atan(pi*x/2)*2/pi 28.7 ns
atan(x) 26.5 ns
1/(1+exp(-x)) 22.7 ns
1/sqrt(1+x^2) 18.1 ns
erf(sqrt(pi)*x/2) 6.8 ns
tanh(x) 5.7 ns
x/(1+|x|) 9.2 ns
icc version 16.0.3 (Intel's commercial compiler):
atan(pi*x/2)*2/pi 13.5 ns
atan(x) 12.7 ns
1/(1+exp(-x)) 30.3 ns
1/sqrt(1+x^2) 18.1 ns
erf(sqrt(pi)*x/2) 13.4 ns
tanh(x) 8.0 ns
x/(1+|x|) 9.2 ns
Note that the Intel compiler simply elides the function invocations because it detects that the return values are not saved (!). I modified the benchmark()
function by saving the return values in an array ys
:
double benchmark(const char* name, double (*fun)(double)) {
clock_t start, stop;
double xs[SIZE], ys[SIZE]; /* to save the values of the function */
double t_ns;
for (int i=0; i<SIZE; i++) {
xs[i] = rand();
}
start = clock();
for (int repeat=0; repeat<CYCLES; repeat++) {
for (int i=0; i<SIZE; i++) {
ys[i] = (*fun)(xs[i]); /* otherwise the Intel compiler elides the whole thing */
}
}
stop = clock();
t_ns = (stop-start)*1.0e9/CLOCKS_PER_SEC/CYCLES/SIZE;
printf("%-17s %6.1f ns\n", name, t_ns);
return t_ns;
}
TL;DR: tanh(x)
still wins. And different compilers optimize differently :-)
Lenovo E480
intel core i5 8th gen - model name: Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz
$ gcc --version
gcc (Ubuntu 8.3.0-6ubuntu1) 8.3.0
Copyright (C) 2018 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
$ gcc -Wall -O2 -o sigmoid-bench sigmoid.c -std=c99 -lm && ./sigmoid-bench
atan(pi*x/2)*2/pi 11.5 ns
atan(x) 10.4 ns
1/(1+exp(-x)) 10.3 ns
1/sqrt(1+x^2) 6.3 ns
erf(sqrt(pi)*x/2) 4.6 ns
tanh(x) 3.7 ns
x/(1+|x|) 2.1 ns
These functions don't have the same domains(some go [0, 1] others [-1, 1]) so I'm not sure what the purpose of this benchmark is.
@miltondts In several applications it's convenient to have a sigmoid function but there is no particular requirement which one should be used. Yes, the function may have to be scaled or shifted according to the application requirements. And indeed, the images of these functions are different, but in some applications the image of the function may be irrelevant or can be accounted for. Exclude or modify those that don't fit your case and choose what's best for you.
On my Core i5-3317U with GCC 4.7.2: