Created
March 13, 2018 17:56
-
-
Save Lewiscowles1986/90191c59c9aedf3d08bf0b129065cccc to your computer and use it in GitHub Desktop.
Benchmarking PC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Integer and float benchmark for Win32 and Win64 | |
// Results are below main(), line 91 | |
#include <stdlib.h> | |
#include <stdio.h> | |
#ifdef _WIN32 | |
#include <sys/timeb.h> | |
#else | |
#include <sys/time.h> | |
#endif | |
#include <time.h> | |
double | |
mygettime(void) { | |
# ifdef _WIN32 | |
struct _timeb tb; | |
_ftime(&tb); | |
return (double)tb.time + (0.001 * (double)tb.millitm); | |
# else | |
struct timeval tv; | |
if(gettimeofday(&tv, 0) < 0) { | |
perror("oops"); | |
} | |
return (double)tv.tv_sec + (0.000001 * (double)tv.tv_usec); | |
# endif | |
} | |
template< typename Type > | |
void my_test(const char* name) { | |
volatile Type v = 0; | |
// Do not use constants or repeating values | |
// to avoid loop unroll optimizations. | |
// All values >0 to avoid division by 0 | |
Type v0 = (Type)(rand() % 256)/16 + 1; | |
Type v1 = (Type)(rand() % 256)/16 + 1; | |
Type v2 = (Type)(rand() % 256)/16 + 1; | |
Type v3 = (Type)(rand() % 256)/16 + 1; | |
Type v4 = (Type)(rand() % 256)/16 + 1; | |
Type v5 = (Type)(rand() % 256)/16 + 1; | |
Type v6 = (Type)(rand() % 256)/16 + 1; | |
Type v7 = (Type)(rand() % 256)/16 + 1; | |
double t1 = mygettime(); | |
for (size_t i = 0; i < 100000000; ++i) { | |
v += v0; | |
v += v2; | |
v += v4; | |
v += v6; | |
} | |
printf("%s add: %f\n", name, mygettime() - t1); | |
t1 = mygettime(); | |
for (size_t i = 0; i < 100000000; ++i) { | |
v -= v1; | |
v -= v3; | |
v -= v5; | |
v -= v7; | |
} | |
printf("%s sub: %f\n", name, mygettime() - t1); | |
t1 = mygettime(); | |
for (size_t i = 0; i < 100000000; ++i) { | |
v *= v0; | |
v *= v2; | |
v *= v4; | |
v *= v6; | |
} | |
printf("%s mul: %f\n", name, mygettime() - t1); | |
t1 = mygettime(); | |
for (size_t i = 0; i < 100000000; ++i) { | |
v /= v1; | |
v /= v3; | |
v /= v5; | |
v /= v7; | |
} | |
printf("%s div: %f\n", name, mygettime() - t1); | |
} | |
int main() { | |
my_test< short >(" short"); | |
my_test< int >(" int"); | |
my_test< long >(" long"); | |
my_test< long long >("long long"); | |
my_test< float >(" float"); | |
my_test< double >(" double"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Also note that repeated division will quickly make
v = 0
, which is the fastest case fordiv
/idiv
. The other operations don't have data-dependent performance, but divide does on most CPUs. Especially for 64-bit,0
can be 2x faster than large dividends.e.g. Agner Fog lists
idiv r64
latency for Haswell as 39-103, (and throughput 24-81). (http://agner.org/optimize/). Not such a big effect for 32-bit and smaller, e.g. latency = 22-29 for Haswellidiv r32
.