Last active
May 20, 2016 06:54
-
-
Save tanakamura/a3728e0b22765de6531e2a070390312e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
clflush 4278.300100[MiB/s] | |
clflushopt 44717.389375[MiB/s] | |
memset 30407.684046[MiB/s] | |
*/ | |
#include <stdio.h> | |
#include <x86intrin.h> | |
#include <sys/time.h> | |
#include <string.h> | |
#include <time.h> | |
double | |
sec(void) | |
{ | |
struct timespec ts; | |
clock_gettime(CLOCK_MONOTONIC, &ts); | |
return ts.tv_sec + (ts.tv_nsec / 1000000000.0); | |
} | |
unsigned char *data; | |
void __attribute__((noinline,noclone)) | |
clflush(size_t nline) | |
{ | |
int li; | |
for (li=0; li<nline; li++) { | |
_mm_clflush(data + li*64); | |
} | |
asm volatile ("" ::: "memory"); | |
} | |
void __attribute__((noinline,noclone)) | |
clflush_opt(size_t nline) | |
{ | |
int li; | |
for (li=0; li<nline; li++) { | |
_mm_clflushopt(data + li*64); | |
} | |
asm volatile ("" ::: "memory"); | |
} | |
void __attribute__((noinline,noclone)) | |
run_memset(size_t nline) | |
{ | |
int li; | |
memset(data, 0, nline * 64); | |
asm volatile ("" ::: "memory"); | |
} | |
int | |
main(int argc, char **argv) | |
{ | |
double t0, t1, total; | |
size_t size = 128 * 1024*1024; | |
void *p; | |
int li; | |
int nloop = 32; | |
if (argc > 1) { | |
size = atoi(argv[1]) * 1024*1024; | |
} | |
posix_memalign(&p, 4096, size); | |
data = p; | |
clflush(size/64); | |
clflush_opt(size/64); | |
t0 = sec(); | |
for (li=0; li<nloop; li++) { | |
clflush(size/64); | |
} | |
t1 = sec(); | |
total = size * (double)nloop; | |
printf("%20s %f[MiB/s]\n", "clflush", total / ((t1-t0) * 1024*1024)); | |
t0 = sec(); | |
for (li=0; li<nloop; li++) { | |
clflush_opt(size/64); | |
} | |
t1 = sec(); | |
total = size * (double)nloop; | |
printf("%20s %f[MiB/s]\n", "clflushopt", total / ((t1-t0) * 1024*1024)); | |
t0 = sec(); | |
for (li=0; li<nloop; li++) { | |
run_memset(size/64); | |
} | |
t1 = sec(); | |
total = size * (double)nloop; | |
printf("%20s %f[MiB/s]\n", "memset", total / ((t1-t0) * 1024*1024)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment