Last active
February 1, 2024 23:30
-
-
Save simonhf/caaa33ccb87c0bf0775a863c0d6843c2 to your computer and use it in GitHub Desktop.
Experiment with __builtin_prefetch()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <sys/time.h> | |
#include <locale.h> | |
#define NUMBYTES (1024*1024*1024) | |
char bytes[NUMBYTES]; | |
int batch_size = BATCH_SIZE; | |
int cache_line_size = CACHE_LINE_SIZE; | |
double | |
get_time_in_seconds(void) | |
{ | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec; | |
} | |
void main(void) { | |
int i; | |
int j; | |
int p_start[batch_size]; | |
int p[batch_size]; | |
uint64_t c[batch_size]; | |
int incs[batch_size]; | |
int incs_total = 0; | |
setlocale(LC_NUMERIC, ""); | |
#ifdef CACHE_LINE_FRIENDLY | |
int inc = 1 + (cache_line_size * 0); | |
#endif | |
#ifdef CACHE_LINE_FRIENDLY_ISH | |
int inc = 1 + (cache_line_size * 1); | |
#endif | |
#ifdef CACHE_LINE_FRIENDLIER | |
int inc = 1 + (cache_line_size * 8); | |
#endif | |
#ifdef CACHE_LINE_UNFRIENDLY | |
int inc = 1 + (cache_line_size * 8191); | |
#endif | |
#ifdef CACHE_LINE_PREFETCH | |
char prefetch_text[] = "with prefetch"; | |
#else | |
char prefetch_text[] = "without prefetch"; | |
#endif | |
for(i = 0; i < NUMBYTES; i++) { | |
bytes[i] = i & 255; | |
} | |
for(i = 0; i < batch_size; i++) { | |
p_start[i] = ((NUMBYTES / batch_size) * i) + (rand() & 8191); | |
p[i] = p_start[i]; | |
c[i] = 0; | |
incs[i] = 0; | |
} | |
double t1 = get_time_in_seconds(); | |
for(j = 0; j < (500000000 / batch_size); j++) { | |
#ifdef CACHE_LINE_PREFETCH | |
for(i = 0; i < batch_size; i++) { | |
__builtin_prefetch(&bytes[p[i]], 1, 3); // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html | |
} | |
#endif | |
for(i = 0; i < batch_size; i++) { | |
c[i] += bytes[p[i]]; | |
incs[i] ++; | |
incs_total ++; | |
p[i] += inc; | |
p[i] = p[i] & (NUMBYTES - 1); | |
} | |
} | |
double t2 = get_time_in_seconds(); | |
for(i = 0; i < batch_size; i++) { | |
printf("- p[%2u]: grand total sum of all bytes with detected cache line size %u at p_start %'13u with %'11u incs of size %'7u %s: %'lu\n", i, cache_line_size, p_start[i], incs[i], inc, &prefetch_text[0], c[i]); | |
} | |
printf("- %'u incs in %f seconds or %'13.0f incs per second %s using batch_size %3u and inc %'7u\n", incs_total, t2 - t1, incs_total / (t2 - t1), &prefetch_text[0], batch_size, inc); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example | |
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ cat /proc/cpuinfo | egrep CPU | head -n 1 | |
model name : Intel(R) Xeon(R) CPU E3-1505M v6 @ 3.00GHz | |
$ gcc -v 2>&1 | egrep "gcc version" | |
gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.9) | |
$ ./cache-line-example.sh 2>&1 | egrep "per second" | |
- 500,000,000 incs in 1.192439 seconds or 419,308,633 incs per second without prefetch using batch_size 1 and inc 1 | |
- 500,000,000 incs in 0.641831 seconds or 779,021,365 incs per second without prefetch using batch_size 2 and inc 1 | |
- 500,000,000 incs in 0.559111 seconds or 894,276,618 incs per second without prefetch using batch_size 4 and inc 1 | |
- 500,000,000 incs in 0.516313 seconds or 968,404,682 incs per second without prefetch using batch_size 8 and inc 1 | |
- 500,000,000 incs in 0.515488 seconds or 969,954,406 incs per second without prefetch using batch_size 16 and inc 1 | |
- 500,000,000 incs in 0.658313 seconds or 759,517,088 incs per second without prefetch using batch_size 32 and inc 1 | |
- 500,000,000 incs in 0.873885 seconds or 572,157,719 incs per second without prefetch using batch_size 64 and inc 1 | |
- 500,000,000 incs in 1.028925 seconds or 485,944,095 incs per second without prefetch using batch_size 128 and inc 1 | |
- 500,000,000 incs in 1.059642 seconds or 471,857,442 incs per second without prefetch using batch_size 256 and inc 1 | |
- 500,000,000 incs in 1.392889 seconds or 358,966,143 incs per second with prefetch using batch_size 1 and inc 1 | |
- 500,000,000 incs in 0.846301 seconds or 590,806,289 incs per second with prefetch using batch_size 2 and inc 1 | |
- 500,000,000 incs in 0.735334 seconds or 679,963,193 incs per second with prefetch using batch_size 4 and inc 1 | |
- 500,000,000 incs in 0.697704 seconds or 716,636,202 incs per second with prefetch using batch_size 8 and inc 1 | |
- 500,000,000 incs in 0.691203 seconds or 723,376,617 incs per second with prefetch using batch_size 16 and inc 1 | |
- 500,000,000 incs in 0.732389 seconds or 682,697,335 incs per second with prefetch using batch_size 32 and inc 1 | |
- 500,000,000 incs in 0.865951 seconds or 577,399,835 incs per second with prefetch using batch_size 64 and inc 1 | |
- 500,000,000 incs in 0.909119 seconds or 549,983,072 incs per second with prefetch using batch_size 128 and inc 1 | |
- 500,000,000 incs in 0.922212 seconds or 542,174,612 incs per second with prefetch using batch_size 256 and inc 1 | |
- 500,000,000 incs in 2.072989 seconds or 241,197,615 incs per second without prefetch using batch_size 1 and inc 65 | |
- 500,000,000 incs in 1.934087 seconds or 258,519,906 incs per second without prefetch using batch_size 2 and inc 65 | |
- 500,000,000 incs in 2.043554 seconds or 244,671,774 incs per second without prefetch using batch_size 4 and inc 65 | |
- 500,000,000 incs in 2.218542 seconds or 225,373,255 incs per second without prefetch using batch_size 8 and inc 65 | |
- 500,000,000 incs in 2.483657 seconds or 201,316,033 incs per second without prefetch using batch_size 16 and inc 65 | |
- 500,000,000 incs in 3.074959 seconds or 162,603,805 incs per second without prefetch using batch_size 32 and inc 65 | |
- 500,000,000 incs in 7.140301 seconds or 70,025,059 incs per second without prefetch using batch_size 64 and inc 65 | |
- 500,000,000 incs in 7.409147 seconds or 67,484,151 incs per second without prefetch using batch_size 128 and inc 65 | |
- 500,000,000 incs in 6.267362 seconds or 79,778,381 incs per second without prefetch using batch_size 256 and inc 65 | |
- 500,000,000 incs in 1.966226 seconds or 254,294,254 incs per second with prefetch using batch_size 1 and inc 65 | |
- 500,000,000 incs in 1.806129 seconds or 276,835,157 incs per second with prefetch using batch_size 2 and inc 65 | |
- 500,000,000 incs in 1.914909 seconds or 261,108,997 incs per second with prefetch using batch_size 4 and inc 65 | |
- 500,000,000 incs in 2.052155 seconds or 243,646,311 incs per second with prefetch using batch_size 8 and inc 65 | |
- 500,000,000 incs in 2.348991 seconds or 212,857,358 incs per second with prefetch using batch_size 16 and inc 65 | |
- 500,000,000 incs in 2.963187 seconds or 168,737,243 incs per second with prefetch using batch_size 32 and inc 65 | |
- 500,000,000 incs in 5.385729 seconds or 92,837,941 incs per second with prefetch using batch_size 64 and inc 65 | |
- 500,000,000 incs in 5.740616 seconds or 87,098,666 incs per second with prefetch using batch_size 128 and inc 65 | |
- 500,000,000 incs in 5.044883 seconds or 99,110,326 incs per second with prefetch using batch_size 256 and inc 65 | |
- 500,000,000 incs in 3.912801 seconds or 127,785,695 incs per second without prefetch using batch_size 1 and inc 513 | |
- 500,000,000 incs in 5.686491 seconds or 87,927,687 incs per second without prefetch using batch_size 2 and inc 513 | |
- 500,000,000 incs in 6.747177 seconds or 74,105,068 incs per second without prefetch using batch_size 4 and inc 513 | |
- 500,000,000 incs in 7.287399 seconds or 68,611,585 incs per second without prefetch using batch_size 8 and inc 513 | |
- 500,000,000 incs in 7.915144 seconds or 63,170,045 incs per second without prefetch using batch_size 16 and inc 513 | |
- 500,000,000 incs in 8.054375 seconds or 62,078,064 incs per second without prefetch using batch_size 32 and inc 513 | |
- 500,000,000 incs in 8.143878 seconds or 61,395,812 incs per second without prefetch using batch_size 64 and inc 513 | |
- 500,000,000 incs in 8.451398 seconds or 59,161,809 incs per second without prefetch using batch_size 128 and inc 513 | |
- 500,000,000 incs in 7.070887 seconds or 70,712,485 incs per second without prefetch using batch_size 256 and inc 513 | |
- 500,000,000 incs in 4.269017 seconds or 117,122,982 incs per second with prefetch using batch_size 1 and inc 513 | |
- 500,000,000 incs in 4.804792 seconds or 104,062,779 incs per second with prefetch using batch_size 2 and inc 513 | |
- 500,000,000 incs in 5.900906 seconds or 84,732,750 incs per second with prefetch using batch_size 4 and inc 513 | |
- 500,000,000 incs in 6.368110 seconds or 78,516,232 incs per second with prefetch using batch_size 8 and inc 513 | |
- 500,000,000 incs in 5.795165 seconds or 86,278,819 incs per second with prefetch using batch_size 16 and inc 513 | |
- 500,000,000 incs in 6.077008 seconds or 82,277,331 incs per second with prefetch using batch_size 32 and inc 513 | |
- 500,000,000 incs in 6.136243 seconds or 81,483,082 incs per second with prefetch using batch_size 64 and inc 513 | |
- 500,000,000 incs in 6.488899 seconds or 77,054,675 incs per second with prefetch using batch_size 128 and inc 513 | |
- 500,000,000 incs in 5.689600 seconds or 87,879,640 incs per second with prefetch using batch_size 256 and inc 513 | |
- 500,000,000 incs in 6.443355 seconds or 77,599,324 incs per second without prefetch using batch_size 1 and inc 524,225 | |
- 500,000,000 incs in 5.911166 seconds or 84,585,678 incs per second without prefetch using batch_size 2 and inc 524,225 | |
- 500,000,000 incs in 6.344528 seconds or 78,808,069 incs per second without prefetch using batch_size 4 and inc 524,225 | |
- 500,000,000 incs in 6.883033 seconds or 72,642,394 incs per second without prefetch using batch_size 8 and inc 524,225 | |
- 500,000,000 incs in 7.105119 seconds or 70,371,798 incs per second without prefetch using batch_size 16 and inc 524,225 | |
- 500,000,000 incs in 7.269122 seconds or 68,784,099 incs per second without prefetch using batch_size 32 and inc 524,225 | |
- 500,000,000 incs in 8.134938 seconds or 61,463,283 incs per second without prefetch using batch_size 64 and inc 524,225 | |
- 500,000,000 incs in 8.326451 seconds or 60,049,593 incs per second without prefetch using batch_size 128 and inc 524,225 | |
- 500,000,000 incs in 6.769783 seconds or 73,857,613 incs per second without prefetch using batch_size 256 and inc 524,225 | |
- 500,000,000 incs in 5.919522 seconds or 84,466,279 incs per second with prefetch using batch_size 1 and inc 524,225 | |
- 500,000,000 incs in 5.298500 seconds or 94,366,329 incs per second with prefetch using batch_size 2 and inc 524,225 | |
- 500,000,000 incs in 5.559668 seconds or 89,933,427 incs per second with prefetch using batch_size 4 and inc 524,225 | |
- 500,000,000 incs in 5.810427 seconds or 86,052,196 incs per second with prefetch using batch_size 8 and inc 524,225 | |
- 500,000,000 incs in 5.176715 seconds or 96,586,346 incs per second with prefetch using batch_size 16 and inc 524,225 | |
- 500,000,000 incs in 5.368901 seconds or 93,128,929 incs per second with prefetch using batch_size 32 and inc 524,225 | |
- 500,000,000 incs in 5.804352 seconds or 86,142,259 incs per second with prefetch using batch_size 64 and inc 524,225 | |
- 500,000,000 incs in 6.189668 seconds or 80,779,775 incs per second with prefetch using batch_size 128 and inc 524,225 | |
- 500,000,000 incs in 5.388826 seconds or 92,784,590 incs per second with prefetch using batch_size 256 and inc 524,225 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment