Skip to content

Instantly share code, notes, and snippets.

@simonhf
Last active February 1, 2024 23:30
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonhf/caaa33ccb87c0bf0775a863c0d6843c2 to your computer and use it in GitHub Desktop.
Save simonhf/caaa33ccb87c0bf0775a863c0d6843c2 to your computer and use it in GitHub Desktop.
Experiment with __builtin_prefetch()
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/time.h>
#include <locale.h>
#define NUMBYTES (1024*1024*1024)
char bytes[NUMBYTES];
int batch_size = BATCH_SIZE;
int cache_line_size = CACHE_LINE_SIZE;
double
get_time_in_seconds(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
}
void main(void) {
int i;
int j;
int p_start[batch_size];
int p[batch_size];
uint64_t c[batch_size];
int incs[batch_size];
int incs_total = 0;
setlocale(LC_NUMERIC, "");
#ifdef CACHE_LINE_FRIENDLY
int inc = 1 + (cache_line_size * 0);
#endif
#ifdef CACHE_LINE_FRIENDLY_ISH
int inc = 1 + (cache_line_size * 1);
#endif
#ifdef CACHE_LINE_FRIENDLIER
int inc = 1 + (cache_line_size * 8);
#endif
#ifdef CACHE_LINE_UNFRIENDLY
int inc = 1 + (cache_line_size * 8191);
#endif
#ifdef CACHE_LINE_PREFETCH
char prefetch_text[] = "with prefetch";
#else
char prefetch_text[] = "without prefetch";
#endif
for(i = 0; i < NUMBYTES; i++) {
bytes[i] = i & 255;
}
for(i = 0; i < batch_size; i++) {
p_start[i] = ((NUMBYTES / batch_size) * i) + (rand() & 8191);
p[i] = p_start[i];
c[i] = 0;
incs[i] = 0;
}
double t1 = get_time_in_seconds();
for(j = 0; j < (500000000 / batch_size); j++) {
#ifdef CACHE_LINE_PREFETCH
for(i = 0; i < batch_size; i++) {
__builtin_prefetch(&bytes[p[i]], 1, 3); // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html
}
#endif
for(i = 0; i < batch_size; i++) {
c[i] += bytes[p[i]];
incs[i] ++;
incs_total ++;
p[i] += inc;
p[i] = p[i] & (NUMBYTES - 1);
}
}
double t2 = get_time_in_seconds();
for(i = 0; i < batch_size; i++) {
printf("- p[%2u]: grand total sum of all bytes with detected cache line size %u at p_start %'13u with %'11u incs of size %'7u %s: %'lu\n", i, cache_line_size, p_start[i], incs[i], inc, &prefetch_text[0], c[i]);
}
printf("- %'u incs in %f seconds or %'13.0f incs per second %s using batch_size %3u and inc %'7u\n", incs_total, t2 - t1, incs_total / (t2 - t1), &prefetch_text[0], batch_size, inc);
}
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
$ cat /proc/cpuinfo | egrep CPU | head -n 1
model name : Intel(R) Xeon(R) CPU E3-1505M v6 @ 3.00GHz
$ gcc -v 2>&1 | egrep "gcc version"
gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.9)
$ ./cache-line-example.sh 2>&1 | egrep "per second"
- 500,000,000 incs in 1.192439 seconds or 419,308,633 incs per second without prefetch using batch_size 1 and inc 1
- 500,000,000 incs in 0.641831 seconds or 779,021,365 incs per second without prefetch using batch_size 2 and inc 1
- 500,000,000 incs in 0.559111 seconds or 894,276,618 incs per second without prefetch using batch_size 4 and inc 1
- 500,000,000 incs in 0.516313 seconds or 968,404,682 incs per second without prefetch using batch_size 8 and inc 1
- 500,000,000 incs in 0.515488 seconds or 969,954,406 incs per second without prefetch using batch_size 16 and inc 1
- 500,000,000 incs in 0.658313 seconds or 759,517,088 incs per second without prefetch using batch_size 32 and inc 1
- 500,000,000 incs in 0.873885 seconds or 572,157,719 incs per second without prefetch using batch_size 64 and inc 1
- 500,000,000 incs in 1.028925 seconds or 485,944,095 incs per second without prefetch using batch_size 128 and inc 1
- 500,000,000 incs in 1.059642 seconds or 471,857,442 incs per second without prefetch using batch_size 256 and inc 1
- 500,000,000 incs in 1.392889 seconds or 358,966,143 incs per second with prefetch using batch_size 1 and inc 1
- 500,000,000 incs in 0.846301 seconds or 590,806,289 incs per second with prefetch using batch_size 2 and inc 1
- 500,000,000 incs in 0.735334 seconds or 679,963,193 incs per second with prefetch using batch_size 4 and inc 1
- 500,000,000 incs in 0.697704 seconds or 716,636,202 incs per second with prefetch using batch_size 8 and inc 1
- 500,000,000 incs in 0.691203 seconds or 723,376,617 incs per second with prefetch using batch_size 16 and inc 1
- 500,000,000 incs in 0.732389 seconds or 682,697,335 incs per second with prefetch using batch_size 32 and inc 1
- 500,000,000 incs in 0.865951 seconds or 577,399,835 incs per second with prefetch using batch_size 64 and inc 1
- 500,000,000 incs in 0.909119 seconds or 549,983,072 incs per second with prefetch using batch_size 128 and inc 1
- 500,000,000 incs in 0.922212 seconds or 542,174,612 incs per second with prefetch using batch_size 256 and inc 1
- 500,000,000 incs in 2.072989 seconds or 241,197,615 incs per second without prefetch using batch_size 1 and inc 65
- 500,000,000 incs in 1.934087 seconds or 258,519,906 incs per second without prefetch using batch_size 2 and inc 65
- 500,000,000 incs in 2.043554 seconds or 244,671,774 incs per second without prefetch using batch_size 4 and inc 65
- 500,000,000 incs in 2.218542 seconds or 225,373,255 incs per second without prefetch using batch_size 8 and inc 65
- 500,000,000 incs in 2.483657 seconds or 201,316,033 incs per second without prefetch using batch_size 16 and inc 65
- 500,000,000 incs in 3.074959 seconds or 162,603,805 incs per second without prefetch using batch_size 32 and inc 65
- 500,000,000 incs in 7.140301 seconds or 70,025,059 incs per second without prefetch using batch_size 64 and inc 65
- 500,000,000 incs in 7.409147 seconds or 67,484,151 incs per second without prefetch using batch_size 128 and inc 65
- 500,000,000 incs in 6.267362 seconds or 79,778,381 incs per second without prefetch using batch_size 256 and inc 65
- 500,000,000 incs in 1.966226 seconds or 254,294,254 incs per second with prefetch using batch_size 1 and inc 65
- 500,000,000 incs in 1.806129 seconds or 276,835,157 incs per second with prefetch using batch_size 2 and inc 65
- 500,000,000 incs in 1.914909 seconds or 261,108,997 incs per second with prefetch using batch_size 4 and inc 65
- 500,000,000 incs in 2.052155 seconds or 243,646,311 incs per second with prefetch using batch_size 8 and inc 65
- 500,000,000 incs in 2.348991 seconds or 212,857,358 incs per second with prefetch using batch_size 16 and inc 65
- 500,000,000 incs in 2.963187 seconds or 168,737,243 incs per second with prefetch using batch_size 32 and inc 65
- 500,000,000 incs in 5.385729 seconds or 92,837,941 incs per second with prefetch using batch_size 64 and inc 65
- 500,000,000 incs in 5.740616 seconds or 87,098,666 incs per second with prefetch using batch_size 128 and inc 65
- 500,000,000 incs in 5.044883 seconds or 99,110,326 incs per second with prefetch using batch_size 256 and inc 65
- 500,000,000 incs in 3.912801 seconds or 127,785,695 incs per second without prefetch using batch_size 1 and inc 513
- 500,000,000 incs in 5.686491 seconds or 87,927,687 incs per second without prefetch using batch_size 2 and inc 513
- 500,000,000 incs in 6.747177 seconds or 74,105,068 incs per second without prefetch using batch_size 4 and inc 513
- 500,000,000 incs in 7.287399 seconds or 68,611,585 incs per second without prefetch using batch_size 8 and inc 513
- 500,000,000 incs in 7.915144 seconds or 63,170,045 incs per second without prefetch using batch_size 16 and inc 513
- 500,000,000 incs in 8.054375 seconds or 62,078,064 incs per second without prefetch using batch_size 32 and inc 513
- 500,000,000 incs in 8.143878 seconds or 61,395,812 incs per second without prefetch using batch_size 64 and inc 513
- 500,000,000 incs in 8.451398 seconds or 59,161,809 incs per second without prefetch using batch_size 128 and inc 513
- 500,000,000 incs in 7.070887 seconds or 70,712,485 incs per second without prefetch using batch_size 256 and inc 513
- 500,000,000 incs in 4.269017 seconds or 117,122,982 incs per second with prefetch using batch_size 1 and inc 513
- 500,000,000 incs in 4.804792 seconds or 104,062,779 incs per second with prefetch using batch_size 2 and inc 513
- 500,000,000 incs in 5.900906 seconds or 84,732,750 incs per second with prefetch using batch_size 4 and inc 513
- 500,000,000 incs in 6.368110 seconds or 78,516,232 incs per second with prefetch using batch_size 8 and inc 513
- 500,000,000 incs in 5.795165 seconds or 86,278,819 incs per second with prefetch using batch_size 16 and inc 513
- 500,000,000 incs in 6.077008 seconds or 82,277,331 incs per second with prefetch using batch_size 32 and inc 513
- 500,000,000 incs in 6.136243 seconds or 81,483,082 incs per second with prefetch using batch_size 64 and inc 513
- 500,000,000 incs in 6.488899 seconds or 77,054,675 incs per second with prefetch using batch_size 128 and inc 513
- 500,000,000 incs in 5.689600 seconds or 87,879,640 incs per second with prefetch using batch_size 256 and inc 513
- 500,000,000 incs in 6.443355 seconds or 77,599,324 incs per second without prefetch using batch_size 1 and inc 524,225
- 500,000,000 incs in 5.911166 seconds or 84,585,678 incs per second without prefetch using batch_size 2 and inc 524,225
- 500,000,000 incs in 6.344528 seconds or 78,808,069 incs per second without prefetch using batch_size 4 and inc 524,225
- 500,000,000 incs in 6.883033 seconds or 72,642,394 incs per second without prefetch using batch_size 8 and inc 524,225
- 500,000,000 incs in 7.105119 seconds or 70,371,798 incs per second without prefetch using batch_size 16 and inc 524,225
- 500,000,000 incs in 7.269122 seconds or 68,784,099 incs per second without prefetch using batch_size 32 and inc 524,225
- 500,000,000 incs in 8.134938 seconds or 61,463,283 incs per second without prefetch using batch_size 64 and inc 524,225
- 500,000,000 incs in 8.326451 seconds or 60,049,593 incs per second without prefetch using batch_size 128 and inc 524,225
- 500,000,000 incs in 6.769783 seconds or 73,857,613 incs per second without prefetch using batch_size 256 and inc 524,225
- 500,000,000 incs in 5.919522 seconds or 84,466,279 incs per second with prefetch using batch_size 1 and inc 524,225
- 500,000,000 incs in 5.298500 seconds or 94,366,329 incs per second with prefetch using batch_size 2 and inc 524,225
- 500,000,000 incs in 5.559668 seconds or 89,933,427 incs per second with prefetch using batch_size 4 and inc 524,225
- 500,000,000 incs in 5.810427 seconds or 86,052,196 incs per second with prefetch using batch_size 8 and inc 524,225
- 500,000,000 incs in 5.176715 seconds or 96,586,346 incs per second with prefetch using batch_size 16 and inc 524,225
- 500,000,000 incs in 5.368901 seconds or 93,128,929 incs per second with prefetch using batch_size 32 and inc 524,225
- 500,000,000 incs in 5.804352 seconds or 86,142,259 incs per second with prefetch using batch_size 64 and inc 524,225
- 500,000,000 incs in 6.189668 seconds or 80,779,775 incs per second with prefetch using batch_size 128 and inc 524,225
- 500,000,000 incs in 5.388826 seconds or 92,784,590 incs per second with prefetch using batch_size 256 and inc 524,225
@simonhf
Copy link
Author

simonhf commented Nov 24, 2020

Screenshot from 2020-11-23 17-46-12

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment