Skip to content

Instantly share code, notes, and snippets.

@simonhf
Last active February 1, 2024 23:30
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonhf/caaa33ccb87c0bf0775a863c0d6843c2 to your computer and use it in GitHub Desktop.
Save simonhf/caaa33ccb87c0bf0775a863c0d6843c2 to your computer and use it in GitHub Desktop.
Experiment with __builtin_prefetch()
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/time.h>
#include <locale.h>
#define NUMBYTES (1024*1024*1024)
char bytes[NUMBYTES];
int batch_size = BATCH_SIZE;
int cache_line_size = CACHE_LINE_SIZE;
double
get_time_in_seconds(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
}
void main(void) {
int i;
int j;
int p_start[batch_size];
int p[batch_size];
uint64_t c[batch_size];
int incs[batch_size];
int incs_total = 0;
setlocale(LC_NUMERIC, "");
#ifdef CACHE_LINE_FRIENDLY
int inc = 1 + (cache_line_size * 0);
#endif
#ifdef CACHE_LINE_FRIENDLY_ISH
int inc = 1 + (cache_line_size * 1);
#endif
#ifdef CACHE_LINE_FRIENDLIER
int inc = 1 + (cache_line_size * 8);
#endif
#ifdef CACHE_LINE_UNFRIENDLY
int inc = 1 + (cache_line_size * 8191);
#endif
#ifdef CACHE_LINE_PREFETCH
char prefetch_text[] = "with prefetch";
#else
char prefetch_text[] = "without prefetch";
#endif
for(i = 0; i < NUMBYTES; i++) {
bytes[i] = i & 255;
}
for(i = 0; i < batch_size; i++) {
p_start[i] = ((NUMBYTES / batch_size) * i) + (rand() & 8191);
p[i] = p_start[i];
c[i] = 0;
incs[i] = 0;
}
double t1 = get_time_in_seconds();
for(j = 0; j < (500000000 / batch_size); j++) {
#ifdef CACHE_LINE_PREFETCH
for(i = 0; i < batch_size; i++) {
__builtin_prefetch(&bytes[p[i]], 1, 3); // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html
}
#endif
for(i = 0; i < batch_size; i++) {
c[i] += bytes[p[i]];
incs[i] ++;
incs_total ++;
p[i] += inc;
p[i] = p[i] & (NUMBYTES - 1);
}
}
double t2 = get_time_in_seconds();
for(i = 0; i < batch_size; i++) {
printf("- p[%2u]: grand total sum of all bytes with detected cache line size %u at p_start %'13u with %'11u incs of size %'7u %s: %'lu\n", i, cache_line_size, p_start[i], incs[i], inc, &prefetch_text[0], c[i]);
}
printf("- %'u incs in %f seconds or %'13.0f incs per second %s using batch_size %3u and inc %'7u\n", incs_total, t2 - t1, incs_total / (t2 - t1), &prefetch_text[0], batch_size, inc);
}
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLY_ISH -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_FRIENDLIER -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=1 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=2 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=4 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=8 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=16 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=32 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=64 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=128 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
gcc -O3 -DBATCH_SIZE=256 -DCACHE_LINE_PREFETCH -DCACHE_LINE_UNFRIENDLY -DCACHE_LINE_SIZE=`cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` -o cache-line-example cache-line-example.c && ./cache-line-example
$ cat /proc/cpuinfo | egrep CPU | head -n 1
model name : Intel(R) Xeon(R) CPU E3-1505M v6 @ 3.00GHz
$ gcc -v 2>&1 | egrep "gcc version"
gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.9)
$ ./cache-line-example.sh 2>&1 | egrep "per second"
- 500,000,000 incs in 1.192439 seconds or 419,308,633 incs per second without prefetch using batch_size 1 and inc 1
- 500,000,000 incs in 0.641831 seconds or 779,021,365 incs per second without prefetch using batch_size 2 and inc 1
- 500,000,000 incs in 0.559111 seconds or 894,276,618 incs per second without prefetch using batch_size 4 and inc 1
- 500,000,000 incs in 0.516313 seconds or 968,404,682 incs per second without prefetch using batch_size 8 and inc 1
- 500,000,000 incs in 0.515488 seconds or 969,954,406 incs per second without prefetch using batch_size 16 and inc 1
- 500,000,000 incs in 0.658313 seconds or 759,517,088 incs per second without prefetch using batch_size 32 and inc 1
- 500,000,000 incs in 0.873885 seconds or 572,157,719 incs per second without prefetch using batch_size 64 and inc 1
- 500,000,000 incs in 1.028925 seconds or 485,944,095 incs per second without prefetch using batch_size 128 and inc 1
- 500,000,000 incs in 1.059642 seconds or 471,857,442 incs per second without prefetch using batch_size 256 and inc 1
- 500,000,000 incs in 1.392889 seconds or 358,966,143 incs per second with prefetch using batch_size 1 and inc 1
- 500,000,000 incs in 0.846301 seconds or 590,806,289 incs per second with prefetch using batch_size 2 and inc 1
- 500,000,000 incs in 0.735334 seconds or 679,963,193 incs per second with prefetch using batch_size 4 and inc 1
- 500,000,000 incs in 0.697704 seconds or 716,636,202 incs per second with prefetch using batch_size 8 and inc 1
- 500,000,000 incs in 0.691203 seconds or 723,376,617 incs per second with prefetch using batch_size 16 and inc 1
- 500,000,000 incs in 0.732389 seconds or 682,697,335 incs per second with prefetch using batch_size 32 and inc 1
- 500,000,000 incs in 0.865951 seconds or 577,399,835 incs per second with prefetch using batch_size 64 and inc 1
- 500,000,000 incs in 0.909119 seconds or 549,983,072 incs per second with prefetch using batch_size 128 and inc 1
- 500,000,000 incs in 0.922212 seconds or 542,174,612 incs per second with prefetch using batch_size 256 and inc 1
- 500,000,000 incs in 2.072989 seconds or 241,197,615 incs per second without prefetch using batch_size 1 and inc 65
- 500,000,000 incs in 1.934087 seconds or 258,519,906 incs per second without prefetch using batch_size 2 and inc 65
- 500,000,000 incs in 2.043554 seconds or 244,671,774 incs per second without prefetch using batch_size 4 and inc 65
- 500,000,000 incs in 2.218542 seconds or 225,373,255 incs per second without prefetch using batch_size 8 and inc 65
- 500,000,000 incs in 2.483657 seconds or 201,316,033 incs per second without prefetch using batch_size 16 and inc 65
- 500,000,000 incs in 3.074959 seconds or 162,603,805 incs per second without prefetch using batch_size 32 and inc 65
- 500,000,000 incs in 7.140301 seconds or 70,025,059 incs per second without prefetch using batch_size 64 and inc 65
- 500,000,000 incs in 7.409147 seconds or 67,484,151 incs per second without prefetch using batch_size 128 and inc 65
- 500,000,000 incs in 6.267362 seconds or 79,778,381 incs per second without prefetch using batch_size 256 and inc 65
- 500,000,000 incs in 1.966226 seconds or 254,294,254 incs per second with prefetch using batch_size 1 and inc 65
- 500,000,000 incs in 1.806129 seconds or 276,835,157 incs per second with prefetch using batch_size 2 and inc 65
- 500,000,000 incs in 1.914909 seconds or 261,108,997 incs per second with prefetch using batch_size 4 and inc 65
- 500,000,000 incs in 2.052155 seconds or 243,646,311 incs per second with prefetch using batch_size 8 and inc 65
- 500,000,000 incs in 2.348991 seconds or 212,857,358 incs per second with prefetch using batch_size 16 and inc 65
- 500,000,000 incs in 2.963187 seconds or 168,737,243 incs per second with prefetch using batch_size 32 and inc 65
- 500,000,000 incs in 5.385729 seconds or 92,837,941 incs per second with prefetch using batch_size 64 and inc 65
- 500,000,000 incs in 5.740616 seconds or 87,098,666 incs per second with prefetch using batch_size 128 and inc 65
- 500,000,000 incs in 5.044883 seconds or 99,110,326 incs per second with prefetch using batch_size 256 and inc 65
- 500,000,000 incs in 3.912801 seconds or 127,785,695 incs per second without prefetch using batch_size 1 and inc 513
- 500,000,000 incs in 5.686491 seconds or 87,927,687 incs per second without prefetch using batch_size 2 and inc 513
- 500,000,000 incs in 6.747177 seconds or 74,105,068 incs per second without prefetch using batch_size 4 and inc 513
- 500,000,000 incs in 7.287399 seconds or 68,611,585 incs per second without prefetch using batch_size 8 and inc 513
- 500,000,000 incs in 7.915144 seconds or 63,170,045 incs per second without prefetch using batch_size 16 and inc 513
- 500,000,000 incs in 8.054375 seconds or 62,078,064 incs per second without prefetch using batch_size 32 and inc 513
- 500,000,000 incs in 8.143878 seconds or 61,395,812 incs per second without prefetch using batch_size 64 and inc 513
- 500,000,000 incs in 8.451398 seconds or 59,161,809 incs per second without prefetch using batch_size 128 and inc 513
- 500,000,000 incs in 7.070887 seconds or 70,712,485 incs per second without prefetch using batch_size 256 and inc 513
- 500,000,000 incs in 4.269017 seconds or 117,122,982 incs per second with prefetch using batch_size 1 and inc 513
- 500,000,000 incs in 4.804792 seconds or 104,062,779 incs per second with prefetch using batch_size 2 and inc 513
- 500,000,000 incs in 5.900906 seconds or 84,732,750 incs per second with prefetch using batch_size 4 and inc 513
- 500,000,000 incs in 6.368110 seconds or 78,516,232 incs per second with prefetch using batch_size 8 and inc 513
- 500,000,000 incs in 5.795165 seconds or 86,278,819 incs per second with prefetch using batch_size 16 and inc 513
- 500,000,000 incs in 6.077008 seconds or 82,277,331 incs per second with prefetch using batch_size 32 and inc 513
- 500,000,000 incs in 6.136243 seconds or 81,483,082 incs per second with prefetch using batch_size 64 and inc 513
- 500,000,000 incs in 6.488899 seconds or 77,054,675 incs per second with prefetch using batch_size 128 and inc 513
- 500,000,000 incs in 5.689600 seconds or 87,879,640 incs per second with prefetch using batch_size 256 and inc 513
- 500,000,000 incs in 6.443355 seconds or 77,599,324 incs per second without prefetch using batch_size 1 and inc 524,225
- 500,000,000 incs in 5.911166 seconds or 84,585,678 incs per second without prefetch using batch_size 2 and inc 524,225
- 500,000,000 incs in 6.344528 seconds or 78,808,069 incs per second without prefetch using batch_size 4 and inc 524,225
- 500,000,000 incs in 6.883033 seconds or 72,642,394 incs per second without prefetch using batch_size 8 and inc 524,225
- 500,000,000 incs in 7.105119 seconds or 70,371,798 incs per second without prefetch using batch_size 16 and inc 524,225
- 500,000,000 incs in 7.269122 seconds or 68,784,099 incs per second without prefetch using batch_size 32 and inc 524,225
- 500,000,000 incs in 8.134938 seconds or 61,463,283 incs per second without prefetch using batch_size 64 and inc 524,225
- 500,000,000 incs in 8.326451 seconds or 60,049,593 incs per second without prefetch using batch_size 128 and inc 524,225
- 500,000,000 incs in 6.769783 seconds or 73,857,613 incs per second without prefetch using batch_size 256 and inc 524,225
- 500,000,000 incs in 5.919522 seconds or 84,466,279 incs per second with prefetch using batch_size 1 and inc 524,225
- 500,000,000 incs in 5.298500 seconds or 94,366,329 incs per second with prefetch using batch_size 2 and inc 524,225
- 500,000,000 incs in 5.559668 seconds or 89,933,427 incs per second with prefetch using batch_size 4 and inc 524,225
- 500,000,000 incs in 5.810427 seconds or 86,052,196 incs per second with prefetch using batch_size 8 and inc 524,225
- 500,000,000 incs in 5.176715 seconds or 96,586,346 incs per second with prefetch using batch_size 16 and inc 524,225
- 500,000,000 incs in 5.368901 seconds or 93,128,929 incs per second with prefetch using batch_size 32 and inc 524,225
- 500,000,000 incs in 5.804352 seconds or 86,142,259 incs per second with prefetch using batch_size 64 and inc 524,225
- 500,000,000 incs in 6.189668 seconds or 80,779,775 incs per second with prefetch using batch_size 128 and inc 524,225
- 500,000,000 incs in 5.388826 seconds or 92,784,590 incs per second with prefetch using batch_size 256 and inc 524,225
@simonhf
Copy link
Author

simonhf commented Nov 24, 2020

$ cat /proc/cpuinfo | egrep CPU | head -n 1
model name      : Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz

$ gcc -v 2>&1 | egrep "gcc version"
gcc version 9.2.1 20191008 (Ubuntu 9.2.1-9ubuntu2)

$ ./cache-line-example.sh 2>&1 | egrep "per second"
- 500,000,000 incs in  2.021004 seconds or   247,401,791 incs per second without prefetch using batch_size   1 and inc       1
- 500,000,000 incs in  1.095475 seconds or   456,423,030 incs per second without prefetch using batch_size   2 and inc       1
- 500,000,000 incs in  0.718603 seconds or   695,794,349 incs per second without prefetch using batch_size   4 and inc       1
- 500,000,000 incs in  0.566984 seconds or   881,859,196 incs per second without prefetch using batch_size   8 and inc       1
- 500,000,000 incs in  0.522834 seconds or   956,326,368 incs per second without prefetch using batch_size  16 and inc       1
- 500,000,000 incs in  0.515499 seconds or   969,934,219 incs per second without prefetch using batch_size  32 and inc       1
- 500,000,000 incs in  0.622472 seconds or   803,248,920 incs per second without prefetch using batch_size  64 and inc       1
- 500,000,000 incs in  0.830954 seconds or   601,718,144 incs per second without prefetch using batch_size 128 and inc       1
- 500,000,000 incs in  0.820290 seconds or   609,540,640 incs per second without prefetch using batch_size 256 and inc       1
- 500,000,000 incs in  1.280117 seconds or   390,589,287 incs per second with    prefetch using batch_size   1 and inc       1
- 500,000,000 incs in  1.031492 seconds or   484,734,736 incs per second with    prefetch using batch_size   2 and inc       1
- 500,000,000 incs in  0.712676 seconds or   701,581,036 incs per second with    prefetch using batch_size   4 and inc       1
- 500,000,000 incs in  0.625220 seconds or   799,718,727 incs per second with    prefetch using batch_size   8 and inc       1
- 500,000,000 incs in  0.627607 seconds or   796,677,068 incs per second with    prefetch using batch_size  16 and inc       1
- 500,000,000 incs in  0.659080 seconds or   758,633,213 incs per second with    prefetch using batch_size  32 and inc       1
- 500,000,000 incs in  0.787064 seconds or   635,272,293 incs per second with    prefetch using batch_size  64 and inc       1
- 500,000,000 incs in  0.853518 seconds or   585,810,721 incs per second with    prefetch using batch_size 128 and inc       1
- 500,000,000 incs in  0.810272 seconds or   617,076,751 incs per second with    prefetch using batch_size 256 and inc       1
- 500,000,000 incs in  4.006759 seconds or   124,789,140 incs per second without prefetch using batch_size   1 and inc      65
- 500,000,000 incs in  2.958549 seconds or   169,001,763 incs per second without prefetch using batch_size   2 and inc      65
- 500,000,000 incs in  2.793843 seconds or   178,964,958 incs per second without prefetch using batch_size   4 and inc      65
- 500,000,000 incs in  2.502812 seconds or   199,775,281 incs per second without prefetch using batch_size   8 and inc      65
- 500,000,000 incs in  2.746686 seconds or   182,037,555 incs per second without prefetch using batch_size  16 and inc      65
- 500,000,000 incs in  3.311481 seconds or   150,989,844 incs per second without prefetch using batch_size  32 and inc      65
- 500,000,000 incs in  5.884480 seconds or    84,969,275 incs per second without prefetch using batch_size  64 and inc      65
- 500,000,000 incs in  5.875749 seconds or    85,095,536 incs per second without prefetch using batch_size 128 and inc      65
- 500,000,000 incs in  6.285364 seconds or    79,549,889 incs per second without prefetch using batch_size 256 and inc      65
- 500,000,000 incs in  3.178410 seconds or   157,311,357 incs per second with    prefetch using batch_size   1 and inc      65
- 500,000,000 incs in  2.755040 seconds or   181,485,557 incs per second with    prefetch using batch_size   2 and inc      65
- 500,000,000 incs in  2.313558 seconds or   216,117,352 incs per second with    prefetch using batch_size   4 and inc      65
- 500,000,000 incs in  2.276980 seconds or   219,589,089 incs per second with    prefetch using batch_size   8 and inc      65
- 500,000,000 incs in  2.184209 seconds or   228,915,811 incs per second with    prefetch using batch_size  16 and inc      65
- 500,000,000 incs in  2.667770 seconds or   187,422,460 incs per second with    prefetch using batch_size  32 and inc      65
- 500,000,000 incs in  4.201359 seconds or   119,009,117 incs per second with    prefetch using batch_size  64 and inc      65
- 500,000,000 incs in  4.399194 seconds or   113,657,184 incs per second with    prefetch using batch_size 128 and inc      65
- 500,000,000 incs in  4.893813 seconds or   102,169,819 incs per second with    prefetch using batch_size 256 and inc      65
- 500,000,000 incs in 10.928870 seconds or    45,750,384 incs per second without prefetch using batch_size   1 and inc     513
- 500,000,000 incs in  9.753981 seconds or    51,261,122 incs per second without prefetch using batch_size   2 and inc     513
- 500,000,000 incs in  8.099245 seconds or    61,734,151 incs per second without prefetch using batch_size   4 and inc     513
- 500,000,000 incs in 12.516688 seconds or    39,946,670 incs per second without prefetch using batch_size   8 and inc     513
- 500,000,000 incs in 10.533810 seconds or    47,466,207 incs per second without prefetch using batch_size  16 and inc     513
- 500,000,000 incs in  9.723798 seconds or    51,420,237 incs per second without prefetch using batch_size  32 and inc     513
- 500,000,000 incs in 10.918548 seconds or    45,793,635 incs per second without prefetch using batch_size  64 and inc     513
- 500,000,000 incs in 10.353710 seconds or    48,291,869 incs per second without prefetch using batch_size 128 and inc     513
- 500,000,000 incs in 11.144872 seconds or    44,863,683 incs per second without prefetch using batch_size 256 and inc     513
- 500,000,000 incs in 11.974536 seconds or    41,755,272 incs per second with    prefetch using batch_size   1 and inc     513
- 500,000,000 incs in  9.235986 seconds or    54,136,072 incs per second with    prefetch using batch_size   2 and inc     513
- 500,000,000 incs in  7.878429 seconds or    63,464,428 incs per second with    prefetch using batch_size   4 and inc     513
- 500,000,000 incs in  8.091980 seconds or    61,789,575 incs per second with    prefetch using batch_size   8 and inc     513
- 500,000,000 incs in  7.595594 seconds or    65,827,637 incs per second with    prefetch using batch_size  16 and inc     513
- 500,000,000 incs in  6.855203 seconds or    72,937,299 incs per second with    prefetch using batch_size  32 and inc     513
- 500,000,000 incs in  6.203684 seconds or    80,597,270 incs per second with    prefetch using batch_size  64 and inc     513
- 500,000,000 incs in  6.258229 seconds or    79,894,807 incs per second with    prefetch using batch_size 128 and inc     513
- 500,000,000 incs in  6.648074 seconds or    75,209,751 incs per second with    prefetch using batch_size 256 and inc     513
- 500,000,000 incs in 38.619608 seconds or    12,946,791 incs per second without prefetch using batch_size   1 and inc 524,225
- 500,000,000 incs in 37.015244 seconds or    13,507,948 incs per second without prefetch using batch_size   2 and inc 524,225
- 500,000,000 incs in 40.240287 seconds or    12,425,359 incs per second without prefetch using batch_size   4 and inc 524,225
- 500,000,000 incs in 42.370859 seconds or    11,800,563 incs per second without prefetch using batch_size   8 and inc 524,225
- 500,000,000 incs in 41.429849 seconds or    12,068,593 incs per second without prefetch using batch_size  16 and inc 524,225
- 500,000,000 incs in 35.713304 seconds or    14,000,385 incs per second without prefetch using batch_size  32 and inc 524,225
- 500,000,000 incs in 33.193502 seconds or    15,063,189 incs per second without prefetch using batch_size  64 and inc 524,225
- 500,000,000 incs in 36.240045 seconds or    13,796,892 incs per second without prefetch using batch_size 128 and inc 524,225
- 500,000,000 incs in 30.931728 seconds or    16,164,632 incs per second without prefetch using batch_size 256 and inc 524,225
- 500,000,000 incs in 38.037277 seconds or    13,145,000 incs per second with    prefetch using batch_size   1 and inc 524,225
- 500,000,000 incs in 36.037931 seconds or    13,874,270 incs per second with    prefetch using batch_size   2 and inc 524,225
- 500,000,000 incs in 36.395219 seconds or    13,738,068 incs per second with    prefetch using batch_size   4 and inc 524,225
- 500,000,000 incs in 37.799736 seconds or    13,227,606 incs per second with    prefetch using batch_size   8 and inc 524,225
- 500,000,000 incs in 38.556575 seconds or    12,967,957 incs per second with    prefetch using batch_size  16 and inc 524,225
- 500,000,000 incs in 28.244046 seconds or    17,702,846 incs per second with    prefetch using batch_size  32 and inc 524,225
- 500,000,000 incs in 22.569705 seconds or    22,153,590 incs per second with    prefetch using batch_size  64 and inc 524,225
- 500,000,000 incs in 18.202306 seconds or    27,469,047 incs per second with    prefetch using batch_size 128 and inc 524,225
- 500,000,000 incs in 15.375037 seconds or    32,520,247 incs per second with    prefetch using batch_size 256 and inc 524,225

@simonhf
Copy link
Author

simonhf commented Nov 24, 2020

Screenshot from 2020-11-23 17-46-12

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment