Skip to content

Instantly share code, notes, and snippets.

@pitrou
Created January 20, 2015 09:13
Show Gist options
  • Save pitrou/959b3b5186309e0bc4be to your computer and use it in GitHub Desktop.
Save pitrou/959b3b5186309e0bc4be to your computer and use it in GitHub Desktop.
// gcc avx.c -mavx -std=c99 -O2
// while true; do ./a.out; done > log
// analyze via np.genfromtxt, e.g. min, 10th percentile, median
#include <stdio.h>
#include <string.h>
#include <x86intrin.h>
#include <avxintrin.h>
#define N 3000
/* Aligned AVX */
void __attribute__((noinline)) add(double * a, double * b, int warmup)
{
int dump;
long long ts = __rdtscp(&dump);
for (size_t i = 0; i < N; i+=4) {
__m256d av = _mm256_load_pd(&a[i]);
__m256d bv = _mm256_load_pd(&b[i]);
av = _mm256_add_pd(av, bv);
_mm256_store_pd(&a[i], av);
}
if (!warmup)
printf("%lld ", __rdtscp(&dump) - ts);
}
/* Unaligned AVX */
void __attribute__((noinline)) add2(double * a, double * b, int warmup)
{
int dump;
long long ts = __rdtscp(&dump);
for (size_t i = 0; i < N; i+=4) {
__m256d av = _mm256_loadu_pd(&a[i]);
__m256d bv = _mm256_loadu_pd(&b[i]);
av = _mm256_add_pd(av, bv);
_mm256_storeu_pd(&a[i], av);
}
if (!warmup)
printf("%lld ", __rdtscp(&dump) - ts);
}
/* Aligned SSE2 */
void __attribute__((noinline)) add3(double * a, double * b, int warmup)
{
int dump;
long long ts = __rdtscp(&dump);
for (size_t i = 0; i < N; i+=2) {
__m128d av = _mm_load_pd(&a[i]);
__m128d bv = _mm_load_pd(&b[i]);
av = _mm_add_pd(av, bv);
_mm_store_pd(&a[i], av);
}
if (!warmup)
printf("%lld ", __rdtscp(&dump) - ts);
}
/* Scalar */
void __attribute__((noinline)) add9(double * a, double * b, int warmup)
{
int dump;
long long ts = __rdtscp(&dump);
for (size_t i = 0; i < N; i+=1) {
a[i] = a[i] + b[i];
}
if (!warmup)
printf("%lld ", __rdtscp(&dump) - ts);
}
int main(int argc, const char *argv[])
{
int i;
double * a = _mm_malloc(N *8 + 10, 32);
double * b = _mm_malloc(N *8 + 10, 32);
memset(a, 0, N * 8 + 10);
memset(b, 0, N * 8 + 10);
for (i=0; i<10; i++)
add(a, b, 1);
add(a, b, 0);
add2(a + 2, b + 2, 1);
add2(a + 2, b + 2, 0);
add2(a + 1, b + 1, 1);
add2(a + 1, b + 1, 0);
add3(a, b, 1);
add3(a, b, 0);
add9(a, b, 1);
add9(a, b, 0);
puts("");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment