Skip to content

Instantly share code, notes, and snippets.

@pitrou
Created January 19, 2015 22:36
Show Gist options
  • Save pitrou/892219a7d4c6d37de201 to your computer and use it in GitHub Desktop.
Save pitrou/892219a7d4c6d37de201 to your computer and use it in GitHub Desktop.
// gcc avx.c -mavx -std=c99 -O2
// while true; do ./a.out; done > log
// analyze via np.genfromtxt, e.g. min, 10th percentile, median
#include <stdio.h>
#include <string.h>
#include <x86intrin.h>
#include <avxintrin.h>
#define N 5000
/* Aligned AVX */
void __attribute__((noinline)) add(double * a, int warmup)
{
int dump;
long long ts = __rdtscp(&dump);
for (size_t i = 0; i < N; i+=4) {
__m256d av = _mm256_load_pd(&a[i]);
av = _mm256_add_pd(av, av);
_mm256_store_pd(&a[i], av);
}
if (!warmup)
printf("%lld ", __rdtscp(&dump) - ts);
}
/* Unaligned AVX */
void __attribute__((noinline)) add2(double * a, int warmup)
{
int dump;
long long ts = __rdtscp(&dump);
for (size_t i = 0; i < N; i+=4) {
__m256d av = _mm256_loadu_pd(&a[i]);
av = _mm256_add_pd(av, av);
_mm256_storeu_pd(&a[i], av);
}
if (!warmup)
printf("%lld ", __rdtscp(&dump) - ts);
}
/* Aligned SSE2 */
void __attribute__((noinline)) add3(double * a, int warmup)
{
int dump;
long long ts = __rdtscp(&dump);
for (size_t i = 0; i < N; i+=2) {
__m128d av = _mm_load_pd(&a[i]);
av = _mm_add_pd(av, av);
_mm_store_pd(&a[i], av);
}
if (!warmup)
printf("%lld ", __rdtscp(&dump) - ts);
}
/* Scalar */
void __attribute__((noinline)) add9(double * a, int warmup)
{
int dump;
long long ts = __rdtscp(&dump);
for (size_t i = 0; i < N; i+=1) {
a[i] = a[i] + a[i];
}
if (!warmup)
printf("%lld ", __rdtscp(&dump) - ts);
}
int main(int argc, const char *argv[])
{
int i;
double * a = _mm_malloc(N *8 + 10, 32);
memset(a, 0, N * 8 + 10);
for (i=0; i<10; i++)
add(a, 1);
add(a, 0);
add2(a + 2, 1);
add2(a + 2, 0);
add2(a + 1, 1);
add2(a + 1, 0);
add3(a, 1);
add3(a, 0);
add9(a, 1);
add9(a, 0);
puts("");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment