Last active
March 22, 2016 15:17
-
-
Save dmikushin/e767c11d712d831e1e1e to your computer and use it in GitHub Desktop.
_mm256_cmp_ps vs _mm256_cmp_pd
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This sketch essentially checks if all elements of AVX vector are greater than zero | |
// In infinite loop we ensure equality of two implementations | |
// | |
// gcc -mavx cmp_check.c -o cmp_check -O3 -ffast-math | |
#include <assert.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <x86intrin.h> | |
int main(int argc, char* argv[]) | |
{ | |
const __m256d double4_0_0_0_0 = _mm256_setzero_pd(); | |
double dinvrandmax = 1.0 / RAND_MAX; | |
while (1) | |
{ | |
int result1 = 0, result2 = 0; | |
double values[4] __attribute__((aligned(64))); | |
values[0] = (0.5 - rand() * dinvrandmax) * 2; | |
values[1] = (0.5 - rand() * dinvrandmax) * 2; | |
values[2] = (0.5 - rand() * dinvrandmax) * 2; | |
values[3] = (0.5 - rand() * dinvrandmax) * 2; | |
const __m256d val = _mm256_load_pd(values); | |
values[rand() % 4] = rand() % 2; | |
// What's being done in hddm-solver/AVX | |
const __m256d d1 = (__m256d)_mm256_cmp_ps((__m256)val, (__m256)double4_0_0_0_0, _CMP_GT_OQ); | |
if (_mm256_movemask_pd(d1) != 0xf) | |
result1 = 1; | |
// What logically should be done instead | |
const __m256d d2 = _mm256_cmp_pd(val, double4_0_0_0_0, _CMP_GT_OQ); | |
if (_mm256_movemask_pd(d2) != 0xf) | |
result2 = 1; | |
printf("%f %f %f %f %d %d\n", values[0], values[1], values[2], values[3], result1, result2); | |
// Apparently, results are always equal. | |
assert(result1 == result2); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment