SSE vs Native Loop over FLOAT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <xmmintrin.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <time.h> | |
void __attribute__ ((noinline)) SSE(){ | |
int len = 1024; | |
float data1 [1024] = {1, 2, 3, 4, 5, 6, 7, 8}; | |
float data2 [1024] = {1, 2, 3, 4, 5, 6, 7, 8}; | |
float dest[1024]; | |
unsigned int i = 0; | |
for(; i < len; i+=4){ | |
__m128 sse1 = _mm_load_ps(data1 +i); | |
__m128 sse2 = _mm_load_ps(data2 + i); | |
__m128 sseRes = _mm_mul_ps(sse1, sse2); | |
_mm_store_ps(dest + i, sseRes); | |
} | |
} | |
void __attribute__ ((noinline)) std(){ | |
int len = 1024; | |
unsigned int i = 0; | |
float data1 [1024] = {1, 2, 3, 4, 5, 6, 7, 8}; | |
float data2 [1024] = {1, 2, 3, 4, 5, 6, 7, 8}; | |
float dest[1024] = {0}; | |
for(; i<len; i++){ | |
dest[i] = data1[i] * data2[i]; | |
} | |
} | |
int main(int argc, char *argv[]) { | |
printf("Hello, world!\n"); | |
int i = 0; | |
clock_t begin = clock(); | |
for(;i<1000000;i++){ | |
SSE(); | |
} | |
clock_t end = clock(); | |
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; | |
printf("SSE: %f\n", time_spent); | |
clock_t begin2= clock(); | |
int j = 0; | |
for(j=0;j<1000000;j++){ | |
std(); | |
} | |
clock_t end2 = clock(); | |
double time_spent2 = (double)(end2 - begin2) / CLOCKS_PER_SEC; | |
printf("std: %f\n", time_spent2); | |
printf("bye!\n"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment