Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
SSE vs Native Loop over FLOAT
#include <xmmintrin.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
void __attribute__ ((noinline)) SSE(){
int len = 1024;
float data1 [1024] = {1, 2, 3, 4, 5, 6, 7, 8};
float data2 [1024] = {1, 2, 3, 4, 5, 6, 7, 8};
float dest[1024];
unsigned int i = 0;
for(; i < len; i+=4){
__m128 sse1 = _mm_load_ps(data1 +i);
__m128 sse2 = _mm_load_ps(data2 + i);
__m128 sseRes = _mm_mul_ps(sse1, sse2);
_mm_store_ps(dest + i, sseRes);
}
}
void __attribute__ ((noinline)) std(){
int len = 1024;
unsigned int i = 0;
float data1 [1024] = {1, 2, 3, 4, 5, 6, 7, 8};
float data2 [1024] = {1, 2, 3, 4, 5, 6, 7, 8};
float dest[1024] = {0};
for(; i<len; i++){
dest[i] = data1[i] * data2[i];
}
}
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
int i = 0;
clock_t begin = clock();
for(;i<1000000;i++){
SSE();
}
clock_t end = clock();
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("SSE: %f\n", time_spent);
clock_t begin2= clock();
int j = 0;
for(j=0;j<1000000;j++){
std();
}
clock_t end2 = clock();
double time_spent2 = (double)(end2 - begin2) / CLOCKS_PER_SEC;
printf("std: %f\n", time_spent2);
printf("bye!\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.