Created
March 14, 2014 20:48
-
-
Save parastuffs/9556542 to your computer and use it in GitHub Desktop.
ELECH473 - SIMD - Evaluating the alignement performance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "StdAfx.h" | |
#include "stdio.h" | |
#include "time.h" | |
#include <stdlib.h> | |
void blackWhiteASMAligned(char* pixels, int lSize, int t, char* output, unsigned char* threshold) { | |
int ii=lSize/16; // Set the counter | |
_asm { | |
mov esi , pixels; // datain ptr of the line in register | |
mov ecx , ii; // counter | |
mov edi , output; // dataout pointer | |
mov eax , threshold; // treshold value pointer | |
movapd xmm7 , [eax]; // 1st pipe mask | |
l1: | |
movapd xmm0 , [esi]; // load first line | |
pcmpeqb xmm0 , xmm7; // compare | |
movapd [edi] , xmm0; // Move result to memory destination | |
add edi , 16; // New result pointer | |
add esi , 16; // New source pointer | |
sub ecx , 1; | |
jnz l1; | |
//emms; | |
} | |
} | |
void blackWhiteASMUnaligned(char* pixels, int lSize, int t, char* output, unsigned char* threshold) { | |
int ii=lSize/16; // Set the counter | |
_asm { | |
mov esi , pixels; // datain ptr of the line in register | |
mov ecx , ii; // counter | |
mov edi , output; // dataout pointer | |
mov eax , threshold; // treshold value pointer | |
movdqu xmm7 , [eax]; // 1st pipe mask | |
l1: | |
movdqu xmm0 , [esi]; // load first line | |
pcmpeqb xmm0 , xmm7; // compare | |
movdqu [edi] , xmm0; // Move result to memory destination | |
add edi , 16; // New result pointer | |
add esi , 16; // New source pointer | |
sub ecx , 1; | |
jnz l1; | |
//emms; | |
} | |
} | |
void ASMtest() { | |
int counter = 4; | |
unsigned char *pixels = (unsigned char*) malloc(sizeof(unsigned char*)*65); | |
char *output = (char*) malloc(sizeof(char)*65); | |
int i; | |
for(int i=0;i<65;i++) { | |
pixels[i] = 200; | |
} | |
unsigned char *threshold = (unsigned char*) malloc(sizeof(unsigned char)*16); | |
for(i=0;i<16;i++) { | |
threshold[i] = 127; | |
} | |
_asm { | |
mov esi , pixels; // datain ptr of the line in register | |
mov ecx , counter; // counter | |
mov edi , output; // dataout pointer | |
mov eax , threshold; // treshold value pointer | |
movapd xmm7 , [eax]; // 1st pipe mask | |
l1: | |
movapd xmm0 , [esi]; // load first line | |
pcmpeqb xmm0 , xmm7; // compare | |
movapd [edi] , xmm0; // Move result to memory destination | |
add edi , 16; // New result pointer | |
add esi , 16; // New source pointer | |
sub ecx , 1; | |
jnz l1; | |
//emms; | |
} | |
} | |
void blackWhiteBuffer(char* pixels, int lSize, int threshold) { | |
int i; | |
for(i=0;i<lSize;i++) { | |
if(pixels[i]<threshold) { | |
pixels[i]=0; | |
} | |
else { | |
pixels[i]=255; | |
} | |
} | |
} | |
int main(int argc, char* argv) { | |
time_t startTimeA, endTimeA, startTimeB, endTimeB, startTimeC, endTimeC; | |
int const height = 1024; | |
int const width = 1024; | |
int threshold = 75; | |
int i,j; | |
unsigned char pixel; | |
float dtA, dtB, dtC; | |
int loops = 10000; | |
//****** | |
//ASM testing | |
//****** | |
//ASMtest(); | |
//*************** | |
//Using a buffer | |
//*************** | |
//Start time | |
startTimeA = clock(); | |
FILE* imgA = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputA = fopen("C:\\outputA.raw","wb"); | |
//file size | |
fseek(imgA , 0 , SEEK_END); | |
int lSize = ftell (imgA); | |
rewind (imgA); | |
// allocate memory to contain the whole file: | |
char *pixels = (char*) malloc(sizeof(char)*lSize); | |
char *output = (char*) malloc(sizeof(char)*lSize); | |
fread(pixels, sizeof(unsigned char), lSize, imgA); | |
fclose(imgA); | |
//black & white | |
blackWhiteBuffer(pixels, lSize, threshold); | |
/*for(i=0;i<lSize;i++) { | |
if(pixels[i]<threshold) { | |
pixels[i]=0; | |
} | |
else { | |
pixels[i]=255; | |
} | |
}*/ | |
fwrite(pixels, sizeof(unsigned char), lSize, imgOutputA); | |
fclose(imgOutputA); | |
//end time | |
endTimeA = clock(); | |
dtA = (endTimeA-startTimeA)/(float)(CLOCKS_PER_SEC); | |
//************** | |
//Streaming | |
//************** | |
//Start time | |
startTimeB = clock(); | |
FILE* imgB = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputB = fopen("C:\\outputB.raw","wb"); | |
//black & white | |
for(i=0;i<lSize;i++) { | |
pixel = fgetc(imgB); | |
if(pixel<threshold) { | |
pixel=0; | |
} | |
else { | |
pixel=255; | |
} | |
fputc(pixel, imgOutputB); | |
} | |
fclose(imgB); | |
fwrite(pixels, sizeof(unsigned char), lSize, imgOutputB); | |
fclose(imgOutputB); | |
//end time | |
endTimeB = clock(); | |
dtB = (endTimeB-startTimeB)/(float)(CLOCKS_PER_SEC); | |
//********** | |
//ASM - aligned | |
//********** | |
FILE* imgC = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputC = fopen("C:\\outputC.raw","wb"); | |
//file size | |
fseek(imgC , 0 , SEEK_END); | |
lSize = ftell (imgC); | |
rewind (imgC); | |
// allocate memory to contain the whole file: | |
//char *pixelsC = (char*) malloc(sizeof(char)*lSize); | |
printf("size: %d",lSize); | |
char *pixelsC = (char*) _aligned_malloc(sizeof(char)*lSize, sizeof(char)*lSize); | |
char *outputC = (char*) _aligned_malloc(sizeof(char)*lSize, sizeof(char)*lSize); | |
//char *outputC = (char*) malloc(sizeof(char)*lSize); | |
fread(pixelsC, sizeof(unsigned char), lSize, imgC); | |
fclose(imgC); | |
//black & white | |
unsigned char *thresArrayAl = (unsigned char*) _aligned_malloc(sizeof(unsigned char)*16,sizeof(unsigned char)*16); | |
for(i=0;i<16;i++) { | |
thresArrayAl[i] = threshold; | |
} | |
//Start time | |
startTimeC = clock(); | |
for(i=0;i<loops;i++) { | |
blackWhiteASMAligned(pixelsC, lSize, threshold, outputC, thresArrayAl); | |
} | |
//end time | |
endTimeC = clock(); | |
fwrite(outputC, sizeof(unsigned char), lSize, imgOutputC); | |
fclose(imgOutputC); | |
dtC = (endTimeC-startTimeC)/(float)(CLOCKS_PER_SEC); | |
//************ | |
//ASM - unaligned | |
//************ | |
FILE* imgASM_unal = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputASM_unal = fopen("C:\\outputC.raw","wb"); | |
//file size | |
fseek(imgASM_unal , 0 , SEEK_END); | |
lSize = ftell (imgASM_unal); | |
rewind (imgASM_unal); | |
// allocate memory to contain the whole file: | |
char *pixelsASM_unal = (char*) malloc(sizeof(char)*lSize); | |
char *outputASM_unal = (char*) malloc(sizeof(char)*lSize); | |
fread(pixelsASM_unal, sizeof(unsigned char), lSize, imgASM_unal); | |
fclose(imgASM_unal); | |
//black & white | |
unsigned char *thresArrayUnal = (unsigned char*) malloc(sizeof(unsigned char)*16); | |
for(i=0;i<16;i++) { | |
thresArrayUnal[i] = threshold; | |
} | |
//Start time | |
time_t startTimeASM_unal = clock(); | |
for(i=0;i<loops;i++) { | |
blackWhiteASMUnaligned(pixelsASM_unal, lSize, threshold, outputASM_unal, thresArrayUnal); | |
} | |
//end time | |
time_t endTimeASM_unal = clock(); | |
fwrite(outputASM_unal, sizeof(unsigned char), lSize, imgOutputASM_unal); | |
fclose(imgOutputASM_unal); | |
float dtASM_unal = (endTimeASM_unal-startTimeASM_unal)/(float)(CLOCKS_PER_SEC); | |
//******* | |
//Display time spent. | |
//******* | |
printf("Time A (Buffer, C): %f\n",dtA); | |
printf("Start time A: %d\n",startTimeA); | |
printf("End time A: %d\n",endTimeA); | |
printf("Time B (Stream, C): %f\n",dtB); | |
printf("Start time B: %d\n",startTimeB); | |
printf("End time B: %d\n",endTimeB); | |
printf("##########\n#%d loops\n##########\n",loops); | |
printf("Time ASM aligned (Buffer, ASM, aligned): %f\n",dtC); | |
printf("Start time ASM aligned: %d\t",startTimeC); | |
printf("End time ASM aligned: %d\n",endTimeC); | |
printf("Time ASM_unal (Buffer, ASM, unaligned): %f\n",dtASM_unal); | |
printf("Start time ASM_unal: %d\t",startTimeASM_unal); | |
printf("End time ASM_unal: %d\n",endTimeASM_unal); | |
getchar(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment