Last active
August 29, 2015 13:56
-
-
Save parastuffs/9327934 to your computer and use it in GitHub Desktop.
ELECH473 - Labo 5 - Weird baby black & white
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "StdAfx.h" | |
#include "stdio.h" | |
#include "time.h" | |
#include <stdlib.h> | |
void ex1(); | |
void ex2(); | |
void blackWhiteASMAligned(unsigned char* pixels, int lSize, int t, unsigned char* output, unsigned char* threshold) { | |
int ii=lSize/16; // Set the counter | |
_asm { | |
mov esi , pixels; // datain ptr of the line in register | |
mov ecx , ii; // counter | |
mov edi , output; // dataout pointer | |
mov eax , threshold; // treshold value pointer | |
movapd xmm7 , [eax]; // 1st pipe mask | |
l1: | |
movapd xmm0 , [esi]; // load first line | |
pcmpeqb xmm0 , xmm7; // compare | |
movapd [edi] , xmm0; // Move result to memory destination | |
add edi , 16; // New result pointer | |
add esi , 16; // New source pointer | |
sub ecx , 1; | |
jnz l1; | |
//emms; | |
} | |
} | |
void blackWhiteASMUnaligned(unsigned char* pixels, int lSize, int t, unsigned char* output, unsigned char* threshold) { | |
int ii=lSize/16; // Set the counter | |
_asm { | |
mov esi , pixels; // datain ptr of the line in register | |
mov ecx , ii; // counter | |
mov edi , output; // dataout pointer | |
mov eax , threshold; // treshold value pointer | |
movdqu xmm7 , [eax]; // 1st pipe mask | |
l1: | |
movdqu xmm0 , [esi]; // load first line | |
pcmpeqb xmm0 , xmm7; // compare | |
movdqu [edi] , xmm0; // Move result to memory destination | |
add edi , 16; // New result pointer | |
add esi , 16; // New source pointer | |
sub ecx , 1; | |
jnz l1; | |
//emms; | |
} | |
} | |
void ASMtest() { | |
int counter = 4; | |
unsigned char *pixels = (unsigned char*) malloc(sizeof(unsigned char*)*65); | |
char *output = (char*) malloc(sizeof(char)*65); | |
int i; | |
for(int i=0;i<65;i++) { | |
pixels[i] = 200; | |
} | |
unsigned char *threshold = (unsigned char*) malloc(sizeof(unsigned char)*16); | |
for(i=0;i<16;i++) { | |
threshold[i] = 127; | |
} | |
_asm { | |
mov esi , pixels; // datain ptr of the line in register | |
mov ecx , counter; // counter | |
mov edi , output; // dataout pointer | |
mov eax , threshold; // treshold value pointer | |
movapd xmm7 , [eax]; // 1st pipe mask | |
l1: | |
movapd xmm0 , [esi]; // load first line | |
pcmpeqb xmm0 , xmm7; // compare | |
movapd [edi] , xmm0; // Move result to memory destination | |
add edi , 16; // New result pointer | |
add esi , 16; // New source pointer | |
sub ecx , 1; | |
jnz l1; | |
//emms; | |
} | |
} | |
void blackWhiteBuffer(unsigned char* pixels, int lSize, int threshold) { | |
int i; | |
for(i=0;i<lSize;i++) { | |
if(pixels[i]<threshold) { | |
pixels[i]=0; | |
} | |
else { | |
pixels[i]=255; | |
} | |
} | |
} | |
int main(int argc, char* argv) { | |
ex1(); | |
//ex2(); | |
getchar(); | |
return 0; | |
} | |
void ex1() { | |
time_t startTimeA, endTimeA, startTimeB, endTimeB, startTimeC, endTimeC; | |
int const height = 1024; | |
int const width = 1024; | |
int threshold = 175; | |
int i,j; | |
unsigned char pixel; | |
float dtA, dtB, dtC; | |
int loops = 500; | |
//****** | |
//ASM testing | |
//****** | |
//ASMtest(); | |
//*************** | |
//Using a buffer | |
//*************** | |
FILE* imgA = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputA = fopen("C:\\outputA.raw","wb"); | |
//file size | |
fseek(imgA , 0 , SEEK_END); | |
int lSize = ftell (imgA); | |
rewind (imgA); | |
// allocate memory to contain the whole file: | |
unsigned char *pixels = (unsigned char*) malloc(sizeof(unsigned char)*lSize); | |
unsigned char *output = (unsigned char*) malloc(sizeof(unsigned char)*lSize); | |
fread(pixels, sizeof(unsigned char), lSize, imgA); | |
fclose(imgA); | |
//black & white | |
//Start time | |
startTimeA = clock(); | |
for(int i=0;i<loops;++i) { | |
blackWhiteBuffer(pixels, lSize, threshold); | |
} | |
//end time | |
endTimeA = clock(); | |
fwrite(pixels, sizeof(unsigned char), lSize, imgOutputA); | |
fclose(imgOutputA); | |
dtA = (endTimeA-startTimeA)/(float)(CLOCKS_PER_SEC); | |
//************** | |
//Streaming | |
//************** | |
FILE* imgB = fopen("C:\\test.raw","rb"); | |
//Start time | |
startTimeB = clock(); | |
FILE* imgOutputB = fopen("C:\\outputB.raw","wb"); | |
//black & white | |
for(i=0;i<lSize;i++) { | |
pixel = fgetc(imgB); | |
if(pixel<threshold) { | |
pixel=0; | |
} | |
else { | |
pixel=255; | |
} | |
fputc(pixel, imgOutputB); | |
} | |
fclose(imgB); | |
//end time | |
endTimeB = clock(); | |
fwrite(pixels, sizeof(unsigned char), lSize, imgOutputB); | |
fclose(imgOutputB); | |
dtB = (endTimeB-startTimeB)/(float)(CLOCKS_PER_SEC); | |
//********** | |
//ASM - aligned | |
//********** | |
FILE* imgC = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputC = fopen("C:\\outputC.raw","wb"); | |
//file size | |
fseek(imgC , 0 , SEEK_END); | |
lSize = ftell (imgC); | |
rewind (imgC); | |
// allocate memory to contain the whole file: | |
//char *pixelsC = (char*) malloc(sizeof(char)*lSize); | |
printf("size: %d",lSize); | |
unsigned char *pixelsC = (unsigned char*) _aligned_malloc(sizeof(unsigned char)*lSize, sizeof(unsigned char)*16); | |
unsigned char *outputC = (unsigned char*) _aligned_malloc(sizeof(unsigned char)*lSize, sizeof(unsigned char)*16); | |
//char *outputC = (char*) malloc(sizeof(char)*lSize); | |
fread(pixelsC, sizeof(unsigned char), lSize, imgC); | |
fclose(imgC); | |
//black & white | |
unsigned char *thresArrayAl = (unsigned char*) _aligned_malloc(sizeof(unsigned char)*16,sizeof(unsigned char)*16); | |
for(i=0;i<16;i++) { | |
thresArrayAl[i] = threshold; | |
} | |
//Start time | |
startTimeC = clock(); | |
for(i=0;i<loops;i++) { | |
blackWhiteASMAligned(pixelsC, lSize, threshold, outputC, thresArrayAl); | |
} | |
//end time | |
endTimeC = clock(); | |
fwrite(outputC, sizeof(unsigned char), lSize, imgOutputC); | |
fclose(imgOutputC); | |
dtC = (endTimeC-startTimeC)/(float)(CLOCKS_PER_SEC); | |
//************ | |
//ASM - unaligned | |
//************ | |
FILE* imgASM_unal = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputASM_unal = fopen("C:\\outputC.raw","wb"); | |
//file size | |
fseek(imgASM_unal , 0 , SEEK_END); | |
lSize = ftell (imgASM_unal); | |
rewind (imgASM_unal); | |
// allocate memory to contain the whole file: | |
unsigned char *pixelsASM_unal = (unsigned char*) malloc(sizeof(unsigned char)*lSize); | |
unsigned char *outputASM_unal = (unsigned char*) malloc(sizeof(unsigned char)*lSize); | |
fread(pixelsASM_unal, sizeof(unsigned char), lSize, imgASM_unal); | |
fclose(imgASM_unal); | |
//black & white | |
unsigned char *thresArrayUnal = (unsigned char*) malloc(sizeof(unsigned char)*16); | |
for(i=0;i<16;i++) { | |
thresArrayUnal[i] = threshold; | |
} | |
//Start time | |
time_t startTimeASM_unal = clock(); | |
for(i=0;i<loops;i++) { | |
blackWhiteASMUnaligned(pixelsASM_unal, lSize, threshold, outputASM_unal, thresArrayUnal); | |
} | |
//end time | |
time_t endTimeASM_unal = clock(); | |
fwrite(outputASM_unal, sizeof(unsigned char), lSize, imgOutputASM_unal); | |
fclose(imgOutputASM_unal); | |
float dtASM_unal = (endTimeASM_unal-startTimeASM_unal)/(float)(CLOCKS_PER_SEC); | |
//******* | |
//Display time spent. | |
//******* | |
printf("Time B (Stream, C): %f\n",dtB); | |
printf("Start time B: %d\n",startTimeB); | |
printf("End time B: %d\n",endTimeB); | |
printf("##########\n#%d loops\n##########\n",loops); | |
printf("Time A (Buffer, C): %f\n",dtA); | |
printf("Start time A: %d\n",startTimeA); | |
printf("End time A: %d\n",endTimeA); | |
printf("Time ASM aligned (Buffer, ASM, aligned): %f\n",dtC); | |
printf("Start time ASM aligned: %d\t",startTimeC); | |
printf("End time ASM aligned: %d\n",endTimeC); | |
printf("Time ASM_unal (Buffer, ASM, unaligned): %f\n",dtASM_unal); | |
printf("Start time ASM_unal: %d\t",startTimeASM_unal); | |
printf("End time ASM_unal: %d\n",endTimeASM_unal); | |
} | |
int computeMax(int val1, int val2, int val3) { | |
int max = val1; | |
if(val2 > max) | |
max = val2; | |
if(val3 > max) | |
max = val3; | |
return max; | |
} | |
void simdMaxFilter(unsigned char* pixels, int lSize, unsigned char* output) { | |
int line=0,column=0; | |
int sizeX = 1024; | |
int sizeY = 1024; | |
unsigned char tabLine[3][3]; | |
unsigned char columnMax[3], newMax; | |
//for( ; line*column < lSize; column++) { | |
int i,j; | |
for(i=1;i<1022;++i) { | |
for(j=1;j<1022;++j) { | |
//Incrementation for colum & line. | |
//if(column%sizeY == 0) { | |
// column = 0; | |
// line++; | |
//} | |
//We ignore the borders | |
//if(column%sizeX == 0 || line%sizeY == 0) | |
// continue; | |
//We take the neighborhood | |
tabLine[0][0] = pixels[(i-1)*sizeX + (j-1)]; | |
tabLine[0][1] = pixels[(i-1)*sizeX + (j)]; | |
tabLine[0][2] = pixels[(i-1)*sizeX + (j+1)]; | |
tabLine[1][0] = pixels[i*sizeX + (j-1)]; | |
tabLine[1][1] = pixels[i*sizeX + (j)]; | |
tabLine[1][2] = pixels[i*sizeX + (j+1)]; | |
tabLine[2][0] = pixels[(i+1)*sizeX + (j-1)]; | |
tabLine[2][1] = pixels[(i+1)*sizeX + (j)]; | |
tabLine[2][2] = pixels[(i+1)*sizeX + (j+1)]; | |
//We calculate the max | |
columnMax[0] = computeMax(tabLine[0][0],tabLine[1][0],tabLine[2][0]); | |
columnMax[1] = computeMax(tabLine[0][1],tabLine[1][1],tabLine[2][1]); | |
columnMax[2] = computeMax(tabLine[0][2],tabLine[1][2],tabLine[2][2]); | |
//We calculate the max after the shift | |
newMax = computeMax(columnMax[0], columnMax[1], columnMax[2]); | |
//We save the new pixel | |
output[i*sizeX + (j)] = newMax; | |
} | |
} | |
} | |
void simdMaxFilterASM(unsigned char* pixels, int lSize, unsigned char* output) { | |
int ii=lSize/14; // Set the counter, divided by 14 to ignore the borders. | |
_asm { | |
start: | |
mov esi , pixels; // datain ptr of the line | |
mov ecx , ii; // counter | |
mov edi , output; // dataout pointer | |
l1: | |
movdqu xmm0 , [esi]; // 1st line | |
movdqu xmm1 , [esi+1024]; // 2nd line | |
movdqu xmm2 , [esi+2048]; // 3rd line | |
pmaxub xmm0 , xmm1; // compare & store the max in xmm0 | |
pmaxub xmm0 , xmm2; | |
movdqu xmm6 , xmm0; // copy into xmm6 | |
movdqu xmm7 , xmm0; | |
psrldq xmm6 , 1; // shift | |
psrldq xmm7 , 2; | |
pmaxub xmm6 , xmm7; // colon max | |
pmaxub xmm6 , xmm0; | |
movdqu [edi+1025] , xmm6; // move result to the middle row | |
add edi , 14; // new result pointer | |
add esi , 14; // new src pointer | |
sub ecx , 1; | |
jnz l1; // end | |
} | |
} | |
void ex2() { | |
int benchCount = 50; | |
//*************** | |
//Using a buffer with C for labo 2 | |
//*************** | |
FILE* imgLab2 = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputLab2 = fopen("C:\\outputLab2.raw","wb"); | |
//file size | |
fseek(imgLab2 , 0 , SEEK_END); | |
int lSizeLab2 = ftell (imgLab2); | |
rewind (imgLab2); | |
// allocate memory to contain the whole file: | |
unsigned char *pixelsLab2 = (unsigned char*) malloc(sizeof(unsigned char)*lSizeLab2); | |
unsigned char *outputLab2 = (unsigned char*) malloc(sizeof(unsigned char)*lSizeLab2); | |
fread(pixelsLab2, sizeof(unsigned char), lSizeLab2, imgLab2); | |
fclose(imgLab2); | |
//black & white | |
int i; | |
//Start time | |
time_t startTimeLab2 = clock(); | |
for(i=0;i<benchCount;++i) { | |
simdMaxFilter(pixelsLab2, lSizeLab2, outputLab2); | |
} | |
//end time | |
time_t endTimeLab2 = clock(); | |
fwrite(outputLab2, sizeof(unsigned char), lSizeLab2, imgOutputLab2); | |
fclose(imgOutputLab2); | |
float dtLab2 = (endTimeLab2-startTimeLab2)/(float)(CLOCKS_PER_SEC); | |
//*************** | |
//Using a buffer with ASM for labo 2 | |
//*************** | |
FILE* imgLab2_asm = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputLab2_asm = fopen("C:\\outputLab2_asm.raw","wb"); | |
//file size | |
fseek(imgLab2_asm , 0 , SEEK_END); | |
int lSizeLab2_asm = ftell(imgLab2_asm); | |
rewind(imgLab2_asm); | |
// allocate memory to contain the whole file: | |
unsigned char *pixelsLab2_asm = (unsigned char*) malloc(sizeof(unsigned char)*lSizeLab2_asm); | |
unsigned char *outputLab2_asm = (unsigned char*) malloc(sizeof(unsigned char)*lSizeLab2_asm); | |
fread(pixelsLab2_asm, sizeof(unsigned char), lSizeLab2_asm, imgLab2_asm); | |
fclose(imgLab2_asm); | |
//black & white | |
//Start time | |
time_t startTimeLab2_asm = clock(); | |
for(i=0;i<benchCount;++i) { | |
simdMaxFilterASM(pixelsLab2_asm, lSizeLab2_asm, outputLab2_asm); | |
} | |
//end time | |
time_t endTimeLab2_asm = clock(); | |
fwrite(outputLab2_asm, sizeof(unsigned char), lSizeLab2_asm, imgOutputLab2_asm); | |
fclose(imgOutputLab2_asm); | |
float dtLab2_asm = (endTimeLab2_asm-startTimeLab2_asm)/(float)(CLOCKS_PER_SEC); | |
printf("For %d loops\n",benchCount); | |
printf("Time C (Buffer, Lab2): %f\n",dtLab2); | |
printf("Start time Lab2: %d\n",startTimeLab2); | |
printf("End time Lab2: %d\n\n",endTimeLab2); | |
printf("Time ASM (Buffer, Lab2): %f\n",dtLab2_asm); | |
printf("Start time Lab2: %d\n",startTimeLab2_asm); | |
printf("End time Lab2: %d\n\n",endTimeLab2_asm); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment