Skip to content

Instantly share code, notes, and snippets.

@parastuffs
Last active August 29, 2015 13:56
Show Gist options
  • Save parastuffs/9327934 to your computer and use it in GitHub Desktop.
Save parastuffs/9327934 to your computer and use it in GitHub Desktop.
ELECH473 - Labo 5 - Weird baby black & white
#include "StdAfx.h"
#include "stdio.h"
#include "time.h"
#include <stdlib.h>
void ex1();
void ex2();
void blackWhiteASMAligned(unsigned char* pixels, int lSize, int t, unsigned char* output, unsigned char* threshold) {
int ii=lSize/16; // Set the counter
_asm {
mov esi , pixels; // datain ptr of the line in register
mov ecx , ii; // counter
mov edi , output; // dataout pointer
mov eax , threshold; // treshold value pointer
movapd xmm7 , [eax]; // 1st pipe mask
l1:
movapd xmm0 , [esi]; // load first line
pcmpeqb xmm0 , xmm7; // compare
movapd [edi] , xmm0; // Move result to memory destination
add edi , 16; // New result pointer
add esi , 16; // New source pointer
sub ecx , 1;
jnz l1;
//emms;
}
}
void blackWhiteASMUnaligned(unsigned char* pixels, int lSize, int t, unsigned char* output, unsigned char* threshold) {
int ii=lSize/16; // Set the counter
_asm {
mov esi , pixels; // datain ptr of the line in register
mov ecx , ii; // counter
mov edi , output; // dataout pointer
mov eax , threshold; // treshold value pointer
movdqu xmm7 , [eax]; // 1st pipe mask
l1:
movdqu xmm0 , [esi]; // load first line
pcmpeqb xmm0 , xmm7; // compare
movdqu [edi] , xmm0; // Move result to memory destination
add edi , 16; // New result pointer
add esi , 16; // New source pointer
sub ecx , 1;
jnz l1;
//emms;
}
}
void ASMtest() {
int counter = 4;
unsigned char *pixels = (unsigned char*) malloc(sizeof(unsigned char*)*65);
char *output = (char*) malloc(sizeof(char)*65);
int i;
for(int i=0;i<65;i++) {
pixels[i] = 200;
}
unsigned char *threshold = (unsigned char*) malloc(sizeof(unsigned char)*16);
for(i=0;i<16;i++) {
threshold[i] = 127;
}
_asm {
mov esi , pixels; // datain ptr of the line in register
mov ecx , counter; // counter
mov edi , output; // dataout pointer
mov eax , threshold; // treshold value pointer
movapd xmm7 , [eax]; // 1st pipe mask
l1:
movapd xmm0 , [esi]; // load first line
pcmpeqb xmm0 , xmm7; // compare
movapd [edi] , xmm0; // Move result to memory destination
add edi , 16; // New result pointer
add esi , 16; // New source pointer
sub ecx , 1;
jnz l1;
//emms;
}
}
void blackWhiteBuffer(unsigned char* pixels, int lSize, int threshold) {
int i;
for(i=0;i<lSize;i++) {
if(pixels[i]<threshold) {
pixels[i]=0;
}
else {
pixels[i]=255;
}
}
}
int main(int argc, char* argv) {
ex1();
//ex2();
getchar();
return 0;
}
void ex1() {
time_t startTimeA, endTimeA, startTimeB, endTimeB, startTimeC, endTimeC;
int const height = 1024;
int const width = 1024;
int threshold = 175;
int i,j;
unsigned char pixel;
float dtA, dtB, dtC;
int loops = 500;
//******
//ASM testing
//******
//ASMtest();
//***************
//Using a buffer
//***************
FILE* imgA = fopen("C:\\test.raw","rb");
FILE* imgOutputA = fopen("C:\\outputA.raw","wb");
//file size
fseek(imgA , 0 , SEEK_END);
int lSize = ftell (imgA);
rewind (imgA);
// allocate memory to contain the whole file:
unsigned char *pixels = (unsigned char*) malloc(sizeof(unsigned char)*lSize);
unsigned char *output = (unsigned char*) malloc(sizeof(unsigned char)*lSize);
fread(pixels, sizeof(unsigned char), lSize, imgA);
fclose(imgA);
//black & white
//Start time
startTimeA = clock();
for(int i=0;i<loops;++i) {
blackWhiteBuffer(pixels, lSize, threshold);
}
//end time
endTimeA = clock();
fwrite(pixels, sizeof(unsigned char), lSize, imgOutputA);
fclose(imgOutputA);
dtA = (endTimeA-startTimeA)/(float)(CLOCKS_PER_SEC);
//**************
//Streaming
//**************
FILE* imgB = fopen("C:\\test.raw","rb");
//Start time
startTimeB = clock();
FILE* imgOutputB = fopen("C:\\outputB.raw","wb");
//black & white
for(i=0;i<lSize;i++) {
pixel = fgetc(imgB);
if(pixel<threshold) {
pixel=0;
}
else {
pixel=255;
}
fputc(pixel, imgOutputB);
}
fclose(imgB);
//end time
endTimeB = clock();
fwrite(pixels, sizeof(unsigned char), lSize, imgOutputB);
fclose(imgOutputB);
dtB = (endTimeB-startTimeB)/(float)(CLOCKS_PER_SEC);
//**********
//ASM - aligned
//**********
FILE* imgC = fopen("C:\\test.raw","rb");
FILE* imgOutputC = fopen("C:\\outputC.raw","wb");
//file size
fseek(imgC , 0 , SEEK_END);
lSize = ftell (imgC);
rewind (imgC);
// allocate memory to contain the whole file:
//char *pixelsC = (char*) malloc(sizeof(char)*lSize);
printf("size: %d",lSize);
unsigned char *pixelsC = (unsigned char*) _aligned_malloc(sizeof(unsigned char)*lSize, sizeof(unsigned char)*16);
unsigned char *outputC = (unsigned char*) _aligned_malloc(sizeof(unsigned char)*lSize, sizeof(unsigned char)*16);
//char *outputC = (char*) malloc(sizeof(char)*lSize);
fread(pixelsC, sizeof(unsigned char), lSize, imgC);
fclose(imgC);
//black & white
unsigned char *thresArrayAl = (unsigned char*) _aligned_malloc(sizeof(unsigned char)*16,sizeof(unsigned char)*16);
for(i=0;i<16;i++) {
thresArrayAl[i] = threshold;
}
//Start time
startTimeC = clock();
for(i=0;i<loops;i++) {
blackWhiteASMAligned(pixelsC, lSize, threshold, outputC, thresArrayAl);
}
//end time
endTimeC = clock();
fwrite(outputC, sizeof(unsigned char), lSize, imgOutputC);
fclose(imgOutputC);
dtC = (endTimeC-startTimeC)/(float)(CLOCKS_PER_SEC);
//************
//ASM - unaligned
//************
FILE* imgASM_unal = fopen("C:\\test.raw","rb");
FILE* imgOutputASM_unal = fopen("C:\\outputC.raw","wb");
//file size
fseek(imgASM_unal , 0 , SEEK_END);
lSize = ftell (imgASM_unal);
rewind (imgASM_unal);
// allocate memory to contain the whole file:
unsigned char *pixelsASM_unal = (unsigned char*) malloc(sizeof(unsigned char)*lSize);
unsigned char *outputASM_unal = (unsigned char*) malloc(sizeof(unsigned char)*lSize);
fread(pixelsASM_unal, sizeof(unsigned char), lSize, imgASM_unal);
fclose(imgASM_unal);
//black & white
unsigned char *thresArrayUnal = (unsigned char*) malloc(sizeof(unsigned char)*16);
for(i=0;i<16;i++) {
thresArrayUnal[i] = threshold;
}
//Start time
time_t startTimeASM_unal = clock();
for(i=0;i<loops;i++) {
blackWhiteASMUnaligned(pixelsASM_unal, lSize, threshold, outputASM_unal, thresArrayUnal);
}
//end time
time_t endTimeASM_unal = clock();
fwrite(outputASM_unal, sizeof(unsigned char), lSize, imgOutputASM_unal);
fclose(imgOutputASM_unal);
float dtASM_unal = (endTimeASM_unal-startTimeASM_unal)/(float)(CLOCKS_PER_SEC);
//*******
//Display time spent.
//*******
printf("Time B (Stream, C): %f\n",dtB);
printf("Start time B: %d\n",startTimeB);
printf("End time B: %d\n",endTimeB);
printf("##########\n#%d loops\n##########\n",loops);
printf("Time A (Buffer, C): %f\n",dtA);
printf("Start time A: %d\n",startTimeA);
printf("End time A: %d\n",endTimeA);
printf("Time ASM aligned (Buffer, ASM, aligned): %f\n",dtC);
printf("Start time ASM aligned: %d\t",startTimeC);
printf("End time ASM aligned: %d\n",endTimeC);
printf("Time ASM_unal (Buffer, ASM, unaligned): %f\n",dtASM_unal);
printf("Start time ASM_unal: %d\t",startTimeASM_unal);
printf("End time ASM_unal: %d\n",endTimeASM_unal);
}
int computeMax(int val1, int val2, int val3) {
int max = val1;
if(val2 > max)
max = val2;
if(val3 > max)
max = val3;
return max;
}
void simdMaxFilter(unsigned char* pixels, int lSize, unsigned char* output) {
int line=0,column=0;
int sizeX = 1024;
int sizeY = 1024;
unsigned char tabLine[3][3];
unsigned char columnMax[3], newMax;
//for( ; line*column < lSize; column++) {
int i,j;
for(i=1;i<1022;++i) {
for(j=1;j<1022;++j) {
//Incrementation for colum & line.
//if(column%sizeY == 0) {
// column = 0;
// line++;
//}
//We ignore the borders
//if(column%sizeX == 0 || line%sizeY == 0)
// continue;
//We take the neighborhood
tabLine[0][0] = pixels[(i-1)*sizeX + (j-1)];
tabLine[0][1] = pixels[(i-1)*sizeX + (j)];
tabLine[0][2] = pixels[(i-1)*sizeX + (j+1)];
tabLine[1][0] = pixels[i*sizeX + (j-1)];
tabLine[1][1] = pixels[i*sizeX + (j)];
tabLine[1][2] = pixels[i*sizeX + (j+1)];
tabLine[2][0] = pixels[(i+1)*sizeX + (j-1)];
tabLine[2][1] = pixels[(i+1)*sizeX + (j)];
tabLine[2][2] = pixels[(i+1)*sizeX + (j+1)];
//We calculate the max
columnMax[0] = computeMax(tabLine[0][0],tabLine[1][0],tabLine[2][0]);
columnMax[1] = computeMax(tabLine[0][1],tabLine[1][1],tabLine[2][1]);
columnMax[2] = computeMax(tabLine[0][2],tabLine[1][2],tabLine[2][2]);
//We calculate the max after the shift
newMax = computeMax(columnMax[0], columnMax[1], columnMax[2]);
//We save the new pixel
output[i*sizeX + (j)] = newMax;
}
}
}
void simdMaxFilterASM(unsigned char* pixels, int lSize, unsigned char* output) {
int ii=lSize/14; // Set the counter, divided by 14 to ignore the borders.
_asm {
start:
mov esi , pixels; // datain ptr of the line
mov ecx , ii; // counter
mov edi , output; // dataout pointer
l1:
movdqu xmm0 , [esi]; // 1st line
movdqu xmm1 , [esi+1024]; // 2nd line
movdqu xmm2 , [esi+2048]; // 3rd line
pmaxub xmm0 , xmm1; // compare & store the max in xmm0
pmaxub xmm0 , xmm2;
movdqu xmm6 , xmm0; // copy into xmm6
movdqu xmm7 , xmm0;
psrldq xmm6 , 1; // shift
psrldq xmm7 , 2;
pmaxub xmm6 , xmm7; // colon max
pmaxub xmm6 , xmm0;
movdqu [edi+1025] , xmm6; // move result to the middle row
add edi , 14; // new result pointer
add esi , 14; // new src pointer
sub ecx , 1;
jnz l1; // end
}
}
void ex2() {
int benchCount = 50;
//***************
//Using a buffer with C for labo 2
//***************
FILE* imgLab2 = fopen("C:\\test.raw","rb");
FILE* imgOutputLab2 = fopen("C:\\outputLab2.raw","wb");
//file size
fseek(imgLab2 , 0 , SEEK_END);
int lSizeLab2 = ftell (imgLab2);
rewind (imgLab2);
// allocate memory to contain the whole file:
unsigned char *pixelsLab2 = (unsigned char*) malloc(sizeof(unsigned char)*lSizeLab2);
unsigned char *outputLab2 = (unsigned char*) malloc(sizeof(unsigned char)*lSizeLab2);
fread(pixelsLab2, sizeof(unsigned char), lSizeLab2, imgLab2);
fclose(imgLab2);
//black & white
int i;
//Start time
time_t startTimeLab2 = clock();
for(i=0;i<benchCount;++i) {
simdMaxFilter(pixelsLab2, lSizeLab2, outputLab2);
}
//end time
time_t endTimeLab2 = clock();
fwrite(outputLab2, sizeof(unsigned char), lSizeLab2, imgOutputLab2);
fclose(imgOutputLab2);
float dtLab2 = (endTimeLab2-startTimeLab2)/(float)(CLOCKS_PER_SEC);
//***************
//Using a buffer with ASM for labo 2
//***************
FILE* imgLab2_asm = fopen("C:\\test.raw","rb");
FILE* imgOutputLab2_asm = fopen("C:\\outputLab2_asm.raw","wb");
//file size
fseek(imgLab2_asm , 0 , SEEK_END);
int lSizeLab2_asm = ftell(imgLab2_asm);
rewind(imgLab2_asm);
// allocate memory to contain the whole file:
unsigned char *pixelsLab2_asm = (unsigned char*) malloc(sizeof(unsigned char)*lSizeLab2_asm);
unsigned char *outputLab2_asm = (unsigned char*) malloc(sizeof(unsigned char)*lSizeLab2_asm);
fread(pixelsLab2_asm, sizeof(unsigned char), lSizeLab2_asm, imgLab2_asm);
fclose(imgLab2_asm);
//black & white
//Start time
time_t startTimeLab2_asm = clock();
for(i=0;i<benchCount;++i) {
simdMaxFilterASM(pixelsLab2_asm, lSizeLab2_asm, outputLab2_asm);
}
//end time
time_t endTimeLab2_asm = clock();
fwrite(outputLab2_asm, sizeof(unsigned char), lSizeLab2_asm, imgOutputLab2_asm);
fclose(imgOutputLab2_asm);
float dtLab2_asm = (endTimeLab2_asm-startTimeLab2_asm)/(float)(CLOCKS_PER_SEC);
printf("For %d loops\n",benchCount);
printf("Time C (Buffer, Lab2): %f\n",dtLab2);
printf("Start time Lab2: %d\n",startTimeLab2);
printf("End time Lab2: %d\n\n",endTimeLab2);
printf("Time ASM (Buffer, Lab2): %f\n",dtLab2_asm);
printf("Start time Lab2: %d\n",startTimeLab2_asm);
printf("End time Lab2: %d\n\n",endTimeLab2_asm);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment