Skip to content

Instantly share code, notes, and snippets.

@parastuffs
Created March 14, 2014 20:48
Show Gist options
  • Save parastuffs/9556542 to your computer and use it in GitHub Desktop.
Save parastuffs/9556542 to your computer and use it in GitHub Desktop.
ELECH473 - SIMD - Evaluating the alignement performance
#include "StdAfx.h"
#include "stdio.h"
#include "time.h"
#include <stdlib.h>
void blackWhiteASMAligned(char* pixels, int lSize, int t, char* output, unsigned char* threshold) {
int ii=lSize/16; // Set the counter
_asm {
mov esi , pixels; // datain ptr of the line in register
mov ecx , ii; // counter
mov edi , output; // dataout pointer
mov eax , threshold; // treshold value pointer
movapd xmm7 , [eax]; // 1st pipe mask
l1:
movapd xmm0 , [esi]; // load first line
pcmpeqb xmm0 , xmm7; // compare
movapd [edi] , xmm0; // Move result to memory destination
add edi , 16; // New result pointer
add esi , 16; // New source pointer
sub ecx , 1;
jnz l1;
//emms;
}
}
void blackWhiteASMUnaligned(char* pixels, int lSize, int t, char* output, unsigned char* threshold) {
int ii=lSize/16; // Set the counter
_asm {
mov esi , pixels; // datain ptr of the line in register
mov ecx , ii; // counter
mov edi , output; // dataout pointer
mov eax , threshold; // treshold value pointer
movdqu xmm7 , [eax]; // 1st pipe mask
l1:
movdqu xmm0 , [esi]; // load first line
pcmpeqb xmm0 , xmm7; // compare
movdqu [edi] , xmm0; // Move result to memory destination
add edi , 16; // New result pointer
add esi , 16; // New source pointer
sub ecx , 1;
jnz l1;
//emms;
}
}
void ASMtest() {
int counter = 4;
unsigned char *pixels = (unsigned char*) malloc(sizeof(unsigned char*)*65);
char *output = (char*) malloc(sizeof(char)*65);
int i;
for(int i=0;i<65;i++) {
pixels[i] = 200;
}
unsigned char *threshold = (unsigned char*) malloc(sizeof(unsigned char)*16);
for(i=0;i<16;i++) {
threshold[i] = 127;
}
_asm {
mov esi , pixels; // datain ptr of the line in register
mov ecx , counter; // counter
mov edi , output; // dataout pointer
mov eax , threshold; // treshold value pointer
movapd xmm7 , [eax]; // 1st pipe mask
l1:
movapd xmm0 , [esi]; // load first line
pcmpeqb xmm0 , xmm7; // compare
movapd [edi] , xmm0; // Move result to memory destination
add edi , 16; // New result pointer
add esi , 16; // New source pointer
sub ecx , 1;
jnz l1;
//emms;
}
}
void blackWhiteBuffer(char* pixels, int lSize, int threshold) {
int i;
for(i=0;i<lSize;i++) {
if(pixels[i]<threshold) {
pixels[i]=0;
}
else {
pixels[i]=255;
}
}
}
int main(int argc, char* argv) {
time_t startTimeA, endTimeA, startTimeB, endTimeB, startTimeC, endTimeC;
int const height = 1024;
int const width = 1024;
int threshold = 75;
int i,j;
unsigned char pixel;
float dtA, dtB, dtC;
int loops = 10000;
//******
//ASM testing
//******
//ASMtest();
//***************
//Using a buffer
//***************
//Start time
startTimeA = clock();
FILE* imgA = fopen("C:\\test.raw","rb");
FILE* imgOutputA = fopen("C:\\outputA.raw","wb");
//file size
fseek(imgA , 0 , SEEK_END);
int lSize = ftell (imgA);
rewind (imgA);
// allocate memory to contain the whole file:
char *pixels = (char*) malloc(sizeof(char)*lSize);
char *output = (char*) malloc(sizeof(char)*lSize);
fread(pixels, sizeof(unsigned char), lSize, imgA);
fclose(imgA);
//black & white
blackWhiteBuffer(pixels, lSize, threshold);
/*for(i=0;i<lSize;i++) {
if(pixels[i]<threshold) {
pixels[i]=0;
}
else {
pixels[i]=255;
}
}*/
fwrite(pixels, sizeof(unsigned char), lSize, imgOutputA);
fclose(imgOutputA);
//end time
endTimeA = clock();
dtA = (endTimeA-startTimeA)/(float)(CLOCKS_PER_SEC);
//**************
//Streaming
//**************
//Start time
startTimeB = clock();
FILE* imgB = fopen("C:\\test.raw","rb");
FILE* imgOutputB = fopen("C:\\outputB.raw","wb");
//black & white
for(i=0;i<lSize;i++) {
pixel = fgetc(imgB);
if(pixel<threshold) {
pixel=0;
}
else {
pixel=255;
}
fputc(pixel, imgOutputB);
}
fclose(imgB);
fwrite(pixels, sizeof(unsigned char), lSize, imgOutputB);
fclose(imgOutputB);
//end time
endTimeB = clock();
dtB = (endTimeB-startTimeB)/(float)(CLOCKS_PER_SEC);
//**********
//ASM - aligned
//**********
FILE* imgC = fopen("C:\\test.raw","rb");
FILE* imgOutputC = fopen("C:\\outputC.raw","wb");
//file size
fseek(imgC , 0 , SEEK_END);
lSize = ftell (imgC);
rewind (imgC);
// allocate memory to contain the whole file:
//char *pixelsC = (char*) malloc(sizeof(char)*lSize);
printf("size: %d",lSize);
char *pixelsC = (char*) _aligned_malloc(sizeof(char)*lSize, sizeof(char)*lSize);
char *outputC = (char*) _aligned_malloc(sizeof(char)*lSize, sizeof(char)*lSize);
//char *outputC = (char*) malloc(sizeof(char)*lSize);
fread(pixelsC, sizeof(unsigned char), lSize, imgC);
fclose(imgC);
//black & white
unsigned char *thresArrayAl = (unsigned char*) _aligned_malloc(sizeof(unsigned char)*16,sizeof(unsigned char)*16);
for(i=0;i<16;i++) {
thresArrayAl[i] = threshold;
}
//Start time
startTimeC = clock();
for(i=0;i<loops;i++) {
blackWhiteASMAligned(pixelsC, lSize, threshold, outputC, thresArrayAl);
}
//end time
endTimeC = clock();
fwrite(outputC, sizeof(unsigned char), lSize, imgOutputC);
fclose(imgOutputC);
dtC = (endTimeC-startTimeC)/(float)(CLOCKS_PER_SEC);
//************
//ASM - unaligned
//************
FILE* imgASM_unal = fopen("C:\\test.raw","rb");
FILE* imgOutputASM_unal = fopen("C:\\outputC.raw","wb");
//file size
fseek(imgASM_unal , 0 , SEEK_END);
lSize = ftell (imgASM_unal);
rewind (imgASM_unal);
// allocate memory to contain the whole file:
char *pixelsASM_unal = (char*) malloc(sizeof(char)*lSize);
char *outputASM_unal = (char*) malloc(sizeof(char)*lSize);
fread(pixelsASM_unal, sizeof(unsigned char), lSize, imgASM_unal);
fclose(imgASM_unal);
//black & white
unsigned char *thresArrayUnal = (unsigned char*) malloc(sizeof(unsigned char)*16);
for(i=0;i<16;i++) {
thresArrayUnal[i] = threshold;
}
//Start time
time_t startTimeASM_unal = clock();
for(i=0;i<loops;i++) {
blackWhiteASMUnaligned(pixelsASM_unal, lSize, threshold, outputASM_unal, thresArrayUnal);
}
//end time
time_t endTimeASM_unal = clock();
fwrite(outputASM_unal, sizeof(unsigned char), lSize, imgOutputASM_unal);
fclose(imgOutputASM_unal);
float dtASM_unal = (endTimeASM_unal-startTimeASM_unal)/(float)(CLOCKS_PER_SEC);
//*******
//Display time spent.
//*******
printf("Time A (Buffer, C): %f\n",dtA);
printf("Start time A: %d\n",startTimeA);
printf("End time A: %d\n",endTimeA);
printf("Time B (Stream, C): %f\n",dtB);
printf("Start time B: %d\n",startTimeB);
printf("End time B: %d\n",endTimeB);
printf("##########\n#%d loops\n##########\n",loops);
printf("Time ASM aligned (Buffer, ASM, aligned): %f\n",dtC);
printf("Start time ASM aligned: %d\t",startTimeC);
printf("End time ASM aligned: %d\n",endTimeC);
printf("Time ASM_unal (Buffer, ASM, unaligned): %f\n",dtASM_unal);
printf("Start time ASM_unal: %d\t",startTimeASM_unal);
printf("End time ASM_unal: %d\n",endTimeASM_unal);
getchar();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment