Created
March 14, 2014 16:59
-
-
Save parastuffs/9551969 to your computer and use it in GitHub Desktop.
ELECH473 - SIMD _ movapd surprisingly working
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "StdAfx.h" | |
#include "stdio.h" | |
#include "time.h" | |
#include <stdlib.h> | |
void blackWhiteASM(char* pixels, int lSize, int t, char* output) { | |
int ii=lSize/16; // Set the counter | |
unsigned char *threshold = (unsigned char*) malloc(sizeof(unsigned char)*16); | |
int i; | |
for(i=0;i<16;i++) { | |
threshold[i] = t; | |
} | |
_asm { | |
mov esi , pixels; // datain ptr of the line in register | |
mov ecx , ii; // counter | |
mov edi , output; // dataout pointer | |
mov eax , threshold; // treshold value pointer | |
movapd xmm7 , [eax]; // 1st pipe mask | |
l1: | |
movapd xmm0 , [esi]; // load first line | |
pcmpeqb xmm0 , xmm7; // compare | |
movapd [edi] , xmm0; // Move result to memory destination | |
add edi , 16; // New result pointer | |
add esi , 16; // New source pointer | |
sub ecx , 1; | |
jnz l1; | |
//emms; | |
} | |
} | |
void ASMtest() { | |
int counter = 4; | |
unsigned char *pixels = (unsigned char*) malloc(sizeof(unsigned char*)*65); | |
char *output = (char*) malloc(sizeof(char)*65); | |
int i; | |
for(int i=0;i<65;i++) { | |
pixels[i] = 200; | |
} | |
unsigned char *threshold = (unsigned char*) malloc(sizeof(unsigned char)*16); | |
for(i=0;i<16;i++) { | |
threshold[i] = 127; | |
} | |
_asm { | |
mov esi , pixels; // datain ptr of the line in register | |
mov ecx , counter; // counter | |
mov edi , output; // dataout pointer | |
mov eax , threshold; // treshold value pointer | |
movapd xmm7 , [eax]; // 1st pipe mask | |
l1: | |
movapd xmm0 , [esi]; // load first line | |
pcmpeqb xmm0 , xmm7; // compare | |
movapd [edi] , xmm0; // Move result to memory destination | |
add edi , 16; // New result pointer | |
add esi , 16; // New source pointer | |
sub ecx , 1; | |
jnz l1; | |
//emms; | |
} | |
} | |
void blackWhiteBuffer(char* pixels, int lSize, int threshold) { | |
int i; | |
for(i=0;i<lSize;i++) { | |
if(pixels[i]<threshold) { | |
pixels[i]=0; | |
} | |
else { | |
pixels[i]=255; | |
} | |
} | |
} | |
int main(int argc, char* argv) { | |
time_t startTimeA, endTimeA, startTimeB, endTimeB, startTimeC, endTimeC; | |
int const height = 1024; | |
int const width = 1024; | |
int threshold = 75; | |
int i,j; | |
unsigned char pixel; | |
float dtA, dtB, dtC; | |
//****** | |
//ASM testing | |
//****** | |
//ASMtest(); | |
//*************** | |
//Using a buffer | |
//*************** | |
//Start time | |
startTimeA = clock(); | |
FILE* imgA = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputA = fopen("C:\\outputA.raw","wb"); | |
//file size | |
fseek(imgA , 0 , SEEK_END); | |
int lSize = ftell (imgA); | |
rewind (imgA); | |
// allocate memory to contain the whole file: | |
char *pixels = (char*) malloc(sizeof(char)*lSize); | |
char *output = (char*) malloc(sizeof(char)*lSize); | |
fread(pixels, sizeof(unsigned char), lSize, imgA); | |
fclose(imgA); | |
//black & white | |
blackWhiteBuffer(pixels, lSize, threshold); | |
/*for(i=0;i<lSize;i++) { | |
if(pixels[i]<threshold) { | |
pixels[i]=0; | |
} | |
else { | |
pixels[i]=255; | |
} | |
}*/ | |
fwrite(pixels, sizeof(unsigned char), lSize, imgOutputA); | |
fclose(imgOutputA); | |
//end time | |
endTimeA = clock(); | |
dtA = (endTimeA-startTimeA)/(float)(CLOCKS_PER_SEC); | |
//************** | |
//Streaming | |
//************** | |
//Start time | |
startTimeB = clock(); | |
FILE* imgB = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputB = fopen("C:\\outputB.raw","wb"); | |
//black & white | |
for(i=0;i<lSize;i++) { | |
pixel = fgetc(imgB); | |
if(pixel<threshold) { | |
pixel=0; | |
} | |
else { | |
pixel=255; | |
} | |
fputc(pixel, imgOutputB); | |
} | |
fclose(imgB); | |
fwrite(pixels, sizeof(unsigned char), lSize, imgOutputB); | |
fclose(imgOutputB); | |
//end time | |
endTimeB = clock(); | |
dtB = (endTimeB-startTimeB)/(float)(CLOCKS_PER_SEC); | |
//********** | |
//ASM | |
//********** | |
//Start time | |
startTimeC = clock(); | |
FILE* imgC = fopen("C:\\test.raw","rb"); | |
FILE* imgOutputC = fopen("C:\\outputC.raw","wb"); | |
//file size | |
fseek(imgC , 0 , SEEK_END); | |
lSize = ftell (imgC); | |
rewind (imgC); | |
// allocate memory to contain the whole file: | |
lSize++; | |
char *pixelsC = (char*) malloc(sizeof(char)*lSize); | |
char *outputC = (char*) malloc(sizeof(char)*lSize); | |
fread(pixelsC, sizeof(unsigned char), lSize, imgC); | |
fclose(imgC); | |
//black & white | |
blackWhiteASM(pixelsC, lSize, threshold, outputC); | |
fwrite(outputC, sizeof(unsigned char), lSize, imgOutputC); | |
fclose(imgOutputC); | |
//end time | |
endTimeC = clock(); | |
dtC = (endTimeC-startTimeC)/(float)(CLOCKS_PER_SEC); | |
//******* | |
//Display time spent. | |
//******* | |
printf("Time A (Buffer, C): %f\n",dtA); | |
printf("Start time A: %d\n",startTimeA); | |
printf("End time A: %d\n",endTimeA); | |
printf("Time B (Stream, C): %f\n",dtB); | |
printf("Start time B: %d\n",startTimeB); | |
printf("End time B: %d\n",endTimeB); | |
printf("Time C (Buffer, ASM): %f\n",dtC); | |
printf("Start time C: %d\n",startTimeC); | |
printf("End time C: %d\n",endTimeC); | |
getchar(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment