Created
April 25, 2018 09:21
-
-
Save parastuffs/49e14df634258239696722a627455f08 to your computer and use it in GitHub Desktop.
ELECH473 - Arthur Valingot - Lab 2 AT&T version
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ConsoleApplication2.cpp : Defines the entry point for the console application. | |
// | |
#include <stdlib.h> | |
#include <stdio.h> | |
// #include <windows.h> | |
// #pragma warning(disable : 4996) | |
/* | |
Result | |
Code C debug: 0.003631, release: 0.002187 gain significatif | |
Code assembleur: debug: 0.001466, release: 0.001410 gain non significatif // pas beaucoup de changement entre debug et release | |
Conclusion: La partie en assembleur est plus rapide, deplus les changements de mode ( debug / release) n'ont pas d'effet, alors que les effets sont spectaculaire sur le code en C | |
*/ | |
int main() | |
{ | |
// LARGE_INTEGER frequency; | |
// LARGE_INTEGER start; | |
// LARGE_INTEGER end; | |
// double interval; | |
int W = 1024, H = 1024; | |
unsigned char *threshold; | |
unsigned char *mask; | |
int threshold_1 = 125; | |
unsigned char *src; | |
unsigned char *dst; | |
char c; | |
src = (unsigned char *) malloc (W*H*sizeof(unsigned char)); | |
dst = (unsigned char *) malloc (W*H*sizeof(unsigned char)); | |
threshold = (unsigned char *)malloc(16 * sizeof(unsigned char)); | |
mask = (unsigned char *)malloc(16 * sizeof(unsigned char)); | |
for (int i = 0; i < 16; i++) { | |
threshold[i] = (unsigned char)threshold_1; | |
mask[i] = (unsigned char)128; | |
} | |
if (src == NULL || dst == NULL) { | |
printf("Out of memory!"); | |
exit(1); | |
} | |
// FILE *fp1 = fopen("C:\\Users\\Arthur\\Desktop\\ULB Cours\\Microprocesseur Archi\\Labs\\test.raw", "r"); | |
// FILE *fp2 = fopen("C:\\Users\\Arthur\\Desktop\\ULB Cours\\Microprocesseur Archi\\Labs\\test_contourDetector.raw", "w"); | |
FILE *fp1 = fopen("/home/para/Documents/ULB/ELECH473/2017-2018/SIMD/students/LAB7ArthurValingot/test.raw", "r"); | |
FILE *fp2 = fopen("/home/para/Documents/ULB/ELECH473/2017-2018/SIMD/students/LAB7ArthurValingot/test_contourDetector.raw", "w"); | |
// QueryPerformanceFrequency(&frequency); | |
// QueryPerformanceCounter(&start); | |
if (fp1 != NULL) { | |
fread(src, sizeof(unsigned char), W*H, fp1); | |
for (int i = 0; i < W*H; i++) | |
dst[i] = src[i]; | |
// for (int i = 0; i < 16; i++) | |
// printf("%d \n", (int)src[888670 + i]); | |
// src += 888670; | |
// put our code here | |
//C code, uncomment to get it | |
//for (int i = 0; i < W*H; i++) { | |
// if ((int)src[i] < threshold_1) { | |
// dst[i] = 0; | |
// } | |
// else { | |
// dst[i] = 255; | |
// } | |
//} | |
// assembly code | |
int ii = W*H / 14; | |
ii = ii - 2; | |
__asm__( | |
"mov %[src], %%esi\n" | |
"mov %[ii], %%ecx\n" | |
"mov %[dst], %%edi\n" | |
"l1:\n" | |
"movdqu (%%esi), %%xmm0\n" | |
"movdqu 1024(%%esi), %%xmm1\n" | |
"movdqu 2048(%%esi), %%xmm2\n" | |
"movdqu %%xmm0, %%xmm3\n" | |
"pmaxub %%xmm1, %%xmm0\n" | |
"pmaxub %%xmm2, %%xmm0\n" | |
"pminub %%xmm1, %%xmm3\n" | |
"pminub %%xmm2, %%xmm3\n" | |
"movdqu %%xmm0, %%xmm6\n" | |
"movdqu %%xmm0, %%xmm7\n" | |
"movdqu %%xmm3, %%xmm4\n" | |
"movdqu %%xmm3, %%xmm5\n" | |
"psrldq $1, %%xmm6\n" | |
"psrldq $2, %%xmm7\n" | |
"psrldq $1, %%xmm4\n" | |
"psrldq $2, %%xmm5\n" | |
"pmaxub %%xmm7, %%xmm6\n" | |
"pmaxub %%xmm0, %%xmm6\n" | |
"pminub %%xmm5, %%xmm4\n" | |
"pminub %%xmm3, %%xmm4\n" | |
"psubusb %%xmm4, %%xmm6\n" | |
"movdqu %%xmm6, (%%edi)\n" | |
"add $14, %%esi\n" | |
"add $14, %%edi\n" | |
"sub $1, %%ecx\n" | |
"jnz l1\n" | |
:"=m"(src), "=m"(dst)//outputs | |
:[src]"m" (src), [dst]"m" (dst), [ii]"m" (ii)//inputs | |
:"%esi", "%ecx", "%edx", "%xmm1"//clobbers | |
); | |
printf("ASM computation over.\n"); | |
// QueryPerformanceCounter(&end); | |
fclose(fp1);} | |
else { | |
printf("Can't open specified file!"); | |
exit(1); | |
} | |
if (fp2 != NULL) { | |
fwrite(dst, sizeof(unsigned char), W*H, fp2); | |
fclose(fp2); | |
} | |
else { | |
printf("Can't open specified file!"); | |
exit(1); | |
} | |
free(dst); | |
free(src); | |
free(threshold); | |
// interval = (double)(end.QuadPart - start.QuadPart) / frequency.QuadPart; | |
// printf("%f", interval); | |
// scanf("%c", &c); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment