Skip to content

Instantly share code, notes, and snippets.

@parastuffs
Created April 25, 2018 09:21
Show Gist options
  • Save parastuffs/49e14df634258239696722a627455f08 to your computer and use it in GitHub Desktop.
Save parastuffs/49e14df634258239696722a627455f08 to your computer and use it in GitHub Desktop.
ELECH473 - Arthur Valingot - Lab 2 AT&T version
// ConsoleApplication2.cpp : Defines the entry point for the console application.
//
#include <stdlib.h>
#include <stdio.h>
// #include <windows.h>
// #pragma warning(disable : 4996)
/*
Result
Code C debug: 0.003631, release: 0.002187 gain significatif
Code assembleur: debug: 0.001466, release: 0.001410 gain non significatif // pas beaucoup de changement entre debug et release
Conclusion: La partie en assembleur est plus rapide, deplus les changements de mode ( debug / release) n'ont pas d'effet, alors que les effets sont spectaculaire sur le code en C
*/
int main()
{
// LARGE_INTEGER frequency;
// LARGE_INTEGER start;
// LARGE_INTEGER end;
// double interval;
int W = 1024, H = 1024;
unsigned char *threshold;
unsigned char *mask;
int threshold_1 = 125;
unsigned char *src;
unsigned char *dst;
char c;
src = (unsigned char *) malloc (W*H*sizeof(unsigned char));
dst = (unsigned char *) malloc (W*H*sizeof(unsigned char));
threshold = (unsigned char *)malloc(16 * sizeof(unsigned char));
mask = (unsigned char *)malloc(16 * sizeof(unsigned char));
for (int i = 0; i < 16; i++) {
threshold[i] = (unsigned char)threshold_1;
mask[i] = (unsigned char)128;
}
if (src == NULL || dst == NULL) {
printf("Out of memory!");
exit(1);
}
// FILE *fp1 = fopen("C:\\Users\\Arthur\\Desktop\\ULB Cours\\Microprocesseur Archi\\Labs\\test.raw", "r");
// FILE *fp2 = fopen("C:\\Users\\Arthur\\Desktop\\ULB Cours\\Microprocesseur Archi\\Labs\\test_contourDetector.raw", "w");
FILE *fp1 = fopen("/home/para/Documents/ULB/ELECH473/2017-2018/SIMD/students/LAB7ArthurValingot/test.raw", "r");
FILE *fp2 = fopen("/home/para/Documents/ULB/ELECH473/2017-2018/SIMD/students/LAB7ArthurValingot/test_contourDetector.raw", "w");
// QueryPerformanceFrequency(&frequency);
// QueryPerformanceCounter(&start);
if (fp1 != NULL) {
fread(src, sizeof(unsigned char), W*H, fp1);
for (int i = 0; i < W*H; i++)
dst[i] = src[i];
// for (int i = 0; i < 16; i++)
// printf("%d \n", (int)src[888670 + i]);
// src += 888670;
// put our code here
//C code, uncomment to get it
//for (int i = 0; i < W*H; i++) {
// if ((int)src[i] < threshold_1) {
// dst[i] = 0;
// }
// else {
// dst[i] = 255;
// }
//}
// assembly code
int ii = W*H / 14;
ii = ii - 2;
__asm__(
"mov %[src], %%esi\n"
"mov %[ii], %%ecx\n"
"mov %[dst], %%edi\n"
"l1:\n"
"movdqu (%%esi), %%xmm0\n"
"movdqu 1024(%%esi), %%xmm1\n"
"movdqu 2048(%%esi), %%xmm2\n"
"movdqu %%xmm0, %%xmm3\n"
"pmaxub %%xmm1, %%xmm0\n"
"pmaxub %%xmm2, %%xmm0\n"
"pminub %%xmm1, %%xmm3\n"
"pminub %%xmm2, %%xmm3\n"
"movdqu %%xmm0, %%xmm6\n"
"movdqu %%xmm0, %%xmm7\n"
"movdqu %%xmm3, %%xmm4\n"
"movdqu %%xmm3, %%xmm5\n"
"psrldq $1, %%xmm6\n"
"psrldq $2, %%xmm7\n"
"psrldq $1, %%xmm4\n"
"psrldq $2, %%xmm5\n"
"pmaxub %%xmm7, %%xmm6\n"
"pmaxub %%xmm0, %%xmm6\n"
"pminub %%xmm5, %%xmm4\n"
"pminub %%xmm3, %%xmm4\n"
"psubusb %%xmm4, %%xmm6\n"
"movdqu %%xmm6, (%%edi)\n"
"add $14, %%esi\n"
"add $14, %%edi\n"
"sub $1, %%ecx\n"
"jnz l1\n"
:"=m"(src), "=m"(dst)//outputs
:[src]"m" (src), [dst]"m" (dst), [ii]"m" (ii)//inputs
:"%esi", "%ecx", "%edx", "%xmm1"//clobbers
);
printf("ASM computation over.\n");
// QueryPerformanceCounter(&end);
fclose(fp1);}
else {
printf("Can't open specified file!");
exit(1);
}
if (fp2 != NULL) {
fwrite(dst, sizeof(unsigned char), W*H, fp2);
fclose(fp2);
}
else {
printf("Can't open specified file!");
exit(1);
}
free(dst);
free(src);
free(threshold);
// interval = (double)(end.QuadPart - start.QuadPart) / frequency.QuadPart;
// printf("%f", interval);
// scanf("%c", &c);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment