Created
June 26, 2020 08:58
-
-
Save jwbensley/2e67f096ff5af74bbcba6f60fa06a481 to your computer and use it in GitHub Desktop.
memcpy() using cache and page aligned values
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <inttypes.h> // uint*_t | |
#include <stdio.h> // perror(), printf() | |
#include <stdlib.h> // memcpy, posix_memalign() | |
#include <string.h> // malloc() | |
#include <time.h> // clock_t, CLOCKS_PER_SEC | |
#include <unistd.h> // getpagesize() | |
static inline void memcpy_aligned(void *to, const void *from, size_t len) { | |
if (len <= 64) { | |
memcpy(to, from, 64); | |
} else if (len <= 128) { | |
memcpy(to, from, 64); | |
memcpy((uint8_t *)to + 64, (uint8_t *)from + 64, 64); | |
} else { | |
size_t offset; | |
for (offset = 0; offset < len; offset += 64) | |
memcpy((uint8_t *)to + offset, (uint8_t *)from + offset, 64); | |
} | |
} | |
int main(int argc, char **argv) { | |
clock_t duration, end, start; | |
double time_taken; | |
uint32_t loops = 100000; | |
int sizes[9] = {32, 64, 96, 128, 256, 512, 1024, 2048, 4096}; | |
printf("Loop count is %d\n", loops); | |
printf("Page size is %d\n\n", getpagesize()); | |
void *src, *dst, *buf; // buf is for n-way cache associated CPUs | |
if (posix_memalign(&buf, getpagesize(), 16384) != 0) { | |
perror("posix_memalign failed!\n"); | |
return 1; | |
} | |
src = malloc(1024); | |
dst = malloc(1024); | |
printf("Malloc size is 1024, src=%p, dst=%p\n", src, dst); | |
for (uint8_t i = 0; i < 7; i += 1) { | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy_aligned(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy_aligned(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
} | |
free(src); | |
free(dst); | |
printf("\n"); | |
if (posix_memalign(&src, getpagesize(), 1024) != 0) { | |
perror("posix_memalign failed!\n"); | |
return 1; | |
} | |
if (posix_memalign(&dst, getpagesize(), 1024) != 0) { | |
perror("posix_memalign failed!\n"); | |
return 1; | |
} | |
printf("Page aligned malloc size is 1024, src=%p, dst=%p\n", src, dst); | |
for (uint8_t i = 0; i < 7; i += 1) { | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy_aligned(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy_aligned(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
} | |
free(src); | |
free(dst); | |
printf("\n"); | |
src = malloc(4096); | |
//buf = malloc(4096); | |
dst = malloc(4096); | |
printf("Malloc size is 4096, src=%p, dst=%p\n", src, dst); | |
//printf("buf=%p\n", buf); | |
for (uint8_t i = 0; i < 9; i += 1) { | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy_aligned(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy_aligned(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
} | |
free(src); | |
//free(buf); | |
free(dst); | |
printf("\n"); | |
if (posix_memalign(&src, getpagesize(), 4096) != 0) { | |
perror("posix_memalign(src) failed!\n"); | |
return 1; | |
} | |
//if (posix_memalign(&buf, 2048, 2048) != 0) { | |
// perror("posix_memalign(buf) failed!\n"); | |
// return 1; | |
//} | |
if (posix_memalign(&dst, getpagesize(), 4096) != 0) { | |
perror("posix_memalign(dst) failed!\n"); | |
return 1; | |
} | |
printf("Page aligned malloc size is 4096, src=%p, dst=%p\n", src, dst); | |
//printf("buf=%p\n", buf); | |
for (uint8_t i = 0; i < 9; i += 1) { | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy_aligned(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy_aligned(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
} | |
free(src); | |
//free(buf); | |
free(dst); | |
printf("\n"); | |
src = buf; | |
dst = buf+4096; | |
printf("Cache aligned malloc, offset is 0, buf=%p, src=%p, dst=%p\n", buf, src, dst); | |
for (uint8_t i = 0; i < 9; i += 1) { | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
} | |
printf("\n"); | |
src = buf; | |
dst = buf+5120; | |
printf("Cache aligned malloc, offset is +1024, buf=%p, src=%p, dst=%p\n", buf, src, dst); | |
for (uint8_t i = 0; i < 9; i += 1) { | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
} | |
printf("\n"); | |
src = buf; | |
dst = buf+6144; | |
printf("Cache aligned malloc, offset is +2048, buf=%p, src=%p, dst=%p\n", buf, src, dst); | |
for (uint8_t i = 0; i < 9; i += 1) { | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
} | |
printf("\n"); | |
src = buf; | |
dst = buf+7168; | |
printf("Cache aligned malloc, offset is +3072, buf=%p, src=%p, dst=%p\n", buf, src, dst); | |
for (uint8_t i = 0; i < 9; i += 1) { | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
} | |
printf("\n"); | |
src = buf; | |
dst = buf+8192; | |
printf("Cache aligned malloc, offset is +4096, buf=%p, src=%p, dst=%p\n", buf, src, dst); | |
for (uint8_t i = 0; i < 9; i += 1) { | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
} | |
printf("\n"); | |
src = buf; | |
dst = buf+9216; | |
printf("Cache aligned malloc, offset is +5120, buf=%p, src=%p, dst=%p\n", buf, src, dst); | |
for (uint8_t i = 0; i < 9; i += 1) { | |
start = clock(); | |
for (uint32_t j = 0; j < loops; j += 1){ | |
memcpy(dst, src, sizes[i]); | |
} | |
end = clock(); | |
duration = end - start; | |
time_taken = ((double)duration)/CLOCKS_PER_SEC; | |
printf("memcpy(%d) took %f seconds to execute \n", sizes[i], time_taken); | |
} | |
printf("\n"); | |
free(buf); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment