Created
March 3, 2014 19:03
-
-
Save JossWhittle/9332200 to your computer and use it in GitHub Desktop.
A fast AVX memcpy macro which copies the content of a 64 byte source buffer into a 64 byte destination buffer. Buffers must be 32byte aligned.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <immintrin.h> | |
#define I32B __m256i | |
#define STORE_I32B(ptrD, ptrS) _mm256_store_si256(ptrD, *ptrS) | |
/** | |
* Copies the content of one 32 byte aligned | |
* 64 byte buffer into another | |
* | |
* @param dest | |
* A pointer to the destination buffer | |
* | |
* @param src | |
* A pointer to the source buffer | |
*/ | |
#define A_memcpy_64(dest,src) { \ | |
I32B *A_memcpy_64_bufferS = (I32B*) (src); \ | |
I32B *A_memcpy_64_bufferD = (I32B*) (dest); \ | |
STORE_I32B(A_memcpy_64_bufferD, A_memcpy_64_bufferS); \ | |
A_memcpy_64_bufferS++; \ | |
A_memcpy_64_bufferD++; \ | |
STORE_I32B(A_memcpy_64_bufferD, A_memcpy_64_bufferS); \ | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment