Silly memset function with optimizations for 4,8,16-byte aligned types.
#include <cstdio> | |
#include <cstdint> | |
#include <cassert> | |
#include <xmmintrin.h> | |
/////////////////////////////////////////////////////////////////////////////// | |
namespace detail | |
{ | |
static inline bool IsAligned(const void* p, const size_t alignment) | |
{ | |
return !(reinterpret_cast<uintptr_t>(p) & (alignment - 1)); | |
} | |
template<typename T> | |
static inline T Expand(const uint8_t fillValue) | |
{ | |
alignas(T) uint8_t expanded[sizeof(T)]; | |
for (size_t i = 0; i < sizeof(T); ++i) | |
{ | |
expanded[i] = fillValue; | |
} | |
return *reinterpret_cast<T*>(expanded); | |
} | |
static inline void MemsetUnaligned(void* pDest, const uint8_t fillValue, const size_t sizeBytes) | |
{ | |
auto* pDestBytes = static_cast<uint8_t*>(pDest); | |
for (size_t i = 0; i < sizeBytes; ++i) | |
{ | |
*pDestBytes++ = fillValue; | |
} | |
} | |
template<typename T> | |
static inline void MemsetAlignedT(void* pDest, const uint8_t fillValue, const size_t sizeBytes) | |
{ | |
const size_t numElements = sizeBytes / sizeof(T); | |
const size_t remainder = sizeBytes % sizeof(T); | |
if (numElements != 0) | |
{ | |
const T fillValExpanded = Expand<T>(fillValue); | |
auto* pDestElements = static_cast<T*>(pDest); | |
for (size_t i = 0; i < numElements; ++i) | |
{ | |
*pDestElements++ = fillValExpanded; | |
} | |
} | |
// Remaining bytes < sizeof T. | |
if (remainder != 0) | |
{ | |
MemsetUnaligned(pDest, fillValue, remainder); | |
} | |
} | |
} // detail | |
void Memset(void* pDest, const uint8_t fillValue, const size_t sizeBytes) | |
{ | |
if (detail::IsAligned(pDest, sizeof(__m128))) // Set 16 bytes at a time | |
{ | |
detail::MemsetAlignedT<__m128>(pDest, fillValue, sizeBytes); | |
} | |
else if (detail::IsAligned(pDest, sizeof(uint64_t))) // Set 8 bytes at a time | |
{ | |
detail::MemsetAlignedT<uint64_t>(pDest, fillValue, sizeBytes); | |
} | |
else if (detail::IsAligned(pDest, sizeof(uint32_t))) // Set 4 bytes at a time | |
{ | |
detail::MemsetAlignedT<uint32_t>(pDest, fillValue, sizeBytes); | |
} | |
else // Unaligned - one byte at a time | |
{ | |
detail::MemsetUnaligned(pDest, fillValue, sizeBytes); | |
} | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
template<typename T> | |
static void CheckBytes(const T* pTestVal, const uint8_t fillValue) | |
{ | |
const auto* pBytes = reinterpret_cast<const uint8_t*>(pTestVal); | |
for (size_t i = 0; i < sizeof(T); ++i) | |
{ | |
assert(pBytes[i] == fillValue); | |
} | |
} | |
int main() | |
{ | |
const uint8_t fillValue = 0xAB; | |
uint8_t i8Val = {}; | |
Memset(&i8Val, fillValue, sizeof(i8Val)); | |
assert(i8Val == 0xAB); | |
uint16_t i16Val = {}; | |
Memset(&i16Val, fillValue, sizeof(i16Val)); | |
assert(i16Val == 0xABAB); | |
uint32_t i32Val = {}; | |
Memset(&i32Val, fillValue, sizeof(i32Val)); | |
assert(i32Val == 0xABABABAB); | |
uint64_t i64Val = {}; | |
Memset(&i64Val, fillValue, sizeof(i64Val)); | |
assert(i64Val == 0xABABABABABABABAB); | |
__m128 i128Val = {}; | |
Memset(&i128Val, fillValue, sizeof(i128Val)); | |
CheckBytes(&i128Val, fillValue); | |
char unaligned1[128] = {}; | |
Memset(&unaligned1, fillValue, sizeof(unaligned1)); | |
CheckBytes(unaligned1, fillValue); | |
char unaligned2[10] = {}; | |
Memset(&unaligned2, fillValue, sizeof(unaligned2)); | |
CheckBytes(unaligned2, fillValue); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment