Skip to content

Instantly share code, notes, and snippets.

@glampert
Created February 16, 2020 18:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save glampert/6695d76f614c3de71870fd5921be79fe to your computer and use it in GitHub Desktop.
Save glampert/6695d76f614c3de71870fd5921be79fe to your computer and use it in GitHub Desktop.
Silly memset function with optimizations for 4,8,16-byte aligned types.
#include <cstdio>
#include <cstdint>
#include <cassert>
#include <xmmintrin.h>
///////////////////////////////////////////////////////////////////////////////
namespace detail
{
static inline bool IsAligned(const void* p, const size_t alignment)
{
return !(reinterpret_cast<uintptr_t>(p) & (alignment - 1));
}
template<typename T>
static inline T Expand(const uint8_t fillValue)
{
alignas(T) uint8_t expanded[sizeof(T)];
for (size_t i = 0; i < sizeof(T); ++i)
{
expanded[i] = fillValue;
}
return *reinterpret_cast<T*>(expanded);
}
static inline void MemsetUnaligned(void* pDest, const uint8_t fillValue, const size_t sizeBytes)
{
auto* pDestBytes = static_cast<uint8_t*>(pDest);
for (size_t i = 0; i < sizeBytes; ++i)
{
*pDestBytes++ = fillValue;
}
}
template<typename T>
static inline void MemsetAlignedT(void* pDest, const uint8_t fillValue, const size_t sizeBytes)
{
const size_t numElements = sizeBytes / sizeof(T);
const size_t remainder = sizeBytes % sizeof(T);
if (numElements != 0)
{
const T fillValExpanded = Expand<T>(fillValue);
auto* pDestElements = static_cast<T*>(pDest);
for (size_t i = 0; i < numElements; ++i)
{
*pDestElements++ = fillValExpanded;
}
}
// Remaining bytes < sizeof T.
if (remainder != 0)
{
MemsetUnaligned(pDest, fillValue, remainder);
}
}
} // detail
void Memset(void* pDest, const uint8_t fillValue, const size_t sizeBytes)
{
if (detail::IsAligned(pDest, sizeof(__m128))) // Set 16 bytes at a time
{
detail::MemsetAlignedT<__m128>(pDest, fillValue, sizeBytes);
}
else if (detail::IsAligned(pDest, sizeof(uint64_t))) // Set 8 bytes at a time
{
detail::MemsetAlignedT<uint64_t>(pDest, fillValue, sizeBytes);
}
else if (detail::IsAligned(pDest, sizeof(uint32_t))) // Set 4 bytes at a time
{
detail::MemsetAlignedT<uint32_t>(pDest, fillValue, sizeBytes);
}
else // Unaligned - one byte at a time
{
detail::MemsetUnaligned(pDest, fillValue, sizeBytes);
}
}
///////////////////////////////////////////////////////////////////////////////
template<typename T>
static void CheckBytes(const T* pTestVal, const uint8_t fillValue)
{
const auto* pBytes = reinterpret_cast<const uint8_t*>(pTestVal);
for (size_t i = 0; i < sizeof(T); ++i)
{
assert(pBytes[i] == fillValue);
}
}
int main()
{
const uint8_t fillValue = 0xAB;
uint8_t i8Val = {};
Memset(&i8Val, fillValue, sizeof(i8Val));
assert(i8Val == 0xAB);
uint16_t i16Val = {};
Memset(&i16Val, fillValue, sizeof(i16Val));
assert(i16Val == 0xABAB);
uint32_t i32Val = {};
Memset(&i32Val, fillValue, sizeof(i32Val));
assert(i32Val == 0xABABABAB);
uint64_t i64Val = {};
Memset(&i64Val, fillValue, sizeof(i64Val));
assert(i64Val == 0xABABABABABABABAB);
__m128 i128Val = {};
Memset(&i128Val, fillValue, sizeof(i128Val));
CheckBytes(&i128Val, fillValue);
char unaligned1[128] = {};
Memset(&unaligned1, fillValue, sizeof(unaligned1));
CheckBytes(unaligned1, fillValue);
char unaligned2[10] = {};
Memset(&unaligned2, fillValue, sizeof(unaligned2));
CheckBytes(unaligned2, fillValue);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment