Skip to content

Instantly share code, notes, and snippets.

@theraot
Created November 2, 2015 06:41
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save theraot/1bfd0deb4a1aab0a27d8 to your computer and use it in GitHub Desktop.
Save theraot/1bfd0deb4a1aab0a27d8 to your computer and use it in GitHub Desktop.
using System;
using System.Diagnostics;
using System.Reflection.Emit;
using System.Runtime.InteropServices;
using System.Security;
namespace TestPerf
{
public class Benchmark
{
static unsafe readonly CopyBlockDelegate _cpBlk = GenerateCpBlk();
public unsafe delegate void CopyBlockDelegate(void* des, void* src, uint bytes);
[DllImport("msvcrt.dll", EntryPoint = "memcpy", CallingConvention = CallingConvention.Cdecl, SetLastError = false), SuppressUnmanagedCodeSecurity]
public static unsafe extern void* memcpy(void* dest, void* src, ulong count);
static unsafe void CpBlk(void* dest, void* src, uint count)
{
var local = _cpBlk;
local(dest, src, count);
}
static unsafe void Custom(void* dest, void* src, int count)
{
var block = count >> 3;
var pDest = (long*)dest;
var pSrc = (long*)src;
for (var i = 0; i < block; i++)
{
*pDest = *pSrc; pDest++; pSrc++;
}
dest = pDest;
src = pSrc;
count = count - (block << 3);
if (count > 0)
{
var pDestB = (byte*)dest;
var pSrcB = (byte*)src;
for (var i = 0; i < count; i++)
{
*pDestB = *pSrcB; pDestB++; pSrcB++;
}
}
}
static CopyBlockDelegate GenerateCpBlk()
{
var method = new DynamicMethod("CopyBlockIL", typeof(void), new[] {typeof(void *), typeof(void *), typeof(uint)}, typeof(Benchmark));
var emitter = method.GetILGenerator();
// emit IL
emitter.Emit(OpCodes.Ldarg_0);
emitter.Emit(OpCodes.Ldarg_1);
emitter.Emit(OpCodes.Ldarg_2);
emitter.Emit(OpCodes.Cpblk);
emitter.Emit(OpCodes.Ret);
// compile to delegate
return (CopyBlockDelegate) method.CreateDelegate(typeof (CopyBlockDelegate));
}
static unsafe void Main()
{
var blockSize = 4;
Console.WriteLine(Environment.Is64BitProcess ? "64 bits" : "32 bits");
Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", "Size", nameof(Buffer), nameof(CpBlk), nameof(memcpy), nameof(Array), nameof(Custom), nameof(Marshal));
for (var j = 0; j < 20; j++, blockSize *= 2)
{
var dest = new byte[blockSize];
var src = new byte[blockSize];
for (var i = 0; i < blockSize; i++)
src[i] = (byte) i;
fixed (void* pDest = &dest[0])
fixed (void* pSrc = &src[0])
{
var pDestPtr = (IntPtr) pDest;
var pSrcPtr = (IntPtr) pSrc;
var count = (1 << 26)/blockSize;
var watch = new Stopwatch();
watch.Reset();
watch.Start();
for (var i = 0; i < count; i++)
{
Buffer.BlockCopy(src, 0, dest, 0, blockSize);
Buffer.BlockCopy(dest, 0, src, 0, blockSize);
Buffer.BlockCopy(src, 0, dest, 0, blockSize);
Buffer.BlockCopy(dest, 0, src, 0, blockSize);
Buffer.BlockCopy(src, 0, dest, 0, blockSize);
Buffer.BlockCopy(dest, 0, src, 0, blockSize);
Buffer.BlockCopy(src, 0, dest, 0, blockSize);
Buffer.BlockCopy(dest, 0, src, 0, blockSize);
Buffer.BlockCopy(src, 0, dest, 0, blockSize);
Buffer.BlockCopy(dest, 0, src, 0, blockSize);
}
watch.Stop();
var bufferBlockCopyTime = watch.ElapsedMilliseconds;
watch.Reset();
watch.Start();
for (var i = 0; i < count; i++)
{
CpBlk(pDest, pSrc, (uint)blockSize);
CpBlk(pSrc, pDest, (uint)blockSize);
CpBlk(pDest, pSrc, (uint)blockSize);
CpBlk(pSrc, pDest, (uint)blockSize);
CpBlk(pDest, pSrc, (uint)blockSize);
CpBlk(pSrc, pDest, (uint)blockSize);
CpBlk(pDest, pSrc, (uint)blockSize);
CpBlk(pSrc, pDest, (uint)blockSize);
CpBlk(pDest, pSrc, (uint)blockSize);
CpBlk(pSrc, pDest, (uint)blockSize);
}
watch.Stop();
var cpBlkTime = watch.ElapsedMilliseconds;
watch.Reset();
watch.Start();
for (var i = 0; i < count; i++)
{
memcpy(pDest, pSrc, (ulong) blockSize);
memcpy(pSrc, pDest, (ulong) blockSize);
memcpy(pDest, pSrc, (ulong) blockSize);
memcpy(pSrc, pDest, (ulong) blockSize);
memcpy(pDest, pSrc, (ulong) blockSize);
memcpy(pSrc, pDest, (ulong) blockSize);
memcpy(pDest, pSrc, (ulong) blockSize);
memcpy(pSrc, pDest, (ulong) blockSize);
memcpy(pDest, pSrc, (ulong) blockSize);
memcpy(pSrc, pDest, (ulong) blockSize);
}
watch.Stop();
var copyMemoryTime = watch.ElapsedMilliseconds;
watch.Reset();
watch.Start();
for (var i = 0; i < count; i++)
{
Array.Copy(src, dest, blockSize);
Array.Copy(dest, src, blockSize);
Array.Copy(src, dest, blockSize);
Array.Copy(dest, src, blockSize);
Array.Copy(src, dest, blockSize);
Array.Copy(dest, src, blockSize);
Array.Copy(src, dest, blockSize);
Array.Copy(dest, src, blockSize);
Array.Copy(src, dest, blockSize);
Array.Copy(dest, src, blockSize);
}
watch.Stop();
var arrayCopyTime = watch.ElapsedMilliseconds;
watch.Reset();
watch.Start();
for (var i = 0; i < count; i++)
{
Custom(pDest, pSrc, blockSize);
Custom(pSrc, pDest, blockSize);
Custom(pDest, pSrc, blockSize);
Custom(pSrc, pDest, blockSize);
Custom(pDest, pSrc, blockSize);
Custom(pSrc, pDest, blockSize);
Custom(pDest, pSrc, blockSize);
Custom(pSrc, pDest, blockSize);
Custom(pDest, pSrc, blockSize);
Custom(pSrc, pDest, blockSize);
}
watch.Stop();
var customCopyTime = watch.ElapsedMilliseconds;
watch.Reset();
watch.Start();
for (var i = 0; i < count; i++)
{
Marshal.Copy(src, 0, pDestPtr, blockSize);
Marshal.Copy(dest, 0, pSrcPtr, blockSize);
Marshal.Copy(src, 0, pDestPtr, blockSize);
Marshal.Copy(dest, 0, pSrcPtr, blockSize);
Marshal.Copy(src, 0, pDestPtr, blockSize);
Marshal.Copy(dest, 0, pSrcPtr, blockSize);
Marshal.Copy(src, 0, pDestPtr, blockSize);
Marshal.Copy(dest, 0, pSrcPtr, blockSize);
Marshal.Copy(src, 0, pDestPtr, blockSize);
Marshal.Copy(dest, 0, pSrcPtr, blockSize);
}
watch.Stop();
var marshalCopyTime = watch.ElapsedMilliseconds;
var memFactor = count*10.0*blockSize/0.001/(1024*1024);
var bufferCpyOut = memFactor / bufferBlockCopyTime;
var cpBlkOut = memFactor/cpBlkTime;
var copyMemoryOut = memFactor/copyMemoryTime;
var arrayOut = memFactor/arrayCopyTime;
var customOut = memFactor/customCopyTime;
var marshalOut = memFactor / marshalCopyTime;
Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", blockSize, (long)bufferCpyOut, (long)cpBlkOut, (long)copyMemoryOut, (long)arrayOut, (long)customOut, (long)marshalOut);
}
}
}
}
}
@theraot
Copy link
Author

theraot commented Nov 2, 2015

The code above is based on High performance memcpy gotchas in C# at xoofx.com by Alexandre Mutel.


Some experimental results shared via Google Drive - Tested on a computer with 64bit Windows 10, on Intel(R) Core(TM) i3-3240 CPU @ 3.40GHz with 8 GB. Compiled for release configuration against .NET 4.0 on Visual Studio 2015 with optimize code flag set for both x86 and x64 target platforms.


Notes:

  • Buffer.MemoryCopy not included. It is new in .NET 4.5 - I'm interested in the best options for old .NET versions.
  • Also, haven't done testing on Linux. Maybe somebody can share some results?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment