Skip to content

Instantly share code, notes, and snippets.

@tgsstdio
Forked from theraot/Benchmark.cs
Last active June 28, 2016 02:01
Show Gist options
  • Save tgsstdio/1ad429532f8b8138ea03 to your computer and use it in GitHub Desktop.
Save tgsstdio/1ad429532f8b8138ea03 to your computer and use it in GitHub Desktop.
using BenchmarkDotNet.Attributes;
using System;
using System.Diagnostics;
using System.Reflection.Emit;
using System.Runtime.InteropServices;
using System.Security;
// Based on fork and see
// http://xoofx.com/blog/2010/10/23/high-performance-memcpy-gotchas-in-c/
namespace TestPerf
{
public class UnsafeMemCopy
{
[Params(4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384)]
public int BlockSize { get; set; }
private static unsafe readonly CopyBlockDelegate _cpBlk = GenerateCpBlk();
public unsafe delegate void CopyBlockDelegate(void* des, void* src, uint bytes);
[DllImport("msvcrt.dll", EntryPoint = "memcpy", CallingConvention = CallingConvention.Cdecl, SetLastError = false), SuppressUnmanagedCodeSecurity]
public static unsafe extern void* memcpy(void* dest, void* src, ulong count);
static unsafe void CpBlk(void* dest, void* src, uint count)
{
var local = _cpBlk;
local(dest, src, count);
}
static unsafe void Custom(void* dest, void* src, int count)
{
var block = count >> 3;
var pDest = (long*)dest;
var pSrc = (long*)src;
for (var i = 0; i < block; i++)
{
*pDest = *pSrc; pDest++; pSrc++;
}
dest = pDest;
src = pSrc;
count = count - (block << 3);
if (count > 0)
{
var pDestB = (byte*)dest;
var pSrcB = (byte*)src;
for (var i = 0; i < count; i++)
{
*pDestB = *pSrcB; pDestB++; pSrcB++;
}
}
}
static CopyBlockDelegate GenerateCpBlk()
{
var method = new DynamicMethod("CopyBlockIL", typeof(void), new[] { typeof(void*), typeof(void*), typeof(uint) }, typeof(UnsafeMemCopy));
var emitter = method.GetILGenerator();
// emit IL
emitter.Emit(OpCodes.Ldarg_0);
emitter.Emit(OpCodes.Ldarg_1);
emitter.Emit(OpCodes.Ldarg_2);
emitter.Emit(OpCodes.Cpblk);
emitter.Emit(OpCodes.Ret);
// compile to delegate
return (CopyBlockDelegate)method.CreateDelegate(typeof(CopyBlockDelegate));
}
byte[] src;
byte[] dest;
const int MAX_BUFFER_SIZE = 32768;
[Setup]
public unsafe void SetupData()
{
src = new byte[MAX_BUFFER_SIZE];
for (var i = 0; i < MAX_BUFFER_SIZE; i++)
src[i] = (byte)i;
dest = new byte[MAX_BUFFER_SIZE];
}
[Benchmark]
public unsafe void CopyViaBlockCopy()
{
fixed (void* pDest = &dest[0])
fixed (void* pSrc = &src[0])
{
Buffer.BlockCopy(src, 0, dest, 0, BlockSize);
}
}
[Benchmark]
public unsafe void CopyViaCpBlk()
{
fixed (void* pDest = &dest[0])
fixed (void* pSrc = &src[0])
{
CpBlk(pDest, pSrc, (uint)BlockSize);
}
}
[Benchmark]
public unsafe void CopyViaMemcpy()
{
fixed (void* pDest = &dest[0])
fixed (void* pSrc = &src[0])
{
memcpy(pDest, pSrc, (ulong)BlockSize);
}
}
[Benchmark]
public unsafe void CopyViaArrayCopy()
{
fixed (void* pDest = &dest[0])
fixed (void* pSrc = &src[0])
{
Array.Copy(src, dest, BlockSize);
}
}
[Benchmark]
public unsafe void CopyViaCustomCopy()
{
fixed (void* pDest = &dest[0])
fixed (void* pSrc = &src[0])
{
Custom(pDest, pSrc, BlockSize);
}
}
[Benchmark]
public unsafe void CopyViaMarshalCopy()
{
fixed (void* pDest = &dest[0])
fixed (void* pSrc = &src[0])
{
IntPtr pDestPtr = (IntPtr)pDest;
Marshal.Copy(src, 0, pDestPtr, BlockSize);
}
}
}
}
@tgsstdio
Copy link
Author

Preliminary RELEASE results with Benchmark.NET

BenchmarkDotNet=v0.9.7.0
OS=Microsoft Windows NT 6.2.9200.0
Processor=Intel(R) Core(TM) i7-4770K CPU 3.50GHz, ProcessorCount=8
Frequency=3415986 ticks, Resolution=292.7412 ns, Timer=TSC
HostCLR=MS.NET 4.0.30319.42000, Arch=32-bit RELEASE
JitModules=clrjit-v4.6.1080.0

Type=UnsafeMemCopy  Mode=Throughput  
Method BlockSize Median StdDev
CopyViaBlockCopy 4 20.1239 ns 0.1478 ns
CopyViaCpBlk 4 15.4555 ns 0.2688 ns
CopyViaMemcpy 4 8.7056 ns 0.2557 ns
CopyViaArrayCopy 4 39.4497 ns 0.6011 ns
CopyViaCustomCopy 4 6.5002 ns 0.7458 ns
CopyViaMarshalCopy 4 27.6666 ns 0.5619 ns
CopyViaBlockCopy 8 20.2705 ns 0.6780 ns
CopyViaCpBlk 8 15.3735 ns 0.7270 ns
CopyViaMemcpy 8 9.3683 ns 0.3059 ns
CopyViaArrayCopy 8 39.8129 ns 1.1938 ns
CopyViaCustomCopy 8 4.8734 ns 0.1611 ns
CopyViaMarshalCopy 8 28.9601 ns 0.8762 ns
CopyViaBlockCopy 16 20.6568 ns 2.3650 ns
CopyViaCpBlk 16 15.4625 ns 0.3095 ns
CopyViaMemcpy 16 9.6592 ns 0.9297 ns
CopyViaArrayCopy 16 40.2312 ns 0.6522 ns
CopyViaCustomCopy 16 5.4181 ns 0.1825 ns
CopyViaMarshalCopy 16 28.2772 ns 0.7484 ns
CopyViaBlockCopy 32 20.2266 ns 0.2855 ns
CopyViaCpBlk 32 15.4003 ns 0.1753 ns
CopyViaMemcpy 32 15.3931 ns 0.1745 ns
CopyViaArrayCopy 32 39.1545 ns 0.1856 ns
CopyViaCustomCopy 32 7.0603 ns 0.3989 ns
CopyViaMarshalCopy 32 27.9486 ns 0.6591 ns
CopyViaBlockCopy 64 20.8282 ns 0.8874 ns
CopyViaCpBlk 64 15.5394 ns 0.2940 ns
CopyViaMemcpy 64 16.0735 ns 0.3452 ns
CopyViaArrayCopy 64 39.5693 ns 0.7831 ns
CopyViaCustomCopy 64 10.9344 ns 0.5837 ns
CopyViaMarshalCopy 64 29.7324 ns 1.2355 ns
CopyViaBlockCopy 128 23.6070 ns 4.6554 ns
CopyViaCpBlk 128 17.7501 ns 0.3727 ns
CopyViaMemcpy 128 21.3531 ns 0.3276 ns
CopyViaArrayCopy 128 41.6006 ns 1.1796 ns
CopyViaCustomCopy 128 16.3575 ns 0.5406 ns
CopyViaMarshalCopy 128 36.9803 ns 1.7866 ns
CopyViaBlockCopy 256 27.7144 ns 3.7794 ns
CopyViaCpBlk 256 22.5643 ns 3.6143 ns
CopyViaMemcpy 256 32.6273 ns 0.6868 ns
CopyViaArrayCopy 256 46.6956 ns 4.6812 ns
CopyViaCustomCopy 256 27.3929 ns 0.3050 ns
CopyViaMarshalCopy 256 43.2228 ns 7.1780 ns
CopyViaBlockCopy 512 45.7667 ns 8.8410 ns
CopyViaCpBlk 512 36.2004 ns 8.5977 ns
CopyViaMemcpy 512 69.9357 ns 0.2709 ns
CopyViaArrayCopy 512 65.2994 ns 8.5572 ns
CopyViaCustomCopy 512 56.4534 ns 0.3857 ns
CopyViaMarshalCopy 512 64.0364 ns 11.1595 ns
CopyViaBlockCopy 1024 83.4732 ns 23.1634 ns
CopyViaCpBlk 1024 72.7215 ns 15.0372 ns
CopyViaMemcpy 1024 104.2895 ns 0.6934 ns
CopyViaArrayCopy 1024 104.8888 ns 22.4465 ns
CopyViaCustomCopy 1024 101.3223 ns 0.4833 ns
CopyViaMarshalCopy 1024 102.2398 ns 25.4855 ns
CopyViaBlockCopy 2048 140.8724 ns 61.2754 ns
CopyViaCpBlk 2048 136.0149 ns 40.6683 ns
CopyViaMemcpy 2048 172.0688 ns 0.8153 ns
CopyViaArrayCopy 2048 160.6302 ns 40.3950 ns
CopyViaCustomCopy 2048 190.2832 ns 159.3131 ns
CopyViaMarshalCopy 2048 164.8697 ns 40.3632 ns
CopyViaBlockCopy 4096 236.8672 ns 67.9468 ns
CopyViaCpBlk 4096 225.6976 ns 30.6664 ns
CopyViaMemcpy 4096 306.9859 ns 1.0134 ns
CopyViaArrayCopy 4096 259.5906 ns 64.2187 ns
CopyViaCustomCopy 4096 367.7106 ns 3.5250 ns
CopyViaMarshalCopy 4096 253.9492 ns 58.8593 ns
CopyViaBlockCopy 8192 424.8318 ns 112.7825 ns
CopyViaCpBlk 8192 415.7624 ns 111.4617 ns
CopyViaMemcpy 8192 579.7509 ns 2.2122 ns
CopyViaArrayCopy 8192 432.4439 ns 106.2646 ns
CopyViaCustomCopy 8192 724.1218 ns 581.2734 ns
CopyViaMarshalCopy 8192 434.6137 ns 112.5997 ns
CopyViaBlockCopy 16384 755.1101 ns 191.9747 ns
CopyViaCpBlk 16384 749.1528 ns 206.8295 ns
CopyViaMemcpy 16384 1,254.6983 ns 55.0115 ns
CopyViaArrayCopy 16384 801.9736 ns 190.9383 ns
CopyViaCustomCopy 16384 1,445.2949 ns 3.4539 ns
CopyViaMarshalCopy 16384 782.3009 ns 190.2557 ns

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment