Skip to content

Instantly share code, notes, and snippets.

@gfoidl
Last active October 30, 2020 15:21
Show Gist options
  • Save gfoidl/346ea7472f06e2024b8bdc9c598e547e to your computer and use it in GitHub Desktop.
Save gfoidl/346ea7472f06e2024b8bdc9c598e547e to your computer and use it in GitHub Desktop.
.NET Array cache line alignment / length -- false sharing caution
using System;
using System.Linq;
namespace ConsoleApp3
{
static unsafe class Program
{
static void Main()
{
int[] arr = Enumerable.Range(2, 122).ToArray();
fixed (int* pArr = arr)
{
PrintCacheInfo(pArr - 2 , "(IntPtr)ptr - 1 = Length");
PrintCacheInfo(pArr , "arr[0]");
PrintCacheInfo(pArr + 64, "arr[64]");
}
}
private static void PrintCacheInfo(int* ptr, string caption)
{
const int cacheBlockBits = 6;
const int cacheBlockSize = 1 << cacheBlockBits; // 64
const int cacheBlockMask = cacheBlockSize - 1; // 63
long addr = (long)ptr;
long cacheBlockOffset = addr & cacheBlockMask; // % 64
long cacheBlockAlignedAddr = addr & ~cacheBlockMask;
Console.WriteLine(caption);
Console.WriteLine($"Address: 0x{addr:X}");
Console.WriteLine($"Cache line addr: 0x{cacheBlockAlignedAddr:X}");
Console.WriteLine($"Offset in cl: {cacheBlockOffset} | 0x{cacheBlockOffset:X}");
Console.WriteLine($"Value: {*ptr}");
Console.WriteLine();
}
}
}
@gfoidl
Copy link
Author

gfoidl commented Nov 30, 2018

Output is something like this (win-x64, .NET Core 2.1)

(IntPtr)ptr - 1 = Length
Address:         0x1DBC0903C70
Cache line addr: 0x1DBC0903C40
Offset in cl:    48 | 0x30
Value:           122

arr[0]
Address:         0x1DBC0903C78
Cache line addr: 0x1DBC0903C40
Offset in cl:    56 | 0x38
Value:           2

So the Length and arr[0] are in the same cache line. Hence The layout of arrays on the CLR is such that an array’s length resides before the first element. So unless you pad the first element of the array, all accesses will perform bounds checking that touches the first element’s line. Because this line is being mutated by the thread incrementing the first counter, terrible false sharing results. has to be taken into account when trying to avoid false sharing.

@gfoidl
Copy link
Author

gfoidl commented Nov 30, 2018

Demo for false sharing

Result

false-sharing

C# code
using System;
using System.Diagnostics;
using System.IO;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;

namespace ConsoleApplication2
{
    class Program
    {
#if DEBUG
        private const int ITERS = 2;
#else
        private const int ITERS = 100_000_000;
#endif
        //---------------------------------------------------------------------
        private static int[]                s_counter = new int[1 << 20];
        private static CountdownEvent       s_cde     = new CountdownEvent(4);
        private static ManualResetEventSlim s_mre     = new ManualResetEventSlim(false);
        private static Stopwatch            s_watch   = new Stopwatch();
        //---------------------------------------------------------------------
        [MethodImpl(MethodImplOptions.AggressiveOptimization)]
        private static void UpdateCounter(int position)
        {
            s_cde.Signal();
            s_mre.Wait();

            for (int j = 0; j < ITERS; j++)
            {
                s_counter[position] += 3;   // any number, just to do a read and a write
            }
        }
        //---------------------------------------------------------------------
        static void Main()
        {
            // Parallel.Invoke will run one task in the main-thread (inlineing of tasks),
            // so be aware of potential deadlocks. Thus "free" the main-thread.

            // JIT
            s_watch.Start();
            Task.Run(() =>
                Parallel.Invoke(
                    () => UpdateCounter(64 * 0 / sizeof(int)),
                    () => UpdateCounter(64 * 1 / sizeof(int)),
                    () => UpdateCounter(64 * 2 / sizeof(int)),
                    () => UpdateCounter(64 * 3 / sizeof(int))
                )
            );
            s_cde  .Wait();
            s_mre  .Set();
            s_watch.Stop();

            Run(padFirstElement: false, "results-no-padfirst.csv");
            Run(padFirstElement: true , "results-padfirst.csv");
        }
        //---------------------------------------------------------------------
        private static void Run(bool padFirstElement, string resultsFileName)
        {
            int pad = padFirstElement ? 1 : 0;

            using (StreamWriter sw = File.CreateText(resultsFileName))
            {
                sw.WriteLine("#spacing [bytes];time [ms]");     // # for GnuPlot

                for (int i = 0; i < 15; ++i)
                {
                    int spacing        = 1 << i;
                    int spacingInBytes = spacing * sizeof(int);

                    Console.Write($"spacing: {spacingInBytes,5} bytes");

                    s_cde.Reset();
                    s_mre.Reset();

                    var task = Task.Run(() =>
                        Parallel.Invoke(
                            () => UpdateCounter(spacingInBytes * (pad + 0) / sizeof(int)),
                            () => UpdateCounter(spacingInBytes * (pad + 1) / sizeof(int)),
                            () => UpdateCounter(spacingInBytes * (pad + 2) / sizeof(int)),
                            () => UpdateCounter(spacingInBytes * (pad + 3) / sizeof(int))
                        )
                    );

                    s_cde  .Wait();
                    s_watch.Restart();
                    s_mre  .Set();
                    task   .Wait();
                    s_watch.Stop();

                    Console.WriteLine($", time: {s_watch.ElapsedMilliseconds}");
                    sw.WriteLine($"{spacingInBytes};{s_watch.ElapsedMilliseconds}");
                }
            }
        }
    }
}
GnuPlot
reset

set datafile separator ';'

set term pngcairo size 1200,600 enhanced
set output 'false-sharing.png'

set grid

set title  'False sharing on updating counter-array'
set title font ",12"

set xlabel 'spacing [bytes]'
set ylabel 'time [ms]'

set logscale x;
set xtics (4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536)

set arrow from 64, graph 0 to 64, graph 1 nohead lc rgb 'red' lw 1
set label at 60,1000 'Cache line size' rotate by 90 front textcolor rgb 'red'

plot'results-no-padfirst.csv' with linespoints lw 2 title 'no padding for first', \
    'results-padfirst.csv'    with linespoints lw 2 title 'padding for first'

@gfoidl
Copy link
Author

gfoidl commented Oct 30, 2020

False sharing in Intel VTune Profiler

Microarchitecture Exploration

w/o padding

grafik

with padding

grafik

Memory Access

w/o padding

grafik

with padding

grafik

Note: It's L1-bound because of the RAW-hazard from the increment now.

BenchmarkDotNet

Shows this effect with the use of hardware counters also clearly:

Method Mean Error StdDev Ratio RatioSD CacheMisses/Op LLCReference/Op LLCMisses/Op
NoPadding 2.325 s 0.0409 s 0.0363 s 1.00 0.00 1,047,666 146,520,838 1,048,121
Padding 1.658 s 0.0239 s 0.0223 s 0.71 0.02 622,924 2,056,746 622,592

demo program
#define PAD

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading.Tasks;

Console.WriteLine($"Size of struct: {Unsafe.SizeOf<Data>()}");

Worker worker = new();
Parallel.Invoke(
    () => Console.WriteLine($"x: {worker.IncX()}"),
    () => Console.WriteLine($"y: {worker.IncY()}")
);

public class Worker
{
    private const uint N = 3_000_000_000;
    private Data _data;

    public uint IncX()
    {
        for (uint i = 0; i < N; ++i)
        {
            _data.X++;
        }

        return _data.X;
    }

    public uint IncY()
    {
        for (uint i = 0; i < N; ++i)
        {
            _data.Y++;
        }

        return _data.Y;
    }
}

[StructLayout(LayoutKind.Explicit)]
public struct Data
{
    [FieldOffset(0)]
    public uint X;

#if PAD
    [FieldOffset(64)]
#else
    [FieldOffset(4)]
#endif
    public uint Y;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment