Skip to content

Instantly share code, notes, and snippets.

@gfoidl
Created December 4, 2020 10:59
Show Gist options
  • Save gfoidl/45d1f4b47fae81f49656d4c52a169ac9 to your computer and use it in GitHub Desktop.
Save gfoidl/45d1f4b47fae81f49656d4c52a169ac9 to your computer and use it in GitHub Desktop.
Codegen for ModuleInitializer and static readonly fields
using System;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
BenchmarkRunner.Run<Bench>();
[ShortRunJob]
[DisassemblyDiagnoser]
public class Bench
{
[Benchmark]
public double GetConstValue_DefaultConst() => DefaultConst.Sqrt2;
[Benchmark]
public double GetConstValue_CCtorConst() => CCtorConst.Sqrt2;
[Benchmark]
public double GetConstValue_ModuleInitializerConst() => ModuleInitializerConst.Sqrt2;
[Benchmark]
public double UseValueInLoop_DefaultConst()
{
double s = 0;
for (int i = 0; i < 1_000; ++i)
{
s += DefaultConst.Sqrt2;
}
return s;
}
[Benchmark]
public double UseValueInLoop_CCtorConst()
{
double s = 0;
for (int i = 0; i < 1_000; ++i)
{
s += CCtorConst.Sqrt2;
}
return s;
}
[Benchmark]
public double UseValueInLoop_ModuleInitializerConst()
{
double s = 0;
for (int i = 0; i < 1_000; ++i)
{
s += ModuleInitializerConst.Sqrt2;
}
return s;
}
}
public static class DefaultConst
{
public static readonly double Sqrt2 = Math.Sqrt(2);
}
public static class CCtorConst
{
public static readonly double Sqrt2;
static CCtorConst() => Sqrt2 = Math.Sqrt(2);
}
public static class ModuleInitializerConst
{
public static readonly double Sqrt2 = Math.Sqrt(2);
[ModuleInitializer]
public static void DummyAccessToInit()
{
_ = Sqrt2;
}
}
@gfoidl
Copy link
Author

gfoidl commented Dec 4, 2020

; Bench.GetConstValue_DefaultConst()
       vzeroupper
       vmovsd    xmm0,qword ptr [7FFAE44D2DA8]
       ret
; Total bytes of code 12

; Bench.GetConstValue_CCtorConst()
       vzeroupper
       vmovsd    xmm0,qword ptr [7FFAE44D2DA8]
       ret
; Total bytes of code 12

; Bench.GetConstValue_ModuleInitializerConst()
       vzeroupper
       vmovsd    xmm0,qword ptr [7FFAE44F2D38]
       ret
; Total bytes of code 12
; Bench.UseValueInLoop_DefaultConst()
       push      rsi
       sub       rsp,30
       vzeroupper
       vmovaps   [rsp+20],xmm6
       vxorps    xmm6,xmm6,xmm6
       xor       esi,esi
       mov       rcx,7FFAE441BA80
       mov       edx,3
       call      CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
       vmovsd    xmm0,qword ptr [7FFAE44EBAB8]
M00_L00:
       vaddsd    xmm6,xmm6,xmm0
       inc       esi
       cmp       esi,3E8
       jl        short M00_L00
       vmovaps   xmm0,xmm6
       vmovaps   xmm6,[rsp+20]
       add       rsp,30
       pop       rsi
       ret
; Total bytes of code 78

; Bench.UseValueInLoop_CCtorConst()
       push      rsi
       sub       rsp,30
       vzeroupper
       vmovaps   [rsp+20],xmm6
       vxorps    xmm6,xmm6,xmm6
       xor       esi,esi
M00_L00:
       mov       rcx,7FFAE43FBA80
       mov       edx,4
       call      CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
       vaddsd    xmm6,xmm6,qword ptr [7FFAE44CBAC0]
       inc       esi
       cmp       esi,3E8
       jl        short M00_L00
       vmovaps   xmm0,xmm6
       vmovaps   xmm6,[rsp+20]
       add       rsp,30
       pop       rsi
       ret
; Total bytes of code 74

; Bench.UseValueInLoop_ModuleInitializerConst()
       vzeroupper
       vxorps    xmm0,xmm0,xmm0
       xor       eax,eax
M00_L00:
       vaddsd    xmm0,xmm0,qword ptr [7FFAE44D0238]
       inc       eax
       cmp       eax,3E8
       jl        short M00_L00
       ret
; Total bytes of code 27

So it's a "better way" than manually enabling quick-jit for loops (which is process wide). With the module initializer per module (~ assembly) the init can be done, so JIT knows that the static initialization happened.

@gfoidl
Copy link
Author

gfoidl commented Dec 4, 2020

Side note

Codegen for the loop can be improved by "breaking" the CSE in the JIT.
Tracking issue: dotnet/runtime#35257

C# code
using System;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkRunner.Run<Bench>();

[ShortRunJob]
[DisassemblyDiagnoser]
public class Bench
{
    private static readonly double s_sqrt2 = Math.Sqrt(2);

    private static readonly long s_sqrt2Long = BitConverter.DoubleToInt64Bits(Math.Sqrt(2));
    private static double Sqrt2 => BitConverter.Int64BitsToDouble(s_sqrt2Long);

    [ModuleInitializer]
    public static void Init()
    {
        _ = s_sqrt2;
        _ = Sqrt2;
    }

    [Benchmark]
    public double A()
    {
        double s = 0;

        for (int i = 0; i < 1_000; ++i)
        {
            s += s_sqrt2;
        }

        return s;
    }

    [Benchmark]
    public double B()
    {
        double s = 0;

        for (int i = 0; i < 1_000; ++i)
        {
            s += Sqrt2;
        }

        return s;
    }
}
; Bench.A()
       vzeroupper
       vxorps    xmm0,xmm0,xmm0
       xor       eax,eax
M00_L00:
       vaddsd    xmm0,xmm0,qword ptr [7FFAE0570448]
       inc       eax
       cmp       eax,3E8
       jl        short M00_L00
       ret
; Total bytes of code 27

; Bench.B()
       vzeroupper
       vxorps    xmm0,xmm0,xmm0
       xor       eax,eax
       mov       rdx,0A09E667F3BCD
       vmovq     xmm1,rdx
M00_L00:
       vmovaps   xmm2,xmm1
       vaddsd    xmm0,xmm0,xmm2
       inc       eax
       cmp       eax,3E8
       jl        short M00_L00
       ret
; Total bytes of code 42

@gfoidl
Copy link
Author

gfoidl commented Jun 25, 2021

(At least for .NET 6) a cctor and module initializer yield good code too.

public static class CCtorConst1
{
    public static readonly double Sqrt2;

    static CCtorConst1() => Sqrt2 = Math.Sqrt(2);

    [ModuleInitializer]
    internal static void Init()
    {
        _ = Sqrt2;
    }
}

results in the same code as with ModuleInitializerConst.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment