Skip to content

Instantly share code, notes, and snippets.

@MihuBot
Created December 22, 2023 00:33
Show Gist options
  • Save MihuBot/2242caaa757df98bd798a145e7cbc7c1 to your computer and use it in GitHub Desktop.
Save MihuBot/2242caaa757df98bd798a145e7cbc7c1 to your computer and use it in GitHub Desktop.
JIT diffs CoreLib improvements for https://github.com/MihuBot/runtime-utils/issues/213

Top method improvements

-55 (-27.64 % of base) - System.Threading.WaitHandle:set_Handle(long):this
 ; Assembly listing for method System.Threading.WaitHandle:set_Handle(long):this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
-; fully interruptible
+; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 7 single block inlinees; 2 inlinees without PGO data
+; 0 inlinees with PGO data; 7 single block inlinees; 1 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T04] (  5,  3.50)     ref  ->  rbx         this class-hnd single-def <System.Threading.WaitHandle>
-;  V01 arg1         [V01,T05] (  4,  3.50)    long  ->  r15         single-def
+;  V00 this         [V00,T01] (  5,  3.50)     ref  ->  rbx         this class-hnd single-def <System.Threading.WaitHandle>
+;  V01 arg1         [V01,T02] (  4,  3.50)    long  ->  r15         single-def
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V03 tmp1         [V03,T03] (  7,  7   )     ref  ->  rax         class-hnd exact single-def "NewObj constructor temp" <Microsoft.Win32.SafeHandles.SafeWaitHandle>
-;  V04 tmp2         [V04,T06] (  5,  5   )     ref  ->  r15         class-hnd exact single-def "Inlining Arg" <Microsoft.Win32.SafeHandles.SafeWaitHandle>
-;  V05 tmp3         [V05,T02] (  3, 10   )   byref  ->  rdi         single-def "Inlining Arg"
-;  V06 tmp4         [V06,T00] (  5, 16.50)     int  ->  [rbp-0x14]  "Inline stloc first use temp"
-;* V07 tmp5         [V07    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V08 tmp6         [V08,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V09 cse0         [V09,T07] (  3,  1.50)     ref  ->  r15         "CSE - moderate"
+;  V03 tmp1         [V03,T00] (  7,  7   )     ref  ->  rax         class-hnd exact single-def "NewObj constructor temp" <Microsoft.Win32.SafeHandles.SafeWaitHandle>
+;  V04 tmp2         [V04,T03] (  4,  4   )     ref  ->  r15         class-hnd exact single-def "Inlining Arg" <Microsoft.Win32.SafeHandles.SafeWaitHandle>
+;  V05 cse0         [V05,T04] (  3,  1.50)     ref  ->  r15         "CSE - moderate"
 ;
-; Lcl frame size = 16
+; Lcl frame size = 0
 
 G_M39114_IG01:
        push     rbp
        push     r15
        push     rbx
-       sub      rsp, 16
-       lea      rbp, [rsp+0x20]
+       lea      rbp, [rsp+0x10]
        mov      rbx, rdi
        mov      r15, rsi
-						;; size=19 bbWeight=1 PerfScore 4.25
+						;; size=15 bbWeight=1 PerfScore 4.00
 G_M39114_IG02:
        cmp      r15, -1
-       jne      SHORT G_M39114_IG07
+       jne      SHORT G_M39114_IG05
 						;; size=6 bbWeight=1 PerfScore 1.25
 G_M39114_IG03:
        mov      r15, gword ptr [rbx+0x08]
        test     r15, r15
-       je       G_M39114_IG08
-       cmp      byte  ptr [r15], r15b
+       je       SHORT G_M39114_IG06
        lea      rdi, bword ptr [r15+0x10]
-       mov      eax, dword ptr [rdi]
-       jmp      SHORT G_M39114_IG04
-       align    [15 bytes for IG04]
-						;; size=39 bbWeight=0.50 PerfScore 5.38
-G_M39114_IG04:
-       mov      esi, eax
-       or       esi, 1
-       mov      dword ptr [rbp-0x14], eax
+       mov      esi, 1
        lock     
-       cmpxchg  dword ptr [rdi], esi
-       mov      esi, dword ptr [rbp-0x14]
-       cmp      eax, esi
-       je       SHORT G_M39114_IG05
-       mov      esi, eax
-       mov      eax, esi
-       jmp      SHORT G_M39114_IG04
-						;; size=25 bbWeight=4 PerfScore 97.00
-G_M39114_IG05:
+       or       dword ptr [rdi], esi
        mov      rdi, r15
        mov      rsi, 0xD1FFAB1E      ; 'obj'
 
        mov      rax, 0xD1FFAB1E      ; code for System.ArgumentNullException:ThrowIfNull(System.Object,System.String)
        call     [rax]System.ArgumentNullException:ThrowIfNull(System.Object,System.String)
        mov      rdi, r15
        call     System.GC:_SuppressFinalize(System.Object)
        xor      rdi, rdi
        mov      gword ptr [rbx+0x08], rdi
-						;; size=39 bbWeight=0.50 PerfScore 3.12
-G_M39114_IG06:
-       add      rsp, 16
+						;; size=60 bbWeight=0.50 PerfScore 13.12
+G_M39114_IG04:
        pop      rbx
        pop      r15
        pop      rbp
        ret      
-						;; size=9 bbWeight=0.50 PerfScore 1.38
-G_M39114_IG07:
+						;; size=5 bbWeight=0.50 PerfScore 1.25
+G_M39114_IG05:
        mov      rdi, 0xD1FFAB1E      ; Microsoft.Win32.SafeHandles.SafeWaitHandle
        call     CORINFO_HELP_NEWFAST
        xor      edi, edi
        mov      qword ptr [rax+0x08], rdi
        mov      dword ptr [rax+0x10], 4
        mov      byte  ptr [rax+0x14], 1
        mov      byte  ptr [rax+0x15], 1
        mov      qword ptr [rax+0x08], r15
        lea      rdi, bword ptr [rbx+0x08]
        mov      rsi, rax
        call     CORINFO_HELP_ASSIGN_REF
 						;; size=52 bbWeight=0.50 PerfScore 4.12
-G_M39114_IG08:
+G_M39114_IG06:
        nop      
 						;; size=1 bbWeight=0.50 PerfScore 0.12
-G_M39114_IG09:
-       add      rsp, 16
+G_M39114_IG07:
        pop      rbx
        pop      r15
        pop      rbp
        ret      
-						;; size=9 bbWeight=0.50 PerfScore 1.38
+						;; size=5 bbWeight=0.50 PerfScore 1.25
 
-; Total bytes of code 199, prolog size 19, PerfScore 137.90, instruction count 58, allocated bytes for code 199 (MethodHash=a6df6735) for method System.Threading.WaitHandle:set_Handle(long):this (FullOpts)
+; Total bytes of code 144, prolog size 9, PerfScore 39.53, instruction count 43, allocated bytes for code 144 (MethodHash=a6df6735) for method System.Threading.WaitHandle:set_Handle(long):this (FullOpts)
-47 (-62.67 % of base) - System.Runtime.InteropServices.SafeHandle:SetHandleAsInvalid():this
 ; Assembly listing for method System.Runtime.InteropServices.SafeHandle:SetHandleAsInvalid():this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
-; rbp based frame
+; rsp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T03] (  4,  4   )     ref  ->  rdi         this class-hnd single-def <System.Runtime.InteropServices.SafeHandle>
+;  V00 this         [V00,T00] (  5,  5   )     ref  ->  rdi         this class-hnd single-def <System.Runtime.InteropServices.SafeHandle>
 ;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V02 tmp1         [V02,T01] (  3, 20   )   byref  ->  rcx         single-def "Inlining Arg"
-;  V03 tmp2         [V03,T00] (  5, 29   )     int  ->  [rbp-0x04]  "Inline stloc first use temp"
-;* V04 tmp3         [V04    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V05 tmp4         [V05,T02] (  3, 20   )     int  ->  rax         "Inline stloc first use temp"
 ;
-; Lcl frame size = 16
+; Lcl frame size = 0
 
 G_M36207_IG01:
-       push     rbp
-       sub      rsp, 16
-       lea      rbp, [rsp+0x10]
-						;; size=10 bbWeight=1 PerfScore 1.75
+						;; size=0 bbWeight=1 PerfScore 0.00
 G_M36207_IG02:
-       lea      rcx, bword ptr [rdi+0x10]
-       mov      eax, dword ptr [rcx]
-       jmp      SHORT G_M36207_IG03
-       align    [14 bytes for IG03]
-						;; size=22 bbWeight=1 PerfScore 4.50
-G_M36207_IG03:
-       mov      edx, eax
-       or       edx, 1
-       mov      dword ptr [rbp-0x04], eax
+       cmp      byte  ptr [rdi], dil
+       lea      rax, bword ptr [rdi+0x10]
+       mov      ecx, 1
        lock     
-       cmpxchg  dword ptr [rcx], edx
-       mov      edx, dword ptr [rbp-0x04]
-       cmp      eax, edx
-       je       SHORT G_M36207_IG05
-						;; size=19 bbWeight=8 PerfScore 174.00
-G_M36207_IG04:
-       mov      edx, eax
-       mov      eax, edx
-       jmp      SHORT G_M36207_IG03
-						;; size=6 bbWeight=4 PerfScore 10.00
-G_M36207_IG05:
+       or       dword ptr [rax], ecx
        mov      rax, 0xD1FFAB1E      ; code for System.GC:SuppressFinalize(System.Object)
-						;; size=10 bbWeight=1 PerfScore 0.25
-G_M36207_IG06:
-       add      rsp, 16
-       pop      rbp
+						;; size=25 bbWeight=1 PerfScore 20.00
+G_M36207_IG03:
        tail.jmp [rax]System.GC:SuppressFinalize(System.Object)
-						;; size=8 bbWeight=1 PerfScore 2.75
+						;; size=3 bbWeight=1 PerfScore 2.00
 
-; Total bytes of code 75, prolog size 10, PerfScore 200.75, instruction count 22, allocated bytes for code 75 (MethodHash=25a67290) for method System.Runtime.InteropServices.SafeHandle:SetHandleAsInvalid():this (FullOpts)
+; Total bytes of code 28, prolog size 0, PerfScore 24.80, instruction count 7, allocated bytes for code 28 (MethodHash=25a67290) for method System.Runtime.InteropServices.SafeHandle:SetHandleAsInvalid():this (FullOpts)
-42 (-13.55 % of base) - System.Threading.EventWaitHandle:CreateEventCore(ubyte,int,System.String,byref):this
 ; Assembly listing for method System.Threading.EventWaitHandle:CreateEventCore(ubyte,int,System.String,byref):this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
-; fully interruptible
+; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 3 single block inlinees; 2 inlinees without PGO data
+; 0 inlinees with PGO data; 3 single block inlinees; 1 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T03] (  3,  3   )     ref  ->  rbx         this class-hnd single-def <System.Threading.EventWaitHandle>
-;  V01 arg1         [V01,T06] (  3,  3   )   ubyte  ->  rsi         single-def
-;  V02 arg2         [V02,T07] (  3,  3   )     int  ->  rdx         single-def
-;  V03 arg3         [V03,T04] (  3,  3   )     ref  ->  rcx         class-hnd single-def <System.String>
-;  V04 arg4         [V04,T05] (  3,  3   )   byref  ->  r15         single-def
-;  V05 loc0         [V05,T08] (  4,  4   )     int  ->  rsi        
-;  V06 loc1         [V06,T09] (  6,  3.50)     ref  ->  r14         class-hnd exact single-def <Microsoft.Win32.SafeHandles.SafeWaitHandle>
-;  V07 loc2         [V07,T11] (  3,  2   )     int  ->  r13        
+;  V00 this         [V00,T00] (  3,  3   )     ref  ->  rbx         this class-hnd single-def <System.Threading.EventWaitHandle>
+;  V01 arg1         [V01,T03] (  3,  3   )   ubyte  ->  rsi         single-def
+;  V02 arg2         [V02,T04] (  3,  3   )     int  ->  rdx         single-def
+;  V03 arg3         [V03,T01] (  3,  3   )     ref  ->  rcx         class-hnd single-def <System.String>
+;  V04 arg4         [V04,T02] (  3,  3   )   byref  ->  r15         single-def
+;  V05 loc0         [V05,T05] (  4,  4   )     int  ->  rsi        
+;  V06 loc1         [V06,T06] (  6,  3   )     ref  ->  r14         class-hnd exact single-def <Microsoft.Win32.SafeHandles.SafeWaitHandle>
+;  V07 loc2         [V07,T08] (  3,  2   )     int  ->  r13        
 ;# V08 OutArgs      [V08    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V09 tmp1         [V09,T10] (  3,  3   )     int  ->  rdi        
+;  V09 tmp1         [V09,T07] (  3,  3   )     int  ->  rdi        
 ;* V10 tmp2         [V10    ] (  0,  0   )     ref  ->  zero-ref    class-hnd exact single-def "NewObj constructor temp" <System.Threading.WaitHandleCannotBeOpenedException>
-;  V11 tmp3         [V11,T12] (  3,  0   )     ref  ->  r14         class-hnd exact single-def "NewObj constructor temp" <System.PlatformNotSupportedException>
-;  V12 tmp4         [V12,T02] (  3, 10   )   byref  ->  rdi         single-def "Inlining Arg"
-;  V13 tmp5         [V13,T00] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V14 tmp6         [V14    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V15 tmp7         [V15,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;* V16 tmp8         [V16,T14] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V17 tmp9         [V17    ] (  0,  0   )     ref  ->  zero-ref    single-def "argument with side effect"
-;* V18 tmp10        [V18    ] (  0,  0   )     ref  ->  zero-ref    single-def "argument with side effect"
-;  V19 tmp11        [V19,T13] (  2,  0   )     ref  ->  rsi         single-def "argument with side effect"
+;  V11 tmp3         [V11,T09] (  3,  0   )     ref  ->  r14         class-hnd exact single-def "NewObj constructor temp" <System.PlatformNotSupportedException>
+;* V12 tmp4         [V12,T11] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V13 tmp5         [V13    ] (  0,  0   )     ref  ->  zero-ref    single-def "argument with side effect"
+;* V14 tmp6         [V14    ] (  0,  0   )     ref  ->  zero-ref    single-def "argument with side effect"
+;  V15 tmp7         [V15,T10] (  2,  0   )     ref  ->  rsi         single-def "argument with side effect"
 ;
-; Lcl frame size = 16
+; Lcl frame size = 0
 
 G_M17468_IG01:
        push     rbp
        push     r15
        push     r14
        push     r13
        push     rbx
-       sub      rsp, 16
-       lea      rbp, [rsp+0x30]
+       lea      rbp, [rsp+0x20]
        mov      rbx, rdi
        mov      r15, r8
-						;; size=23 bbWeight=1 PerfScore 6.25
+						;; size=19 bbWeight=1 PerfScore 6.00
 G_M17468_IG02:
        test     rcx, rcx
-       jne      G_M17468_IG07
+       jne      SHORT G_M17468_IG04
        mov      edi, 2
        xor      ecx, ecx
        test     sil, sil
        cmove    edi, ecx
        mov      esi, edi
        or       edi, 1
        cmp      edx, 1
        cmove    esi, edi
        mov      edx, esi
        xor      edi, edi
        xor      rsi, rsi
        mov      ecx, 0xD1FFAB1E
        mov      rax, 0xD1FFAB1E      ; code for Interop+Kernel32:CreateEventEx(long,System.String,uint,uint):Microsoft.Win32.SafeHandles.SafeWaitHandle
        call     [rax]Interop+Kernel32:CreateEventEx(long,System.String,uint,uint):Microsoft.Win32.SafeHandles.SafeWaitHandle
        mov      r14, rax
        call     System.Runtime.InteropServices.Marshal:GetLastPInvokeError():int
        mov      r13d, eax
        mov      rdi, r14
        mov      rax, 0xD1FFAB1E      ; code for Microsoft.Win32.SafeHandles.SafeHandleZeroOrMinusOneIsInvalid:get_IsInvalid():ubyte:this
        cmp      dword ptr [rdi], edi
        call     [rax]Microsoft.Win32.SafeHandles.SafeHandleZeroOrMinusOneIsInvalid:get_IsInvalid():ubyte:this
        test     eax, eax
-       je       SHORT G_M17468_IG05
-						;; size=88 bbWeight=1 PerfScore 16.75
-G_M17468_IG03:
-       lea      rdi, bword ptr [r14+0x10]
-       mov      eax, dword ptr [rdi]
-       jmp      SHORT G_M17468_IG04
-       align    [9 bytes for IG04]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M17468_IG04:
-       mov      esi, eax
-       or       esi, 1
-       mov      dword ptr [rbp-0x24], eax
-       lock     
-       cmpxchg  dword ptr [rdi], esi
-       mov      esi, dword ptr [rbp-0x24]
-       cmp      eax, esi
-       je       SHORT G_M17468_IG08
-       mov      esi, eax
-       mov      eax, esi
-       jmp      SHORT G_M17468_IG04
-						;; size=25 bbWeight=4 PerfScore 97.00
-G_M17468_IG05:
+       jne      SHORT G_M17468_IG05
        cmp      r13d, 183
        setne    dil
        mov      byte  ptr [r15], dil
        lea      rdi, bword ptr [rbx+0x08]
        mov      rsi, r14
        call     CORINFO_HELP_ASSIGN_REF
        nop      
-						;; size=27 bbWeight=1 PerfScore 4.25
-G_M17468_IG06:
-       add      rsp, 16
+						;; size=111 bbWeight=1 PerfScore 21.00
+G_M17468_IG03:
        pop      rbx
        pop      r13
        pop      r14
        pop      r15
        pop      rbp
        ret      
-						;; size=13 bbWeight=1 PerfScore 3.75
-G_M17468_IG07:
+						;; size=9 bbWeight=1 PerfScore 3.50
+G_M17468_IG04:
        mov      rdi, 0xD1FFAB1E      ; System.PlatformNotSupportedException
        call     CORINFO_HELP_NEWSFAST
        mov      r14, rax
        mov      rax, 0xD1FFAB1E      ; code for System.SR:get_PlatformNotSupported_NamedSynchronizationPrimitives():System.String
        call     [rax]System.SR:get_PlatformNotSupported_NamedSynchronizationPrimitives():System.String
        mov      rsi, rax
        mov      rdi, r14
        mov      rax, 0xD1FFAB1E      ; code for System.PlatformNotSupportedException:.ctor(System.String):this
        call     [rax]System.PlatformNotSupportedException:.ctor(System.String):this
        mov      rdi, r14
        call     CORINFO_HELP_THROW
 						;; size=56 bbWeight=0 PerfScore 0.00
-G_M17468_IG08:
+G_M17468_IG05:
+       lea      rdi, bword ptr [r14+0x10]
+       mov      esi, 1
+       lock     
+       or       dword ptr [rdi], esi
        mov      rdi, r14
        mov      rsi, 0xD1FFAB1E      ; 'obj'
 
        mov      rax, 0xD1FFAB1E      ; code for System.ArgumentNullException:ThrowIfNull(System.Object,System.String)
        call     [rax]System.ArgumentNullException:ThrowIfNull(System.Object,System.String)
        mov      rdi, r14
        call     System.GC:_SuppressFinalize(System.Object)
        mov      edi, r13d
        xor      rsi, rsi
        xor      rdx, rdx
        mov      rax, 0xD1FFAB1E      ; code for System.IO.Win32Marshal:GetExceptionForWin32Error(int,System.String,System.String):System.Exception
        call     [rax]System.IO.Win32Marshal:GetExceptionForWin32Error(int,System.String,System.String):System.Exception
        mov      rdi, rax
        call     CORINFO_HELP_THROW
        int3     
-						;; size=61 bbWeight=0 PerfScore 0.00
+						;; size=73 bbWeight=0 PerfScore 0.00
 
-; Total bytes of code 310, prolog size 23, PerfScore 161.25, instruction count 88, allocated bytes for code 310 (MethodHash=f983bbc3) for method System.Threading.EventWaitHandle:CreateEventCore(ubyte,int,System.String,byref):this (FullOpts)
+; Total bytes of code 268, prolog size 13, PerfScore 57.30, instruction count 75, allocated bytes for code 268 (MethodHash=f983bbc3) for method System.Threading.EventWaitHandle:CreateEventCore(ubyte,int,System.String,byref):this (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder:SetNotificationForWaitCompletion(ubyte):this
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 5 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T07] (  4,  3.50)   byref  ->  rbx         this single-def
-;  V01 arg1         [V01,T08] (  3,  3   )   ubyte  ->  r15         single-def
+;  V00 this         [V00,T04] (  4,  3.50)   byref  ->  rbx         this single-def
+;  V01 arg1         [V01,T05] (  3,  3   )   ubyte  ->  r15         single-def
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
 ;* V03 tmp1         [V03    ] (  0,  0   )   byref  ->  zero-ref    single-def "Inlining Arg"
-;  V04 tmp2         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp3         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp4         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.Threading.Tasks.VoidTaskResult]>
+;  V04 tmp2         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp3         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp4         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.Threading.Tasks.VoidTaskResult]>
 ;* V07 tmp5         [V07    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def "Inline stloc first use temp" <System.Threading.Tasks.Task`1[System.Threading.Tasks.VoidTaskResult]>
 ;* V08 tmp6         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V09 tmp7         [V09    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V10 tmp8         [V10,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V10 tmp8         [V10,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V11 tmp9         [V11    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V12 tmp10        [V12,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V13 tmp11        [V13,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V14 tmp12        [V14,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V15 tmp13        [V15,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V16 tmp14        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp15        [V17,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V18 cse0         [V18,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - moderate"
+;  V13 tmp11        [V13,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V14 cse0         [V14,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M55510_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      rbx, rdi
        mov      r15d, esi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M55510_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M55510_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M55510_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Threading.Tasks.VoidTaskResult]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.Threading.Tasks.VoidTaskResult]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Threading.Tasks.VoidTaskResult]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.Threading.Tasks.VoidTaskResult]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M55510_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M55510_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M55510_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M55510_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M55510_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M55510_IG09
+       je       SHORT G_M55510_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M55510_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M55510_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M55510_IG09
+       jne      SHORT G_M55510_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M55510_IG09
+       je       SHORT G_M55510_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M55510_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M55510_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M55510_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M55510_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M55510_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M55510_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M55510_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M55510_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=ad392729) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=ad392729) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:SetNotificationForWaitCompletion(ubyte,byref)
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T08] (  3,  3   )   ubyte  ->  r15         single-def
-;  V01 arg1         [V01,T07] (  4,  3.50)   byref  ->  rbx         single-def
+;  V00 arg0         [V00,T05] (  3,  3   )   ubyte  ->  r15         single-def
+;  V01 arg1         [V01,T04] (  4,  3.50)   byref  ->  rbx         single-def
 ;* V02 loc0         [V02    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def <System.Threading.Tasks.Task`1[double]>
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp2         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp3         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[double]>
+;  V04 tmp1         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp2         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp3         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[double]>
 ;* V07 tmp4         [V07    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V08 tmp5         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V09 tmp6         [V09,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V09 tmp6         [V09,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V10 tmp7         [V10    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V11 tmp8         [V11,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V12 tmp9         [V12,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V13 tmp10        [V13,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V14 tmp11        [V14,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V15 tmp12        [V15    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V16 tmp13        [V16,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V17 cse0         [V17,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
+;  V12 tmp9         [V12,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V13 cse0         [V13,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M58574_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      r15d, edi
        mov      rbx, rsi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M58574_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M58574_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M58574_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[double]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[double]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M58574_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M58574_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M58574_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M58574_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M58574_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M58574_IG09
+       je       SHORT G_M58574_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M58574_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M58574_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M58574_IG09
+       jne      SHORT G_M58574_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M58574_IG09
+       je       SHORT G_M58574_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M58574_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M58574_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M58574_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M58574_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M58574_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M58574_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M58574_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M58574_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=127c1b31) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=127c1b31) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:SetNotificationForWaitCompletion(ubyte):this
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 5 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T07] (  4,  3.50)   byref  ->  rbx         this single-def
-;  V01 arg1         [V01,T08] (  3,  3   )   ubyte  ->  r15         single-def
+;  V00 this         [V00,T04] (  4,  3.50)   byref  ->  rbx         this single-def
+;  V01 arg1         [V01,T05] (  3,  3   )   ubyte  ->  r15         single-def
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
 ;* V03 tmp1         [V03    ] (  0,  0   )   byref  ->  zero-ref    single-def "Inlining Arg"
-;  V04 tmp2         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp3         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp4         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[double]>
+;  V04 tmp2         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp3         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp4         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[double]>
 ;* V07 tmp5         [V07    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def "Inline stloc first use temp" <System.Threading.Tasks.Task`1[double]>
 ;* V08 tmp6         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V09 tmp7         [V09    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V10 tmp8         [V10,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V10 tmp8         [V10,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V11 tmp9         [V11    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V12 tmp10        [V12,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V13 tmp11        [V13,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V14 tmp12        [V14,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V15 tmp13        [V15,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V16 tmp14        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp15        [V17,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V18 cse0         [V18,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - moderate"
+;  V13 tmp11        [V13,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V14 cse0         [V14,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M62388_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      rbx, rdi
        mov      r15d, esi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M62388_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M62388_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M62388_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[double]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[double]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M62388_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M62388_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M62388_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M62388_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M62388_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M62388_IG09
+       je       SHORT G_M62388_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M62388_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M62388_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M62388_IG09
+       jne      SHORT G_M62388_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M62388_IG09
+       je       SHORT G_M62388_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M62388_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M62388_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M62388_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M62388_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M62388_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M62388_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M62388_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M62388_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=3b400c4b) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=3b400c4b) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[double]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:SetNotificationForWaitCompletion(ubyte,byref)
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T08] (  3,  3   )   ubyte  ->  r15         single-def
-;  V01 arg1         [V01,T07] (  4,  3.50)   byref  ->  rbx         single-def
+;  V00 arg0         [V00,T05] (  3,  3   )   ubyte  ->  r15         single-def
+;  V01 arg1         [V01,T04] (  4,  3.50)   byref  ->  rbx         single-def
 ;* V02 loc0         [V02    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def <System.Threading.Tasks.Task`1[int]>
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp2         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp3         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[int]>
+;  V04 tmp1         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp2         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp3         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[int]>
 ;* V07 tmp4         [V07    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V08 tmp5         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V09 tmp6         [V09,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V09 tmp6         [V09,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V10 tmp7         [V10    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V11 tmp8         [V11,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V12 tmp9         [V12,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V13 tmp10        [V13,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V14 tmp11        [V14,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V15 tmp12        [V15    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V16 tmp13        [V16,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V17 cse0         [V17,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
+;  V12 tmp9         [V12,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V13 cse0         [V13,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M55432_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      r15d, edi
        mov      rbx, rsi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M55432_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M55432_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M55432_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[int]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[int]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M55432_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M55432_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M55432_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M55432_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M55432_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M55432_IG09
+       je       SHORT G_M55432_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M55432_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M55432_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M55432_IG09
+       jne      SHORT G_M55432_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M55432_IG09
+       je       SHORT G_M55432_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M55432_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M55432_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M55432_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M55432_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M55432_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M55432_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M55432_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M55432_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=16f62777) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=16f62777) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:SetNotificationForWaitCompletion(ubyte):this
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 5 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T07] (  4,  3.50)   byref  ->  rbx         this single-def
-;  V01 arg1         [V01,T08] (  3,  3   )   ubyte  ->  r15         single-def
+;  V00 this         [V00,T04] (  4,  3.50)   byref  ->  rbx         this single-def
+;  V01 arg1         [V01,T05] (  3,  3   )   ubyte  ->  r15         single-def
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
 ;* V03 tmp1         [V03    ] (  0,  0   )   byref  ->  zero-ref    single-def "Inlining Arg"
-;  V04 tmp2         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp3         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp4         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[int]>
+;  V04 tmp2         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp3         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp4         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[int]>
 ;* V07 tmp5         [V07    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def "Inline stloc first use temp" <System.Threading.Tasks.Task`1[int]>
 ;* V08 tmp6         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V09 tmp7         [V09    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V10 tmp8         [V10,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V10 tmp8         [V10,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V11 tmp9         [V11    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V12 tmp10        [V12,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V13 tmp11        [V13,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V14 tmp12        [V14,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V15 tmp13        [V15,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V16 tmp14        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp15        [V17,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V18 cse0         [V18,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - moderate"
+;  V13 tmp11        [V13,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V14 cse0         [V14,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M50994_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      rbx, rdi
        mov      r15d, esi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M50994_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M50994_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M50994_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[int]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[int]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M50994_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M50994_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M50994_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M50994_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M50994_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M50994_IG09
+       je       SHORT G_M50994_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M50994_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M50994_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M50994_IG09
+       jne      SHORT G_M50994_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M50994_IG09
+       je       SHORT G_M50994_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M50994_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M50994_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M50994_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M50994_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M50994_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M50994_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M50994_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M50994_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=38ce38cd) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=38ce38cd) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[int]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:SetNotificationForWaitCompletion(ubyte,byref)
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T08] (  3,  3   )   ubyte  ->  r15         single-def
-;  V01 arg1         [V01,T07] (  4,  3.50)   byref  ->  rbx         single-def
+;  V00 arg0         [V00,T05] (  3,  3   )   ubyte  ->  r15         single-def
+;  V01 arg1         [V01,T04] (  4,  3.50)   byref  ->  rbx         single-def
 ;* V02 loc0         [V02    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def <System.Threading.Tasks.Task`1[long]>
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp2         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp3         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[long]>
+;  V04 tmp1         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp2         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp3         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[long]>
 ;* V07 tmp4         [V07    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V08 tmp5         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V09 tmp6         [V09,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V09 tmp6         [V09,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V10 tmp7         [V10    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V11 tmp8         [V11,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V12 tmp9         [V12,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V13 tmp10        [V13,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V14 tmp11        [V14,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V15 tmp12        [V15    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V16 tmp13        [V16,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V17 cse0         [V17,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
+;  V12 tmp9         [V12,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V13 cse0         [V13,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M39185_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      r15d, edi
        mov      rbx, rsi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M39185_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M39185_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M39185_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[long]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[long]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M39185_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M39185_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M39185_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M39185_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M39185_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M39185_IG09
+       je       SHORT G_M39185_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M39185_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M39185_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M39185_IG09
+       jne      SHORT G_M39185_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M39185_IG09
+       je       SHORT G_M39185_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M39185_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M39185_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M39185_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M39185_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M39185_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M39185_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M39185_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M39185_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=269766ee) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=269766ee) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:SetNotificationForWaitCompletion(ubyte):this
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 5 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T07] (  4,  3.50)   byref  ->  rbx         this single-def
-;  V01 arg1         [V01,T08] (  3,  3   )   ubyte  ->  r15         single-def
+;  V00 this         [V00,T04] (  4,  3.50)   byref  ->  rbx         this single-def
+;  V01 arg1         [V01,T05] (  3,  3   )   ubyte  ->  r15         single-def
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
 ;* V03 tmp1         [V03    ] (  0,  0   )   byref  ->  zero-ref    single-def "Inlining Arg"
-;  V04 tmp2         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp3         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp4         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[long]>
+;  V04 tmp2         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp3         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp4         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[long]>
 ;* V07 tmp5         [V07    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def "Inline stloc first use temp" <System.Threading.Tasks.Task`1[long]>
 ;* V08 tmp6         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V09 tmp7         [V09    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V10 tmp8         [V10,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V10 tmp8         [V10,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V11 tmp9         [V11    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V12 tmp10        [V12,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V13 tmp11        [V13,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V14 tmp12        [V14,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V15 tmp13        [V15,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V16 tmp14        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp15        [V17,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V18 cse0         [V18,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - moderate"
+;  V13 tmp11        [V13,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V14 cse0         [V14,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M51787_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      rbx, rdi
        mov      r15d, esi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M51787_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M51787_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M51787_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[long]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[long]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M51787_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M51787_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M51787_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M51787_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M51787_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M51787_IG09
+       je       SHORT G_M51787_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M51787_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M51787_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M51787_IG09
+       jne      SHORT G_M51787_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M51787_IG09
+       je       SHORT G_M51787_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M51787_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M51787_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M51787_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M51787_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M51787_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M51787_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M51787_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M51787_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=886b35b4) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=886b35b4) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[long]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:SetNotificationForWaitCompletion(ubyte,byref)
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T08] (  3,  3   )   ubyte  ->  r15         single-def
-;  V01 arg1         [V01,T07] (  4,  3.50)   byref  ->  rbx         single-def
+;  V00 arg0         [V00,T05] (  3,  3   )   ubyte  ->  r15         single-def
+;  V01 arg1         [V01,T04] (  4,  3.50)   byref  ->  rbx         single-def
 ;* V02 loc0         [V02    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def <System.Threading.Tasks.Task`1[short]>
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp2         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp3         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[short]>
+;  V04 tmp1         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp2         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp3         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[short]>
 ;* V07 tmp4         [V07    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V08 tmp5         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V09 tmp6         [V09,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V09 tmp6         [V09,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V10 tmp7         [V10    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V11 tmp8         [V11,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V12 tmp9         [V12,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V13 tmp10        [V13,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V14 tmp11        [V14,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V15 tmp12        [V15    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V16 tmp13        [V16,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V17 cse0         [V17,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
+;  V12 tmp9         [V12,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V13 cse0         [V13,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M34921_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      r15d, edi
        mov      rbx, rsi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M34921_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M34921_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M34921_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[short]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[short]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M34921_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M34921_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M34921_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M34921_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M34921_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M34921_IG09
+       je       SHORT G_M34921_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M34921_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M34921_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M34921_IG09
+       jne      SHORT G_M34921_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M34921_IG09
+       je       SHORT G_M34921_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M34921_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M34921_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M34921_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M34921_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M34921_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M34921_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M34921_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M34921_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=00ba7796) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=00ba7796) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:SetNotificationForWaitCompletion(ubyte):this
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 5 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T07] (  4,  3.50)   byref  ->  rbx         this single-def
-;  V01 arg1         [V01,T08] (  3,  3   )   ubyte  ->  r15         single-def
+;  V00 this         [V00,T04] (  4,  3.50)   byref  ->  rbx         this single-def
+;  V01 arg1         [V01,T05] (  3,  3   )   ubyte  ->  r15         single-def
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
 ;* V03 tmp1         [V03    ] (  0,  0   )   byref  ->  zero-ref    single-def "Inlining Arg"
-;  V04 tmp2         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp3         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp4         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[short]>
+;  V04 tmp2         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp3         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp4         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[short]>
 ;* V07 tmp5         [V07    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def "Inline stloc first use temp" <System.Threading.Tasks.Task`1[short]>
 ;* V08 tmp6         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V09 tmp7         [V09    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V10 tmp8         [V10,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V10 tmp8         [V10,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V11 tmp9         [V11    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V12 tmp10        [V12,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V13 tmp11        [V13,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V14 tmp12        [V14,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V15 tmp13        [V15,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V16 tmp14        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp15        [V17,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V18 cse0         [V18,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - moderate"
+;  V13 tmp11        [V13,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V14 cse0         [V14,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M23027_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      rbx, rdi
        mov      r15d, esi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M23027_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M23027_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M23027_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[short]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[short]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M23027_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M23027_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M23027_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M23027_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M23027_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M23027_IG09
+       je       SHORT G_M23027_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M23027_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M23027_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M23027_IG09
+       jne      SHORT G_M23027_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M23027_IG09
+       je       SHORT G_M23027_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M23027_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M23027_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M23027_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M23027_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M23027_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M23027_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M23027_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M23027_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=48e7a60c) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=48e7a60c) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[short]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:SetNotificationForWaitCompletion(ubyte,byref)
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T08] (  3,  3   )   ubyte  ->  r15         single-def
-;  V01 arg1         [V01,T07] (  4,  3.50)   byref  ->  rbx         single-def
+;  V00 arg0         [V00,T05] (  3,  3   )   ubyte  ->  r15         single-def
+;  V01 arg1         [V01,T04] (  4,  3.50)   byref  ->  rbx         single-def
 ;* V02 loc0         [V02    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def <System.Threading.Tasks.Task`1[System.Nullable`1[int]]>
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp2         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp3         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.Nullable`1[int]]>
+;  V04 tmp1         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp2         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp3         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.Nullable`1[int]]>
 ;* V07 tmp4         [V07    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V08 tmp5         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V09 tmp6         [V09,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V09 tmp6         [V09,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V10 tmp7         [V10    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V11 tmp8         [V11,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V12 tmp9         [V12,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V13 tmp10        [V13,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V14 tmp11        [V14,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V15 tmp12        [V15    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V16 tmp13        [V16,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V17 cse0         [V17,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
+;  V12 tmp9         [V12,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V13 cse0         [V13,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M52005_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      r15d, edi
        mov      rbx, rsi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M52005_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M52005_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M52005_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.Nullable`1[int]]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.Nullable`1[int]]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M52005_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M52005_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M52005_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M52005_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M52005_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M52005_IG09
+       je       SHORT G_M52005_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M52005_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M52005_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M52005_IG09
+       jne      SHORT G_M52005_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M52005_IG09
+       je       SHORT G_M52005_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M52005_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M52005_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M52005_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M52005_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M52005_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M52005_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M52005_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M52005_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=104534da) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=104534da) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:SetNotificationForWaitCompletion(ubyte):this
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 5 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T07] (  4,  3.50)   byref  ->  rbx         this single-def
-;  V01 arg1         [V01,T08] (  3,  3   )   ubyte  ->  r15         single-def
+;  V00 this         [V00,T04] (  4,  3.50)   byref  ->  rbx         this single-def
+;  V01 arg1         [V01,T05] (  3,  3   )   ubyte  ->  r15         single-def
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
 ;* V03 tmp1         [V03    ] (  0,  0   )   byref  ->  zero-ref    single-def "Inlining Arg"
-;  V04 tmp2         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp3         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp4         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.Nullable`1[int]]>
+;  V04 tmp2         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp3         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp4         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.Nullable`1[int]]>
 ;* V07 tmp5         [V07    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def "Inline stloc first use temp" <System.Threading.Tasks.Task`1[System.Nullable`1[int]]>
 ;* V08 tmp6         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V09 tmp7         [V09    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V10 tmp8         [V10,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V10 tmp8         [V10,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V11 tmp9         [V11    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V12 tmp10        [V12,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V13 tmp11        [V13,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V14 tmp12        [V14,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V15 tmp13        [V15,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V16 tmp14        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp15        [V17,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V18 cse0         [V18,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - moderate"
+;  V13 tmp11        [V13,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V14 cse0         [V14,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M11967_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      rbx, rdi
        mov      r15d, esi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M11967_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M11967_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M11967_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.Nullable`1[int]]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.Nullable`1[int]]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M11967_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M11967_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M11967_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M11967_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M11967_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M11967_IG09
+       je       SHORT G_M11967_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M11967_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M11967_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M11967_IG09
+       jne      SHORT G_M11967_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M11967_IG09
+       je       SHORT G_M11967_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M11967_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M11967_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M11967_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M11967_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M11967_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M11967_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M11967_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M11967_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=b9ded140) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=b9ded140) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Nullable`1[int]]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:SetNotificationForWaitCompletion(ubyte,byref)
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T08] (  3,  3   )   ubyte  ->  r15         single-def
-;  V01 arg1         [V01,T07] (  4,  3.50)   byref  ->  rbx         single-def
+;  V00 arg0         [V00,T05] (  3,  3   )   ubyte  ->  r15         single-def
+;  V01 arg1         [V01,T04] (  4,  3.50)   byref  ->  rbx         single-def
 ;* V02 loc0         [V02    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def <System.Threading.Tasks.Task`1[System.Numerics.Vector`1[float]]>
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp2         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp3         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.Numerics.Vector`1[float]]>
+;  V04 tmp1         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp2         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp3         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.Numerics.Vector`1[float]]>
 ;* V07 tmp4         [V07    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V08 tmp5         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V09 tmp6         [V09,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V09 tmp6         [V09,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V10 tmp7         [V10    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V11 tmp8         [V11,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V12 tmp9         [V12,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V13 tmp10        [V13,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V14 tmp11        [V14,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V15 tmp12        [V15    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V16 tmp13        [V16,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V17 cse0         [V17,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
+;  V12 tmp9         [V12,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V13 cse0         [V13,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M47896_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      r15d, edi
        mov      rbx, rsi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M47896_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M47896_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M47896_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.Numerics.Vector`1[float]]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.Numerics.Vector`1[float]]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M47896_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M47896_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M47896_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M47896_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M47896_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M47896_IG09
+       je       SHORT G_M47896_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M47896_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M47896_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M47896_IG09
+       jne      SHORT G_M47896_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M47896_IG09
+       je       SHORT G_M47896_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M47896_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M47896_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M47896_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M47896_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M47896_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M47896_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M47896_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M47896_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=994144e7) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=994144e7) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:SetNotificationForWaitCompletion(ubyte):this
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 5 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T07] (  4,  3.50)   byref  ->  rbx         this single-def
-;  V01 arg1         [V01,T08] (  3,  3   )   ubyte  ->  r15         single-def
+;  V00 this         [V00,T04] (  4,  3.50)   byref  ->  rbx         this single-def
+;  V01 arg1         [V01,T05] (  3,  3   )   ubyte  ->  r15         single-def
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
 ;* V03 tmp1         [V03    ] (  0,  0   )   byref  ->  zero-ref    single-def "Inlining Arg"
-;  V04 tmp2         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp3         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp4         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.Numerics.Vector`1[float]]>
+;  V04 tmp2         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp3         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp4         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.Numerics.Vector`1[float]]>
 ;* V07 tmp5         [V07    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def "Inline stloc first use temp" <System.Threading.Tasks.Task`1[System.Numerics.Vector`1[float]]>
 ;* V08 tmp6         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V09 tmp7         [V09    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V10 tmp8         [V10,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V10 tmp8         [V10,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V11 tmp9         [V11    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V12 tmp10        [V12,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V13 tmp11        [V13,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V14 tmp12        [V14,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V15 tmp13        [V15,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V16 tmp14        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp15        [V17,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V18 cse0         [V18,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - moderate"
+;  V13 tmp11        [V13,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V14 cse0         [V14,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M6050_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      rbx, rdi
        mov      r15d, esi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M6050_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M6050_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M6050_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.Numerics.Vector`1[float]]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.Numerics.Vector`1[float]]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M6050_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M6050_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M6050_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M6050_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M6050_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M6050_IG09
+       je       SHORT G_M6050_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M6050_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M6050_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M6050_IG09
+       jne      SHORT G_M6050_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M6050_IG09
+       je       SHORT G_M6050_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M6050_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M6050_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M6050_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M6050_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M6050_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M6050_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M6050_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M6050_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=0893e85d) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=0893e85d) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.Numerics.Vector`1[float]]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:SetNotificationForWaitCompletion(ubyte,byref)
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T08] (  3,  3   )   ubyte  ->  r15         single-def
-;  V01 arg1         [V01,T07] (  4,  3.50)   byref  ->  rbx         single-def
+;  V00 arg0         [V00,T05] (  3,  3   )   ubyte  ->  r15         single-def
+;  V01 arg1         [V01,T04] (  4,  3.50)   byref  ->  rbx         single-def
 ;* V02 loc0         [V02    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def <System.Threading.Tasks.Task`1[ubyte]>
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp2         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp3         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[ubyte]>
+;  V04 tmp1         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp2         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp3         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[ubyte]>
 ;* V07 tmp4         [V07    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V08 tmp5         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V09 tmp6         [V09,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V09 tmp6         [V09,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V10 tmp7         [V10    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V11 tmp8         [V11,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V12 tmp9         [V12,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V13 tmp10        [V13,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V14 tmp11        [V14,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V15 tmp12        [V15    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V16 tmp13        [V16,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V17 cse0         [V17,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
+;  V12 tmp9         [V12,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V13 cse0         [V13,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M47364_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      r15d, edi
        mov      rbx, rsi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M47364_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M47364_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M47364_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[ubyte]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[ubyte]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M47364_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M47364_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M47364_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M47364_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M47364_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M47364_IG09
+       je       SHORT G_M47364_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M47364_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M47364_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M47364_IG09
+       jne      SHORT G_M47364_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M47364_IG09
+       je       SHORT G_M47364_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M47364_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M47364_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M47364_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M47364_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M47364_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M47364_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M47364_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M47364_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=319946fb) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=319946fb) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
-37 (-18.59 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:SetNotificationForWaitCompletion(ubyte):this
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 5 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T07] (  4,  3.50)   byref  ->  rbx         this single-def
-;  V01 arg1         [V01,T08] (  3,  3   )   ubyte  ->  r15         single-def
+;  V00 this         [V00,T04] (  4,  3.50)   byref  ->  rbx         this single-def
+;  V01 arg1         [V01,T05] (  3,  3   )   ubyte  ->  r15         single-def
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
 ;* V03 tmp1         [V03    ] (  0,  0   )   byref  ->  zero-ref    single-def "Inlining Arg"
-;  V04 tmp2         [V04,T09] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V05 tmp3         [V05,T06] (  8,  5.50)     ref  ->  rcx        
-;  V06 tmp4         [V06,T10] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[ubyte]>
+;  V04 tmp2         [V04,T06] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V05 tmp3         [V05,T03] (  8,  5.50)     ref  ->  rcx        
+;  V06 tmp4         [V06,T07] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[ubyte]>
 ;* V07 tmp5         [V07    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def "Inline stloc first use temp" <System.Threading.Tasks.Task`1[ubyte]>
 ;* V08 tmp6         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V09 tmp7         [V09    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V10 tmp8         [V10,T11] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
+;  V10 tmp8         [V10,T08] (  5,  2.50)     int  ->  [rbp-0x1C]  spill-single-def "Inline stloc first use temp"
 ;* V11 tmp9         [V11    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V12 tmp10        [V12,T00] (  6, 20.50)     int  ->  [rbp-0x20]  "Inline stloc first use temp"
-;  V13 tmp11        [V13,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V14 tmp12        [V14,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V15 tmp13        [V15,T01] (  5, 16.50)     int  ->  [rbp-0x24]  "Inline stloc first use temp"
-;* V16 tmp14        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp15        [V17,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V18 cse0         [V18,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - moderate"
+;  V13 tmp11        [V13,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V14 cse0         [V14,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 8
 
 G_M25918_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x30]
+       push     rax
+       lea      rbp, [rsp+0x20]
        mov      rbx, rdi
        mov      r15d, esi
-						;; size=21 bbWeight=1 PerfScore 5.25
+						;; size=18 bbWeight=1 PerfScore 6.00
 G_M25918_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M25918_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M25918_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[ubyte]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[ubyte]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M25918_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M25918_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M25918_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x1C], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M25918_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M25918_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x1C]
-       je       SHORT G_M25918_IG09
+       je       SHORT G_M25918_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M25918_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M25918_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M25918_IG09
+       jne      SHORT G_M25918_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x20], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x20]
        cmp      eax, ecx
-       je       SHORT G_M25918_IG09
+       je       SHORT G_M25918_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M25918_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M25918_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M25918_IG08
-       align    [6 bytes for IG08]
-						;; size=17 bbWeight=0.50 PerfScore 2.25
-G_M25918_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x24], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x24]
-       cmp      eax, ecx
-       je       SHORT G_M25918_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M25918_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M25918_IG09:
-       add      rsp, 24
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M25918_IG08:
+       add      rsp, 8
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 21, PerfScore 254.90, instruction count 70, allocated bytes for code 199 (MethodHash=1c9d9ac1) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
+; Total bytes of code 162, prolog size 18, PerfScore 161.08, instruction count 59, allocated bytes for code 162 (MethodHash=1c9d9ac1) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[ubyte]:SetNotificationForWaitCompletion(ubyte):this (FullOpts)
-30 (-15.08 % of base) - System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.__Canon]:SetNotificationForWaitCompletion(ubyte,byref)
 ; Assembly listing for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.__Canon]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
+; 0 inlinees with PGO data; 0 single block inlinees; 3 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 TypeCtx      [V00,T08] (  4,  3.50)    long  ->  rdi         single-def
-;  V01 arg0         [V01,T09] (  3,  3   )   ubyte  ->  r15         single-def
-;  V02 arg1         [V02,T07] (  4,  3.50)   byref  ->  rbx         single-def
+;  V00 TypeCtx      [V00,T05] (  4,  3.50)    long  ->  rdi         single-def
+;  V01 arg0         [V01,T06] (  3,  3   )   ubyte  ->  r15         single-def
+;  V02 arg1         [V02,T04] (  4,  3.50)   byref  ->  rbx         single-def
 ;* V03 loc0         [V03    ] (  0,  0   )     ref  ->  zero-ref    class-hnd single-def <System.Threading.Tasks.Task`1[System.__Canon]>
 ;# V04 OutArgs      [V04    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V05 tmp1         [V05,T10] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
-;  V06 tmp2         [V06,T06] (  8,  5.50)     ref  ->  rcx        
-;  V07 tmp3         [V07,T11] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.__Canon]>
+;  V05 tmp1         [V05,T07] (  2,  4   )     ref  ->  rcx         class-hnd single-def "dup spill" <System.Object>
+;  V06 tmp2         [V06,T03] (  8,  5.50)     ref  ->  rcx        
+;  V07 tmp3         [V07,T08] (  3,  3   )     ref  ->  r14         class-hnd single-def "dup spill" <System.Threading.Tasks.Task`1[System.__Canon]>
 ;* V08 tmp4         [V08    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V09 tmp5         [V09    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V10 tmp6         [V10,T12] (  5,  2.50)     int  ->  [rbp-0x24]  spill-single-def "Inline stloc first use temp"
+;  V10 tmp6         [V10,T09] (  5,  2.50)     int  ->  [rbp-0x24]  spill-single-def "Inline stloc first use temp"
 ;* V11 tmp7         [V11    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;  V12 tmp8         [V12,T00] (  6, 20.50)     int  ->  [rbp-0x28]  "Inline stloc first use temp"
-;  V13 tmp9         [V13,T02] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V14 tmp10        [V14,T04] (  3, 10   )   byref  ->  rdx         single-def "Inlining Arg"
-;  V15 tmp11        [V15,T01] (  5, 16.50)     int  ->  [rbp-0x2C]  "Inline stloc first use temp"
-;* V16 tmp12        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp13        [V17,T03] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V18 cse0         [V18,T05] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
+;  V13 tmp9         [V13,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
+;  V14 cse0         [V14,T02] (  5,  6   )   byref  ->  rdx         multi-def "CSE - aggressive"
 ;
 ; Lcl frame size = 24
 
 G_M44445_IG01:
        push     rbp
        push     r15
        push     r14
        push     rbx
        sub      rsp, 24
        lea      rbp, [rsp+0x30]
        mov      qword ptr [rbp-0x20], rdi
        mov      r15d, esi
        mov      rbx, rdx
 						;; size=25 bbWeight=1 PerfScore 6.25
 G_M44445_IG02:
        mov      rcx, gword ptr [rbx]
        test     rcx, rcx
        jne      SHORT G_M44445_IG04
 						;; size=8 bbWeight=1 PerfScore 3.25
 G_M44445_IG03:
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.__Canon]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.__Canon]
        call     [rax]System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.__Canon]:CreateWeaklyTypedStateMachineBox():System.Threading.Tasks.Task`1[System.__Canon]
        mov      r14, rax
        mov      rdi, rbx
        mov      rsi, r14
        call     CORINFO_HELP_CHECKED_ASSIGN_REF
        mov      rcx, r14
 						;; size=29 bbWeight=0.50 PerfScore 2.62
 G_M44445_IG04:
        cmp      byte  ptr [rcx], cl
        test     r15b, r15b
        je       SHORT G_M44445_IG07
 						;; size=7 bbWeight=1 PerfScore 4.25
 G_M44445_IG05:
        mov      eax, dword ptr [rcx+0x34]
        mov      dword ptr [rbp-0x24], eax
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M44445_IG09
-		  ;; NOP compensation instructions of 4 bytes.
+       jne      SHORT G_M44445_IG08
        lea      rdx, bword ptr [rcx+0x34]
        mov      edi, eax
        or       edi, 0xD1FFAB1E
        lock     
        cmpxchg  dword ptr [rdx], edi
        cmp      eax, dword ptr [rbp-0x24]
-       je       SHORT G_M44445_IG09
+       je       SHORT G_M44445_IG08
        mov      eax, dword ptr [rcx+0x34]
        jmp      SHORT G_M44445_IG06
        align    [0 bytes for IG06]
-						;; size=43 bbWeight=0.50 PerfScore 15.12
+						;; size=39 bbWeight=0.50 PerfScore 15.12
 G_M44445_IG06:
        test     eax, 0xD1FFAB1E
-       jne      SHORT G_M44445_IG09
+       jne      SHORT G_M44445_IG08
        mov      ecx, eax
        or       ecx, 0xD1FFAB1E
        mov      dword ptr [rbp-0x28], eax
        lock     
        cmpxchg  dword ptr [rdx], ecx
        mov      ecx, dword ptr [rbp-0x28]
        cmp      eax, ecx
-       je       SHORT G_M44445_IG09
+       je       SHORT G_M44445_IG08
        mov      ecx, eax
        mov      eax, ecx
        jmp      SHORT G_M44445_IG06
 						;; size=35 bbWeight=4 PerfScore 102.00
 G_M44445_IG07:
        add      rcx, 52
        mov      rdx, rcx
-       mov      eax, dword ptr [rdx]
-       jmp      SHORT G_M44445_IG08
-       align    [2 bytes for IG08]
-						;; size=13 bbWeight=0.50 PerfScore 2.25
-G_M44445_IG08:
-       mov      ecx, eax
-       and      ecx, 0xD1FFAB1E
-       mov      dword ptr [rbp-0x2C], eax
+       mov      eax, 0xD1FFAB1E
        lock     
-       cmpxchg  dword ptr [rdx], ecx
-       mov      ecx, dword ptr [rbp-0x2C]
-       cmp      eax, ecx
-       je       SHORT G_M44445_IG09
-       mov      ecx, eax
-       mov      eax, ecx
-       jmp      SHORT G_M44445_IG08
-						;; size=28 bbWeight=4 PerfScore 97.00
-G_M44445_IG09:
+       and      dword ptr [rdx], eax
+						;; size=15 bbWeight=0.50 PerfScore 8.38
+G_M44445_IG08:
        add      rsp, 24
        pop      rbx
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 199, prolog size 25, PerfScore 255.90, instruction count 71, allocated bytes for code 199 (MethodHash=6daa5262) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.__Canon]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
+; Total bytes of code 169, prolog size 25, PerfScore 162.03, instruction count 60, allocated bytes for code 169 (MethodHash=6daa5262) for method System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[System.__Canon]:SetNotificationForWaitCompletion(ubyte,byref) (FullOpts)
-26 (-6.57 % of base) - System.Threading.Mutex:CreateMutexCore(ubyte,System.String,byref):this
 ; Assembly listing for method System.Threading.Mutex:CreateMutexCore(ubyte,System.String,byref):this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
-; fully interruptible
+; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 3 single block inlinees; 1 inlinees without PGO data
+; 0 inlinees with PGO data; 3 single block inlinees; 0 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 this         [V00,T04] (  3,  3   )     ref  ->  r15         this class-hnd single-def <System.Threading.Mutex>
-;  V01 arg1         [V01,T06] (  3,  3   )   ubyte  ->  rsi         single-def
-;  V02 arg2         [V02,T03] (  5,  3   )     ref  ->  rbx         class-hnd single-def <System.String>
-;  V03 arg3         [V03,T05] (  3,  3   )   byref  ->  r14         single-def
-;  V04 loc0         [V04,T07] (  6,  3.50)     ref  ->  r13         class-hnd exact single-def <Microsoft.Win32.SafeHandles.SafeWaitHandle>
+;  V00 this         [V00,T01] (  3,  3   )     ref  ->  r15         this class-hnd single-def <System.Threading.Mutex>
+;  V01 arg1         [V01,T03] (  3,  3   )   ubyte  ->  rsi         single-def
+;  V02 arg2         [V02,T00] (  5,  3   )     ref  ->  rbx         class-hnd single-def <System.String>
+;  V03 arg3         [V03,T02] (  3,  3   )   byref  ->  r14         single-def
+;  V04 loc0         [V04,T04] (  6,  3   )     ref  ->  r13         class-hnd exact single-def <Microsoft.Win32.SafeHandles.SafeWaitHandle>
 ;  V05 loc1         [V05    ] (  5,  2   )     int  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed ld-addr-op
 ;  V06 loc2         [V06    ] (  2,  1   )     ref  ->  [rbp-0x30]  do-not-enreg[X] must-init addr-exposed ld-addr-op class-hnd <System.String>
 ;# V07 OutArgs      [V07    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V08 tmp1         [V08,T08] (  3,  0   )     ref  ->  r15         class-hnd exact single-def "NewObj constructor temp" <System.Threading.WaitHandleCannotBeOpenedException>
-;  V09 tmp2         [V09,T09] (  3,  0   )     ref  ->  rbx         class-hnd exact single-def "NewObj constructor temp" <System.ArgumentException>
-;  V10 tmp3         [V10,T02] (  3, 10   )   byref  ->  rdi         single-def "Inlining Arg"
-;  V11 tmp4         [V11,T00] (  5, 16.50)     int  ->  [rbp-0x34]  "Inline stloc first use temp"
-;* V12 tmp5         [V12    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V13 tmp6         [V13,T01] (  3, 12   )     int  ->  rax         "Inline stloc first use temp"
-;  V14 tmp7         [V14,T10] (  2,  0   )     ref  ->  rdi         single-def "argument with side effect"
-;  V15 tmp8         [V15,T11] (  2,  0   )     ref  ->  rsi         single-def "argument with side effect"
-;  V16 tmp9         [V16,T12] (  2,  0   )     ref  ->  r15         single-def "argument with side effect"
-;  V17 tmp10        [V17,T13] (  2,  0   )     ref  ->  rdx         single-def "argument with side effect"
+;  V08 tmp1         [V08,T05] (  3,  0   )     ref  ->  r15         class-hnd exact single-def "NewObj constructor temp" <System.Threading.WaitHandleCannotBeOpenedException>
+;  V09 tmp2         [V09,T06] (  3,  0   )     ref  ->  rbx         class-hnd exact single-def "NewObj constructor temp" <System.ArgumentException>
+;  V10 tmp3         [V10,T07] (  2,  0   )     ref  ->  rdi         single-def "argument with side effect"
+;  V11 tmp4         [V11,T08] (  2,  0   )     ref  ->  rsi         single-def "argument with side effect"
+;  V12 tmp5         [V12,T09] (  2,  0   )     ref  ->  r15         single-def "argument with side effect"
+;  V13 tmp6         [V13,T10] (  2,  0   )     ref  ->  rdx         single-def "argument with side effect"
 ;
-; Lcl frame size = 32
+; Lcl frame size = 16
 
 G_M5154_IG01:
        push     rbp
        push     r15
        push     r14
        push     r13
        push     rbx
-       sub      rsp, 32
-       lea      rbp, [rsp+0x40]
+       sub      rsp, 16
+       lea      rbp, [rsp+0x30]
        xor      eax, eax
        mov      qword ptr [rbp-0x30], rax
        mov      r15, rdi
        mov      rbx, rdx
        mov      r14, rcx
 						;; size=32 bbWeight=1 PerfScore 7.75
 G_M5154_IG02:
        movzx    rdi, sil
        lea      rcx, [rbp-0x30]
        lea      rdx, [rbp-0x28]
        mov      rsi, rbx
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.Mutex:CreateMutexCore(ubyte,System.String,byref,byref):Microsoft.Win32.SafeHandles.SafeWaitHandle
        call     [rax]System.Threading.Mutex:CreateMutexCore(ubyte,System.String,byref,byref):Microsoft.Win32.SafeHandles.SafeWaitHandle
        mov      r13, rax
        mov      rdi, r13
        mov      rax, 0xD1FFAB1E      ; code for Microsoft.Win32.SafeHandles.SafeHandleZeroOrMinusOneIsInvalid:get_IsInvalid():ubyte:this
        cmp      dword ptr [rdi], edi
        call     [rax]Microsoft.Win32.SafeHandles.SafeHandleZeroOrMinusOneIsInvalid:get_IsInvalid():ubyte:this
        test     eax, eax
-       je       SHORT G_M5154_IG05
-						;; size=51 bbWeight=1 PerfScore 12.75
-G_M5154_IG03:
-       lea      rdi, bword ptr [r13+0x10]
-       mov      eax, dword ptr [rdi]
-       jmp      SHORT G_M5154_IG04
-       align    [5 bytes for IG04]
-						;; size=13 bbWeight=0.50 PerfScore 2.25
-G_M5154_IG04:
-       mov      esi, eax
-       or       esi, 1
-       mov      dword ptr [rbp-0x34], eax
-       lock     
-       cmpxchg  dword ptr [rdi], esi
-       mov      esi, dword ptr [rbp-0x34]
-       cmp      eax, esi
-       je       SHORT G_M5154_IG07
-       mov      esi, eax
-       mov      eax, esi
-       jmp      SHORT G_M5154_IG04
-						;; size=25 bbWeight=4 PerfScore 97.00
-G_M5154_IG05:
+       jne      SHORT G_M5154_IG04
        cmp      dword ptr [rbp-0x28], 183
        setne    dil
        mov      byte  ptr [r14], dil
        lea      rdi, bword ptr [r15+0x08]
        mov      rsi, r13
        call     CORINFO_HELP_ASSIGN_REF
        nop      
-						;; size=27 bbWeight=1 PerfScore 6.00
-G_M5154_IG06:
-       add      rsp, 32
+						;; size=78 bbWeight=1 PerfScore 18.75
+G_M5154_IG03:
+       add      rsp, 16
        pop      rbx
        pop      r13
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=13 bbWeight=1 PerfScore 3.75
-G_M5154_IG07:
+G_M5154_IG04:
+       lea      rdi, bword ptr [r13+0x10]
+       mov      esi, 1
+       lock     
+       or       dword ptr [rdi], esi
        mov      rdi, r13
        mov      rsi, 0xD1FFAB1E      ; 'obj'
 
        mov      rax, 0xD1FFAB1E      ; code for System.ArgumentNullException:ThrowIfNull(System.Object,System.String)
        call     [rax]System.ArgumentNullException:ThrowIfNull(System.Object,System.String)
        mov      rdi, r13
        call     System.GC:_SuppressFinalize(System.Object)
        cmp      dword ptr [rbp-0x28], 206
-       jne      SHORT G_M5154_IG08
+       jne      SHORT G_M5154_IG05
        mov      rdi, 0xD1FFAB1E      ; System.ArgumentException
        call     CORINFO_HELP_NEWSFAST
        mov      rbx, rax
        mov      rax, 0xD1FFAB1E      ; code for System.SR:get_Argument_WaitHandleNameTooLong():System.String
        call     [rax]System.SR:get_Argument_WaitHandleNameTooLong():System.String
        mov      r15, rax
        mov      edi, 0x51B
        mov      rsi, 0xD1FFAB1E
        call     CORINFO_HELP_STRCNS
        mov      rdx, rax
        mov      rsi, r15
        mov      rdi, rbx
        mov      rax, 0xD1FFAB1E      ; code for System.ArgumentException:.ctor(System.String,System.String):this
        call     [rax]System.ArgumentException:.ctor(System.String,System.String):this
        mov      rdi, rbx
        call     CORINFO_HELP_THROW
-						;; size=124 bbWeight=0 PerfScore 0.00
-G_M5154_IG08:
+						;; size=136 bbWeight=0 PerfScore 0.00
+G_M5154_IG05:
        cmp      dword ptr [rbp-0x28], 6
-       jne      SHORT G_M5154_IG09
+       jne      SHORT G_M5154_IG06
        mov      rdi, 0xD1FFAB1E      ; System.Threading.WaitHandleCannotBeOpenedException
        call     CORINFO_HELP_NEWSFAST
        mov      r15, rax
        mov      rax, 0xD1FFAB1E      ; code for System.SR:get_Threading_WaitHandleCannotBeOpenedException_InvalidHandle():System.String
        call     [rax]System.SR:get_Threading_WaitHandleCannotBeOpenedException_InvalidHandle():System.String
        mov      rdi, rax
        mov      rsi, rbx
        mov      rax, 0xD1FFAB1E      ; code for System.SR:Format(System.String,System.Object):System.String
        call     [rax]System.SR:Format(System.String,System.Object):System.String
        mov      rsi, rax
        mov      rdi, r15
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.WaitHandleCannotBeOpenedException:.ctor(System.String):this
        call     [rax]System.Threading.WaitHandleCannotBeOpenedException:.ctor(System.String):this
        mov      rdi, r15
        call     CORINFO_HELP_THROW
 						;; size=80 bbWeight=0 PerfScore 0.00
-G_M5154_IG09:
+G_M5154_IG06:
        mov      edi, dword ptr [rbp-0x28]
        mov      rsi, rbx
        mov      rdx, gword ptr [rbp-0x30]
        mov      rax, 0xD1FFAB1E      ; code for System.IO.Win32Marshal:GetExceptionForWin32Error(int,System.String,System.String):System.Exception
        call     [rax]System.IO.Win32Marshal:GetExceptionForWin32Error(int,System.String,System.String):System.Exception
        mov      rdi, rax
        call     CORINFO_HELP_THROW
        int3     
 						;; size=31 bbWeight=0 PerfScore 0.00
 
-; Total bytes of code 396, prolog size 32, PerfScore 169.10, instruction count 103, allocated bytes for code 396 (MethodHash=8294ebdd) for method System.Threading.Mutex:CreateMutexCore(ubyte,System.String,byref):this (FullOpts)
+; Total bytes of code 370, prolog size 23, PerfScore 67.25, instruction count 92, allocated bytes for code 370 (MethodHash=8294ebdd) for method System.Threading.Mutex:CreateMutexCore(ubyte,System.String,byref):this (FullOpts)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment