36 (10.68 % of base) - System.Text.Ascii:GetIndexOfFirstNonAsciiChar_Vector(ulong,ulong):ulong
; Assembly listing for method System.Text.Ascii:GetIndexOfFirstNonAsciiChar_Vector(ulong,ulong):ulong (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
-; 0 inlinees with PGO data; 8 single block inlinees; 5 inlinees without PGO data
+; 0 inlinees with PGO data; 18 single block inlinees; 25 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 32, 74 ) long -> rdi
; V01 arg1 [V01,T01] ( 16, 20.50) long -> rsi
; V02 loc0 [V02,T04] ( 12, 7 ) long -> rax
; V03 loc1 [V03,T02] ( 7, 10.50) int -> rcx
; V04 loc2 [V04,T05] ( 2, 4.50) long -> rcx
; V05 loc3 [V05,T06] ( 2, 4.50) long -> rcx
; V06 loc4 [V06,T07] ( 2, 4.50) long -> rcx
; V07 loc5 [V07,T03] ( 3, 8.50) int -> rdx
;# V08 OutArgs [V08 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V09 tmp1 [V09 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-;* V10 tmp2 [V10 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
-;* V11 tmp3 [V11 ] ( 0, 0 ) simd64 -> zero-ref "spilled call-like call argument"
-;* V12 tmp4 [V12 ] ( 0, 0 ) simd64 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
-;* V13 tmp5 [V13 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V14 tmp6 [V14 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+; V09 tmp1 [V09,T21] ( 2, 2 ) simd16 -> mm0 "spilled call-like call argument"
+; V10 tmp2 [V10,T22] ( 2, 2 ) simd32 -> mm0 "spilled call-like call argument"
+; V11 tmp3 [V11,T23] ( 2, 2 ) simd64 -> mm0 "spilled call-like call argument"
+;* V12 tmp4 [V12 ] ( 0, 0 ) simd64 -> zero-ref "spilled call-like call argument"
+;* V13 tmp5 [V13 ] ( 0, 0 ) simd64 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;* V14 tmp6 [V14,T12] ( 0, 0 ) ushort -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
;* V15 tmp7 [V15 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V16 tmp8 [V16 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V17 tmp9 [V17 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V18 tmp10 [V18 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V19 tmp11 [V19 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V20 tmp12 [V20 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V21 tmp13 [V21 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V22 tmp14 [V22 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V23 tmp15 [V23 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
-;* V24 tmp16 [V24,T08] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-; V25 cse0 [V25,T09] ( 3, 5 ) simd64 -> mm0 "CSE #01: aggressive"
-; V26 cse1 [V26,T10] ( 3, 5 ) simd32 -> mm0 "CSE #04: aggressive"
-; V27 cse2 [V27,T11] ( 3, 5 ) simd16 -> mm0 "CSE #05: aggressive"
+;* V16 tmp8 [V16 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V17 tmp9 [V17 ] ( 0, 0 ) ushort -> zero-ref "Inline stloc first use temp"
+;* V18 tmp10 [V18 ] ( 0, 0 ) ushort -> zero-ref "Inlining Arg"
+;* V19 tmp11 [V19 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V20 tmp12 [V20,T15] ( 2, 16 ) simd64 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;* V21 tmp13 [V21 ] ( 0, 0 ) simd64 -> zero-ref "spilled call-like call argument"
+;* V22 tmp14 [V22 ] ( 0, 0 ) simd64 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;* V23 tmp15 [V23,T08] ( 0, 0 ) ushort -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V24 tmp16 [V24 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V25 tmp17 [V25 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V26 tmp18 [V26 ] ( 0, 0 ) ushort -> zero-ref "Inline stloc first use temp"
+;* V27 tmp19 [V27 ] ( 0, 0 ) ushort -> zero-ref "Inlining Arg"
+;* V28 tmp20 [V28 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V29 tmp21 [V29 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V30 tmp22 [V30 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V31 tmp23 [V31,T13] ( 0, 0 ) ushort -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V32 tmp24 [V32 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V33 tmp25 [V33 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V34 tmp26 [V34 ] ( 0, 0 ) ushort -> zero-ref "Inline stloc first use temp"
+;* V35 tmp27 [V35 ] ( 0, 0 ) ushort -> zero-ref "Inlining Arg"
+;* V36 tmp28 [V36 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V37 tmp29 [V37,T16] ( 2, 16 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V38 tmp30 [V38 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V39 tmp31 [V39 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V40 tmp32 [V40,T09] ( 0, 0 ) ushort -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V41 tmp33 [V41 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V42 tmp34 [V42 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V43 tmp35 [V43 ] ( 0, 0 ) ushort -> zero-ref "Inline stloc first use temp"
+;* V44 tmp36 [V44 ] ( 0, 0 ) ushort -> zero-ref "Inlining Arg"
+;* V45 tmp37 [V45 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V46 tmp38 [V46 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+;* V47 tmp39 [V47 ] ( 0, 0 ) simd16 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V48 tmp40 [V48,T14] ( 0, 0 ) ushort -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V49 tmp41 [V49 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V50 tmp42 [V50 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V51 tmp43 [V51 ] ( 0, 0 ) ushort -> zero-ref "Inline stloc first use temp"
+;* V52 tmp44 [V52 ] ( 0, 0 ) ushort -> zero-ref "Inlining Arg"
+;* V53 tmp45 [V53 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V54 tmp46 [V54,T17] ( 2, 16 ) simd16 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V55 tmp47 [V55 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+;* V56 tmp48 [V56 ] ( 0, 0 ) simd16 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V57 tmp49 [V57,T10] ( 0, 0 ) ushort -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V58 tmp50 [V58 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V59 tmp51 [V59 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V60 tmp52 [V60 ] ( 0, 0 ) ushort -> zero-ref "Inline stloc first use temp"
+;* V61 tmp53 [V61 ] ( 0, 0 ) ushort -> zero-ref "Inlining Arg"
+;* V62 tmp54 [V62 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V63 tmp55 [V63 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
+;* V64 tmp56 [V64,T11] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V65 cse0 [V65,T18] ( 3, 5 ) simd64 -> mm1 "CSE #01: moderate"
+; V66 cse1 [V66,T19] ( 3, 5 ) simd32 -> mm1 "CSE #04: moderate"
+; V67 cse2 [V67,T20] ( 3, 5 ) simd16 -> mm1 "CSE #05: moderate"
;
; Lcl frame size = 0
G_M42618_IG01:
push rbp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
G_M42618_IG02:
mov rax, rdi
cmp rsi, 64
jb SHORT G_M42618_IG05
;; size=9 bbWeight=1 PerfScore 1.50
G_M42618_IG03:
- vmovups zmm0, zmmword ptr [reloc @RWD00]
- vptestmw k1, zmm0, zmmword ptr [rax]
+ vmovups zmm0, zmmword ptr [rax]
+ vmovups zmm1, zmmword ptr [reloc @RWD00]
+ vptestmw k1, zmm1, zmm0
kortestd k1, k1
;; NOP compensation instructions of 3 bytes.
jne G_M42618_IG10
lea rcx, [rax+2*rsi-0x40]
lea rdi, [rax+0x40]
and rdi, -64
- align [0 bytes for IG04]
- ;; size=43 bbWeight=0.50 PerfScore 6.38
+ align [2 bytes for IG04]
+ ;; size=51 bbWeight=0.50 PerfScore 7.00
G_M42618_IG04:
- vptestmw k1, zmm0, zmmword ptr [rdi]
+ vmovdqa32 zmm0, zmmword ptr [rdi]
+ vptestmw k1, zmm1, zmm0
kortestd k1, k1
;; NOP compensation instructions of 3 bytes.
jne G_M42618_IG09
add rdi, 64
cmp rdi, rcx
jbe SHORT G_M42618_IG04
- jmp SHORT G_M42618_IG09
- ;; NOP compensation instructions of 3 bytes.
- ;; size=34 bbWeight=4 PerfScore 46.00
+ jmp G_M42618_IG09
+ ;; size=40 bbWeight=4 PerfScore 50.00
G_M42618_IG05:
cmp rsi, 32
jb SHORT G_M42618_IG07
- vmovups ymm0, ymmword ptr [reloc @RWD00]
- vptest ymm0, ymmword ptr [rax]
+ vmovups ymm0, ymmword ptr [rax]
+ vmovups ymm1, ymmword ptr [reloc @RWD00]
+ vptest ymm1, ymm0
jne SHORT G_M42618_IG10
+ ;; NOP compensation instructions of 4 bytes.
lea rcx, [rax+2*rsi-0x20]
lea rdi, [rax+0x20]
and rdi, -32
- align [4 bytes for IG06]
- ;; size=38 bbWeight=0.50 PerfScore 8.12
+ align [14 bytes for IG06]
+ ;; size=56 bbWeight=0.50 PerfScore 9.12
G_M42618_IG06:
- vptest ymm0, ymmword ptr [rdi]
+ vmovdqa ymm0, ymmword ptr [rdi]
+ vptest ymm1, ymm0
jne SHORT G_M42618_IG09
add rdi, 32
cmp rdi, rcx
jbe SHORT G_M42618_IG06
jmp SHORT G_M42618_IG09
- ;; size=18 bbWeight=4 PerfScore 50.00
+ ;; size=22 bbWeight=4 PerfScore 58.00
G_M42618_IG07:
cmp rsi, 16
jb SHORT G_M42618_IG10
- vmovups xmm0, xmmword ptr [reloc @RWD00]
- vptest xmm0, xmmword ptr [rax]
+ vmovups xmm0, xmmword ptr [rax]
+ vmovups xmm1, xmmword ptr [reloc @RWD00]
+ vptest xmm1, xmm0
jne SHORT G_M42618_IG10
lea rcx, [rax+2*rsi-0x10]
lea rdi, [rax+0x10]
and rdi, -16
- align [12 bytes for IG08]
- ;; size=46 bbWeight=0.50 PerfScore 6.62
+ align [4 bytes for IG08]
+ ;; size=42 bbWeight=0.50 PerfScore 7.12
G_M42618_IG08:
- vptest xmm0, xmmword ptr [rdi]
+ vmovdqa xmm0, xmmword ptr [rdi]
+ vptest xmm1, xmm0
jne SHORT G_M42618_IG09
add rdi, 16
cmp rdi, rcx
jbe SHORT G_M42618_IG08
- ;; size=16 bbWeight=4 PerfScore 34.00
+ ;; size=20 bbWeight=4 PerfScore 38.00
G_M42618_IG09:
mov rcx, rdi
sub rcx, rax
shr rcx, 1
sub rsi, rcx
;; size=12 bbWeight=0.50 PerfScore 0.62
G_M42618_IG10:
cmp rsi, 4
jb SHORT G_M42618_IG15
align [0 bytes for IG11]
;; size=6 bbWeight=1 PerfScore 1.25
G_M42618_IG11:
mov ecx, dword ptr [rdi]
mov edx, dword ptr [rdi+0x04]
mov r8d, ecx
or r8d, edx
test r8d, 0xD1FFAB1E
je SHORT G_M42618_IG14
;; size=20 bbWeight=4 PerfScore 23.00
G_M42618_IG12:
test ecx, 0xD1FFAB1E
jne SHORT G_M42618_IG13
mov ecx, edx
add rdi, 4
;; size=14 bbWeight=0.50 PerfScore 0.88
G_M42618_IG13:
test ecx, 0xFF80
jne SHORT G_M42618_IG18
jmp SHORT G_M42618_IG17
;; size=10 bbWeight=0.50 PerfScore 1.62
G_M42618_IG14:
add rdi, 8
add rsi, -4
cmp rsi, 4
jae SHORT G_M42618_IG11
;; size=14 bbWeight=4 PerfScore 7.00
G_M42618_IG15:
test sil, 2
je SHORT G_M42618_IG16
mov ecx, dword ptr [rdi]
test ecx, 0xD1FFAB1E
jne SHORT G_M42618_IG13
add rdi, 4
;; size=20 bbWeight=0.50 PerfScore 2.38
G_M42618_IG16:
test sil, 1
je SHORT G_M42618_IG18
cmp word ptr [rdi], 127
ja SHORT G_M42618_IG18
;; size=12 bbWeight=0.50 PerfScore 2.62
G_M42618_IG17:
add rdi, 2
;; size=4 bbWeight=0.50 PerfScore 0.12
G_M42618_IG18:
mov rcx, rdi
sub rcx, rax
mov rax, rcx
shr rax, 1
;; size=12 bbWeight=1 PerfScore 1.25
G_M42618_IG19:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=1 PerfScore 2.50
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
-; Total bytes of code 337, prolog size 4, PerfScore 197.12, instruction count 91, allocated bytes for code 337 (MethodHash=bc9a5985) for method System.Text.Ascii:GetIndexOfFirstNonAsciiChar_Vector(ulong,ulong):ulong (FullOpts)
+; Total bytes of code 373, prolog size 4, PerfScore 215.25, instruction count 97, allocated bytes for code 373 (MethodHash=bc9a5985) for method System.Text.Ascii:GetIndexOfFirstNonAsciiChar_Vector(ulong,ulong):ulong (FullOpts)
12 (3.40 % of base) - System.Text.Ascii:IsValidCore[int](byref,int):ubyte
; Assembly listing for method System.Text.Ascii:IsValidCore[int](byref,int):ubyte (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
-; 0 inlinees with PGO data; 2 single block inlinees; 7 inlinees without PGO data
+; 0 inlinees with PGO data; 12 single block inlinees; 12 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T02] ( 18, 13 ) byref -> rdi
; V01 arg1 [V01,T03] ( 9, 6 ) int -> rsi single-def
; V02 loc0 [V02,T07] ( 5, 2.50) byref -> rdx single-def
;* V03 loc1 [V03 ] ( 0, 0 ) int -> zero-ref
;* V04 loc2 [V04 ] ( 0, 0 ) long -> zero-ref
;* V05 loc3 [V05,T10] ( 0, 0 ) long -> zero-ref
;* V06 loc4 [V06 ] ( 0, 0 ) long -> zero-ref
; V07 loc5 [V07,T01] ( 6, 17 ) long -> r8
;* V08 loc6 [V08 ] ( 0, 0 ) long -> zero-ref
; V09 loc7 [V09,T04] ( 4, 5.50) long -> rcx
; V10 loc8 [V10,T00] ( 5, 20 ) byref -> rax
;* V11 loc9 [V11 ] ( 0, 0 ) long -> zero-ref
;* V12 loc10 [V12 ] ( 0, 0 ) long -> zero-ref
;* V13 loc11 [V13 ] ( 0, 0 ) long -> zero-ref
;* V14 loc12 [V14 ] ( 0, 0 ) byref -> zero-ref
;# V15 OutArgs [V15 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V16 tmp1 [V16 ] ( 0, 0 ) int -> zero-ref
;* V17 tmp2 [V17 ] ( 0, 0 ) int -> zero-ref ld-addr-op "Inlining Arg"
; V18 tmp3 [V18,T09] ( 2, 1 ) ubyte -> r8 "Inline return value spill temp"
;* V19 tmp4 [V19 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
-;* V20 tmp5 [V20 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V21 tmp6 [V21 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
-;* V22 tmp7 [V22 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V23 tmp8 [V23 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
-;* V24 tmp9 [V24 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-; V25 tmp10 [V25,T13] ( 2, 2 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
-;* V26 tmp11 [V26 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-; V27 tmp12 [V27,T11] ( 2, 16 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
-;* V28 tmp13 [V28 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-; V29 tmp14 [V29,T14] ( 2, 2 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
-; V30 tmp15 [V30,T05] ( 5, 5 ) int -> r8 "Single return block return value"
-; V31 tmp16 [V31,T08] ( 2, 2 ) long -> rax "Cast away GC"
-; V32 cse0 [V32,T12] ( 7, 7 ) simd32 -> mm1 multi-def "CSE #03: aggressive"
-; V33 cse1 [V33,T06] ( 6, 3 ) long -> rcx multi-def "CSE #01: aggressive"
+;* V20 tmp5 [V20 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;* V21 tmp6 [V21 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+;* V22 tmp7 [V22 ] ( 0, 0 ) simd16 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
+;* V23 tmp8 [V23 ] ( 0, 0 ) int -> zero-ref ld-addr-op "Inline stloc first use temp"
+;* V24 tmp9 [V24 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
+;* V25 tmp10 [V25 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V26 tmp11 [V26 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
+;* V27 tmp12 [V27 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V28 tmp13 [V28 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[int]>
+;* V29 tmp14 [V29 ] ( 0, 0 ) int -> zero-ref ld-addr-op "Inline stloc first use temp"
+;* V30 tmp15 [V30 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
+;* V31 tmp16 [V31 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V32 tmp17 [V32,T15] ( 2, 2 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
+;* V33 tmp18 [V33 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V34 tmp19 [V34 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[int]>
+;* V35 tmp20 [V35 ] ( 0, 0 ) int -> zero-ref ld-addr-op "Inline stloc first use temp"
+;* V36 tmp21 [V36 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
+;* V37 tmp22 [V37 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V38 tmp23 [V38,T11] ( 2, 16 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
+;* V39 tmp24 [V39 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V40 tmp25 [V40 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[int]>
+;* V41 tmp26 [V41 ] ( 0, 0 ) int -> zero-ref ld-addr-op "Inline stloc first use temp"
+;* V42 tmp27 [V42 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
+;* V43 tmp28 [V43 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V44 tmp29 [V44,T16] ( 2, 2 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
+;* V45 tmp30 [V45 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V46 tmp31 [V46 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[int]>
+;* V47 tmp32 [V47 ] ( 0, 0 ) int -> zero-ref ld-addr-op "Inline stloc first use temp"
+;* V48 tmp33 [V48 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
+;* V49 tmp34 [V49 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V50 tmp35 [V50,T05] ( 5, 5 ) int -> r8 "Single return block return value"
+; V51 tmp36 [V51,T08] ( 2, 2 ) long -> rax "Cast away GC"
+; V52 cse0 [V52,T12] ( 7, 7 ) simd32 -> mm2 multi-def "CSE #03: aggressive"
+; V53 cse1 [V53,T06] ( 6, 3 ) long -> rcx multi-def "CSE #01: aggressive"
+; V54 rat0 [V54,T13] ( 3, 3 ) simd16 -> mm0 "ReplaceWithLclVar is creating a new local variable"
+; V55 rat1 [V55,T14] ( 3, 3 ) simd32 -> mm0 "ReplaceWithLclVar is creating a new local variable"
;
; Lcl frame size = 0
G_M8346_IG01:
push rbp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
G_M8346_IG02:
cmp esi, 4
jge SHORT G_M8346_IG04
;; size=5 bbWeight=1 PerfScore 1.25
G_M8346_IG03:
movsxd rcx, esi
cmp rcx, 2
jge G_M8346_IG13
test esi, esi
je G_M8346_IG16
jmp G_M8346_IG18
;; size=26 bbWeight=0.50 PerfScore 2.38
G_M8346_IG04:
movsxd rcx, esi
lea rdx, bword ptr [rdi+4*rcx]
cmp esi, 8
jg SHORT G_M8346_IG05
vmovups xmm0, xmmword ptr [rdi]
- vpor xmm0, xmm0, xmmword ptr [rdx-0x10]
- vptest xmm0, xmmword ptr [reloc @RWD00]
+ vmovups xmm1, xmmword ptr [rdx-0x10]
+ vpternlogd xmm0, xmm1, xmmword ptr [reloc @RWD00], -88
+ vptest xmm0, xmm0
sete r8b
movzx r8, r8b
jmp G_M8346_IG14
- align [2 bytes for IG07]
- ;; size=45 bbWeight=0.50 PerfScore 8.62
+ align [0 bytes for IG07]
+ ;; size=50 bbWeight=0.50 PerfScore 9.12
G_M8346_IG05:
cmp esi, 16
jg SHORT G_M8346_IG06
vmovups ymm0, ymmword ptr [rdi]
- vpor ymm0, ymm0, ymmword ptr [rdx-0x20]
- vmovups ymm1, ymmword ptr [reloc @RWD32]
- vptest ymm0, ymm1
+ vmovups ymm1, ymmword ptr [rdx-0x20]
+ vmovups ymm2, ymmword ptr [reloc @RWD32]
+ vpternlogd ymm0, ymm1, ymm2, -88
+ vptest ymm0, ymm0
sete r8b
movzx r8, r8b
jmp G_M8346_IG14
- ;; size=40 bbWeight=0.50 PerfScore 10.75
+ ;; size=47 bbWeight=0.50 PerfScore 12.00
G_M8346_IG06:
cmp esi, 32
jle SHORT G_M8346_IG12
- vmovups ymm1, ymmword ptr [rdi]
+ vmovups ymm2, ymmword ptr [rdi]
vmovups ymm0, ymmword ptr [rdi+0x20]
- vpternlogd ymm1, ymm0, ymmword ptr [rdi+0x40], -2
- vpor ymm0, ymm1, ymmword ptr [rdi+0x60]
- vmovups ymm1, ymmword ptr [reloc @RWD32]
- vptest ymm0, ymm1
+ vpternlogd ymm2, ymm0, ymmword ptr [rdi+0x40], -2
+ vpor ymm0, ymm2, ymmword ptr [rdi+0x60]
+ vmovups ymm2, ymmword ptr [reloc @RWD32]
+ vptest ymm2, ymm0
jne SHORT G_M8346_IG08
mov rax, rdi
and rax, 31
shr rax, 2
mov r8, rax
neg r8
add r8, 32
add rcx, -32
cmp r8, rcx
jae SHORT G_M8346_IG11
;; size=72 bbWeight=0.50 PerfScore 15.25
G_M8346_IG07:
lea rax, bword ptr [rdi+4*r8]
vmovups ymm0, ymmword ptr [rax]
- vmovups ymm2, ymmword ptr [rax+0x20]
- vpternlogd ymm0, ymm2, ymmword ptr [rax+0x40], -2
+ vmovups ymm1, ymmword ptr [rax+0x20]
+ vpternlogd ymm0, ymm1, ymmword ptr [rax+0x40], -2
vpor ymm0, ymm0, ymmword ptr [rax+0x60]
- vptest ymm0, ymm1
+ vptest ymm2, ymm0
je SHORT G_M8346_IG10
;; size=33 bbWeight=4 PerfScore 90.00
G_M8346_IG08:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M8346_IG09:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M8346_IG10:
add r8, 32
cmp r8, rcx
jb SHORT G_M8346_IG07
;; size=9 bbWeight=4 PerfScore 6.00
G_M8346_IG11:
lea rdi, bword ptr [rdi+4*rcx]
;; size=4 bbWeight=0.50 PerfScore 0.25
G_M8346_IG12:
- vmovups ymm1, ymmword ptr [rdi]
+ vmovups ymm2, ymmword ptr [rdi]
vmovups ymm0, ymmword ptr [rdi+0x20]
- vpternlogd ymm1, ymm0, ymmword ptr [rdx-0x40], -2
- vpor ymm0, ymm1, ymmword ptr [rdx-0x20]
- vmovups ymm1, ymmword ptr [reloc @RWD32]
- vptest ymm0, ymm1
+ vpternlogd ymm2, ymm0, ymmword ptr [rdx-0x40], -2
+ vpor ymm0, ymm2, ymmword ptr [rdx-0x20]
+ vmovups ymm2, ymmword ptr [reloc @RWD32]
+ vptest ymm2, ymm0
sete r8b
movzx r8, r8b
jmp SHORT G_M8346_IG14
;; size=45 bbWeight=0.50 PerfScore 14.12
G_M8346_IG13:
mov r8, qword ptr [rdi]
or r8, qword ptr [rdi+4*rcx-0x08]
mov rax, 0xD1FFAB1E
test r8, rax
sete r8b
movzx r8, r8b
;; size=29 bbWeight=0.50 PerfScore 3.38
G_M8346_IG14:
movzx rax, r8b
;; size=4 bbWeight=0.50 PerfScore 0.12
G_M8346_IG15:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M8346_IG16:
mov eax, 1
;; size=5 bbWeight=0.50 PerfScore 0.12
G_M8346_IG17:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M8346_IG18:
cmp dword ptr [rdi], edi
mov rax, 0xD1FFAB1E ; code for System.ThrowHelper:ThrowNotSupportedException()
call [rax]System.ThrowHelper:ThrowNotSupportedException()
int3
;; size=15 bbWeight=0 PerfScore 0.00
-RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h
+RWD00 dq FFFFFF80FFFFFF80h, FFFFFF80FFFFFF80h
RWD16 dd 00000000h, 00000000h, 00000000h, 00000000h
-RWD32 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
+RWD32 dq FFFFFF80FFFFFF80h, FFFFFF80FFFFFF80h, FFFFFF80FFFFFF80h, FFFFFF80FFFFFF80h
-; Total bytes of code 353, prolog size 4, PerfScore 157.38, instruction count 90, allocated bytes for code 353 (MethodHash=10a8df65) for method System.Text.Ascii:IsValidCore[int](byref,int):ubyte (FullOpts)
+; Total bytes of code 365, prolog size 4, PerfScore 159.12, instruction count 92, allocated bytes for code 365 (MethodHash=10a8df65) for method System.Text.Ascii:IsValidCore[int](byref,int):ubyte (FullOpts)
11 (2.96 % of base) - System.Text.Ascii:GetIndexOfFirstNonAsciiByte_Vector(ulong,ulong):ulong
; Assembly listing for method System.Text.Ascii:GetIndexOfFirstNonAsciiByte_Vector(ulong,ulong):ulong (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
-; 0 inlinees with PGO data; 8 single block inlinees; 2 inlinees without PGO data
+; 0 inlinees with PGO data; 12 single block inlinees; 8 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 38, 77 ) long -> rdi
; V01 arg1 [V01,T01] ( 17, 21 ) long -> rsi
; V02 loc0 [V02,T04] ( 12, 7 ) long -> rax
; V03 loc1 [V03,T02] ( 9, 11.50) int -> rcx
; V04 loc2 [V04,T05] ( 2, 4.50) long -> rcx
; V05 loc3 [V05,T06] ( 2, 4.50) long -> rcx
; V06 loc4 [V06,T07] ( 2, 4.50) long -> rcx
; V07 loc5 [V07,T03] ( 3, 8.50) int -> rdx
;# V08 OutArgs [V08 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V09 tmp1 [V09 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+; V09 tmp1 [V09,T12] ( 2, 2 ) simd16 -> mm0 "spilled call-like call argument"
;* V10 tmp2 [V10 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
;* V11 tmp3 [V11 ] ( 0, 0 ) simd64 -> zero-ref "spilled call-like call argument"
-;* V12 tmp4 [V12 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V13 tmp5 [V13 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V14 tmp6 [V14 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V15 tmp7 [V15 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
-; V16 cse0 [V16,T08] ( 3, 5 ) simd16 -> mm0 "CSE #02: aggressive"
+;* V12 tmp4 [V12 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+;* V13 tmp5 [V13 ] ( 0, 0 ) simd16 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V14 tmp6 [V14,T09] ( 0, 0 ) ubyte -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V15 tmp7 [V15 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V16 tmp8 [V16 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V17 tmp9 [V17 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
+;* V18 tmp10 [V18 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
+;* V19 tmp11 [V19 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V20 tmp12 [V20,T10] ( 2, 16 ) simd16 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V21 tmp13 [V21 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+;* V22 tmp14 [V22 ] ( 0, 0 ) simd16 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V23 tmp15 [V23,T08] ( 0, 0 ) ubyte -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V24 tmp16 [V24 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V25 tmp17 [V25 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V26 tmp18 [V26 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
+;* V27 tmp19 [V27 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
+;* V28 tmp20 [V28 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V29 tmp21 [V29 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
+; V30 cse0 [V30,T11] ( 3, 5 ) simd16 -> mm1 "CSE #02: moderate"
;
; Lcl frame size = 0
G_M50024_IG01:
push rbp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
G_M50024_IG02:
mov rax, rdi
cmp rsi, 128
jb SHORT G_M50024_IG05
;; size=12 bbWeight=1 PerfScore 1.50
G_M50024_IG03:
vmovups zmm0, zmmword ptr [rax]
vpmovb2m k1, zmm0
kmovq rcx, k1
;; NOP compensation instructions of 3 bytes.
test rcx, rcx
jne G_M50024_IG10
lea rcx, [rax+rsi-0x40]
lea rdi, [rax+0x40]
and rdi, -64
align [6 bytes for IG04]
;; size=48 bbWeight=0.50 PerfScore 5.62
G_M50024_IG04:
vmovdqa32 zmm0, zmmword ptr [rdi]
vpmovb2m k1, zmm0
kmovq rdx, k1
;; NOP compensation instructions of 3 bytes.
test rdx, rdx
jne G_M50024_IG09
add rdi, 64
cmp rdi, rcx
jbe SHORT G_M50024_IG04
jmp SHORT G_M50024_IG09
;; NOP compensation instructions of 3 bytes.
;; size=43 bbWeight=4 PerfScore 51.00
G_M50024_IG05:
cmp rsi, 64
jb SHORT G_M50024_IG07
vmovups ymm0, ymmword ptr [rax]
vpmovmskb ecx, ymm0
test ecx, ecx
jne SHORT G_M50024_IG10
+ ;; NOP compensation instructions of 4 bytes.
lea rcx, [rax+rsi-0x20]
lea rdi, [rax+0x20]
and rdi, -32
- align [6 bytes for IG06]
+ align [2 bytes for IG06]
;; size=37 bbWeight=0.50 PerfScore 6.25
G_M50024_IG06:
vmovdqa ymm0, ymmword ptr [rdi]
vpmovmskb edx, ymm0
test edx, edx
jne SHORT G_M50024_IG09
add rdi, 32
cmp rdi, rcx
jbe SHORT G_M50024_IG06
jmp SHORT G_M50024_IG09
;; size=23 bbWeight=4 PerfScore 51.00
G_M50024_IG07:
cmp rsi, 32
jb SHORT G_M50024_IG10
- vmovups xmm0, xmmword ptr [reloc @RWD00]
- vptest xmm0, xmmword ptr [rax]
+ vmovups xmm0, xmmword ptr [rax]
+ vmovups xmm1, xmmword ptr [reloc @RWD00]
+ vptest xmm1, xmm0
jne SHORT G_M50024_IG10
lea rcx, [rax+rsi-0x10]
lea rdi, [rax+0x10]
and rdi, -16
- align [0 bytes for IG08]
- ;; size=34 bbWeight=0.50 PerfScore 6.50
+ align [3 bytes for IG08]
+ ;; size=41 bbWeight=0.50 PerfScore 7.12
G_M50024_IG08:
- vptest xmm0, xmmword ptr [rdi]
+ vmovdqa xmm0, xmmword ptr [rdi]
+ vptest xmm1, xmm0
jne SHORT G_M50024_IG09
add rdi, 16
cmp rdi, rcx
jbe SHORT G_M50024_IG08
- ;; size=16 bbWeight=4 PerfScore 34.00
+ ;; size=20 bbWeight=4 PerfScore 38.00
G_M50024_IG09:
sub rsi, rdi
add rsi, rax
;; size=6 bbWeight=0.50 PerfScore 0.25
G_M50024_IG10:
cmp rsi, 8
jb SHORT G_M50024_IG15
align [0 bytes for IG11]
;; size=6 bbWeight=1 PerfScore 1.25
G_M50024_IG11:
mov ecx, dword ptr [rdi]
mov edx, dword ptr [rdi+0x04]
mov r8d, ecx
or r8d, edx
test r8d, 0xD1FFAB1E
je SHORT G_M50024_IG14
;; size=20 bbWeight=4 PerfScore 23.00
G_M50024_IG12:
test ecx, 0xD1FFAB1E
jne SHORT G_M50024_IG13
mov ecx, edx
add rdi, 4
;; size=14 bbWeight=0.50 PerfScore 0.88
G_M50024_IG13:
and ecx, 0xD1FFAB1E
xor esi, esi
tzcnt esi, ecx
shr esi, 3
mov ecx, esi
add rdi, rcx
jmp SHORT G_M50024_IG18
;; size=22 bbWeight=0.50 PerfScore 2.75
G_M50024_IG14:
add rdi, 8
add rsi, -8
cmp rsi, 8
jae SHORT G_M50024_IG11
;; size=14 bbWeight=4 PerfScore 7.00
G_M50024_IG15:
test sil, 4
je SHORT G_M50024_IG16
mov ecx, dword ptr [rdi]
test ecx, 0xD1FFAB1E
jne SHORT G_M50024_IG13
add rdi, 4
;; size=20 bbWeight=0.50 PerfScore 2.38
G_M50024_IG16:
test sil, 2
je SHORT G_M50024_IG17
movzx rcx, word ptr [rdi]
test ecx, 0xD1FFAB1E
jne SHORT G_M50024_IG13
add rdi, 2
;; size=21 bbWeight=0.50 PerfScore 2.38
G_M50024_IG17:
test sil, 1
je SHORT G_M50024_IG18
lea rcx, [rdi+0x01]
cmp byte ptr [rdi], 0
cmovge rdi, rcx
;; size=17 bbWeight=0.50 PerfScore 2.50
G_M50024_IG18:
mov rcx, rdi
sub rcx, rax
mov rax, rcx
;; size=9 bbWeight=1 PerfScore 0.75
G_M50024_IG19:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=1 PerfScore 2.50
RWD00 dq 8080808080808080h, 8080808080808080h
-; Total bytes of code 371, prolog size 4, PerfScore 202.75, instruction count 104, allocated bytes for code 371 (MethodHash=58923c97) for method System.Text.Ascii:GetIndexOfFirstNonAsciiByte_Vector(ulong,ulong):ulong (FullOpts)
+; Total bytes of code 382, prolog size 4, PerfScore 207.38, instruction count 106, allocated bytes for code 382 (MethodHash=58923c97) for method System.Text.Ascii:GetIndexOfFirstNonAsciiByte_Vector(ulong,ulong):ulong (FullOpts)
9 (12.68 % of base) - System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadAvxVector256(ulong,ulong,int,byref):ubyte:this
; Assembly listing for method System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadAvxVector256(ulong,ulong,int,byref):ubyte:this (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data
+; 0 inlinees with PGO data; 2 single block inlinees; 4 inlinees without PGO data
; Final local variable assignments
;
;* V00 this [V00 ] ( 0, 0 ) byref -> zero-ref this single-def
; V01 arg1 [V01,T00] ( 4, 4 ) long -> rsi single-def
;* V02 arg2 [V02 ] ( 0, 0 ) long -> zero-ref single-def
;* V03 arg3 [V03 ] ( 0, 0 ) int -> zero-ref single-def
; V04 arg4 [V04,T01] ( 4, 3 ) byref -> r8 single-def
-; V05 loc0 [V05,T02] ( 3, 2.50) simd32 -> mm0 <System.Runtime.Intrinsics.Vector256`1[ushort]>
-; V06 loc1 [V06,T03] ( 3, 2.50) simd32 -> mm1 <System.Runtime.Intrinsics.Vector256`1[ushort]>
+; V05 loc0 [V05,T04] ( 3, 2.50) simd32 -> mm0 <System.Runtime.Intrinsics.Vector256`1[ushort]>
+; V06 loc1 [V06,T05] ( 3, 2.50) simd32 -> mm1 <System.Runtime.Intrinsics.Vector256`1[ushort]>
;# V07 OutArgs [V07 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V08 tmp1 [V08 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V09 tmp2 [V09 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V10 tmp3 [V10 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+; V08 tmp1 [V08,T03] ( 2, 4 ) simd32 -> mm2 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V09 tmp2 [V09 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V10 tmp3 [V10 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V11 tmp4 [V11,T02] ( 0, 0 ) ushort -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V12 tmp5 [V12 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V13 tmp6 [V13 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V14 tmp7 [V14 ] ( 0, 0 ) ushort -> zero-ref "Inline stloc first use temp"
+;* V15 tmp8 [V15 ] ( 0, 0 ) ushort -> zero-ref "Inlining Arg"
+;* V16 tmp9 [V16 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
;
-; Lcl frame size = 0
+; Lcl frame size = 8
G_M46395_IG01:
- ;; size=0 bbWeight=1 PerfScore 0.00
+ push rax
+ ;; size=1 bbWeight=1 PerfScore 1.00
G_M46395_IG02:
vmovups ymm0, ymmword ptr [rsi]
vmovups ymm1, ymmword ptr [rsi+0x20]
vpor ymm2, ymm0, ymm1
vptest ymm2, ymmword ptr [reloc @RWD00]
je SHORT G_M46395_IG05
;; size=24 bbWeight=1 PerfScore 18.33
G_M46395_IG03:
vxorps ymm0, ymm0, ymm0
vmovups ymmword ptr [r8], ymm0
xor eax, eax
;; size=11 bbWeight=0.50 PerfScore 1.29
G_M46395_IG04:
vzeroupper
+ add rsp, 8
ret
- ;; size=4 bbWeight=0.50 PerfScore 1.00
+ ;; size=8 bbWeight=0.50 PerfScore 1.12
G_M46395_IG05:
vpmovwb ymm0, ymm0
vpmovwb ymm1, ymm1
vinserti128 ymm0, ymm0, xmm1, 1
vmovups ymmword ptr [r8], ymm0
mov eax, 1
;; size=28 bbWeight=0.50 PerfScore 5.12
G_M46395_IG06:
vzeroupper
+ add rsp, 8
ret
- ;; size=4 bbWeight=0.50 PerfScore 1.00
+ ;; size=8 bbWeight=0.50 PerfScore 1.12
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
-; Total bytes of code 71, prolog size 0, PerfScore 26.75, instruction count 17, allocated bytes for code 71 (MethodHash=fb454ac4) for method System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadAvxVector256(ulong,ulong,int,byref):ubyte:this (FullOpts)
+; Total bytes of code 80, prolog size 1, PerfScore 28.00, instruction count 20, allocated bytes for code 80 (MethodHash=fb454ac4) for method System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadAvxVector256(ulong,ulong,int,byref):ubyte:this (FullOpts)
9 (17.65 % of base) - System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadVector128(ulong,ulong,int,byref):ubyte:this
; Assembly listing for method System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadVector128(ulong,ulong,int,byref):ubyte:this (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 2 inlinees without PGO data
+; 0 inlinees with PGO data; 2 single block inlinees; 5 inlinees without PGO data
; Final local variable assignments
;
;* V00 this [V00 ] ( 0, 0 ) byref -> zero-ref this single-def
; V01 arg1 [V01,T00] ( 4, 4 ) long -> rsi single-def
;* V02 arg2 [V02 ] ( 0, 0 ) long -> zero-ref single-def
;* V03 arg3 [V03 ] ( 0, 0 ) int -> zero-ref single-def
; V04 arg4 [V04,T01] ( 4, 3 ) byref -> r8 single-def
-; V05 loc0 [V05,T02] ( 3, 2.50) simd16 -> mm0 <System.Runtime.Intrinsics.Vector128`1[ushort]>
-; V06 loc1 [V06,T03] ( 3, 2.50) simd16 -> mm1 <System.Runtime.Intrinsics.Vector128`1[ushort]>
+; V05 loc0 [V05,T04] ( 3, 2.50) simd16 -> mm0 <System.Runtime.Intrinsics.Vector128`1[ushort]>
+; V06 loc1 [V06,T05] ( 3, 2.50) simd16 -> mm1 <System.Runtime.Intrinsics.Vector128`1[ushort]>
;# V07 OutArgs [V07 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V08 tmp1 [V08 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V09 tmp2 [V09 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V10 tmp3 [V10 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V11 tmp4 [V11 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V08 tmp1 [V08,T03] ( 2, 4 ) simd16 -> mm2 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V09 tmp2 [V09 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+;* V10 tmp3 [V10 ] ( 0, 0 ) simd16 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V11 tmp4 [V11,T02] ( 0, 0 ) ushort -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V12 tmp5 [V12 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V13 tmp6 [V13 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V14 tmp7 [V14 ] ( 0, 0 ) ushort -> zero-ref "Inline stloc first use temp"
+;* V15 tmp8 [V15 ] ( 0, 0 ) ushort -> zero-ref "Inlining Arg"
+;* V16 tmp9 [V16 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V17 tmp10 [V17 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;
-; Lcl frame size = 0
+; Lcl frame size = 8
G_M11006_IG01:
- ;; size=0 bbWeight=1 PerfScore 0.00
+ push rax
+ ;; size=1 bbWeight=1 PerfScore 1.00
G_M11006_IG02:
vmovups xmm0, xmmword ptr [rsi]
vmovups xmm1, xmmword ptr [rsi+0x10]
vpor xmm2, xmm0, xmm1
vptest xmm2, xmmword ptr [reloc @RWD00]
je SHORT G_M11006_IG05
;; size=24 bbWeight=1 PerfScore 14.33
G_M11006_IG03:
vxorps xmm0, xmm0, xmm0
vmovups xmmword ptr [r8], xmm0
xor eax, eax
;; size=11 bbWeight=0.50 PerfScore 1.29
G_M11006_IG04:
+ add rsp, 8
ret
- ;; size=1 bbWeight=0.50 PerfScore 0.50
+ ;; size=5 bbWeight=0.50 PerfScore 0.62
G_M11006_IG05:
vpackuswb xmm0, xmm0, xmm1
vmovups xmmword ptr [r8], xmm0
mov eax, 1
;; size=14 bbWeight=0.50 PerfScore 1.62
G_M11006_IG06:
+ add rsp, 8
ret
- ;; size=1 bbWeight=0.50 PerfScore 0.50
+ ;; size=5 bbWeight=0.50 PerfScore 0.62
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h
-; Total bytes of code 51, prolog size 0, PerfScore 18.25, instruction count 13, allocated bytes for code 51 (MethodHash=0badd501) for method System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadVector128(ulong,ulong,int,byref):ubyte:this (FullOpts)
+; Total bytes of code 60, prolog size 1, PerfScore 19.50, instruction count 16, allocated bytes for code 60 (MethodHash=0badd501) for method System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadVector128(ulong,ulong,int,byref):ubyte:this (FullOpts)
7 (1.77 % of base) - System.Text.Latin1Utility:GetIndexOfFirstNonLatin1Char_Sse2(ulong,ulong):ulong
; Assembly listing for method System.Text.Latin1Utility:GetIndexOfFirstNonLatin1Char_Sse2(ulong,ulong):ulong (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 2 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 32, 34.50) long -> rbx
; V01 arg1 [V01,T01] ( 17, 10 ) long -> rsi
;* V02 loc0 [V02,T08] ( 0, 0 ) int -> zero-ref
;* V03 loc1 [V03,T09] ( 0, 0 ) int -> zero-ref
-; V04 loc2 [V04,T10] ( 9, 11.50) simd16 -> mm2 <System.Runtime.Intrinsics.Vector128`1[ushort]>
-; V05 loc3 [V05,T11] ( 3, 8.50) simd16 -> mm3 <System.Runtime.Intrinsics.Vector128`1[ushort]>
+; V04 loc2 [V04,T11] ( 9, 11.50) simd16 -> mm2 <System.Runtime.Intrinsics.Vector128`1[ushort]>
+; V05 loc3 [V05,T12] ( 3, 8.50) simd16 -> mm3 <System.Runtime.Intrinsics.Vector128`1[ushort]>
; V06 loc4 [V06,T04] ( 4, 2 ) int -> r14
; V07 loc5 [V07,T03] ( 8, 4 ) long -> r15
-; V08 loc6 [V08,T12] ( 5, 6 ) simd16 -> mm0 <System.Runtime.Intrinsics.Vector128`1[ushort]>
-; V09 loc7 [V09,T13] ( 3, 1.50) simd16 -> mm1 <System.Runtime.Intrinsics.Vector128`1[ushort]>
+; V08 loc6 [V08,T13] ( 5, 6 ) simd16 -> mm0 <System.Runtime.Intrinsics.Vector128`1[ushort]>
+; V09 loc7 [V09,T14] ( 3, 1.50) simd16 -> mm1 <System.Runtime.Intrinsics.Vector128`1[ushort]>
; V10 loc8 [V10,T05] ( 3, 1.50) int -> rdi
; V11 loc9 [V11,T02] ( 2, 4.50) long -> rdi
;* V12 loc10 [V12 ] ( 0, 0 ) simd16 -> zero-ref <System.Runtime.Intrinsics.Vector128`1[ushort]>
; V13 loc11 [V13,T07] ( 2, 1 ) long -> rdi
;* V14 loc12 [V14 ] ( 0, 0 ) int -> zero-ref
;# V15 OutArgs [V15 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V16 cse0 [V16,T06] ( 3, 1.50) long -> rdi "CSE #01: moderate"
+; V17 rat0 [V17,T10] ( 3, 24 ) simd16 -> mm4 "ReplaceWithLclVar is creating a new local variable"
;
; Lcl frame size = 8
G_M38868_IG01:
push rbp
push r15
push r14
push rbx
push rax
lea rbp, [rsp+0x20]
mov rbx, rdi
;; size=15 bbWeight=1 PerfScore 5.75
G_M38868_IG02:
test rsi, rsi
jne SHORT G_M38868_IG05
;; size=5 bbWeight=1 PerfScore 1.25
G_M38868_IG03:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M38868_IG04:
add rsp, 8
pop rbx
pop r14
pop r15
pop rbp
ret
;; size=11 bbWeight=0.50 PerfScore 1.62
G_M38868_IG05:
mov r15, rbx
cmp rsi, 8
jb G_M38868_IG10
vmovups xmm0, xmmword ptr [reloc @RWD00]
vmovups xmm1, xmmword ptr [reloc @RWD16]
vpaddusw xmm2, xmm1, xmmword ptr [r15]
vpmovmskb r14d, xmm2
test r14d, 0xAAAA
jne G_M38868_IG18
add rsi, rsi
cmp rsi, 32
jb SHORT G_M38868_IG08
lea rbx, [r15+0x10]
and rbx, -16
add rsi, r15
sub rsi, rbx
cmp rsi, 32
jb SHORT G_M38868_IG07
lea rdi, [rbx+rsi-0x20]
align [0 bytes for IG06]
;; size=85 bbWeight=0.50 PerfScore 9.38
G_M38868_IG06:
vmovdqa xmm2, xmmword ptr [rbx]
vmovdqa xmm3, xmmword ptr [rbx+0x10]
- vpor xmm4, xmm2, xmm3
- vptest xmm4, xmm0
+ vmovaps xmm4, xmm2
+ vpternlogd xmm4, xmm3, xmm0, -88
+ vptest xmm4, xmm4
jne G_M38868_IG16
add rbx, 32
cmp rbx, rdi
jbe SHORT G_M38868_IG06
- ;; size=33 bbWeight=4 PerfScore 55.33
+ ;; size=40 bbWeight=4 PerfScore 57.00
G_M38868_IG07:
test sil, 16
je SHORT G_M38868_IG09
vmovdqa xmm2, xmmword ptr [rbx]
vptest xmm2, xmm0
jne G_M38868_IG17
;; size=21 bbWeight=0.50 PerfScore 4.62
G_M38868_IG08:
add rbx, 16
;; size=4 bbWeight=0.50 PerfScore 0.12
G_M38868_IG09:
movzx rdi, sil
test dil, 15
je G_M38868_IG19
and rsi, 15
add rsi, rbx
mov rbx, rsi
sub rbx, 16
vmovups xmm2, xmmword ptr [rbx]
vptest xmm2, xmm0
jne G_M38868_IG17
add rbx, 16
jmp G_M38868_IG19
;; size=52 bbWeight=0.50 PerfScore 6.38
G_M38868_IG10:
test sil, 4
je SHORT G_M38868_IG12
mov rdi, qword ptr [r15]
mov rax, 0xD1FFAB1E
and rdi, rax
je SHORT G_M38868_IG11
xor ebx, ebx
tzcnt rbx, rdi
shr rbx, 3
and rbx, -2
add rbx, r15
jmp SHORT G_M38868_IG19
;; size=44 bbWeight=0.50 PerfScore 5.00
G_M38868_IG11:
lea rbx, [r15+0x08]
;; size=4 bbWeight=0.50 PerfScore 0.25
G_M38868_IG12:
test sil, 2
je SHORT G_M38868_IG13
mov edi, dword ptr [rbx]
test edi, 0xD1FFAB1E
jne SHORT G_M38868_IG14
add rbx, 4
;; size=20 bbWeight=0.50 PerfScore 2.38
G_M38868_IG13:
test sil, 1
je SHORT G_M38868_IG19
cmp word ptr [rbx], 255
ja SHORT G_M38868_IG19
jmp SHORT G_M38868_IG15
;; size=15 bbWeight=0.50 PerfScore 3.62
G_M38868_IG14:
mov rax, 0xD1FFAB1E ; code for System.Text.Latin1Utility:FirstCharInUInt32IsLatin1(uint):ubyte
call [rax]System.Text.Latin1Utility:FirstCharInUInt32IsLatin1(uint):ubyte
test eax, eax
je SHORT G_M38868_IG19
;; size=16 bbWeight=0.50 PerfScore 2.25
G_M38868_IG15:
add rbx, 2
jmp SHORT G_M38868_IG19
;; size=6 bbWeight=0.50 PerfScore 1.12
G_M38868_IG16:
vptest xmm2, xmm0
jne SHORT G_M38868_IG17
add rbx, 16
vmovaps xmm2, xmm3
;; size=15 bbWeight=0.50 PerfScore 2.25
G_M38868_IG17:
vpaddusw xmm0, xmm2, xmm1
vpmovmskb r14d, xmm0
;; size=8 bbWeight=0.50 PerfScore 1.17
G_M38868_IG18:
and r14d, 0xAAAA
xor eax, eax
tzcnt eax, r14d
lea rbx, [rbx+rax-0x01]
;; size=19 bbWeight=0.50 PerfScore 1.75
G_M38868_IG19:
mov rax, rbx
sub rax, r15
shr rax, 1
;; size=9 bbWeight=0.50 PerfScore 0.50
G_M38868_IG20:
add rsp, 8
pop rbx
pop r14
pop r15
pop rbp
ret
;; size=11 bbWeight=0.50 PerfScore 1.62
RWD00 dq FF00FF00FF00FF00h, FF00FF00FF00FF00h
RWD16 dq 7F007F007F007F00h, 7F007F007F007F00h
-; Total bytes of code 395, prolog size 15, PerfScore 106.50, instruction count 111, allocated bytes for code 395 (MethodHash=0f68682b) for method System.Text.Latin1Utility:GetIndexOfFirstNonLatin1Char_Sse2(ulong,ulong):ulong (FullOpts)
+; Total bytes of code 402, prolog size 15, PerfScore 108.17, instruction count 112, allocated bytes for code 402 (MethodHash=0f68682b) for method System.Text.Latin1Utility:GetIndexOfFirstNonLatin1Char_Sse2(ulong,ulong):ulong (FullOpts)
7 (4.70 % of base) - System.Text.Latin1Utility:NarrowUtf16ToLatin1_Sse2(ulong,ulong,ulong):ulong
; Assembly listing for method System.Text.Latin1Utility:NarrowUtf16ToLatin1_Sse2(ulong,ulong,ulong):ulong (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T01] ( 6, 11.50) long -> rdi single-def
; V01 arg1 [V01,T02] ( 8, 8.50) long -> rsi single-def
; V02 arg2 [V02,T03] ( 3, 2.50) long -> rdx single-def
;* V03 loc0 [V03,T05] ( 0, 0 ) int -> zero-ref
;* V04 loc1 [V04 ] ( 0, 0 ) long -> zero-ref
-; V05 loc2 [V05,T08] ( 5, 7 ) simd16 -> mm0 <System.Runtime.Intrinsics.Vector128`1[short]>
+; V05 loc2 [V05,T09] ( 5, 7 ) simd16 -> mm0 <System.Runtime.Intrinsics.Vector128`1[short]>
;* V06 loc3 [V06 ] ( 0, 0 ) simd16 -> zero-ref <System.Runtime.Intrinsics.Vector128`1[ushort]>
-; V07 loc4 [V07,T06] ( 14, 18.50) simd16 -> mm1 <System.Runtime.Intrinsics.Vector128`1[short]>
+; V07 loc4 [V07,T07] ( 14, 18.50) simd16 -> mm1 <System.Runtime.Intrinsics.Vector128`1[short]>
;* V08 loc5 [V08 ] ( 0, 0 ) simd16 -> zero-ref <System.Runtime.Intrinsics.Vector128`1[ubyte]>
; V09 loc6 [V09,T00] ( 12, 27 ) long -> rax
; V10 loc7 [V10,T04] ( 2, 4.50) long -> rdx
-; V11 loc8 [V11,T07] ( 3, 12 ) simd16 -> mm2 <System.Runtime.Intrinsics.Vector128`1[short]>
+; V11 loc8 [V11,T08] ( 3, 12 ) simd16 -> mm2 <System.Runtime.Intrinsics.Vector128`1[short]>
;* V12 loc9 [V12 ] ( 0, 0 ) simd16 -> zero-ref <System.Runtime.Intrinsics.Vector128`1[short]>
;# V13 OutArgs [V13 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+; V14 rat0 [V14,T06] ( 3, 24 ) simd16 -> mm3 "ReplaceWithLclVar is creating a new local variable"
;
; Lcl frame size = 0
G_M23879_IG01:
push rbp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
G_M23879_IG02:
vmovups xmm0, xmmword ptr [reloc @RWD00]
vmovups xmm1, xmmword ptr [rdi]
vptest xmm1, xmm0
je SHORT G_M23879_IG05
;; size=19 bbWeight=1 PerfScore 11.00
G_M23879_IG03:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M23879_IG04:
pop rbp
ret
;; size=2 bbWeight=0.50 PerfScore 0.75
G_M23879_IG05:
vpackuswb xmm1, xmm1, xmm1
vmovq qword ptr [rsi], xmm1
mov eax, 8
test sil, 8
jne SHORT G_M23879_IG06
vmovups xmm1, xmmword ptr [rdi+0x10]
vptest xmm1, xmm0
jne SHORT G_M23879_IG08
vpackuswb xmm1, xmm1, xmm1
vmovq qword ptr [rsi+0x08], xmm1
;; size=40 bbWeight=0.50 PerfScore 7.75
G_M23879_IG06:
mov rax, rsi
and rax, 15
neg rax
add rax, 16
add rdx, -16
align [0 bytes for IG07]
;; size=18 bbWeight=0.50 PerfScore 0.62
G_M23879_IG07:
vmovups xmm1, xmmword ptr [rdi+2*rax]
vmovups xmm2, xmmword ptr [rdi+2*rax+0x10]
- vpor xmm3, xmm1, xmm2
- vptest xmm3, xmm0
+ vmovaps xmm3, xmm1
+ vpternlogd xmm3, xmm2, xmm0, -88
+ vptest xmm3, xmm3
jne SHORT G_M23879_IG09
vpackuswb xmm1, xmm1, xmm2
vmovdqa xmmword ptr [rsi+rax], xmm1
add rax, 16
cmp rax, rdx
jbe SHORT G_M23879_IG07
- ;; size=40 bbWeight=4 PerfScore 67.33
+ ;; size=47 bbWeight=4 PerfScore 69.00
G_M23879_IG08:
pop rbp
ret
;; size=2 bbWeight=0.50 PerfScore 0.75
G_M23879_IG09:
vptest xmm1, xmm0
jne SHORT G_M23879_IG08
vpackuswb xmm0, xmm1, xmm1
vmovq qword ptr [rsi+rax], xmm0
add rax, 8
jmp SHORT G_M23879_IG08
;; size=22 bbWeight=0.50 PerfScore 4.62
RWD00 dq FF00FF00FF00FF00h, FF00FF00FF00FF00h
-; Total bytes of code 149, prolog size 4, PerfScore 94.21, instruction count 43, allocated bytes for code 149 (MethodHash=f65ba2b8) for method System.Text.Latin1Utility:NarrowUtf16ToLatin1_Sse2(ulong,ulong,ulong):ulong (FullOpts)
+; Total bytes of code 156, prolog size 4, PerfScore 95.88, instruction count 44, allocated bytes for code 156 (MethodHash=f65ba2b8) for method System.Text.Latin1Utility:NarrowUtf16ToLatin1_Sse2(ulong,ulong,ulong):ulong (FullOpts)
6 (12.24 % of base) - System.Text.Ascii+PlainLoader`1[int]:EqualAndAscii256(byref,byref):ubyte
; Assembly listing for method System.Text.Ascii+PlainLoader`1[int]:EqualAndAscii256(byref,byref):ubyte (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data
+; 0 inlinees with PGO data; 2 single block inlinees; 2 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 3, 3 ) byref -> rdi single-def
; V01 arg1 [V01,T01] ( 3, 3 ) byref -> rsi single-def
-; V02 loc0 [V02,T02] ( 3, 2.50) simd32 -> mm0 <System.Runtime.Intrinsics.Vector256`1[int]>
+; V02 loc0 [V02,T03] ( 3, 2.50) simd32 -> mm0 <System.Runtime.Intrinsics.Vector256`1[int]>
;* V03 loc1 [V03 ] ( 0, 0 ) simd32 -> zero-ref <System.Runtime.Intrinsics.Vector256`1[int]>
;# V04 OutArgs [V04 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V05 tmp1 [V05 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V05 tmp1 [V05 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V06 tmp2 [V06 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[int]>
+;* V07 tmp3 [V07 ] ( 0, 0 ) int -> zero-ref ld-addr-op "Inline stloc first use temp"
+;* V08 tmp4 [V08 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
+;* V09 tmp5 [V09 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V10 rat0 [V10,T02] ( 3, 3 ) simd32 -> mm0 "ReplaceWithLclVar is creating a new local variable"
;
; Lcl frame size = 0
G_M61666_IG01:
push rbp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
G_M61666_IG02:
vmovups ymm0, ymmword ptr [rdi]
vpcmpd k1, ymm0, ymmword ptr [rsi], 4
kortestb k1, k1
jne SHORT G_M61666_IG04
;; size=17 bbWeight=1 PerfScore 13.00
G_M61666_IG03:
- vptest ymm0, ymmword ptr [reloc @RWD00]
+ vpandd ymm0, ymm0, dword ptr [reloc @RWD00] {1to8}
+ vptest ymm0, ymm0
je SHORT G_M61666_IG06
- ;; size=11 bbWeight=0.50 PerfScore 4.00
+ ;; size=17 bbWeight=0.50 PerfScore 4.00
G_M61666_IG04:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M61666_IG05:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M61666_IG06:
mov eax, 1
;; size=5 bbWeight=0.50 PerfScore 0.12
G_M61666_IG07:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
-RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
+RWD00 dd FFFFFF80h
-; Total bytes of code 49, prolog size 4, PerfScore 21.00, instruction count 16, allocated bytes for code 51 (MethodHash=a7ac0f1d) for method System.Text.Ascii+PlainLoader`1[int]:EqualAndAscii256(byref,byref):ubyte (FullOpts)
+; Total bytes of code 55, prolog size 4, PerfScore 21.00, instruction count 17, allocated bytes for code 57 (MethodHash=a7ac0f1d) for method System.Text.Ascii+PlainLoader`1[int]:EqualAndAscii256(byref,byref):ubyte (FullOpts)
6 (12.24 % of base) - System.Text.Ascii+PlainLoader`1[long]:EqualAndAscii256(byref,byref):ubyte
; Assembly listing for method System.Text.Ascii+PlainLoader`1[long]:EqualAndAscii256(byref,byref):ubyte (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data
+; 0 inlinees with PGO data; 2 single block inlinees; 4 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 3, 3 ) byref -> rdi single-def
; V01 arg1 [V01,T01] ( 3, 3 ) byref -> rsi single-def
-; V02 loc0 [V02,T02] ( 3, 2.50) simd32 -> mm0 <System.Runtime.Intrinsics.Vector256`1[long]>
+; V02 loc0 [V02,T03] ( 3, 2.50) simd32 -> mm0 <System.Runtime.Intrinsics.Vector256`1[long]>
;* V03 loc1 [V03 ] ( 0, 0 ) simd32 -> zero-ref <System.Runtime.Intrinsics.Vector256`1[long]>
;# V04 OutArgs [V04 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V05 tmp1 [V05 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V05 tmp1 [V05 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V06 tmp2 [V06 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[long]>
+;* V07 tmp3 [V07 ] ( 0, 0 ) long -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V08 tmp4 [V08 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V09 tmp5 [V09 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+;* V10 tmp6 [V10 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V11 tmp7 [V11 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
+;* V12 tmp8 [V12 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V13 rat0 [V13,T02] ( 3, 3 ) simd32 -> mm0 "ReplaceWithLclVar is creating a new local variable"
;
; Lcl frame size = 0
G_M4539_IG01:
push rbp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
G_M4539_IG02:
vmovups ymm0, ymmword ptr [rdi]
vpcmpq k1, ymm0, ymmword ptr [rsi], 4
kortestb k1, k1
jne SHORT G_M4539_IG04
;; size=17 bbWeight=1 PerfScore 13.00
G_M4539_IG03:
- vptest ymm0, ymmword ptr [reloc @RWD00]
+ vpandq ymm0, ymm0, qword ptr [reloc @RWD00] {1to4}
+ vptest ymm0, ymm0
je SHORT G_M4539_IG06
- ;; size=11 bbWeight=0.50 PerfScore 4.00
+ ;; size=17 bbWeight=0.50 PerfScore 4.00
G_M4539_IG04:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M4539_IG05:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M4539_IG06:
mov eax, 1
;; size=5 bbWeight=0.50 PerfScore 0.12
G_M4539_IG07:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
-RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
+RWD00 dq FFFFFFFFFFFFFF80h
-; Total bytes of code 49, prolog size 4, PerfScore 21.00, instruction count 16, allocated bytes for code 51 (MethodHash=0669ee44) for method System.Text.Ascii+PlainLoader`1[long]:EqualAndAscii256(byref,byref):ubyte (FullOpts)
+; Total bytes of code 55, prolog size 4, PerfScore 21.00, instruction count 17, allocated bytes for code 57 (MethodHash=0669ee44) for method System.Text.Ascii+PlainLoader`1[long]:EqualAndAscii256(byref,byref):ubyte (FullOpts)
2 (2.15 % of base) - System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadVector512(ulong,ulong,int,byref):ubyte:this
; Assembly listing for method System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadVector512(ulong,ulong,int,byref):ubyte:this (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 3 single block inlinees; 0 inlinees without PGO data
+; 0 inlinees with PGO data; 4 single block inlinees; 4 inlinees without PGO data
; Final local variable assignments
;
;* V00 this [V00 ] ( 0, 0 ) byref -> zero-ref this single-def
; V01 arg1 [V01,T00] ( 4, 4 ) long -> rsi single-def
;* V02 arg2 [V02 ] ( 0, 0 ) long -> zero-ref single-def
;* V03 arg3 [V03 ] ( 0, 0 ) int -> zero-ref single-def
; V04 arg4 [V04,T01] ( 4, 3 ) byref -> r8 single-def
-; V05 loc0 [V05,T03] ( 3, 2.50) simd64 -> mm0 <System.Runtime.Intrinsics.Vector512`1[ushort]>
-; V06 loc1 [V06,T04] ( 3, 2.50) simd64 -> mm1 <System.Runtime.Intrinsics.Vector512`1[ushort]>
+; V05 loc0 [V05,T04] ( 3, 2.50) simd64 -> mm0 <System.Runtime.Intrinsics.Vector512`1[ushort]>
+; V06 loc1 [V06,T05] ( 3, 2.50) simd64 -> mm1 <System.Runtime.Intrinsics.Vector512`1[ushort]>
;# V07 OutArgs [V07 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V08 tmp1 [V08 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
-;* V09 tmp2 [V09 ] ( 0, 0 ) simd64 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
-; V10 rat0 [V10,T02] ( 3, 6 ) simd64 -> mm2 "ReplaceWithLclVar is creating a new local variable"
+; V09 tmp2 [V09,T03] ( 2, 4 ) simd64 -> mm2 "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;* V10 tmp3 [V10 ] ( 0, 0 ) simd64 -> zero-ref "spilled call-like call argument"
+;* V11 tmp4 [V11 ] ( 0, 0 ) simd64 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;* V12 tmp5 [V12,T02] ( 0, 0 ) ushort -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V13 tmp6 [V13 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V14 tmp7 [V14 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V15 tmp8 [V15 ] ( 0, 0 ) ushort -> zero-ref "Inline stloc first use temp"
+;* V16 tmp9 [V16 ] ( 0, 0 ) ushort -> zero-ref "Inlining Arg"
+;* V17 tmp10 [V17 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
;
-; Lcl frame size = 0
+; Lcl frame size = 8
G_M39699_IG01:
- ;; size=0 bbWeight=1 PerfScore 0.00
+ push rax
+ ;; size=1 bbWeight=1 PerfScore 1.00
G_M39699_IG02:
vmovups zmm0, zmmword ptr [rsi]
vmovups zmm1, zmmword ptr [rsi+0x40]
- vmovaps zmm2, zmm0
- vpternlogd zmm2, zmm1, zmmword ptr [reloc @RWD00], -88
- vptestmw k1, zmm2, zmm2
+ vpord zmm2, zmm0, zmm1
+ vptestmw k1, zmm2, zmmword ptr [reloc @RWD00]
kortestd k1, k1
je SHORT G_M39699_IG05
- ;; size=43 bbWeight=1 PerfScore 15.25
+ ;; size=36 bbWeight=1 PerfScore 15.33
G_M39699_IG03:
vxorps ymm0, ymm0, ymm0
vmovups zmmword ptr [r8], zmm0
xor eax, eax
;; size=12 bbWeight=0.50 PerfScore 1.29
G_M39699_IG04:
vzeroupper
+ add rsp, 8
ret
- ;; size=4 bbWeight=0.50 PerfScore 1.00
+ ;; size=8 bbWeight=0.50 PerfScore 1.12
G_M39699_IG05:
vpmovwb zmm0, zmm0
vpmovwb zmm1, zmm1
vinserti64x4 zmm0, zmm0, ymm1, 1
vmovups zmmword ptr [r8], zmm0
mov eax, 1
;; size=30 bbWeight=0.50 PerfScore 5.12
G_M39699_IG06:
vzeroupper
+ add rsp, 8
ret
- ;; size=4 bbWeight=0.50 PerfScore 1.00
+ ;; size=8 bbWeight=0.50 PerfScore 1.12
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
-; Total bytes of code 93, prolog size 0, PerfScore 23.67, instruction count 19, allocated bytes for code 96 (MethodHash=65b664ec) for method System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadVector512(ulong,ulong,int,byref):ubyte:this (FullOpts)
+; Total bytes of code 95, prolog size 1, PerfScore 25.00, instruction count 21, allocated bytes for code 98 (MethodHash=65b664ec) for method System.Buffers.Text.Base64Url+Base64UrlDecoderChar:TryLoadVector512(ulong,ulong,int,byref):ubyte:this (FullOpts)
2 (0.57 % of base) - System.Text.Ascii:IsValidCore[long](byref,int):ubyte
; Assembly listing for method System.Text.Ascii:IsValidCore[long](byref,int):ubyte (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
-; 0 inlinees with PGO data; 2 single block inlinees; 7 inlinees without PGO data
+; 0 inlinees with PGO data; 12 single block inlinees; 22 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T02] ( 18, 13 ) byref -> rdi
-; V01 arg1 [V01,T03] ( 9, 6 ) int -> rsi single-def
-; V02 loc0 [V02,T07] ( 5, 2.50) byref -> rdx single-def
+; V01 arg1 [V01,T03] ( 11, 6.50) int -> rsi single-def
+; V02 loc0 [V02,T06] ( 5, 2.50) byref -> rcx single-def
;* V03 loc1 [V03 ] ( 0, 0 ) int -> zero-ref
;* V04 loc2 [V04 ] ( 0, 0 ) long -> zero-ref
-;* V05 loc3 [V05,T10] ( 0, 0 ) long -> zero-ref
+;* V05 loc3 [V05 ] ( 0, 0 ) long -> zero-ref
;* V06 loc4 [V06 ] ( 0, 0 ) long -> zero-ref
-; V07 loc5 [V07,T01] ( 6, 17 ) long -> r8
+; V07 loc5 [V07,T01] ( 6, 17 ) long -> rdx
;* V08 loc6 [V08 ] ( 0, 0 ) long -> zero-ref
-; V09 loc7 [V09,T04] ( 4, 5.50) long -> rcx
+; V09 loc7 [V09,T04] ( 4, 5.50) long -> rsi
; V10 loc8 [V10,T00] ( 5, 20 ) byref -> rax
;* V11 loc9 [V11 ] ( 0, 0 ) long -> zero-ref
;* V12 loc10 [V12 ] ( 0, 0 ) long -> zero-ref
;* V13 loc11 [V13 ] ( 0, 0 ) long -> zero-ref
;* V14 loc12 [V14 ] ( 0, 0 ) byref -> zero-ref
;# V15 OutArgs [V15 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V16 tmp1 [V16 ] ( 0, 0 ) int -> zero-ref
;* V17 tmp2 [V17 ] ( 0, 0 ) long -> zero-ref ld-addr-op "Inlining Arg"
-; V18 tmp3 [V18,T09] ( 2, 1 ) ubyte -> r8 "Inline return value spill temp"
+; V18 tmp3 [V18,T08] ( 2, 1 ) ubyte -> rdx "Inline return value spill temp"
;* V19 tmp4 [V19 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
-;* V20 tmp5 [V20 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V21 tmp6 [V21 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;* V22 tmp7 [V22 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V23 tmp8 [V23 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+; V20 tmp5 [V20,T12] ( 2, 2 ) simd16 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;* V21 tmp6 [V21 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+;* V22 tmp7 [V22 ] ( 0, 0 ) simd16 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
+;* V23 tmp8 [V23 ] ( 0, 0 ) long -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
;* V24 tmp9 [V24 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-; V25 tmp10 [V25,T13] ( 2, 2 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;* V25 tmp10 [V25 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
;* V26 tmp11 [V26 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-; V27 tmp12 [V27,T11] ( 2, 16 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;* V27 tmp12 [V27 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
;* V28 tmp13 [V28 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-; V29 tmp14 [V29,T14] ( 2, 2 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
-; V30 tmp15 [V30,T05] ( 5, 5 ) int -> r8 "Single return block return value"
-; V31 tmp16 [V31,T08] ( 2, 2 ) long -> rax "Cast away GC"
-; V32 cse0 [V32,T12] ( 7, 7 ) simd32 -> mm1 multi-def "CSE #03: aggressive"
-; V33 cse1 [V33,T06] ( 6, 3 ) long -> rcx multi-def "CSE #01: aggressive"
+; V29 tmp14 [V29,T13] ( 2, 2 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;* V30 tmp15 [V30 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V31 tmp16 [V31 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[long]>
+;* V32 tmp17 [V32 ] ( 0, 0 ) long -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V33 tmp18 [V33 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V34 tmp19 [V34 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+;* V35 tmp20 [V35 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V36 tmp21 [V36 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
+;* V37 tmp22 [V37 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V38 tmp23 [V38,T14] ( 2, 2 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;* V39 tmp24 [V39 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V40 tmp25 [V40 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[long]>
+;* V41 tmp26 [V41 ] ( 0, 0 ) long -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V42 tmp27 [V42 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V43 tmp28 [V43 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+;* V44 tmp29 [V44 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V45 tmp30 [V45 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
+;* V46 tmp31 [V46 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V47 tmp32 [V47,T09] ( 2, 16 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;* V48 tmp33 [V48 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V49 tmp34 [V49 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[long]>
+;* V50 tmp35 [V50 ] ( 0, 0 ) long -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V51 tmp36 [V51 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V52 tmp37 [V52 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+;* V53 tmp38 [V53 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V54 tmp39 [V54 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
+;* V55 tmp40 [V55 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V56 tmp41 [V56,T15] ( 2, 2 ) simd32 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;* V57 tmp42 [V57 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V58 tmp43 [V58 ] ( 0, 0 ) simd32 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[long]>
+;* V59 tmp44 [V59 ] ( 0, 0 ) long -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V60 tmp45 [V60 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V61 tmp46 [V61 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+;* V62 tmp47 [V62 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V63 tmp48 [V63 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
+;* V64 tmp49 [V64 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V65 tmp50 [V65,T05] ( 5, 5 ) int -> rdx "Single return block return value"
+; V66 tmp51 [V66,T07] ( 2, 2 ) long -> rax "Cast away GC"
+; V67 cse0 [V67,T10] ( 7, 7 ) simd32 -> mm1 multi-def "CSE #04: aggressive"
+; V68 rat0 [V68,T11] ( 3, 3 ) simd16 -> mm0 "ReplaceWithLclVar is creating a new local variable"
;
; Lcl frame size = 0
G_M33379_IG01:
push rbp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
G_M33379_IG02:
cmp esi, 2
jge SHORT G_M33379_IG04
;; size=5 bbWeight=1 PerfScore 1.25
G_M33379_IG03:
- movsxd rcx, esi
- test rcx, rcx
+ movsxd rax, esi
+ test rax, rax
jg G_M33379_IG13
- test esi, esi
- je G_M33379_IG16
- jmp G_M33379_IG18
- ;; size=25 bbWeight=0.50 PerfScore 2.38
+ jmp G_M33379_IG16
+ ;; size=17 bbWeight=0.50 PerfScore 1.75
G_M33379_IG04:
- movsxd rcx, esi
- lea rdx, bword ptr [rdi+8*rcx]
+ movsxd rax, esi
+ lea rcx, bword ptr [rdi+8*rax]
cmp esi, 4
jg SHORT G_M33379_IG05
vmovups xmm0, xmmword ptr [rdi]
- vpor xmm0, xmm0, xmmword ptr [rdx-0x10]
- vptest xmm0, xmmword ptr [reloc @RWD00]
- sete r8b
- movzx r8, r8b
+ vpor xmm0, xmm0, xmmword ptr [rcx-0x10]
+ vpandq xmm0, xmm0, qword ptr [reloc @RWD00] {1to2}
+ vptest xmm0, xmm0
+ sete dl
+ movzx rdx, dl
jmp G_M33379_IG14
- align [3 bytes for IG07]
- ;; size=46 bbWeight=0.50 PerfScore 8.62
+ align [6 bytes for IG07]
+ ;; size=53 bbWeight=0.50 PerfScore 8.62
G_M33379_IG05:
cmp esi, 8
jg SHORT G_M33379_IG06
vmovups ymm0, ymmword ptr [rdi]
- vpor ymm0, ymm0, ymmword ptr [rdx-0x20]
+ vpor ymm0, ymm0, ymmword ptr [rcx-0x20]
vmovups ymm1, ymmword ptr [reloc @RWD32]
- vptest ymm0, ymm1
- sete r8b
- movzx r8, r8b
+ vptest ymm1, ymm0
+ sete dl
+ movzx rdx, dl
jmp G_M33379_IG14
- ;; size=40 bbWeight=0.50 PerfScore 10.75
+ ;; size=38 bbWeight=0.50 PerfScore 10.75
G_M33379_IG06:
cmp esi, 16
jle SHORT G_M33379_IG12
vmovups ymm1, ymmword ptr [rdi]
vmovups ymm0, ymmword ptr [rdi+0x20]
vpternlogq ymm1, ymm0, ymmword ptr [rdi+0x40], -2
vpor ymm0, ymm1, ymmword ptr [rdi+0x60]
vmovups ymm1, ymmword ptr [reloc @RWD32]
- vptest ymm0, ymm1
+ vptest ymm1, ymm0
jne SHORT G_M33379_IG08
mov rax, rdi
and rax, 31
shr rax, 3
- mov r8, rax
- neg r8
- add r8, 16
- add rcx, -16
- cmp r8, rcx
+ mov rdx, rax
+ neg rdx
+ add rdx, 16
+ movsxd rsi, esi
+ add rsi, -16
+ cmp rdx, rsi
jae SHORT G_M33379_IG11
- ;; size=72 bbWeight=0.50 PerfScore 15.25
+ ;; size=75 bbWeight=0.50 PerfScore 15.38
G_M33379_IG07:
- lea rax, bword ptr [rdi+8*r8]
+ lea rax, bword ptr [rdi+8*rdx]
vmovups ymm0, ymmword ptr [rax]
vmovups ymm2, ymmword ptr [rax+0x20]
vpternlogq ymm0, ymm2, ymmword ptr [rax+0x40], -2
vpor ymm0, ymm0, ymmword ptr [rax+0x60]
- vptest ymm0, ymm1
+ vptest ymm1, ymm0
je SHORT G_M33379_IG10
;; size=33 bbWeight=4 PerfScore 90.00
G_M33379_IG08:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M33379_IG09:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M33379_IG10:
- add r8, 16
- cmp r8, rcx
+ add rdx, 16
+ cmp rdx, rsi
jb SHORT G_M33379_IG07
;; size=9 bbWeight=4 PerfScore 6.00
G_M33379_IG11:
- lea rdi, bword ptr [rdi+8*rcx]
+ lea rdi, bword ptr [rdi+8*rsi]
;; size=4 bbWeight=0.50 PerfScore 0.25
G_M33379_IG12:
vmovups ymm1, ymmword ptr [rdi]
vmovups ymm0, ymmword ptr [rdi+0x20]
- vpternlogq ymm1, ymm0, ymmword ptr [rdx-0x40], -2
- vpor ymm0, ymm1, ymmword ptr [rdx-0x20]
+ vpternlogq ymm1, ymm0, ymmword ptr [rcx-0x40], -2
+ vpor ymm0, ymm1, ymmword ptr [rcx-0x20]
vmovups ymm1, ymmword ptr [reloc @RWD32]
- vptest ymm0, ymm1
- sete r8b
- movzx r8, r8b
+ vptest ymm1, ymm0
+ sete dl
+ movzx rdx, dl
jmp SHORT G_M33379_IG14
- ;; size=45 bbWeight=0.50 PerfScore 14.12
+ ;; size=43 bbWeight=0.50 PerfScore 14.12
G_M33379_IG13:
- mov r8, qword ptr [rdi]
- or r8, qword ptr [rdi+8*rcx-0x08]
+ mov rdx, qword ptr [rdi]
+ movsxd rax, esi
+ or rdx, qword ptr [rdi+8*rax-0x08]
mov rax, 0xD1FFAB1E
- test r8, rax
- sete r8b
- movzx r8, r8b
- ;; size=29 bbWeight=0.50 PerfScore 3.38
+ test rdx, rax
+ sete dl
+ movzx rdx, dl
+ ;; size=30 bbWeight=0.50 PerfScore 3.50
G_M33379_IG14:
- movzx rax, r8b
- ;; size=4 bbWeight=0.50 PerfScore 0.12
+ movzx rax, dl
+ ;; size=3 bbWeight=0.50 PerfScore 0.12
G_M33379_IG15:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M33379_IG16:
- mov eax, 1
- ;; size=5 bbWeight=0.50 PerfScore 0.12
-G_M33379_IG17:
- vzeroupper
- pop rbp
- ret
- ;; size=5 bbWeight=0.50 PerfScore 1.25
-G_M33379_IG18:
+ test esi, esi
+ je SHORT G_M33379_IG17
cmp dword ptr [rdi], edi
mov rax, 0xD1FFAB1E ; code for System.ThrowHelper:ThrowNotSupportedException()
call [rax]System.ThrowHelper:ThrowNotSupportedException()
int3
- ;; size=15 bbWeight=0 PerfScore 0.00
-RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h
-RWD16 dd 00000000h, 00000000h, 00000000h, 00000000h
-RWD32 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
+ ;; size=19 bbWeight=0 PerfScore 0.00
+G_M33379_IG17:
+ mov eax, 1
+ ;; size=5 bbWeight=0 PerfScore 0.00
+G_M33379_IG18:
+ vzeroupper
+ pop rbp
+ ret
+ ;; size=5 bbWeight=0 PerfScore 0.00
+RWD00 dq FFFFFFFFFFFFFF80h
+RWD08 dd 00000000h, 00000000h, 00000000h, 00000000h, 00000000h, 00000000h
+RWD32 dq FFFFFFFFFFFFFF80h, FFFFFFFFFFFFFF80h, FFFFFFFFFFFFFF80h, FFFFFFFFFFFFFF80h
-; Total bytes of code 353, prolog size 4, PerfScore 157.38, instruction count 90, allocated bytes for code 353 (MethodHash=a1267d9c) for method System.Text.Ascii:IsValidCore[long](byref,int):ubyte (FullOpts)
+; Total bytes of code 355, prolog size 4, PerfScore 155.62, instruction count 93, allocated bytes for code 355 (MethodHash=a1267d9c) for method System.Text.Ascii:IsValidCore[long](byref,int):ubyte (FullOpts)
1 (1.75 % of base) - System.Text.Ascii+PlainLoader`1[ubyte]:EqualAndAscii512(byref,byref):ubyte
; Assembly listing for method System.Text.Ascii+PlainLoader`1[ubyte]:EqualAndAscii512(byref,byref):ubyte (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 1 inlinees without PGO data
+; 0 inlinees with PGO data; 2 single block inlinees; 4 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 3, 3 ) byref -> rdi single-def
; V01 arg1 [V01,T01] ( 3, 3 ) byref -> rsi single-def
-; V02 loc0 [V02,T02] ( 3, 2.50) simd64 -> mm0 <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+; V02 loc0 [V02,T03] ( 3, 2.50) simd64 -> mm0 <System.Runtime.Intrinsics.Vector512`1[ubyte]>
;* V03 loc1 [V03 ] ( 0, 0 ) simd64 -> zero-ref <System.Runtime.Intrinsics.Vector512`1[ubyte]>
;# V04 OutArgs [V04 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V05 tmp1 [V05 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V05 tmp1 [V05 ] ( 0, 0 ) simd64 -> zero-ref "spilled call-like call argument"
+;* V06 tmp2 [V06 ] ( 0, 0 ) simd64 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V07 tmp3 [V07,T02] ( 0, 0 ) ubyte -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V08 tmp4 [V08 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V09 tmp5 [V09 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V10 tmp6 [V10 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
+;* V11 tmp7 [V11 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
+;* V12 tmp8 [V12 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
;
; Lcl frame size = 0
G_M30537_IG01:
push rbp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
G_M30537_IG02:
vmovups zmm0, zmmword ptr [rdi]
vpcmpub k1, zmm0, zmmword ptr [rsi], 4
kortestq k1, k1
jne SHORT G_M30537_IG04
;; size=20 bbWeight=1 PerfScore 12.00
G_M30537_IG03:
- vpmovb2m k1, zmm0
- kmovq rax, k1
- test rax, rax
+ vptestmb k1, zmm0, zmmword ptr [reloc @RWD00]
+ kortestq k1, k1
je SHORT G_M30537_IG06
- ;; size=16 bbWeight=0.50 PerfScore 2.62
+ ;; size=17 bbWeight=0.50 PerfScore 3.50
G_M30537_IG04:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M30537_IG05:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M30537_IG06:
mov eax, 1
;; size=5 bbWeight=0.50 PerfScore 0.12
G_M30537_IG07:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
+RWD00 dq 8080808080808080h, 8080808080808080h, 8080808080808080h, 8080808080808080h, 8080808080808080h, 8080808080808080h, 8080808080808080h, 8080808080808080h
+
-; Total bytes of code 57, prolog size 4, PerfScore 18.62, instruction count 18, allocated bytes for code 63 (MethodHash=8c8d88b6) for method System.Text.Ascii+PlainLoader`1[ubyte]:EqualAndAscii512(byref,byref):ubyte (FullOpts)
+; Total bytes of code 58, prolog size 4, PerfScore 19.50, instruction count 17, allocated bytes for code 64 (MethodHash=8c8d88b6) for method System.Text.Ascii+PlainLoader`1[ubyte]:EqualAndAscii512(byref,byref):ubyte (FullOpts)
1 (0.99 % of base) - System.Text.Ascii+WideningLoader:EqualAndAscii512(byref,byref):ubyte
; Assembly listing for method System.Text.Ascii+WideningLoader:EqualAndAscii512(byref,byref):ubyte (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 2 single block inlinees; 1 inlinees without PGO data
+; 0 inlinees with PGO data; 4 single block inlinees; 4 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T01] ( 3, 3 ) byref -> rdi single-def
; V01 arg1 [V01,T00] ( 4, 3 ) byref -> rsi single-def
-; V02 loc0 [V02,T02] ( 4, 3 ) simd64 -> mm0 <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+; V02 loc0 [V02,T03] ( 4, 3 ) simd64 -> mm0 <System.Runtime.Intrinsics.Vector512`1[ubyte]>
;* V03 loc1 [V03 ] ( 0, 0 ) simd64 -> zero-ref <System.Runtime.Intrinsics.Vector512`1[ushort]>
;* V04 loc2 [V04 ] ( 0, 0 ) simd64 -> zero-ref <System.Runtime.Intrinsics.Vector512`1[ushort]>
;* V05 loc3 [V05 ] ( 0, 0 ) simd64 -> zero-ref <System.Runtime.Intrinsics.Vector512`1[ushort]>
;* V06 loc4 [V06 ] ( 0, 0 ) simd64 -> zero-ref <System.Runtime.Intrinsics.Vector512`1[ushort]>
;# V07 OutArgs [V07 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V08 tmp1 [V08 ] ( 0, 0 ) struct (128) zero-ref "dup spill" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector512`1[ushort],System.Runtime.Intrinsics.Vector512`1[ushort]]>
-;* V09 tmp2 [V09 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V10 tmp3 [V10 ] ( 0, 0 ) struct (128) zero-ref ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector512`1[ushort],System.Runtime.Intrinsics.Vector512`1[ushort]]>
-; V11 tmp4 [V11,T04] ( 2, 2 ) simd64 -> mm1 "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
-; V12 tmp5 [V12,T05] ( 2, 2 ) simd64 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
-;* V13 tmp6 [V13 ] ( 0, 0 ) simd64 -> zero-ref "field V08.Item1 (fldOffset=0x0)" P-INDEP
-;* V14 tmp7 [V14 ] ( 0, 0 ) simd64 -> zero-ref "field V08.Item2 (fldOffset=0x40)" P-INDEP
-;* V15 tmp8 [V15 ] ( 0, 0 ) simd64 -> zero-ref "field V10.Item1 (fldOffset=0x0)" P-INDEP
-;* V16 tmp9 [V16 ] ( 0, 0 ) simd64 -> zero-ref "field V10.Item2 (fldOffset=0x40)" P-INDEP
-; V17 rat0 [V17,T03] ( 3, 3 ) simd64 -> mm1 "ReplaceWithLclVar is creating a new local variable"
+;* V09 tmp2 [V09 ] ( 0, 0 ) simd64 -> zero-ref "spilled call-like call argument"
+;* V10 tmp3 [V10 ] ( 0, 0 ) simd64 -> zero-ref ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V11 tmp4 [V11,T02] ( 0, 0 ) ubyte -> zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+;* V12 tmp5 [V12 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V13 tmp6 [V13 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V14 tmp7 [V14 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
+;* V15 tmp8 [V15 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
+;* V16 tmp9 [V16 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V17 tmp10 [V17 ] ( 0, 0 ) struct (128) zero-ref ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector512`1[ushort],System.Runtime.Intrinsics.Vector512`1[ushort]]>
+; V18 tmp11 [V18,T05] ( 2, 2 ) simd64 -> mm1 "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+; V19 tmp12 [V19,T06] ( 2, 2 ) simd64 -> mm0 "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;* V20 tmp13 [V20 ] ( 0, 0 ) simd64 -> zero-ref "field V08.Item1 (fldOffset=0x0)" P-INDEP
+;* V21 tmp14 [V21 ] ( 0, 0 ) simd64 -> zero-ref "field V08.Item2 (fldOffset=0x40)" P-INDEP
+;* V22 tmp15 [V22 ] ( 0, 0 ) simd64 -> zero-ref "field V17.Item1 (fldOffset=0x0)" P-INDEP
+;* V23 tmp16 [V23 ] ( 0, 0 ) simd64 -> zero-ref "field V17.Item2 (fldOffset=0x40)" P-INDEP
+; V24 rat0 [V24,T04] ( 3, 3 ) simd64 -> mm1 "ReplaceWithLclVar is creating a new local variable"
;
; Lcl frame size = 0
G_M19370_IG01:
push rbp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
G_M19370_IG02:
vmovups zmm0, zmmword ptr [rdi]
- vpmovb2m k1, zmm0
- kmovq rax, k1
- test rax, rax
+ vptestmb k1, zmm0, zmmword ptr [reloc @RWD00]
+ kortestq k1, k1
je SHORT G_M19370_IG05
- ;; size=22 bbWeight=1 PerfScore 9.25
+ ;; size=23 bbWeight=1 PerfScore 11.00
G_M19370_IG03:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M19370_IG04:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M19370_IG05:
vmovaps zmm1, zmm0
vpmovzxbw zmm1, zmm1
vextracti64x4 ymm0, zmm0, 1
vpmovzxbw zmm0, zmm0
vmovups zmm2, zmmword ptr [rsi]
vpxord zmm0, zmm0, zmmword ptr [rsi+0x40]
vpternlogd zmm1, zmm2, zmm0, -66
vptestmw k1, zmm1, zmm1
kortestd k1, k1
jne SHORT G_M19370_IG03
mov eax, 1
;; size=63 bbWeight=0.50 PerfScore 8.50
G_M19370_IG06:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
+RWD00 dq 8080808080808080h, 8080808080808080h, 8080808080808080h, 8080808080808080h, 8080808080808080h, 8080808080808080h, 8080808080808080h, 8080808080808080h
-; Total bytes of code 101, prolog size 4, PerfScore 21.62, instruction count 25, allocated bytes for code 107 (MethodHash=9e74b455) for method System.Text.Ascii+WideningLoader:EqualAndAscii512(byref,byref):ubyte (FullOpts)
+
+; Total bytes of code 102, prolog size 4, PerfScore 23.38, instruction count 24, allocated bytes for code 108 (MethodHash=9e74b455) for method System.Text.Ascii+WideningLoader:EqualAndAscii512(byref,byref):ubyte (FullOpts)