Skip to content

Instantly share code, notes, and snippets.

@MihuBot
Created June 5, 2024 09:51
Show Gist options
  • Save MihuBot/f6ef2f25cbdf1a6fa108ff971f43d624 to your computer and use it in GitHub Desktop.
Save MihuBot/f6ef2f25cbdf1a6fa108ff971f43d624 to your computer and use it in GitHub Desktop.

Top method regressions

540 (29.51 % of base) - System.Runtime.Intrinsics.Vector512:Dot[ubyte](System.Runtime.Intrinsics.Vector512`1[ubyte],System.Runtime.Intrinsics.Vector512`1[ubyte]):ubyte
 ; Assembly listing for method System.Runtime.Intrinsics.Vector512:Dot[ubyte](System.Runtime.Intrinsics.Vector512`1[ubyte],System.Runtime.Intrinsics.Vector512`1[ubyte]):ubyte (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
-; partially interruptible
+; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 38 single block inlinees; 47 inlinees without PGO data
+; 0 inlinees with PGO data; 42 single block inlinees; 24 inlinees without PGO data
 ; Final local variable assignments
 ;
 ;* V00 arg0         [V00    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[ubyte]>
 ;* V01 arg1         [V01    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[ubyte]>
-;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V03 tmp1         [V03,T24] (  2,  4   )     int  ->  rax         "impAppendStmt"
-;  V04 tmp2         [V04,T47] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V05 tmp3         [V05,T48] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V06 tmp4         [V06,T25] (  2,  4   )     int  ->  rax         "impAppendStmt"
-;  V07 tmp5         [V07,T49] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V08 tmp6         [V08,T50] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V09 tmp7         [V09,T26] (  2,  4   )     int  ->  rax         "impAppendStmt"
-;  V10 tmp8         [V10,T08] (  8,  8   )   ubyte  ->  rax         ld-addr-op "Inline ldloca(s) first use temp"
-;* V11 tmp9         [V11,T39] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V12 tmp10        [V12    ] (  9, 18   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V13 tmp11        [V13    ] (  9, 18   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V14 tmp12        [V14    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V15 tmp13        [V15,T00] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V16 tmp14        [V16    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V17 tmp15        [V17    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V18 tmp16        [V18    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V19 tmp17        [V19    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V20 tmp18        [V20    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V21 tmp19        [V21    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V22 tmp20        [V22,T16] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V23 tmp21        [V23    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V24 tmp22        [V24    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V25 tmp23        [V25    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V26 tmp24        [V26    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V27 tmp25        [V27    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;  V02 OutArgs      [V02    ] (  1,  1   )  struct (16) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V03 tmp1         [V03,T20] (  2,  4   )     int  ->  rbx         "impAppendStmt"
+;  V04 tmp2         [V04,T55] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V05 tmp3         [V05,T56] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V06 tmp4         [V06,T21] (  2,  4   )     int  ->  rbx         "impAppendStmt"
+;  V07 tmp5         [V07,T57] (  3,  6   )  simd16  ->  [rbp-0x30]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V08 tmp6         [V08,T58] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V09 tmp7         [V09,T22] (  2,  4   )     int  ->  rsi         "impAppendStmt"
+;* V10 tmp8         [V10    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V11 tmp9         [V11    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V12 tmp10        [V12    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V13 tmp11        [V13    ] (  2,  5   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V14 tmp12        [V14,T12] (  5, 17   )     int  ->  rsi         "Inline stloc first use temp"
+;  V15 tmp13        [V15    ] (  2, 10   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V16 tmp14        [V16    ] (  2, 10   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V17 tmp15        [V17    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V18 tmp16        [V18    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;* V19 tmp17        [V19    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V20 tmp18        [V20    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;* V21 tmp19        [V21    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;  V22 tmp20        [V22,T04] ( 16, 16   )   ubyte  ->  rsi         ld-addr-op "Inline ldloca(s) first use temp"
+;* V23 tmp21        [V23,T47] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V24 tmp22        [V24    ] (  9, 18   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V25 tmp23        [V25    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V26 tmp24        [V26    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V27 tmp25        [V27    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
 ;* V28 tmp26        [V28    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V29 tmp27        [V29,T09] (  8,  8   )   ubyte  ->  rcx         ld-addr-op "Inline ldloca(s) first use temp"
-;* V30 tmp28        [V30,T40] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V31 tmp29        [V31    ] (  9, 18   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V32 tmp30        [V32    ] (  9, 18   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V33 tmp31        [V33    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V34 tmp32        [V34,T01] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V35 tmp33        [V35    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V36 tmp34        [V36    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V37 tmp35        [V37    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V38 tmp36        [V38    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V39 tmp37        [V39    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V40 tmp38        [V40    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V41 tmp39        [V41,T17] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V42 tmp40        [V42    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V43 tmp41        [V43    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V44 tmp42        [V44    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V45 tmp43        [V45    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V46 tmp44        [V46    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V29 tmp27        [V29    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V30 tmp28        [V30    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V31 tmp29        [V31    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V32 tmp30        [V32    ] (  2,  5   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V33 tmp31        [V33,T13] (  5, 17   )     int  ->  rdi         "Inline stloc first use temp"
+;  V34 tmp32        [V34    ] (  2, 10   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V35 tmp33        [V35    ] (  2, 10   )  struct ( 8) [rbp-0x78]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V36 tmp34        [V36    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V37 tmp35        [V37    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;* V38 tmp36        [V38    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V39 tmp37        [V39    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;* V40 tmp38        [V40    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;  V41 tmp39        [V41,T05] ( 16, 16   )   ubyte  ->  rdi         ld-addr-op "Inline ldloca(s) first use temp"
+;* V42 tmp40        [V42,T48] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V43 tmp41        [V43    ] (  9, 18   )  struct ( 8) [rbp-0x80]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V44 tmp42        [V44    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V45 tmp43        [V45    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V46 tmp44        [V46    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
 ;* V47 tmp45        [V47    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
 ;* V48 tmp46        [V48    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;* V49 tmp47        [V49    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
 ;* V50 tmp48        [V50    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V51 tmp49        [V51,T51] (  3,  6   )  simd16  ->  [rbp-0x50]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V52 tmp50        [V52,T52] (  3,  6   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V53 tmp51        [V53,T27] (  2,  4   )     int  ->  rcx         "impAppendStmt"
-;  V54 tmp52        [V54,T10] (  8,  8   )   ubyte  ->  rcx         ld-addr-op "Inline ldloca(s) first use temp"
-;* V55 tmp53        [V55,T41] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V56 tmp54        [V56    ] (  9, 18   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V57 tmp55        [V57    ] (  9, 18   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V58 tmp56        [V58    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V59 tmp57        [V59,T02] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V60 tmp58        [V60    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V61 tmp59        [V61    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V62 tmp60        [V62    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V63 tmp61        [V63    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V64 tmp62        [V64    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V65 tmp63        [V65    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V66 tmp64        [V66,T18] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V67 tmp65        [V67    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V68 tmp66        [V68    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V69 tmp67        [V69    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V70 tmp68        [V70    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V71 tmp69        [V71    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;  V51 tmp49        [V51,T59] (  3,  6   )  simd16  ->  [rbp-0x90]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V52 tmp50        [V52,T60] (  3,  6   )  simd16  ->  [rbp-0xA0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V53 tmp51        [V53,T23] (  2,  4   )     int  ->  r15         "impAppendStmt"
+;* V54 tmp52        [V54    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V55 tmp53        [V55    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V56 tmp54        [V56    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V57 tmp55        [V57    ] (  2,  5   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V58 tmp56        [V58,T14] (  5, 17   )     int  ->  rsi         "Inline stloc first use temp"
+;  V59 tmp57        [V59    ] (  2, 10   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V60 tmp58        [V60    ] (  2, 10   )  struct ( 8) [rbp-0xB8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V61 tmp59        [V61    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V62 tmp60        [V62    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;* V63 tmp61        [V63    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V64 tmp62        [V64    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;* V65 tmp63        [V65    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;  V66 tmp64        [V66,T06] ( 16, 16   )   ubyte  ->  rsi         ld-addr-op "Inline ldloca(s) first use temp"
+;* V67 tmp65        [V67,T49] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V68 tmp66        [V68    ] (  9, 18   )  struct ( 8) [rbp-0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V69 tmp67        [V69    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V70 tmp68        [V70    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V71 tmp69        [V71    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
 ;* V72 tmp70        [V72    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V73 tmp71        [V73,T11] (  8,  8   )   ubyte  ->  rdx         ld-addr-op "Inline ldloca(s) first use temp"
-;* V74 tmp72        [V74,T42] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V75 tmp73        [V75    ] (  9, 18   )  struct ( 8) [rbp-0x78]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V76 tmp74        [V76    ] (  9, 18   )  struct ( 8) [rbp-0x80]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V77 tmp75        [V77    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V78 tmp76        [V78,T03] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V79 tmp77        [V79    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V80 tmp78        [V80    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V81 tmp79        [V81    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V82 tmp80        [V82    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V83 tmp81        [V83    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V84 tmp82        [V84    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V85 tmp83        [V85,T19] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V86 tmp84        [V86    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V87 tmp85        [V87    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V88 tmp86        [V88    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V89 tmp87        [V89    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V90 tmp88        [V90    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V91 tmp89        [V91    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V92 tmp90        [V92    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V93 tmp91        [V93    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V94 tmp92        [V94    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V95 tmp93        [V95    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V96 tmp94        [V96    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V97 tmp95        [V97,T53] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V98 tmp96        [V98,T54] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V99 tmp97        [V99,T28] (  2,  4   )     int  ->  rcx         "impAppendStmt"
-;  V100 tmp98       [V100,T55] (  3,  6   )  simd16  ->  [rbp-0x90]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V101 tmp99       [V101,T56] (  3,  6   )  simd16  ->  [rbp-0xA0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V102 tmp100      [V102,T29] (  2,  4   )     int  ->  rcx         "impAppendStmt"
-;  V103 tmp101      [V103,T12] (  8,  8   )   ubyte  ->  rcx         ld-addr-op "Inline ldloca(s) first use temp"
-;* V104 tmp102      [V104,T43] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V105 tmp103      [V105    ] (  9, 18   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V106 tmp104      [V106    ] (  9, 18   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V73 tmp71        [V73    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V74 tmp72        [V74    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V75 tmp73        [V75    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V76 tmp74        [V76    ] (  2,  5   )  struct ( 8) [rbp-0xC8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V77 tmp75        [V77,T15] (  5, 17   )     int  ->  rsi         "Inline stloc first use temp"
+;  V78 tmp76        [V78    ] (  2, 10   )  struct ( 8) [rbp-0xD0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V79 tmp77        [V79    ] (  2, 10   )  struct ( 8) [rbp-0xD8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V80 tmp78        [V80    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V81 tmp79        [V81    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;* V82 tmp80        [V82    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V83 tmp81        [V83    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;* V84 tmp82        [V84    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;  V85 tmp83        [V85,T07] ( 16, 16   )   ubyte  ->  rax         ld-addr-op "Inline ldloca(s) first use temp"
+;* V86 tmp84        [V86,T50] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V87 tmp85        [V87    ] (  9, 18   )  struct ( 8) [rbp-0xE0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V88 tmp86        [V88    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;  V89 tmp87        [V89,T61] (  3,  6   )  simd32  ->  [rbp-0x110]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V90 tmp88        [V90,T62] (  3,  6   )  simd32  ->  [rbp-0x130]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V91 tmp89        [V91,T24] (  2,  4   )     int  ->  r15         "impAppendStmt"
+;  V92 tmp90        [V92,T63] (  3,  6   )  simd16  ->  [rbp-0x140]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V93 tmp91        [V93,T64] (  3,  6   )  simd16  ->  [rbp-0x150]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V94 tmp92        [V94,T25] (  2,  4   )     int  ->  r15         "impAppendStmt"
+;* V95 tmp93        [V95    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V96 tmp94        [V96    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V97 tmp95        [V97    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V98 tmp96        [V98    ] (  2,  5   )  struct ( 8) [rbp-0x158]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V99 tmp97        [V99,T01] (  7, 25   )     int  ->  r15         "Inline stloc first use temp"
+;  V100 tmp98       [V100    ] (  2, 10   )  struct ( 8) [rbp-0x160]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V101 tmp99       [V101    ] (  2, 10   )  struct ( 8) [rbp-0x168]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V102 tmp100      [V102    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V103 tmp101      [V103    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V104 tmp102      [V104,T08] ( 16, 16   )   ubyte  ->  r15         ld-addr-op "Inline ldloca(s) first use temp"
+;* V105 tmp103      [V105,T51] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V106 tmp104      [V106    ] (  9, 18   )  struct ( 8) [rbp-0x170]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V107 tmp105      [V107    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V108 tmp106      [V108,T04] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V109 tmp107      [V109    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V110 tmp108      [V110    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V111 tmp109      [V111    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V112 tmp110      [V112    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V113 tmp111      [V113    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V114 tmp112      [V114    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V115 tmp113      [V115,T20] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V116 tmp114      [V116    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V117 tmp115      [V117    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V118 tmp116      [V118    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V119 tmp117      [V119    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V120 tmp118      [V120    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V121 tmp119      [V121    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V122 tmp120      [V122,T13] (  8,  8   )   ubyte  ->  rdx         ld-addr-op "Inline ldloca(s) first use temp"
-;* V123 tmp121      [V123,T44] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V124 tmp122      [V124    ] (  9, 18   )  struct ( 8) [rbp-0xB8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V125 tmp123      [V125    ] (  9, 18   )  struct ( 8) [rbp-0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V126 tmp124      [V126    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V127 tmp125      [V127,T05] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V128 tmp126      [V128    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V129 tmp127      [V129    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V130 tmp128      [V130    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V131 tmp129      [V131    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V132 tmp130      [V132    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V133 tmp131      [V133    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V134 tmp132      [V134,T21] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V135 tmp133      [V135    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V136 tmp134      [V136    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V137 tmp135      [V137    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V138 tmp136      [V138    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V139 tmp137      [V139    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V140 tmp138      [V140    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V141 tmp139      [V141    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V142 tmp140      [V142    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V143 tmp141      [V143    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V144 tmp142      [V144,T57] (  3,  6   )  simd16  ->  [rbp-0xD0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V145 tmp143      [V145,T58] (  3,  6   )  simd16  ->  [rbp-0xE0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V146 tmp144      [V146,T30] (  2,  4   )     int  ->  rdx         "impAppendStmt"
-;  V147 tmp145      [V147,T14] (  8,  8   )   ubyte  ->  rdx         ld-addr-op "Inline ldloca(s) first use temp"
-;* V148 tmp146      [V148,T45] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V149 tmp147      [V149    ] (  9, 18   )  struct ( 8) [rbp-0xE8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V150 tmp148      [V150    ] (  9, 18   )  struct ( 8) [rbp-0xF0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V151 tmp149      [V151    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V152 tmp150      [V152,T06] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V153 tmp151      [V153    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V154 tmp152      [V154    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V155 tmp153      [V155    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V156 tmp154      [V156    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V157 tmp155      [V157    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V158 tmp156      [V158    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V159 tmp157      [V159,T22] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V160 tmp158      [V160    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V161 tmp159      [V161    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V162 tmp160      [V162    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V163 tmp161      [V163    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V164 tmp162      [V164    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V165 tmp163      [V165    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V166 tmp164      [V166,T15] (  8,  8   )   ubyte  ->  rdi         ld-addr-op "Inline ldloca(s) first use temp"
-;* V167 tmp165      [V167,T46] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V168 tmp166      [V168    ] (  9, 18   )  struct ( 8) [rbp-0xF8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V169 tmp167      [V169    ] (  9, 18   )  struct ( 8) [rbp-0x100]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V170 tmp168      [V170    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V171 tmp169      [V171,T07] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V172 tmp170      [V172    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V173 tmp171      [V173    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V174 tmp172      [V174    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V175 tmp173      [V175    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V176 tmp174      [V176    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V177 tmp175      [V177    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V178 tmp176      [V178,T23] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V179 tmp177      [V179    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V180 tmp178      [V180    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V181 tmp179      [V181    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V182 tmp180      [V182    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V183 tmp181      [V183    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V184 tmp182      [V184    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V185 tmp183      [V185    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V186 tmp184      [V186    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V187 tmp185      [V187    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V188 tmp186      [V188    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V189 tmp187      [V189    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V190 tmp188      [V190    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V191 tmp189      [V191    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V192 tmp190      [V192,T59] (  1,  1   )  simd32  ->  [rbp+0x10]  single-def "field V00._lower (fldOffset=0x0)" P-INDEP
-;  V193 tmp191      [V193,T60] (  1,  1   )  simd32  ->  [rbp+0x30]  single-def "field V00._upper (fldOffset=0x20)" P-INDEP
-;  V194 tmp192      [V194,T61] (  1,  1   )  simd32  ->  [rbp+0x50]  single-def "field V01._lower (fldOffset=0x0)" P-INDEP
-;  V195 tmp193      [V195,T62] (  1,  1   )  simd32  ->  [rbp+0x70]  single-def "field V01._upper (fldOffset=0x20)" P-INDEP
-;  V196 tmp194      [V196    ] (  9, 17   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
-;  V197 tmp195      [V197    ] (  9, 17   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
-;  V198 tmp196      [V198    ] (  9, 17   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
-;  V199 tmp197      [V199    ] (  9, 17   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V32._00 (fldOffset=0x0)" P-DEP
-;  V200 tmp198      [V200    ] (  9, 17   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V56._00 (fldOffset=0x0)" P-DEP
-;  V201 tmp199      [V201    ] (  9, 17   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V57._00 (fldOffset=0x0)" P-DEP
-;  V202 tmp200      [V202    ] (  9, 17   )    long  ->  [rbp-0x78]  do-not-enreg[X] addr-exposed "field V75._00 (fldOffset=0x0)" P-DEP
-;  V203 tmp201      [V203    ] (  9, 17   )    long  ->  [rbp-0x80]  do-not-enreg[X] addr-exposed "field V76._00 (fldOffset=0x0)" P-DEP
-;  V204 tmp202      [V204    ] (  9, 17   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V105._00 (fldOffset=0x0)" P-DEP
-;  V205 tmp203      [V205    ] (  9, 17   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V106._00 (fldOffset=0x0)" P-DEP
-;  V206 tmp204      [V206    ] (  9, 17   )    long  ->  [rbp-0xB8]  do-not-enreg[X] addr-exposed "field V124._00 (fldOffset=0x0)" P-DEP
-;  V207 tmp205      [V207    ] (  9, 17   )    long  ->  [rbp-0xC0]  do-not-enreg[X] addr-exposed "field V125._00 (fldOffset=0x0)" P-DEP
-;  V208 tmp206      [V208    ] (  9, 17   )    long  ->  [rbp-0xE8]  do-not-enreg[X] addr-exposed "field V149._00 (fldOffset=0x0)" P-DEP
-;  V209 tmp207      [V209    ] (  9, 17   )    long  ->  [rbp-0xF0]  do-not-enreg[X] addr-exposed "field V150._00 (fldOffset=0x0)" P-DEP
-;  V210 tmp208      [V210    ] (  9, 17   )    long  ->  [rbp-0xF8]  do-not-enreg[X] addr-exposed "field V168._00 (fldOffset=0x0)" P-DEP
-;  V211 tmp209      [V211    ] (  9, 17   )    long  ->  [rbp-0x100]  do-not-enreg[X] addr-exposed "field V169._00 (fldOffset=0x0)" P-DEP
-;  V212 cse0        [V212,T31] (  2,  2   )     int  ->  rcx         "CSE #01: conservative"
-;  V213 cse1        [V213,T32] (  2,  2   )     int  ->  rdx         "CSE #02: conservative"
-;  V214 cse2        [V214,T33] (  2,  2   )     int  ->  rdx         "CSE #03: conservative"
-;  V215 cse3        [V215,T34] (  2,  2   )     int  ->  rdi         "CSE #04: conservative"
-;  V216 cse4        [V216,T35] (  2,  2   )     int  ->  rdx         "CSE #05: conservative"
-;  V217 cse5        [V217,T36] (  2,  2   )     int  ->  rdi         "CSE #06: conservative"
-;  V218 cse6        [V218,T37] (  2,  2   )     int  ->  rdi         "CSE #07: conservative"
-;  V219 cse7        [V219,T38] (  2,  2   )     int  ->  rsi         "CSE #08: conservative"
+;* V108 tmp106      [V108    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V109 tmp107      [V109    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V110 tmp108      [V110    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V111 tmp109      [V111    ] (  2,  5   )  struct ( 8) [rbp-0x178]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V112 tmp110      [V112,T02] (  7, 25   )     int  ->  r14         "Inline stloc first use temp"
+;  V113 tmp111      [V113    ] (  2, 10   )  struct ( 8) [rbp-0x180]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V114 tmp112      [V114    ] (  2, 10   )  struct ( 8) [rbp-0x188]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V115 tmp113      [V115    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V116 tmp114      [V116    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V117 tmp115      [V117,T09] ( 16, 16   )   ubyte  ->  rax         ld-addr-op "Inline ldloca(s) first use temp"
+;* V118 tmp116      [V118,T52] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V119 tmp117      [V119    ] (  9, 18   )  struct ( 8) [rbp-0x190]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V120 tmp118      [V120    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;  V121 tmp119      [V121,T65] (  3,  6   )  simd16  ->  [rbp-0x1A0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V122 tmp120      [V122,T66] (  3,  6   )  simd16  ->  [rbp-0x1B0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V123 tmp121      [V123,T26] (  2,  4   )     int  ->  r14         "impAppendStmt"
+;* V124 tmp122      [V124    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V125 tmp123      [V125    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V126 tmp124      [V126    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V127 tmp125      [V127    ] (  2,  5   )  struct ( 8) [rbp-0x1B8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V128 tmp126      [V128,T03] (  7, 25   )     int  ->  r14         "Inline stloc first use temp"
+;  V129 tmp127      [V129    ] (  2, 10   )  struct ( 8) [rbp-0x1C0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V130 tmp128      [V130    ] (  2, 10   )  struct ( 8) [rbp-0x1C8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V131 tmp129      [V131    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V132 tmp130      [V132    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V133 tmp131      [V133,T10] ( 16, 16   )   ubyte  ->  r14         ld-addr-op "Inline ldloca(s) first use temp"
+;* V134 tmp132      [V134,T53] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V135 tmp133      [V135    ] (  9, 18   )  struct ( 8) [rbp-0x1D0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V136 tmp134      [V136    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V137 tmp135      [V137    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V138 tmp136      [V138    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V139 tmp137      [V139    ] (  2,  4   )  struct ( 8) [rbp-0x1D8]  do-not-enreg[HS] hidden-struct-arg "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V140 tmp138      [V140,T11] ( 16, 16   )   ubyte  ->  r13         ld-addr-op "Inline ldloca(s) first use temp"
+;* V141 tmp139      [V141,T54] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V142 tmp140      [V142    ] (  9, 18   )  struct ( 8) [rbp-0x1E0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V143 tmp141      [V143    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V144 tmp142      [V144    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V145 tmp143      [V145    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;* V146 tmp144      [V146    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;  V147 tmp145      [V147,T67] (  1,  1   )  simd32  ->  [rbp+0x10]  single-def "field V00._lower (fldOffset=0x0)" P-INDEP
+;  V148 tmp146      [V148,T68] (  1,  1   )  simd32  ->  [rbp+0x30]  single-def "field V00._upper (fldOffset=0x20)" P-INDEP
+;  V149 tmp147      [V149,T69] (  1,  1   )  simd32  ->  [rbp+0x50]  single-def "field V01._lower (fldOffset=0x0)" P-INDEP
+;  V150 tmp148      [V150,T70] (  1,  1   )  simd32  ->  [rbp+0x70]  single-def "field V01._upper (fldOffset=0x20)" P-INDEP
+;  V151 tmp149      [V151,T30] (  2,  2   )    long  ->  rsi         "field V10._00 (fldOffset=0x0)" P-INDEP
+;  V152 tmp150      [V152,T31] (  2,  2   )    long  ->  rdi         "field V11._00 (fldOffset=0x0)" P-INDEP
+;* V153 tmp151      [V153    ] (  0,  0   )    long  ->  zero-ref    "field V12._00 (fldOffset=0x0)" P-INDEP
+;  V154 tmp152      [V154    ] (  2,  5   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V155 tmp153      [V155    ] (  2,  9   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
+;  V156 tmp154      [V156    ] (  2,  9   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
+;  V157 tmp155      [V157    ] (  9, 17   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V24._00 (fldOffset=0x0)" P-DEP
+;  V158 tmp156      [V158,T32] (  2,  2   )    long  ->  rdi         "field V29._00 (fldOffset=0x0)" P-INDEP
+;  V159 tmp157      [V159,T33] (  2,  2   )    long  ->  rax         "field V30._00 (fldOffset=0x0)" P-INDEP
+;* V160 tmp158      [V160    ] (  0,  0   )    long  ->  zero-ref    "field V31._00 (fldOffset=0x0)" P-INDEP
+;  V161 tmp159      [V161    ] (  2,  5   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V32._00 (fldOffset=0x0)" P-DEP
+;  V162 tmp160      [V162    ] (  2,  9   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V34._00 (fldOffset=0x0)" P-DEP
+;  V163 tmp161      [V163    ] (  2,  9   )    long  ->  [rbp-0x78]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
+;  V164 tmp162      [V164    ] (  9, 17   )    long  ->  [rbp-0x80]  do-not-enreg[X] addr-exposed "field V43._00 (fldOffset=0x0)" P-DEP
+;  V165 tmp163      [V165,T34] (  2,  2   )    long  ->  rsi         "field V54._00 (fldOffset=0x0)" P-INDEP
+;  V166 tmp164      [V166,T35] (  2,  2   )    long  ->  rdi         "field V55._00 (fldOffset=0x0)" P-INDEP
+;* V167 tmp165      [V167    ] (  0,  0   )    long  ->  zero-ref    "field V56._00 (fldOffset=0x0)" P-INDEP
+;  V168 tmp166      [V168    ] (  2,  5   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V57._00 (fldOffset=0x0)" P-DEP
+;  V169 tmp167      [V169    ] (  2,  9   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V59._00 (fldOffset=0x0)" P-DEP
+;  V170 tmp168      [V170    ] (  2,  9   )    long  ->  [rbp-0xB8]  do-not-enreg[X] addr-exposed "field V60._00 (fldOffset=0x0)" P-DEP
+;  V171 tmp169      [V171    ] (  9, 17   )    long  ->  [rbp-0xC0]  do-not-enreg[X] addr-exposed "field V68._00 (fldOffset=0x0)" P-DEP
+;  V172 tmp170      [V172,T36] (  2,  2   )    long  ->  rsi         "field V73._00 (fldOffset=0x0)" P-INDEP
+;  V173 tmp171      [V173,T37] (  2,  2   )    long  ->  rdi         "field V74._00 (fldOffset=0x0)" P-INDEP
+;* V174 tmp172      [V174    ] (  0,  0   )    long  ->  zero-ref    "field V75._00 (fldOffset=0x0)" P-INDEP
+;  V175 tmp173      [V175    ] (  2,  5   )    long  ->  [rbp-0xC8]  do-not-enreg[X] addr-exposed "field V76._00 (fldOffset=0x0)" P-DEP
+;  V176 tmp174      [V176    ] (  2,  9   )    long  ->  [rbp-0xD0]  do-not-enreg[X] addr-exposed "field V78._00 (fldOffset=0x0)" P-DEP
+;  V177 tmp175      [V177    ] (  2,  9   )    long  ->  [rbp-0xD8]  do-not-enreg[X] addr-exposed "field V79._00 (fldOffset=0x0)" P-DEP
+;  V178 tmp176      [V178    ] (  9, 17   )    long  ->  [rbp-0xE0]  do-not-enreg[X] addr-exposed "field V87._00 (fldOffset=0x0)" P-DEP
+;  V179 tmp177      [V179,T38] (  2,  2   )    long  ->  rdi         "field V95._00 (fldOffset=0x0)" P-INDEP
+;  V180 tmp178      [V180,T39] (  2,  2   )    long  ->  rsi         "field V96._00 (fldOffset=0x0)" P-INDEP
+;* V181 tmp179      [V181    ] (  0,  0   )    long  ->  zero-ref    "field V97._00 (fldOffset=0x0)" P-INDEP
+;  V182 tmp180      [V182    ] (  2,  5   )    long  ->  [rbp-0x158]  do-not-enreg[X] addr-exposed "field V98._00 (fldOffset=0x0)" P-DEP
+;  V183 tmp181      [V183    ] (  2,  9   )    long  ->  [rbp-0x160]  do-not-enreg[X] addr-exposed "field V100._00 (fldOffset=0x0)" P-DEP
+;  V184 tmp182      [V184    ] (  2,  9   )    long  ->  [rbp-0x168]  do-not-enreg[X] addr-exposed "field V101._00 (fldOffset=0x0)" P-DEP
+;  V185 tmp183      [V185    ] (  9, 17   )    long  ->  [rbp-0x170]  do-not-enreg[X] addr-exposed "field V106._00 (fldOffset=0x0)" P-DEP
+;  V186 tmp184      [V186,T40] (  2,  2   )    long  ->  rdi         "field V108._00 (fldOffset=0x0)" P-INDEP
+;  V187 tmp185      [V187,T41] (  2,  2   )    long  ->  rsi         "field V109._00 (fldOffset=0x0)" P-INDEP
+;* V188 tmp186      [V188    ] (  0,  0   )    long  ->  zero-ref    "field V110._00 (fldOffset=0x0)" P-INDEP
+;  V189 tmp187      [V189    ] (  2,  5   )    long  ->  [rbp-0x178]  do-not-enreg[X] addr-exposed "field V111._00 (fldOffset=0x0)" P-DEP
+;  V190 tmp188      [V190    ] (  2,  9   )    long  ->  [rbp-0x180]  do-not-enreg[X] addr-exposed "field V113._00 (fldOffset=0x0)" P-DEP
+;  V191 tmp189      [V191    ] (  2,  9   )    long  ->  [rbp-0x188]  do-not-enreg[X] addr-exposed "field V114._00 (fldOffset=0x0)" P-DEP
+;  V192 tmp190      [V192    ] (  9, 17   )    long  ->  [rbp-0x190]  do-not-enreg[X] addr-exposed "field V119._00 (fldOffset=0x0)" P-DEP
+;  V193 tmp191      [V193,T42] (  2,  2   )    long  ->  rdi         "field V124._00 (fldOffset=0x0)" P-INDEP
+;  V194 tmp192      [V194,T43] (  2,  2   )    long  ->  rsi         "field V125._00 (fldOffset=0x0)" P-INDEP
+;* V195 tmp193      [V195    ] (  0,  0   )    long  ->  zero-ref    "field V126._00 (fldOffset=0x0)" P-INDEP
+;  V196 tmp194      [V196    ] (  2,  5   )    long  ->  [rbp-0x1B8]  do-not-enreg[X] addr-exposed "field V127._00 (fldOffset=0x0)" P-DEP
+;  V197 tmp195      [V197    ] (  2,  9   )    long  ->  [rbp-0x1C0]  do-not-enreg[X] addr-exposed "field V129._00 (fldOffset=0x0)" P-DEP
+;  V198 tmp196      [V198    ] (  2,  9   )    long  ->  [rbp-0x1C8]  do-not-enreg[X] addr-exposed "field V130._00 (fldOffset=0x0)" P-DEP
+;  V199 tmp197      [V199    ] (  9, 17   )    long  ->  [rbp-0x1D0]  do-not-enreg[X] addr-exposed "field V135._00 (fldOffset=0x0)" P-DEP
+;* V200 tmp198      [V200    ] (  0,  0   )    long  ->  zero-ref    "field V137._00 (fldOffset=0x0)" P-INDEP
+;* V201 tmp199      [V201    ] (  0,  0   )    long  ->  zero-ref    "field V138._00 (fldOffset=0x0)" P-INDEP
+;  V202 tmp200      [V202,T29] (  2,  3   )    long  ->  [rbp-0x1D8]  do-not-enreg[H] hidden-struct-arg "field V139._00 (fldOffset=0x0)" P-DEP
+;  V203 tmp201      [V203    ] (  9, 17   )    long  ->  [rbp-0x1E0]  do-not-enreg[X] addr-exposed "field V142._00 (fldOffset=0x0)" P-DEP
+;  V204 tmp202      [V204,T27] (  2,  4   )     int  ->  rsi         "argument with side effect"
+;  V205 tmp203      [V205,T00] ( 16, 32   )     int  ->  rax         "argument with side effect"
+;  V206 tmp204      [V206,T28] (  2,  4   )     int  ->  rsi         "argument with side effect"
+;  V207 cse0        [V207,T44] (  2,  2   )     int  ->  rsi         "CSE #02: conservative"
+;  V208 cse1        [V208,T45] (  2,  2   )     int  ->  rdi         "CSE #04: conservative"
+;  V209 cse2        [V209,T46] (  2,  2   )     int  ->  rsi         "CSE #06: conservative"
+;  V210 cse3        [V210,T16] (  4, 16   )    long  ->  rax         "CSE #01: moderate"
+;  V211 cse4        [V211,T17] (  4, 16   )    long  ->  rcx         "CSE #03: moderate"
+;  V212 cse5        [V212,T18] (  4, 16   )    long  ->  rax         "CSE #05: moderate"
+;  V213 cse6        [V213,T19] (  4, 16   )    long  ->  rax         "CSE #07: moderate"
 ;
-; Lcl frame size = 256
+; Lcl frame size = 464
 
 G_M29434_IG01:
        push     rbp
-       sub      rsp, 256
-       lea      rbp, [rsp+0x100]
-						;; size=16 bbWeight=1 PerfScore 1.75
+       push     r15
+       push     r14
+       push     r13
+       push     rbx
+       sub      rsp, 464
+       lea      rbp, [rsp+0x1F0]
+						;; size=23 bbWeight=1 PerfScore 5.75
 G_M29434_IG02:
        vmovups  ymm0, ymmword ptr [rbp+0x10]
        vmovups  ymm1, ymmword ptr [rbp+0x50]
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm2
+       vmovaps  xmmword ptr [rbp-0x30], xmm2
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rbp-0x20], xmm2
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x28], rax
-       mov      rax, qword ptr [rbp-0x20]
-       mov      qword ptr [rbp-0x30], rax
-       movzx    rax, byte  ptr [rbp-0x28]
-       movzx    rcx, byte  ptr [rbp-0x30]
-       imul     eax, ecx
-       movzx    rax, al
-       movzx    rcx, byte  ptr [rbp-0x27]
-       movzx    rdx, byte  ptr [rbp-0x2F]
-       imul     ecx, edx
-       movzx    rcx, cl
-       add      eax, ecx
-       movzx    rcx, al
-       movzx    rax, cl
-       movzx    rcx, byte  ptr [rbp-0x26]
-       movzx    rdx, byte  ptr [rbp-0x2E]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x25]
-       movzx    rdi, byte  ptr [rbp-0x2D]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       add      eax, ecx
-       movzx    rax, al
-       movzx    rdx, byte  ptr [rbp-0x24]
-       movzx    rcx, byte  ptr [rbp-0x2C]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x23]
-       movzx    rdi, byte  ptr [rbp-0x2B]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       add      ecx, eax
-       movzx    rax, cl
-       movzx    rdx, byte  ptr [rbp-0x22]
-       movzx    rcx, byte  ptr [rbp-0x2A]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x21]
-       movzx    rdi, byte  ptr [rbp-0x29]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       add      ecx, eax
-       movzx    rax, cl
-       mov      rcx, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x38], rcx
-       mov      rcx, qword ptr [rbp-0x18]
-       mov      qword ptr [rbp-0x40], rcx
-       movzx    rcx, byte  ptr [rbp-0x38]
-       movzx    rdx, byte  ptr [rbp-0x40]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x37]
-       movzx    rdi, byte  ptr [rbp-0x3F]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       movzx    rdx, cl
-       movzx    rcx, dl
-       movzx    rdx, byte  ptr [rbp-0x36]
-       movzx    rdi, byte  ptr [rbp-0x3E]
-						;; size=250 bbWeight=1 PerfScore 66.75
+       vmovaps  xmmword ptr [rbp-0x40], xmm2
+       mov      rsi, qword ptr [rbp-0x30]
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x50], rsi
+       mov      qword ptr [rbp-0x58], rdi
+       xor      esi, esi
+       align    [0 bytes for IG03]
+						;; size=46 bbWeight=1 PerfScore 14.75
 G_M29434_IG03:
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x35]
-       movzx    rsi, byte  ptr [rbp-0x3D]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      ecx, edx
-       movzx    rcx, cl
-       movzx    rdi, byte  ptr [rbp-0x34]
-       movzx    rdx, byte  ptr [rbp-0x3C]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x33]
-       movzx    rsi, byte  ptr [rbp-0x3B]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-       movzx    rdi, byte  ptr [rbp-0x32]
-       movzx    rdx, byte  ptr [rbp-0x3A]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x31]
-       movzx    rsi, byte  ptr [rbp-0x39]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-       add      eax, ecx
-       movzx    rax, al
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x50], xmm0
-       vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x60], xmm0
-       mov      rcx, qword ptr [rbp-0x50]
-       mov      qword ptr [rbp-0x68], rcx
-       mov      rcx, qword ptr [rbp-0x60]
-       mov      qword ptr [rbp-0x70], rcx
-       movzx    rcx, byte  ptr [rbp-0x68]
-       movzx    rdx, byte  ptr [rbp-0x70]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x67]
-       movzx    rdi, byte  ptr [rbp-0x6F]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       movzx    rdx, cl
-       movzx    rcx, dl
-       movzx    rdx, byte  ptr [rbp-0x66]
-       movzx    rdi, byte  ptr [rbp-0x6E]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x65]
-       movzx    rsi, byte  ptr [rbp-0x6D]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      ecx, edx
-       movzx    rcx, cl
-       movzx    rdi, byte  ptr [rbp-0x64]
-       movzx    rdx, byte  ptr [rbp-0x6C]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x63]
-       movzx    rsi, byte  ptr [rbp-0x6B]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-						;; size=266 bbWeight=1 PerfScore 64.00
+       lea      rdi, [rbp-0x50]
+       movsxd   rax, esi
+       movzx    rdi, byte  ptr [rdi+rax]
+       lea      rcx, [rbp-0x58]
+       movzx    rcx, byte  ptr [rcx+rax]
+       imul     edi, ecx
+       lea      rcx, [rbp-0x48]
+       mov      byte  ptr [rcx+rax], dil
+       inc      esi
+       cmp      esi, 8
+       jl       SHORT G_M29434_IG03
+						;; size=38 bbWeight=4 PerfScore 41.00
 G_M29434_IG04:
-       movzx    rdi, byte  ptr [rbp-0x62]
-       movzx    rdx, byte  ptr [rbp-0x6A]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x61]
-       movzx    rsi, byte  ptr [rbp-0x69]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-       mov      rdx, qword ptr [rbp-0x48]
-       mov      qword ptr [rbp-0x78], rdx
-       mov      rdx, qword ptr [rbp-0x58]
-       mov      qword ptr [rbp-0x80], rdx
-       movzx    rdx, byte  ptr [rbp-0x78]
-       movzx    rdi, byte  ptr [rbp-0x80]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x77]
-       movzx    rsi, byte  ptr [rbp-0x7F]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       movzx    rdi, dl
-       movzx    rdx, dil
-       movzx    rdi, byte  ptr [rbp-0x76]
-       movzx    rsi, byte  ptr [rbp-0x7E]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0x75]
-       movzx    r8, byte  ptr [rbp-0x7D]
-       imul     esi, r8d
+       mov      rsi, qword ptr [rbp-0x48]
+       mov      qword ptr [rbp-0x60], rsi
+       movzx    rsi, byte  ptr [rbp-0x60]
+       movzx    rdi, byte  ptr [rbp-0x5F]
+       add      esi, edi
        movzx    rsi, sil
+       movzx    rdi, byte  ptr [rbp-0x5E]
        add      edi, esi
-       add      edx, edi
-       movzx    rdx, dl
-       movzx    rsi, byte  ptr [rbp-0x74]
-       movzx    rdi, byte  ptr [rbp-0x7C]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0x73]
-       movzx    r8, byte  ptr [rbp-0x7B]
-       imul     esi, r8d
-       movzx    rsi, sil
+       movzx    rsi, dil
+       movzx    rdi, byte  ptr [rbp-0x5D]
        add      edi, esi
-       add      edi, edx
-       movzx    rdx, dil
-       movzx    rsi, byte  ptr [rbp-0x72]
-       movzx    rdi, byte  ptr [rbp-0x7A]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0x71]
-       movzx    r8, byte  ptr [rbp-0x79]
-       imul     esi, r8d
-       movzx    rsi, sil
+       movzx    rsi, dil
+       movzx    rdi, byte  ptr [rbp-0x5C]
        add      edi, esi
-       add      edi, edx
-       movzx    rdx, dil
-       add      ecx, edx
-       add      eax, ecx
-       movzx    rax, al
-       vmovups  ymm0, ymmword ptr [rbp+0x30]
-       vmovups  ymm1, ymmword ptr [rbp+0x70]
-       vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rbp-0x90], xmm2
-       vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rbp-0xA0], xmm2
-       mov      rcx, qword ptr [rbp-0x90]
-       mov      qword ptr [rbp-0xA8], rcx
-       mov      rcx, qword ptr [rbp-0xA0]
-       mov      qword ptr [rbp-0xB0], rcx
-						;; size=294 bbWeight=1 PerfScore 65.50
-G_M29434_IG05:
-       movzx    rcx, byte  ptr [rbp-0xA8]
-       movzx    rdx, byte  ptr [rbp-0xB0]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0xA7]
-       movzx    rdi, byte  ptr [rbp-0xAF]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       movzx    rdx, cl
-       movzx    rcx, dl
-       movzx    rdx, byte  ptr [rbp-0xA6]
-       movzx    rdi, byte  ptr [rbp-0xAE]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0xA5]
-       movzx    rsi, byte  ptr [rbp-0xAD]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      ecx, edx
-       movzx    rcx, cl
-       movzx    rdi, byte  ptr [rbp-0xA4]
-       movzx    rdx, byte  ptr [rbp-0xAC]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0xA3]
-       movzx    rsi, byte  ptr [rbp-0xAB]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-       movzx    rdi, byte  ptr [rbp-0xA2]
-       movzx    rdx, byte  ptr [rbp-0xAA]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0xA1]
-       movzx    rsi, byte  ptr [rbp-0xA9]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-       mov      rdx, qword ptr [rbp-0x88]
-       mov      qword ptr [rbp-0xB8], rdx
-       mov      rdx, qword ptr [rbp-0x98]
-       mov      qword ptr [rbp-0xC0], rdx
-       movzx    rdx, byte  ptr [rbp-0xB8]
-       movzx    rdi, byte  ptr [rbp-0xC0]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0xB7]
-       movzx    rsi, byte  ptr [rbp-0xBF]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       movzx    rdi, dl
-       movzx    rdx, dil
-       movzx    rdi, byte  ptr [rbp-0xB6]
-       movzx    rsi, byte  ptr [rbp-0xBE]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0xB5]
-       movzx    r8, byte  ptr [rbp-0xBD]
-       imul     esi, r8d
-       movzx    rsi, sil
+       movzx    rsi, dil
+       movzx    rdi, byte  ptr [rbp-0x5B]
        add      edi, esi
-       add      edx, edi
-       movzx    rdx, dl
-       movzx    rsi, byte  ptr [rbp-0xB4]
-       movzx    rdi, byte  ptr [rbp-0xBC]
-       imul     edi, esi
-						;; size=357 bbWeight=1 PerfScore 63.50
-G_M29434_IG06:
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0xB3]
-       movzx    r8, byte  ptr [rbp-0xBB]
-       imul     esi, r8d
-       movzx    rsi, sil
+       movzx    rsi, dil
+       movzx    rdi, byte  ptr [rbp-0x5A]
        add      edi, esi
-       add      edi, edx
-       movzx    rdx, dil
-       movzx    rsi, byte  ptr [rbp-0xB2]
-       movzx    rdi, byte  ptr [rbp-0xBA]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0xB1]
-       movzx    r8, byte  ptr [rbp-0xB9]
-       imul     esi, r8d
-       movzx    rsi, sil
+       movzx    rsi, dil
+       movzx    rdi, byte  ptr [rbp-0x59]
        add      edi, esi
-       add      edi, edx
-       movzx    rdx, dil
-       add      ecx, edx
-       movzx    rcx, cl
+       movzx    rsi, dil
+       mov      rdi, qword ptr [rbp-0x28]
+       mov      rax, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x70], rdi
+       mov      qword ptr [rbp-0x78], rax
+       xor      edi, edi
+       align    [0 bytes for IG05]
+						;; size=108 bbWeight=1 PerfScore 17.75
+G_M29434_IG05:
+       lea      rax, [rbp-0x70]
+       movsxd   rcx, edi
+       movzx    rax, byte  ptr [rax+rcx]
+       lea      rdx, [rbp-0x78]
+       movzx    rdx, byte  ptr [rdx+rcx]
+       imul     eax, edx
+       lea      rdx, [rbp-0x68]
+       mov      byte  ptr [rdx+rcx], al
+       inc      edi
+       cmp      edi, 8
+       jl       SHORT G_M29434_IG05
+						;; size=36 bbWeight=4 PerfScore 41.00
+G_M29434_IG06:
+       mov      rdi, qword ptr [rbp-0x68]
+       mov      qword ptr [rbp-0x80], rdi
+       movzx    rdi, byte  ptr [rbp-0x80]
+       movzx    rax, byte  ptr [rbp-0x7F]
+       add      edi, eax
+       movzx    rdi, dil
+       movzx    rax, byte  ptr [rbp-0x7E]
+       add      eax, edi
+       movzx    rdi, al
+       movzx    rax, byte  ptr [rbp-0x7D]
+       add      eax, edi
+       movzx    rdi, al
+       movzx    rax, byte  ptr [rbp-0x7C]
+       add      eax, edi
+       movzx    rdi, al
+       movzx    rax, byte  ptr [rbp-0x7B]
+       add      eax, edi
+       movzx    rdi, al
+       movzx    rax, byte  ptr [rbp-0x7A]
+       add      eax, edi
+       movzx    rdi, al
+       movzx    rax, byte  ptr [rbp-0x79]
+       add      eax, edi
+       movzx    rdi, al
+       add      esi, edi
+       movzx    rbx, sil
        vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0xD0], xmm0
+       vmovaps  xmmword ptr [rbp-0x90], xmm0
        vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0xE0], xmm0
-       mov      rdx, qword ptr [rbp-0xD0]
-       mov      qword ptr [rbp-0xE8], rdx
-       mov      rdx, qword ptr [rbp-0xE0]
-       mov      qword ptr [rbp-0xF0], rdx
-       movzx    rdx, byte  ptr [rbp-0xE8]
-       movzx    rdi, byte  ptr [rbp-0xF0]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0xE7]
-       movzx    rsi, byte  ptr [rbp-0xEF]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       movzx    rdi, dl
-       movzx    rdx, dil
-       movzx    rdi, byte  ptr [rbp-0xE6]
-       movzx    rsi, byte  ptr [rbp-0xEE]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0xE5]
-       movzx    r8, byte  ptr [rbp-0xED]
-       imul     esi, r8d
+       vmovaps  xmmword ptr [rbp-0xA0], xmm0
+       mov      rsi, qword ptr [rbp-0x90]
+       mov      rdi, qword ptr [rbp-0xA0]
+       mov      qword ptr [rbp-0xB0], rsi
+       mov      qword ptr [rbp-0xB8], rdi
+       xor      esi, esi
+       align    [0 bytes for IG07]
+						;; size=147 bbWeight=1 PerfScore 24.25
+G_M29434_IG07:
+       lea      rdi, [rbp-0xB0]
+       movsxd   rax, esi
+       movzx    rdi, byte  ptr [rdi+rax]
+       lea      rcx, [rbp-0xB8]
+       movzx    rcx, byte  ptr [rcx+rax]
+       imul     edi, ecx
+       lea      rcx, [rbp-0xA8]
+       mov      byte  ptr [rcx+rax], dil
+       inc      esi
+       cmp      esi, 8
+       jl       SHORT G_M29434_IG07
+						;; size=47 bbWeight=4 PerfScore 41.00
+G_M29434_IG08:
+       mov      rsi, qword ptr [rbp-0xA8]
+       mov      qword ptr [rbp-0xC0], rsi
+       movzx    rsi, byte  ptr [rbp-0xC0]
+       movzx    rdi, byte  ptr [rbp-0xBF]
+       add      esi, edi
        movzx    rsi, sil
+       movzx    rdi, byte  ptr [rbp-0xBE]
        add      edi, esi
-       add      edx, edi
-       movzx    rdx, dl
-       movzx    rsi, byte  ptr [rbp-0xE4]
-       movzx    rdi, byte  ptr [rbp-0xEC]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0xE3]
-       movzx    r8, byte  ptr [rbp-0xEB]
-       imul     esi, r8d
-       movzx    rsi, sil
+       movzx    rsi, dil
+       movzx    rdi, byte  ptr [rbp-0xBD]
        add      edi, esi
-       add      edi, edx
-       movzx    rdx, dil
-       movzx    rsi, byte  ptr [rbp-0xE2]
-       movzx    rdi, byte  ptr [rbp-0xEA]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0xE1]
-       movzx    r8, byte  ptr [rbp-0xE9]
-       imul     esi, r8d
-       movzx    rsi, sil
+       movzx    rsi, dil
+       movzx    rdi, byte  ptr [rbp-0xBC]
        add      edi, esi
-       add      edi, edx
-       movzx    rdx, dil
-       mov      rdi, qword ptr [rbp-0xC8]
-						;; size=377 bbWeight=1 PerfScore 63.00
-G_M29434_IG07:
-       mov      qword ptr [rbp-0xF8], rdi
-       mov      rdi, qword ptr [rbp-0xD8]
-       mov      qword ptr [rbp-0x100], rdi
-       movzx    rdi, byte  ptr [rbp-0xF8]
-       movzx    rsi, byte  ptr [rbp-0x100]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0xF7]
-       movzx    r8, byte  ptr [rbp-0xFF]
-       imul     esi, r8d
-       movzx    rsi, sil
+       movzx    rsi, dil
+       movzx    rdi, byte  ptr [rbp-0xBB]
        add      edi, esi
        movzx    rsi, dil
-       movzx    rdi, sil
-       movzx    rsi, byte  ptr [rbp-0xF6]
-       movzx    r8, byte  ptr [rbp-0xFE]
-       imul     esi, r8d
-       movzx    rsi, sil
-       movzx    r8, byte  ptr [rbp-0xF5]
-       movzx    r9, byte  ptr [rbp-0xFD]
-       imul     r8d, r9d
-       movzx    r8, r8b
-       add      esi, r8d
+       movzx    rdi, byte  ptr [rbp-0xBA]
        add      edi, esi
-       movzx    rdi, dil
-       movzx    r8, byte  ptr [rbp-0xF4]
-       movzx    rsi, byte  ptr [rbp-0xFC]
-       imul     esi, r8d
-       movzx    rsi, sil
-       movzx    r8, byte  ptr [rbp-0xF3]
-       movzx    r9, byte  ptr [rbp-0xFB]
-       imul     r8d, r9d
-       movzx    r8, r8b
-       add      esi, r8d
-       add      esi, edi
-       movzx    rdi, sil
-       movzx    r8, byte  ptr [rbp-0xF2]
-       movzx    rsi, byte  ptr [rbp-0xFA]
-       imul     esi, r8d
-       movzx    rsi, sil
-       movzx    r8, byte  ptr [rbp-0xF1]
-       movzx    r9, byte  ptr [rbp-0xF9]
-       imul     r8d, r9d
-       movzx    r8, r8b
-       add      esi, r8d
-       add      esi, edi
-       movzx    rdi, sil
-       add      edx, edi
-       add      ecx, edx
-       add      eax, ecx
+       movzx    rsi, dil
+       movzx    rdi, byte  ptr [rbp-0xB9]
+       add      edi, esi
+       movzx    rsi, dil
+       mov      r15d, esi
+       mov      rsi, qword ptr [rbp-0x88]
+       mov      rdi, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xD0], rsi
+       mov      qword ptr [rbp-0xD8], rdi
+       xor      esi, esi
+       align    [0 bytes for IG09]
+						;; size=153 bbWeight=1 PerfScore 18.00
+G_M29434_IG09:
+       lea      rdi, [rbp-0xD0]
+       movsxd   rax, esi
+       movzx    rdi, byte  ptr [rdi+rax]
+       lea      rcx, [rbp-0xD8]
+       movzx    rcx, byte  ptr [rcx+rax]
+       imul     edi, ecx
+       lea      rcx, [rbp-0xC8]
+       mov      byte  ptr [rcx+rax], dil
+       inc      esi
+       cmp      esi, 8
+       jl       SHORT G_M29434_IG09
+						;; size=47 bbWeight=4 PerfScore 41.00
+G_M29434_IG10:
+       mov      rsi, qword ptr [rbp-0xC8]
+       mov      qword ptr [rbp-0xE0], rsi
+       movzx    rsi, byte  ptr [rbp-0xE0]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDF]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDE]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDD]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDC]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDB]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDA]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xD9]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rdi, r15b
+       mov      esi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      esi, eax
+       movzx    rdi, bl
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       vmovups  ymm0, ymmword ptr [rbp+0x30]
+       vmovups  ymmword ptr [rbp-0x110], ymm0
+       vmovups  ymm1, ymmword ptr [rbp+0x70]
+       vmovups  ymmword ptr [rbp-0x130], ymm1
+       vmovaps  ymm2, ymm0
+       vmovaps  xmmword ptr [rbp-0x140], xmm2
+       vmovaps  ymm2, ymm1
+       vmovaps  xmmword ptr [rbp-0x150], xmm2
+       mov      rdi, qword ptr [rbp-0x140]
+       mov      rsi, qword ptr [rbp-0x150]
+       mov      qword ptr [rbp-0x160], rdi
+       mov      qword ptr [rbp-0x168], rsi
+       xor      r15d, r15d
+						;; size=309 bbWeight=1 PerfScore 62.50
+G_M29434_IG11:
+       lea      rdi, [rbp-0x160]
+       movsxd   rsi, r15d
+       movzx    rdi, byte  ptr [rdi+rsi]
+       lea      rsi, [rbp-0x168]
+       movsxd   rax, r15d
+       movzx    rsi, byte  ptr [rsi+rax]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       lea      rsi, [rbp-0x158]
+       movsxd   rdi, r15d
+       mov      byte  ptr [rsi+rdi], al
+       inc      r15d
+       cmp      r15d, 8
+       jl       SHORT G_M29434_IG11
+						;; size=64 bbWeight=4 PerfScore 48.00
+G_M29434_IG12:
+       mov      rsi, qword ptr [rbp-0x158]
+       mov      qword ptr [rbp-0x170], rsi
+       movzx    rsi, byte  ptr [rbp-0x170]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0x16F]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0x16E]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0x16D]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0x16C]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0x16B]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0x16A]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0x169]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       mov      rdi, qword ptr [rbp-0x138]
+       mov      rsi, qword ptr [rbp-0x148]
+       mov      qword ptr [rbp-0x180], rdi
+       mov      qword ptr [rbp-0x188], rsi
+       xor      r14d, r14d
+						;; size=252 bbWeight=1 PerfScore 44.25
+G_M29434_IG13:
+       lea      rdi, [rbp-0x180]
+       movsxd   rsi, r14d
+       movzx    rdi, byte  ptr [rdi+rsi]
+       lea      rsi, [rbp-0x188]
+       movsxd   rax, r14d
+       movzx    rsi, byte  ptr [rsi+rax]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       lea      rsi, [rbp-0x178]
+       movsxd   rdi, r14d
+       mov      byte  ptr [rsi+rdi], al
+       inc      r14d
+       cmp      r14d, 8
+       jl       SHORT G_M29434_IG13
+						;; size=64 bbWeight=4 PerfScore 48.00
+G_M29434_IG14:
+       mov      rsi, qword ptr [rbp-0x178]
+       mov      qword ptr [rbp-0x190], rsi
+       movzx    rsi, byte  ptr [rbp-0x190]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x18F]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x18E]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x18D]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x18C]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x18B]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x18A]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x189]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rdi, r15b
+       mov      esi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       vmovups  ymm0, ymmword ptr [rbp-0x110]
+       vextractf128 xmm0, ymm0, 1
+       vmovaps  xmmword ptr [rbp-0x1A0], xmm0
+       vmovups  ymm1, ymmword ptr [rbp-0x130]
+       vextractf128 xmm0, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0x1B0], xmm0
+       mov      rdi, qword ptr [rbp-0x1A0]
+       mov      rsi, qword ptr [rbp-0x1B0]
+       mov      qword ptr [rbp-0x1C0], rdi
+       mov      qword ptr [rbp-0x1C8], rsi
+       xor      r14d, r14d
+						;; size=286 bbWeight=1 PerfScore 60.25
+G_M29434_IG15:
+       lea      rdi, [rbp-0x1C0]
+       movsxd   rsi, r14d
+       movzx    rdi, byte  ptr [rdi+rsi]
+       lea      rsi, [rbp-0x1C8]
+       movsxd   rax, r14d
+       movzx    rsi, byte  ptr [rsi+rax]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       lea      rsi, [rbp-0x1B8]
+       movsxd   rdi, r14d
+       mov      byte  ptr [rsi+rdi], al
+       inc      r14d
+       cmp      r14d, 8
+       jl       SHORT G_M29434_IG15
+						;; size=64 bbWeight=4 PerfScore 48.00
+G_M29434_IG16:
+       mov      rsi, qword ptr [rbp-0x1B8]
+       mov      qword ptr [rbp-0x1D0], rsi
+       movzx    rsi, byte  ptr [rbp-0x1D0]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r14d, eax
+       movzx    rsi, byte  ptr [rbp-0x1CF]
+       mov      edi, r14d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r14d, eax
+       movzx    rsi, byte  ptr [rbp-0x1CE]
+       mov      edi, r14d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r14d, eax
+       movzx    rsi, byte  ptr [rbp-0x1CD]
+       mov      edi, r14d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r14d, eax
+       movzx    rsi, byte  ptr [rbp-0x1CC]
+       mov      edi, r14d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r14d, eax
+       movzx    rsi, byte  ptr [rbp-0x1CB]
+       mov      edi, r14d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r14d, eax
+       movzx    rsi, byte  ptr [rbp-0x1CA]
+       mov      edi, r14d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r14d, eax
+       movzx    rsi, byte  ptr [rbp-0x1C9]
+       mov      edi, r14d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r14d, eax
+       mov      rdi, qword ptr [rbp-0x198]
+       mov      qword ptr [rsp], rdi
+       mov      rdi, qword ptr [rbp-0x1A8]
+       mov      qword ptr [rsp+0x08], rdi
+       lea      rdi, [rbp-0x1D8]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64`1[ubyte]:op_Multiply(System.Runtime.Intrinsics.Vector64`1[ubyte],System.Runtime.Intrinsics.Vector64`1[ubyte]):System.Runtime.Intrinsics.Vector64`1[ubyte]
+       call     [rax]System.Runtime.Intrinsics.Vector64`1[ubyte]:op_Multiply(System.Runtime.Intrinsics.Vector64`1[ubyte],System.Runtime.Intrinsics.Vector64`1[ubyte]):System.Runtime.Intrinsics.Vector64`1[ubyte]
+       mov      rdi, qword ptr [rbp-0x1D8]
+       mov      qword ptr [rbp-0x1E0], rdi
+       lea      rdi, [rbp-0x1E0]
+       xor      esi, esi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       mov      esi, eax
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r13d, eax
+       lea      rdi, [rbp-0x1E0]
+       mov      esi, 1
+						;; size=329 bbWeight=1 PerfScore 58.50
+G_M29434_IG17:
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       mov      esi, eax
+       mov      edi, r13d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r13d, eax
+       lea      rdi, [rbp-0x1E0]
+       mov      esi, 2
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       mov      esi, eax
+       mov      edi, r13d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r13d, eax
+       lea      rdi, [rbp-0x1E0]
+       mov      esi, 3
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       mov      esi, eax
+       mov      edi, r13d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r13d, eax
+       lea      rdi, [rbp-0x1E0]
+       mov      esi, 4
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       mov      esi, eax
+       mov      edi, r13d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r13d, eax
+       lea      rdi, [rbp-0x1E0]
+       mov      esi, 5
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       mov      esi, eax
+       mov      edi, r13d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r13d, eax
+       lea      rdi, [rbp-0x1E0]
+       mov      esi, 6
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       mov      esi, eax
+       mov      edi, r13d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r13d, eax
+       lea      rdi, [rbp-0x1E0]
+       mov      esi, 7
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Vector64:GetElementUnsafe[ubyte](byref,int):ubyte
+       mov      esi, eax
+       mov      edi, r13d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r13d, eax
+       movzx    rdi, r14b
+       mov      esi, r13d
+						;; size=303 bbWeight=1 PerfScore 55.75
+G_M29434_IG18:
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      esi, eax
+       movzx    rdi, r15b
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       add      eax, ebx
        movzx    rax, al
-						;; size=258 bbWeight=1 PerfScore 41.00
-G_M29434_IG08:
+						;; size=35 bbWeight=1 PerfScore 7.50
+G_M29434_IG19:
        vzeroupper 
-       add      rsp, 256
+       add      rsp, 464
+       pop      rbx
+       pop      r13
+       pop      r14
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=19 bbWeight=1 PerfScore 4.75
 
-; Total bytes of code 1830, prolog size 16, PerfScore 368.25, instruction count 422, allocated bytes for code 1830 (MethodHash=b8b18d05) for method System.Runtime.Intrinsics.Vector512:Dot[ubyte](System.Runtime.Intrinsics.Vector512`1[ubyte],System.Runtime.Intrinsics.Vector512`1[ubyte]):ubyte (FullOpts)
+; Total bytes of code 2370, prolog size 23, PerfScore 682.00, instruction count 496, allocated bytes for code 2370 (MethodHash=b8b18d05) for method System.Runtime.Intrinsics.Vector512:Dot[ubyte](System.Runtime.Intrinsics.Vector512`1[ubyte],System.Runtime.Intrinsics.Vector512`1[ubyte]):ubyte (FullOpts)
451 (58.12 % of base) - System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector,T>.Dot(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):ubyte
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Dot(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):ubyte (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
-; partially interruptible
+; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 19 single block inlinees; 23 inlinees without PGO data
+; 0 inlinees with PGO data; 23 single block inlinees; 9 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T27] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V01 arg1         [V01,T28] (  2,  2   )  simd32  ->  mm1         single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V00 arg0         [V00,T32] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V01 arg1         [V01,T33] (  2,  2   )  simd32  ->  [rbp+0x30]  single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V03 tmp1         [V03,T12] (  2,  4   )     int  ->  rax         "impAppendStmt"
-;  V04 tmp2         [V04,T23] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V05 tmp3         [V05,T24] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V06 tmp4         [V06,T13] (  2,  4   )     int  ->  rax         "impAppendStmt"
-;  V07 tmp5         [V07,T04] (  8,  8   )   ubyte  ->  rax         ld-addr-op "Inline ldloca(s) first use temp"
-;* V08 tmp6         [V08,T19] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V09 tmp7         [V09    ] (  9, 18   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V10 tmp8         [V10    ] (  9, 18   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V11 tmp9         [V11    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V12 tmp10        [V12,T00] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V13 tmp11        [V13    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V14 tmp12        [V14    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V15 tmp13        [V15    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V16 tmp14        [V16    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V17 tmp15        [V17    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V18 tmp16        [V18    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V19 tmp17        [V19,T08] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V20 tmp18        [V20    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V21 tmp19        [V21    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V22 tmp20        [V22    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V23 tmp21        [V23    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V24 tmp22        [V24    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V25 tmp23        [V25    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V26 tmp24        [V26,T05] (  8,  8   )   ubyte  ->  rcx         ld-addr-op "Inline ldloca(s) first use temp"
-;* V27 tmp25        [V27,T20] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V28 tmp26        [V28    ] (  9, 18   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V29 tmp27        [V29    ] (  9, 18   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V03 tmp1         [V03,T12] (  2,  4   )     int  ->  rbx         "impAppendStmt"
+;  V04 tmp2         [V04,T28] (  3,  6   )  simd16  ->  [rbp-0x30]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V05 tmp3         [V05,T29] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V06 tmp4         [V06,T13] (  2,  4   )     int  ->  rbx         "impAppendStmt"
+;* V07 tmp5         [V07    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V08 tmp6         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V09 tmp7         [V09    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V10 tmp8         [V10    ] (  2,  5   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V11 tmp9         [V11,T04] (  5, 17   )     int  ->  rsi         "Inline stloc first use temp"
+;  V12 tmp10        [V12    ] (  2, 10   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V13 tmp11        [V13    ] (  2, 10   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V14 tmp12        [V14    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V15 tmp13        [V15    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;* V16 tmp14        [V16    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V17 tmp15        [V17    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;* V18 tmp16        [V18    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;  V19 tmp17        [V19,T00] ( 16, 16   )   ubyte  ->  rbx         ld-addr-op "Inline ldloca(s) first use temp"
+;* V20 tmp18        [V20,T24] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V21 tmp19        [V21    ] (  9, 18   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V22 tmp20        [V22    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V23 tmp21        [V23    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V24 tmp22        [V24    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V25 tmp23        [V25    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V26 tmp24        [V26    ] (  2,  5   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V27 tmp25        [V27,T05] (  5, 17   )     int  ->  r15         "Inline stloc first use temp"
+;  V28 tmp26        [V28    ] (  2, 10   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V29 tmp27        [V29    ] (  2, 10   )  struct ( 8) [rbp-0x78]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V30 tmp28        [V30    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V31 tmp29        [V31,T01] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V32 tmp30        [V32    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V33 tmp31        [V33    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V34 tmp32        [V34    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V35 tmp33        [V35    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V36 tmp34        [V36    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V37 tmp35        [V37    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V38 tmp36        [V38,T09] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V39 tmp37        [V39    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V40 tmp38        [V40    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V41 tmp39        [V41    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V42 tmp40        [V42    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V43 tmp41        [V43    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V44 tmp42        [V44    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V45 tmp43        [V45    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V46 tmp44        [V46    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V47 tmp45        [V47    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V48 tmp46        [V48,T25] (  3,  6   )  simd16  ->  [rbp-0x50]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V49 tmp47        [V49,T26] (  3,  6   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V50 tmp48        [V50,T14] (  2,  4   )     int  ->  rcx         "impAppendStmt"
-;  V51 tmp49        [V51,T06] (  8,  8   )   ubyte  ->  rcx         ld-addr-op "Inline ldloca(s) first use temp"
-;* V52 tmp50        [V52,T21] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V53 tmp51        [V53    ] (  9, 18   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V54 tmp52        [V54    ] (  9, 18   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V55 tmp53        [V55    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V56 tmp54        [V56,T02] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V57 tmp55        [V57    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V58 tmp56        [V58    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V59 tmp57        [V59    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V60 tmp58        [V60    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V61 tmp59        [V61    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V62 tmp60        [V62    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V63 tmp61        [V63,T10] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V64 tmp62        [V64    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V65 tmp63        [V65    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V66 tmp64        [V66    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V67 tmp65        [V67    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V68 tmp66        [V68    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V69 tmp67        [V69    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V70 tmp68        [V70,T07] (  8,  8   )   ubyte  ->  rdx         ld-addr-op "Inline ldloca(s) first use temp"
-;* V71 tmp69        [V71,T22] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V72 tmp70        [V72    ] (  9, 18   )  struct ( 8) [rbp-0x78]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V73 tmp71        [V73    ] (  9, 18   )  struct ( 8) [rbp-0x80]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V74 tmp72        [V74    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V75 tmp73        [V75,T03] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V76 tmp74        [V76    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V77 tmp75        [V77    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V78 tmp76        [V78    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V79 tmp77        [V79    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V80 tmp78        [V80    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V81 tmp79        [V81    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V82 tmp80        [V82,T11] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V83 tmp81        [V83    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V84 tmp82        [V84    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V85 tmp83        [V85    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V86 tmp84        [V86    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V87 tmp85        [V87    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V88 tmp86        [V88    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V89 tmp87        [V89    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V90 tmp88        [V90    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V91 tmp89        [V91    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V92 tmp90        [V92    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V93 tmp91        [V93    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V94 tmp92        [V94    ] (  9, 17   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
-;  V95 tmp93        [V95    ] (  9, 17   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
-;  V96 tmp94        [V96    ] (  9, 17   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V28._00 (fldOffset=0x0)" P-DEP
-;  V97 tmp95        [V97    ] (  9, 17   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
-;  V98 tmp96        [V98    ] (  9, 17   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V53._00 (fldOffset=0x0)" P-DEP
-;  V99 tmp97        [V99    ] (  9, 17   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V54._00 (fldOffset=0x0)" P-DEP
-;  V100 tmp98       [V100    ] (  9, 17   )    long  ->  [rbp-0x78]  do-not-enreg[X] addr-exposed "field V72._00 (fldOffset=0x0)" P-DEP
-;  V101 tmp99       [V101    ] (  9, 17   )    long  ->  [rbp-0x80]  do-not-enreg[X] addr-exposed "field V73._00 (fldOffset=0x0)" P-DEP
-;  V102 cse0        [V102,T15] (  2,  2   )     int  ->  rcx         "CSE #01: moderate"
-;  V103 cse1        [V103,T16] (  2,  2   )     int  ->  rdx         "CSE #02: moderate"
-;  V104 cse2        [V104,T17] (  2,  2   )     int  ->  rdx         "CSE #03: moderate"
-;  V105 cse3        [V105,T18] (  2,  2   )     int  ->  rdi         "CSE #04: moderate"
+;* V31 tmp29        [V31    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V32 tmp30        [V32,T01] ( 16, 16   )   ubyte  ->  rax         ld-addr-op "Inline ldloca(s) first use temp"
+;* V33 tmp31        [V33,T25] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V34 tmp32        [V34    ] (  9, 18   )  struct ( 8) [rbp-0x80]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V35 tmp33        [V35    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;  V36 tmp34        [V36,T30] (  3,  6   )  simd16  ->  [rbp-0x90]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V37 tmp35        [V37,T31] (  3,  6   )  simd16  ->  [rbp-0xA0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V38 tmp36        [V38,T14] (  2,  4   )     int  ->  r15         "impAppendStmt"
+;* V39 tmp37        [V39    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V40 tmp38        [V40    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V41 tmp39        [V41    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V42 tmp40        [V42    ] (  2,  5   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V43 tmp41        [V43,T06] (  5, 17   )     int  ->  r15         "Inline stloc first use temp"
+;  V44 tmp42        [V44    ] (  2, 10   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V45 tmp43        [V45    ] (  2, 10   )  struct ( 8) [rbp-0xB8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V46 tmp44        [V46    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V47 tmp45        [V47    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V48 tmp46        [V48,T02] ( 16, 16   )   ubyte  ->  r15         ld-addr-op "Inline ldloca(s) first use temp"
+;* V49 tmp47        [V49,T26] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V50 tmp48        [V50    ] (  9, 18   )  struct ( 8) [rbp-0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V51 tmp49        [V51    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V52 tmp50        [V52    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V53 tmp51        [V53    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V54 tmp52        [V54    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V55 tmp53        [V55    ] (  2,  5   )  struct ( 8) [rbp-0xC8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V56 tmp54        [V56,T07] (  5, 17   )     int  ->  r14         "Inline stloc first use temp"
+;  V57 tmp55        [V57    ] (  2, 10   )  struct ( 8) [rbp-0xD0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V58 tmp56        [V58    ] (  2, 10   )  struct ( 8) [rbp-0xD8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V59 tmp57        [V59    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V60 tmp58        [V60    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V61 tmp59        [V61,T03] ( 16, 16   )   ubyte  ->  rax         ld-addr-op "Inline ldloca(s) first use temp"
+;* V62 tmp60        [V62,T27] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V63 tmp61        [V63    ] (  9, 18   )  struct ( 8) [rbp-0xE0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V64 tmp62        [V64    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;  V65 tmp63        [V65,T16] (  2,  2   )    long  ->  rsi         "field V07._00 (fldOffset=0x0)" P-INDEP
+;  V66 tmp64        [V66,T17] (  2,  2   )    long  ->  rdi         "field V08._00 (fldOffset=0x0)" P-INDEP
+;* V67 tmp65        [V67    ] (  0,  0   )    long  ->  zero-ref    "field V09._00 (fldOffset=0x0)" P-INDEP
+;  V68 tmp66        [V68    ] (  2,  5   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
+;  V69 tmp67        [V69    ] (  2,  9   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
+;  V70 tmp68        [V70    ] (  2,  9   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V71 tmp69        [V71    ] (  9, 17   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V21._00 (fldOffset=0x0)" P-DEP
+;  V72 tmp70        [V72,T18] (  2,  2   )    long  ->  rdi         "field V23._00 (fldOffset=0x0)" P-INDEP
+;  V73 tmp71        [V73,T19] (  2,  2   )    long  ->  rsi         "field V24._00 (fldOffset=0x0)" P-INDEP
+;* V74 tmp72        [V74    ] (  0,  0   )    long  ->  zero-ref    "field V25._00 (fldOffset=0x0)" P-INDEP
+;  V75 tmp73        [V75    ] (  2,  5   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V26._00 (fldOffset=0x0)" P-DEP
+;  V76 tmp74        [V76    ] (  2,  9   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V28._00 (fldOffset=0x0)" P-DEP
+;  V77 tmp75        [V77    ] (  2,  9   )    long  ->  [rbp-0x78]  do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
+;  V78 tmp76        [V78    ] (  9, 17   )    long  ->  [rbp-0x80]  do-not-enreg[X] addr-exposed "field V34._00 (fldOffset=0x0)" P-DEP
+;  V79 tmp77        [V79,T20] (  2,  2   )    long  ->  rdi         "field V39._00 (fldOffset=0x0)" P-INDEP
+;  V80 tmp78        [V80,T21] (  2,  2   )    long  ->  rsi         "field V40._00 (fldOffset=0x0)" P-INDEP
+;* V81 tmp79        [V81    ] (  0,  0   )    long  ->  zero-ref    "field V41._00 (fldOffset=0x0)" P-INDEP
+;  V82 tmp80        [V82    ] (  2,  5   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V42._00 (fldOffset=0x0)" P-DEP
+;  V83 tmp81        [V83    ] (  2,  9   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V44._00 (fldOffset=0x0)" P-DEP
+;  V84 tmp82        [V84    ] (  2,  9   )    long  ->  [rbp-0xB8]  do-not-enreg[X] addr-exposed "field V45._00 (fldOffset=0x0)" P-DEP
+;  V85 tmp83        [V85    ] (  9, 17   )    long  ->  [rbp-0xC0]  do-not-enreg[X] addr-exposed "field V50._00 (fldOffset=0x0)" P-DEP
+;  V86 tmp84        [V86,T22] (  2,  2   )    long  ->  rdi         "field V52._00 (fldOffset=0x0)" P-INDEP
+;  V87 tmp85        [V87,T23] (  2,  2   )    long  ->  rsi         "field V53._00 (fldOffset=0x0)" P-INDEP
+;* V88 tmp86        [V88    ] (  0,  0   )    long  ->  zero-ref    "field V54._00 (fldOffset=0x0)" P-INDEP
+;  V89 tmp87        [V89    ] (  2,  5   )    long  ->  [rbp-0xC8]  do-not-enreg[X] addr-exposed "field V55._00 (fldOffset=0x0)" P-DEP
+;  V90 tmp88        [V90    ] (  2,  9   )    long  ->  [rbp-0xD0]  do-not-enreg[X] addr-exposed "field V57._00 (fldOffset=0x0)" P-DEP
+;  V91 tmp89        [V91    ] (  2,  9   )    long  ->  [rbp-0xD8]  do-not-enreg[X] addr-exposed "field V58._00 (fldOffset=0x0)" P-DEP
+;  V92 tmp90        [V92    ] (  9, 17   )    long  ->  [rbp-0xE0]  do-not-enreg[X] addr-exposed "field V63._00 (fldOffset=0x0)" P-DEP
+;  V93 tmp91        [V93,T15] (  2,  4   )     int  ->  rsi         "argument with side effect"
+;  V94 cse0         [V94,T08] (  4, 16   )    long  ->  rax         "CSE #01: aggressive"
+;  V95 cse1         [V95,T09] (  4, 16   )    long  ->  r14         "CSE #02: aggressive"
+;  V96 cse2         [V96,T10] (  4, 16   )    long  ->  r14         "CSE #03: aggressive"
+;  V97 cse3         [V97,T11] (  4, 16   )    long  ->  r13         "CSE #04: aggressive"
 ;
-; Lcl frame size = 128
+; Lcl frame size = 192
 
 G_M42821_IG01:
        push     rbp
-       sub      rsp, 128
-       lea      rbp, [rsp+0x80]
+       push     r15
+       push     r14
+       push     r13
+       push     rbx
+       sub      rsp, 192
+       lea      rbp, [rsp+0xE0]
        vmovups  ymm0, ymmword ptr [rbp+0x10]
        vmovups  ymm1, ymmword ptr [rbp+0x30]
-						;; size=26 bbWeight=1 PerfScore 9.75
+						;; size=33 bbWeight=1 PerfScore 13.75
 G_M42821_IG02:
+       vmovups  ymmword ptr [rbp+0x10], ymm0
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm2
+       vmovaps  xmmword ptr [rbp-0x30], xmm2
+       vmovups  ymmword ptr [rbp+0x30], ymm1
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rbp-0x20], xmm2
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x28], rax
-       mov      rax, qword ptr [rbp-0x20]
-       mov      qword ptr [rbp-0x30], rax
-       movzx    rax, byte  ptr [rbp-0x28]
-       movzx    rcx, byte  ptr [rbp-0x30]
-       imul     eax, ecx
-       movzx    rax, al
-       movzx    rcx, byte  ptr [rbp-0x27]
-       movzx    rdx, byte  ptr [rbp-0x2F]
-       imul     ecx, edx
-       movzx    rcx, cl
-       add      eax, ecx
-       movzx    rcx, al
-       movzx    rax, cl
-       movzx    rcx, byte  ptr [rbp-0x26]
-       movzx    rdx, byte  ptr [rbp-0x2E]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x25]
-       movzx    rdi, byte  ptr [rbp-0x2D]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       add      eax, ecx
-       movzx    rax, al
-       movzx    rdx, byte  ptr [rbp-0x24]
-       movzx    rcx, byte  ptr [rbp-0x2C]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x23]
-       movzx    rdi, byte  ptr [rbp-0x2B]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       add      ecx, eax
-       movzx    rax, cl
-       movzx    rdx, byte  ptr [rbp-0x22]
-       movzx    rcx, byte  ptr [rbp-0x2A]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x21]
-       movzx    rdi, byte  ptr [rbp-0x29]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       add      ecx, eax
-       movzx    rax, cl
-       mov      rcx, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x38], rcx
-       mov      rcx, qword ptr [rbp-0x18]
-       mov      qword ptr [rbp-0x40], rcx
-       movzx    rcx, byte  ptr [rbp-0x38]
-       movzx    rdx, byte  ptr [rbp-0x40]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x37]
-       movzx    rdi, byte  ptr [rbp-0x3F]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       movzx    rdx, cl
-       movzx    rcx, dl
-       movzx    rdx, byte  ptr [rbp-0x36]
-       movzx    rdi, byte  ptr [rbp-0x3E]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x35]
-						;; size=251 bbWeight=1 PerfScore 62.00
+       vmovaps  xmmword ptr [rbp-0x40], xmm2
+       mov      rsi, qword ptr [rbp-0x30]
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x50], rsi
+       mov      qword ptr [rbp-0x58], rdi
+       xor      esi, esi
+       align    [0 bytes for IG03]
+						;; size=46 bbWeight=1 PerfScore 8.75
 G_M42821_IG03:
-       movzx    rsi, byte  ptr [rbp-0x3D]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      ecx, edx
-       movzx    rcx, cl
-       movzx    rdi, byte  ptr [rbp-0x34]
-       movzx    rdx, byte  ptr [rbp-0x3C]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x33]
-       movzx    rsi, byte  ptr [rbp-0x3B]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-       movzx    rdi, byte  ptr [rbp-0x32]
-       movzx    rdx, byte  ptr [rbp-0x3A]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x31]
-       movzx    rsi, byte  ptr [rbp-0x39]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-       add      eax, ecx
-       movzx    rax, al
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x50], xmm0
-       vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x60], xmm0
-       mov      rcx, qword ptr [rbp-0x50]
-       mov      qword ptr [rbp-0x68], rcx
-       mov      rcx, qword ptr [rbp-0x60]
-       mov      qword ptr [rbp-0x70], rcx
-       movzx    rcx, byte  ptr [rbp-0x68]
-       movzx    rdx, byte  ptr [rbp-0x70]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x67]
-       movzx    rdi, byte  ptr [rbp-0x6F]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       movzx    rdx, cl
-       movzx    rcx, dl
-       movzx    rdx, byte  ptr [rbp-0x66]
-       movzx    rdi, byte  ptr [rbp-0x6E]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x65]
-       movzx    rsi, byte  ptr [rbp-0x6D]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      ecx, edx
-       movzx    rcx, cl
-       movzx    rdi, byte  ptr [rbp-0x64]
-       movzx    rdx, byte  ptr [rbp-0x6C]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x63]
-       movzx    rsi, byte  ptr [rbp-0x6B]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-       movzx    rdi, byte  ptr [rbp-0x62]
-       movzx    rdx, byte  ptr [rbp-0x6A]
-       imul     edx, edi
-						;; size=267 bbWeight=1 PerfScore 64.75
+       lea      rdi, [rbp-0x50]
+       movsxd   rax, esi
+       movzx    rdi, byte  ptr [rdi+rax]
+       lea      rcx, [rbp-0x58]
+       movzx    rcx, byte  ptr [rcx+rax]
+       imul     edi, ecx
+       lea      rcx, [rbp-0x48]
+       mov      byte  ptr [rcx+rax], dil
+       inc      esi
+       cmp      esi, 8
+       jl       SHORT G_M42821_IG03
+						;; size=38 bbWeight=4 PerfScore 41.00
 G_M42821_IG04:
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x61]
-       movzx    rsi, byte  ptr [rbp-0x69]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-       mov      rdx, qword ptr [rbp-0x48]
-       mov      qword ptr [rbp-0x78], rdx
-       mov      rdx, qword ptr [rbp-0x58]
-       mov      qword ptr [rbp-0x80], rdx
-       movzx    rdx, byte  ptr [rbp-0x78]
-       movzx    rdi, byte  ptr [rbp-0x80]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x77]
+       mov      rsi, qword ptr [rbp-0x48]
+       mov      qword ptr [rbp-0x60], rsi
+       movzx    rsi, byte  ptr [rbp-0x60]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x5F]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x5E]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x5D]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x5C]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x5B]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x5A]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x59]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       mov      rdi, qword ptr [rbp-0x28]
+       mov      rsi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x70], rdi
+       mov      qword ptr [rbp-0x78], rsi
+       xor      r15d, r15d
+						;; size=195 bbWeight=1 PerfScore 44.25
+G_M42821_IG05:
+       lea      rdi, [rbp-0x70]
+       movsxd   r14, r15d
+       movzx    rdi, byte  ptr [rdi+r14]
+       lea      rsi, [rbp-0x78]
+       movzx    rsi, byte  ptr [rsi+r14]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       lea      rsi, [rbp-0x68]
+       mov      byte  ptr [rsi+r14], al
+       inc      r15d
+       cmp      r15d, 8
+       jl       SHORT G_M42821_IG05
+						;; size=50 bbWeight=4 PerfScore 46.00
+G_M42821_IG06:
+       mov      rsi, qword ptr [rbp-0x68]
+       mov      qword ptr [rbp-0x80], rsi
+       movzx    rsi, byte  ptr [rbp-0x80]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
        movzx    rsi, byte  ptr [rbp-0x7F]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       movzx    rdi, dl
-       movzx    rdx, dil
-       movzx    rdi, byte  ptr [rbp-0x76]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
        movzx    rsi, byte  ptr [rbp-0x7E]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0x75]
-       movzx    r8, byte  ptr [rbp-0x7D]
-       imul     esi, r8d
-       movzx    rsi, sil
-       add      edi, esi
-       add      edx, edi
-       movzx    rdx, dl
-       movzx    rsi, byte  ptr [rbp-0x74]
-       movzx    rdi, byte  ptr [rbp-0x7C]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0x73]
-       movzx    r8, byte  ptr [rbp-0x7B]
-       imul     esi, r8d
-       movzx    rsi, sil
-       add      edi, esi
-       add      edi, edx
-       movzx    rdx, dil
-       movzx    rsi, byte  ptr [rbp-0x72]
-       movzx    rdi, byte  ptr [rbp-0x7A]
-       imul     edi, esi
-       movzx    rdi, dil
-       movzx    rsi, byte  ptr [rbp-0x71]
-       movzx    r8, byte  ptr [rbp-0x79]
-       imul     esi, r8d
-       movzx    rsi, sil
-       add      edi, esi
-       add      edi, edx
-       movzx    rdx, dil
-       add      ecx, edx
-       add      eax, ecx
-       movzx    rax, al
-						;; size=220 bbWeight=1 PerfScore 47.00
-G_M42821_IG05:
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x7D]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x7C]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x7B]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x7A]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x79]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rdi, bl
+       mov      esi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       vmovups  ymm0, ymmword ptr [rbp+0x10]
+       vextractf128 xmm0, ymm0, 1
+       vmovaps  xmmword ptr [rbp-0x90], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x30]
+       vextractf128 xmm0, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0xA0], xmm0
+       mov      rdi, qword ptr [rbp-0x90]
+       mov      rsi, qword ptr [rbp-0xA0]
+       mov      qword ptr [rbp-0xB0], rdi
+       mov      qword ptr [rbp-0xB8], rsi
+       xor      r15d, r15d
+						;; size=249 bbWeight=1 PerfScore 60.25
+G_M42821_IG07:
+       lea      rdi, [rbp-0xB0]
+       movsxd   r14, r15d
+       movzx    rdi, byte  ptr [rdi+r14]
+       lea      rsi, [rbp-0xB8]
+       movzx    rsi, byte  ptr [rsi+r14]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       lea      rsi, [rbp-0xA8]
+       mov      byte  ptr [rsi+r14], al
+       inc      r15d
+       cmp      r15d, 8
+       jl       SHORT G_M42821_IG07
+						;; size=59 bbWeight=4 PerfScore 46.00
+G_M42821_IG08:
+       mov      rsi, qword ptr [rbp-0xA8]
+       mov      qword ptr [rbp-0xC0], rsi
+       movzx    rsi, byte  ptr [rbp-0xC0]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0xBF]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0xBE]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0xBD]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0xBC]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0xBB]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0xBA]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       movzx    rsi, byte  ptr [rbp-0xB9]
+       mov      edi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      r15d, eax
+       mov      rdi, qword ptr [rbp-0x88]
+       mov      rsi, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xD0], rdi
+       mov      qword ptr [rbp-0xD8], rsi
+       xor      r14d, r14d
+						;; size=252 bbWeight=1 PerfScore 44.25
+G_M42821_IG09:
+       lea      rdi, [rbp-0xD0]
+       movsxd   r13, r14d
+       movzx    rdi, byte  ptr [rdi+r13]
+       lea      rsi, [rbp-0xD8]
+       movzx    rsi, byte  ptr [rsi+r13]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       lea      rsi, [rbp-0xC8]
+       mov      byte  ptr [rsi+r13], al
+       inc      r14d
+       cmp      r14d, 8
+       jl       SHORT G_M42821_IG09
+						;; size=59 bbWeight=4 PerfScore 46.00
+G_M42821_IG10:
+       mov      rsi, qword ptr [rbp-0xC8]
+       mov      qword ptr [rbp-0xE0], rsi
+       movzx    rsi, byte  ptr [rbp-0xE0]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDF]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDE]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDD]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDC]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDB]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xDA]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0xD9]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rdi, r15b
+       mov      esi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      esi, eax
+       movzx    rdi, bl
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       nop      
+						;; size=227 bbWeight=1 PerfScore 45.75
+G_M42821_IG11:
        vzeroupper 
-       add      rsp, 128
+       add      rsp, 192
+       pop      rbx
+       pop      r13
+       pop      r14
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=19 bbWeight=1 PerfScore 4.75
 
-; Total bytes of code 776, prolog size 16, PerfScore 186.25, instruction count 214, allocated bytes for code 776 (MethodHash=d63158ba) for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Dot(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):ubyte (FullOpts)
+; Total bytes of code 1227, prolog size 33, PerfScore 400.75, instruction count 263, allocated bytes for code 1227 (MethodHash=d63158ba) for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Dot(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):ubyte (FullOpts)
411 (78.59 % of base) - System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector,T>.ShiftLeft(System.Runtime.Intrinsics.Vector256`1[ubyte],int):System.Runtime.Intrinsics.Vector256`1[ubyte]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.ShiftLeft(System.Runtime.Intrinsics.Vector256`1[ubyte],int):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 16 single block inlinees; 13 inlinees without PGO data
+; 0 inlinees with PGO data; 16 single block inlinees; 10 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T01] (  4,  4   )   byref  ->  rdi         single-def
-;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V02 arg1         [V02,T02] (  3,  3   )     int  ->  rsi         single-def
+;  V00 RetBuf       [V00,T02] (  4,  4   )   byref  ->  r15         single-def
+;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V02 arg1         [V02,T00] ( 27, 27   )     int  ->  rbx         single-def
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  mm1         "impAppendStmt"
+;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  [rbp-0x30]  spill-single-def "impAppendStmt"
 ;* V05 tmp2         [V05    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V06 tmp3         [V06,T11] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V06 tmp3         [V06,T11] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V07 tmp4         [V07    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V08 tmp5         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V09 tmp6         [V09    ] (  9,  9   )  struct ( 8) [rbp-0x18]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V09 tmp6         [V09    ] (  9,  9   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V10 tmp7         [V10,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V11 tmp8         [V11    ] (  9, 18   )  struct ( 8) [rbp-0x20]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V11 tmp8         [V11    ] (  9, 18   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V12 tmp9         [V12    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V13 tmp10        [V13    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;* V14 tmp11        [V14    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V15 tmp12        [V15    ] (  9,  9   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V15 tmp12        [V15    ] (  9,  9   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V16 tmp13        [V16,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp14        [V17    ] (  9, 18   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V17 tmp14        [V17    ] (  9, 18   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V18 tmp15        [V18    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V19 tmp16        [V19    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V20 tmp17        [V20    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V21 tmp18        [V21,T14] (  3,  3   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V22 tmp19        [V22,T12] (  3,  6   )  simd16  ->  [rbp-0x50]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V23 tmp20        [V23    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V24 tmp21        [V24    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V25 tmp22        [V25    ] (  9,  9   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V26 tmp23        [V26,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V27 tmp24        [V27    ] (  9, 18   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V28 tmp25        [V28    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V29 tmp26        [V29    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V30 tmp27        [V30    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V31 tmp28        [V31    ] (  9,  9   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V32 tmp29        [V32,T10] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V33 tmp30        [V33    ] (  9, 18   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V34 tmp31        [V34    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V35 tmp32        [V35    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V36 tmp33        [V36    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V37 tmp34        [V37,T15] (  3,  3   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V38 tmp35        [V38    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V39 tmp36        [V39,T03] (  2,  2   )    long  ->  rax         "field V07._00 (fldOffset=0x0)" P-INDEP
-;  V40 tmp37        [V40,T04] (  2,  2   )    long  ->  rcx         "field V08._00 (fldOffset=0x0)" P-INDEP
-;  V41 tmp38        [V41    ] (  9,  9   )    long  ->  [rbp-0x18]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
-;  V42 tmp39        [V42    ] (  9, 17   )    long  ->  [rbp-0x20]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
-;  V43 tmp40        [V43    ] (  9,  9   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;  V44 tmp41        [V44    ] (  9, 17   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
-;  V45 tmp42        [V45,T05] (  2,  2   )    long  ->  rax         "field V23._00 (fldOffset=0x0)" P-INDEP
-;  V46 tmp43        [V46,T06] (  2,  2   )    long  ->  rcx         "field V24._00 (fldOffset=0x0)" P-INDEP
-;  V47 tmp44        [V47    ] (  9,  9   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V48 tmp45        [V48    ] (  9, 17   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
-;  V49 tmp46        [V49    ] (  9,  9   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
-;  V50 tmp47        [V50    ] (  9, 17   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
-;  V51 cse0         [V51,T00] ( 33, 33   )     int  ->  rsi         "CSE #01: aggressive"
+;  V19 tmp16        [V19,T14] (  3,  3   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V20 tmp17        [V20,T12] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V21 tmp18        [V21    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V22 tmp19        [V22    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V23 tmp20        [V23    ] (  9,  9   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V24 tmp21        [V24,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V25 tmp22        [V25    ] (  9, 18   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V26 tmp23        [V26    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V27 tmp24        [V27    ] (  9,  9   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V28 tmp25        [V28,T10] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V29 tmp26        [V29    ] (  9, 18   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V30 tmp27        [V30    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V31 tmp28        [V31,T15] (  3,  3   )  simd16  ->  [rbp-0xB0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V32 tmp29        [V32    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V33 tmp30        [V33,T03] (  2,  2   )    long  ->  r14         "field V07._00 (fldOffset=0x0)" P-INDEP
+;  V34 tmp31        [V34,T04] (  2,  2   )    long  ->  rdi         "field V08._00 (fldOffset=0x0)" P-INDEP
+;  V35 tmp32        [V35    ] (  9,  9   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
+;  V36 tmp33        [V36    ] (  9, 17   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
+;  V37 tmp34        [V37    ] (  9,  9   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
+;  V38 tmp35        [V38    ] (  9, 17   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
+;  V39 tmp36        [V39,T05] (  2,  2   )    long  ->  r14         "field V21._00 (fldOffset=0x0)" P-INDEP
+;  V40 tmp37        [V40,T06] (  2,  2   )    long  ->  rax         "field V22._00 (fldOffset=0x0)" P-INDEP
+;  V41 tmp38        [V41    ] (  9,  9   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+;  V42 tmp39        [V42    ] (  9, 17   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V43 tmp40        [V43    ] (  9,  9   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+;  V44 tmp41        [V44    ] (  9, 17   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
+;  V45 cse0         [V45,T01] (  9,  9   )     int  ->  rsi         "CSE #01: aggressive"
 ;
-; Lcl frame size = 128
+; Lcl frame size = 152
 
 G_M41805_IG01:
        push     rbp
-       sub      rsp, 128
-       lea      rbp, [rsp+0x80]
+       push     r15
+       push     r14
+       push     rbx
+       sub      rsp, 152
+       lea      rbp, [rsp+0xB0]
+       mov      r15, rdi
+       mov      ebx, esi
        vmovups  ymm0, ymmword ptr [rbp+0x10]
-						;; size=21 bbWeight=1 PerfScore 5.75
+						;; size=31 bbWeight=1 PerfScore 9.25
 G_M41805_IG02:
+       vmovups  ymmword ptr [rbp+0x10], ymm0
        vmovaps  ymm1, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm1
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x20], rax
-       movzx    rax, byte  ptr [rbp-0x20]
+       vmovaps  xmmword ptr [rbp-0x40], xmm1
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x50], rdi
+       movzx    rdi, byte  ptr [rbp-0x50]
+       mov      esi, ebx
        and      esi, 7
-       shlx     eax, eax, esi
-       mov      byte  ptr [rbp-0x18], al
-       movzx    rax, byte  ptr [rbp-0x1F]
-       shlx     eax, eax, esi
-       mov      byte  ptr [rbp-0x17], al
-       movzx    rax, byte  ptr [rbp-0x1E]
-       shlx     eax, eax, esi
-       mov      byte  ptr [rbp-0x16], al
-       movzx    rax, byte  ptr [rbp-0x1D]
-       shlx     eax, eax, esi
-       mov      byte  ptr [rbp-0x15], al
-       movzx    rax, byte  ptr [rbp-0x1C]
-       shlx     eax, eax, esi
-       mov      byte  ptr [rbp-0x14], al
-       movzx    rax, byte  ptr [rbp-0x1B]
-       shlx     eax, eax, esi
-       mov      byte  ptr [rbp-0x13], al
-       movzx    rax, byte  ptr [rbp-0x1A]
-       shlx     eax, eax, esi
-       mov      byte  ptr [rbp-0x12], al
-       movzx    rax, byte  ptr [rbp-0x19]
-       shlx     eax, eax, esi
-       mov      byte  ptr [rbp-0x11], al
-       mov      rax, qword ptr [rbp-0x18]
-       mov      rcx, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x30], rcx
-       movzx    rcx, byte  ptr [rbp-0x30]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x28], cl
-       movzx    rcx, byte  ptr [rbp-0x2F]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x27], cl
-       movzx    rcx, byte  ptr [rbp-0x2E]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x26], cl
-       movzx    rcx, byte  ptr [rbp-0x2D]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x25], cl
-       movzx    rcx, byte  ptr [rbp-0x2C]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x24], cl
-       movzx    rcx, byte  ptr [rbp-0x2B]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x23], cl
-       movzx    rcx, byte  ptr [rbp-0x2A]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x22], cl
-       movzx    rcx, byte  ptr [rbp-0x29]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x21], cl
-       mov      rcx, qword ptr [rbp-0x28]
-       mov      qword ptr [rbp-0x40], rax
-       mov      qword ptr [rbp-0x38], rcx
-						;; size=236 bbWeight=1 PerfScore 49.50
-G_M41805_IG03:
-       vmovaps  xmm1, xmmword ptr [rbp-0x40]
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x50], xmm0
-       mov      rax, qword ptr [rbp-0x50]
-       mov      qword ptr [rbp-0x60], rax
-       movzx    rax, byte  ptr [rbp-0x60]
-       shlx     eax, eax, esi
+       shlx     edi, edi, esi
+       mov      byte  ptr [rbp-0x48], dil
+       movzx    rdi, byte  ptr [rbp-0x4F]
+       shlx     edi, edi, esi
+       mov      byte  ptr [rbp-0x47], dil
+       movzx    rdi, byte  ptr [rbp-0x4E]
+       shlx     edi, edi, esi
+       mov      byte  ptr [rbp-0x46], dil
+       movzx    rdi, byte  ptr [rbp-0x4D]
+       shlx     edi, edi, esi
+       mov      byte  ptr [rbp-0x45], dil
+       movzx    rdi, byte  ptr [rbp-0x4C]
+       shlx     edi, edi, esi
+       mov      byte  ptr [rbp-0x44], dil
+       movzx    rdi, byte  ptr [rbp-0x4B]
+       shlx     edi, edi, esi
+       mov      byte  ptr [rbp-0x43], dil
+       movzx    rdi, byte  ptr [rbp-0x4A]
+       shlx     edi, edi, esi
+       mov      byte  ptr [rbp-0x42], dil
+       movzx    rdi, byte  ptr [rbp-0x49]
+       shlx     edi, edi, esi
+       mov      byte  ptr [rbp-0x41], dil
+       mov      r14, qword ptr [rbp-0x48]
+       mov      rdi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x60], rdi
+       movzx    rdi, byte  ptr [rbp-0x60]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x58], al
-       movzx    rax, byte  ptr [rbp-0x5F]
-       shlx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5F]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x57], al
-       movzx    rax, byte  ptr [rbp-0x5E]
-       shlx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5E]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x56], al
-       movzx    rax, byte  ptr [rbp-0x5D]
-       shlx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5D]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+						;; size=236 bbWeight=1 PerfScore 48.75
+G_M41805_IG03:
        mov      byte  ptr [rbp-0x55], al
-       movzx    rax, byte  ptr [rbp-0x5C]
-       shlx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5C]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x54], al
-       movzx    rax, byte  ptr [rbp-0x5B]
-       shlx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5B]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x53], al
-       movzx    rax, byte  ptr [rbp-0x5A]
-       shlx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5A]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x52], al
-       movzx    rax, byte  ptr [rbp-0x59]
-       shlx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x59]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x51], al
-       mov      rax, qword ptr [rbp-0x58]
-       mov      rcx, qword ptr [rbp-0x48]
-       mov      qword ptr [rbp-0x70], rcx
-       movzx    rcx, byte  ptr [rbp-0x70]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x68], cl
-       movzx    rcx, byte  ptr [rbp-0x6F]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x67], cl
-       movzx    rcx, byte  ptr [rbp-0x6E]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x66], cl
-       movzx    rcx, byte  ptr [rbp-0x6D]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x65], cl
-       movzx    rcx, byte  ptr [rbp-0x6C]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x64], cl
-       movzx    rcx, byte  ptr [rbp-0x6B]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x63], cl
-       movzx    rcx, byte  ptr [rbp-0x6A]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x62], cl
-       movzx    rcx, byte  ptr [rbp-0x69]
-       shlx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x61], cl
-       mov      rcx, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0x80], rax
-       mov      qword ptr [rbp-0x78], rcx
-						;; size=240 bbWeight=1 PerfScore 54.00
+       mov      rdi, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x70], r14
+       mov      qword ptr [rbp-0x68], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x70]
+       vmovaps  xmmword ptr [rbp-0x30], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x10]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0x80], xmm1
+       mov      rdi, qword ptr [rbp-0x80]
+       mov      qword ptr [rbp-0x90], rdi
+       movzx    rdi, byte  ptr [rbp-0x90]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x88], al
+       movzx    rdi, byte  ptr [rbp-0x8F]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x87], al
+       movzx    rdi, byte  ptr [rbp-0x8E]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x86], al
+       movzx    rdi, byte  ptr [rbp-0x8D]
+       mov      esi, ebx
+						;; size=234 bbWeight=1 PerfScore 56.75
 G_M41805_IG04:
-       vinserti128 ymm0, ymm1, xmmword ptr [rbp-0x80], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       mov      rax, rdi
-						;; size=14 bbWeight=1 PerfScore 6.25
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x85], al
+       movzx    rdi, byte  ptr [rbp-0x8C]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x84], al
+       movzx    rdi, byte  ptr [rbp-0x8B]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x83], al
+       movzx    rdi, byte  ptr [rbp-0x8A]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x82], al
+       movzx    rdi, byte  ptr [rbp-0x89]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x81], al
+       mov      r14, qword ptr [rbp-0x88]
+       mov      rdi, qword ptr [rbp-0x78]
+       mov      qword ptr [rbp-0xA0], rdi
+       movzx    rdi, byte  ptr [rbp-0xA0]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x98], al
+       movzx    rdi, byte  ptr [rbp-0x9F]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x97], al
+       movzx    rdi, byte  ptr [rbp-0x9E]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x96], al
+       movzx    rdi, byte  ptr [rbp-0x9D]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+						;; size=252 bbWeight=1 PerfScore 47.25
 G_M41805_IG05:
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x95], al
+       movzx    rdi, byte  ptr [rbp-0x9C]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x94], al
+       movzx    rdi, byte  ptr [rbp-0x9B]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x93], al
+       movzx    rdi, byte  ptr [rbp-0x9A]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x92], al
+       movzx    rdi, byte  ptr [rbp-0x99]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftLeft(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x91], al
+       mov      rax, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xB0], r14
+       mov      qword ptr [rbp-0xA8], rax
+       vmovaps  xmm0, xmmword ptr [rbp-0x30]
+       vinserti128 ymm0, ymm0, xmmword ptr [rbp-0xB0], 1
+       vmovups  ymmword ptr [r15], ymm0
+       mov      rax, r15
+						;; size=164 bbWeight=1 PerfScore 38.25
+G_M41805_IG06:
        vzeroupper 
-       add      rsp, 128
+       add      rsp, 152
+       pop      rbx
+       pop      r14
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=17 bbWeight=1 PerfScore 4.25
 
-; Total bytes of code 523, prolog size 16, PerfScore 118.25, instruction count 129, allocated bytes for code 523 (MethodHash=5ba45cb2) for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.ShiftLeft(System.Runtime.Intrinsics.Vector256`1[ubyte],int):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts)
+; Total bytes of code 934, prolog size 21, PerfScore 204.50, instruction count 190, allocated bytes for code 934 (MethodHash=5ba45cb2) for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.ShiftLeft(System.Runtime.Intrinsics.Vector256`1[ubyte],int):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts)
411 (78.59 % of base) - System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector,T>.ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256`1[ubyte],int):System.Runtime.Intrinsics.Vector256`1[ubyte]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256`1[ubyte],int):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 16 single block inlinees; 13 inlinees without PGO data
+; 0 inlinees with PGO data; 16 single block inlinees; 10 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T01] (  4,  4   )   byref  ->  rdi         single-def
-;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V02 arg1         [V02,T02] (  3,  3   )     int  ->  rsi         single-def
+;  V00 RetBuf       [V00,T02] (  4,  4   )   byref  ->  r15         single-def
+;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V02 arg1         [V02,T00] ( 27, 27   )     int  ->  rbx         single-def
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  mm1         "impAppendStmt"
+;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  [rbp-0x30]  spill-single-def "impAppendStmt"
 ;* V05 tmp2         [V05    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V06 tmp3         [V06,T11] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V06 tmp3         [V06,T11] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V07 tmp4         [V07    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V08 tmp5         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V09 tmp6         [V09    ] (  9,  9   )  struct ( 8) [rbp-0x18]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V09 tmp6         [V09    ] (  9,  9   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V10 tmp7         [V10,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V11 tmp8         [V11    ] (  9, 18   )  struct ( 8) [rbp-0x20]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V11 tmp8         [V11    ] (  9, 18   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V12 tmp9         [V12    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V13 tmp10        [V13    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;* V14 tmp11        [V14    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V15 tmp12        [V15    ] (  9,  9   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V15 tmp12        [V15    ] (  9,  9   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V16 tmp13        [V16,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp14        [V17    ] (  9, 18   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V17 tmp14        [V17    ] (  9, 18   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V18 tmp15        [V18    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V19 tmp16        [V19    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V20 tmp17        [V20    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V21 tmp18        [V21,T14] (  3,  3   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V22 tmp19        [V22,T12] (  3,  6   )  simd16  ->  [rbp-0x50]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V23 tmp20        [V23    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V24 tmp21        [V24    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V25 tmp22        [V25    ] (  9,  9   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V26 tmp23        [V26,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V27 tmp24        [V27    ] (  9, 18   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V28 tmp25        [V28    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V29 tmp26        [V29    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V30 tmp27        [V30    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V31 tmp28        [V31    ] (  9,  9   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V32 tmp29        [V32,T10] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V33 tmp30        [V33    ] (  9, 18   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V34 tmp31        [V34    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V35 tmp32        [V35    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V36 tmp33        [V36    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V37 tmp34        [V37,T15] (  3,  3   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V38 tmp35        [V38    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V39 tmp36        [V39,T03] (  2,  2   )    long  ->  rax         "field V07._00 (fldOffset=0x0)" P-INDEP
-;  V40 tmp37        [V40,T04] (  2,  2   )    long  ->  rcx         "field V08._00 (fldOffset=0x0)" P-INDEP
-;  V41 tmp38        [V41    ] (  9,  9   )    long  ->  [rbp-0x18]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
-;  V42 tmp39        [V42    ] (  9, 17   )    long  ->  [rbp-0x20]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
-;  V43 tmp40        [V43    ] (  9,  9   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;  V44 tmp41        [V44    ] (  9, 17   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
-;  V45 tmp42        [V45,T05] (  2,  2   )    long  ->  rax         "field V23._00 (fldOffset=0x0)" P-INDEP
-;  V46 tmp43        [V46,T06] (  2,  2   )    long  ->  rcx         "field V24._00 (fldOffset=0x0)" P-INDEP
-;  V47 tmp44        [V47    ] (  9,  9   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V48 tmp45        [V48    ] (  9, 17   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
-;  V49 tmp46        [V49    ] (  9,  9   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
-;  V50 tmp47        [V50    ] (  9, 17   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
-;  V51 cse0         [V51,T00] ( 33, 33   )     int  ->  rsi         "CSE #01: aggressive"
+;  V19 tmp16        [V19,T14] (  3,  3   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V20 tmp17        [V20,T12] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V21 tmp18        [V21    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V22 tmp19        [V22    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V23 tmp20        [V23    ] (  9,  9   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V24 tmp21        [V24,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V25 tmp22        [V25    ] (  9, 18   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V26 tmp23        [V26    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V27 tmp24        [V27    ] (  9,  9   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V28 tmp25        [V28,T10] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V29 tmp26        [V29    ] (  9, 18   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V30 tmp27        [V30    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V31 tmp28        [V31,T15] (  3,  3   )  simd16  ->  [rbp-0xB0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V32 tmp29        [V32    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V33 tmp30        [V33,T03] (  2,  2   )    long  ->  r14         "field V07._00 (fldOffset=0x0)" P-INDEP
+;  V34 tmp31        [V34,T04] (  2,  2   )    long  ->  rdi         "field V08._00 (fldOffset=0x0)" P-INDEP
+;  V35 tmp32        [V35    ] (  9,  9   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
+;  V36 tmp33        [V36    ] (  9, 17   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
+;  V37 tmp34        [V37    ] (  9,  9   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
+;  V38 tmp35        [V38    ] (  9, 17   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
+;  V39 tmp36        [V39,T05] (  2,  2   )    long  ->  r14         "field V21._00 (fldOffset=0x0)" P-INDEP
+;  V40 tmp37        [V40,T06] (  2,  2   )    long  ->  rax         "field V22._00 (fldOffset=0x0)" P-INDEP
+;  V41 tmp38        [V41    ] (  9,  9   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+;  V42 tmp39        [V42    ] (  9, 17   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V43 tmp40        [V43    ] (  9,  9   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+;  V44 tmp41        [V44    ] (  9, 17   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
+;  V45 cse0         [V45,T01] (  9,  9   )     int  ->  rsi         "CSE #01: aggressive"
 ;
-; Lcl frame size = 128
+; Lcl frame size = 152
 
 G_M45350_IG01:
        push     rbp
-       sub      rsp, 128
-       lea      rbp, [rsp+0x80]
+       push     r15
+       push     r14
+       push     rbx
+       sub      rsp, 152
+       lea      rbp, [rsp+0xB0]
+       mov      r15, rdi
+       mov      ebx, esi
        vmovups  ymm0, ymmword ptr [rbp+0x10]
-						;; size=21 bbWeight=1 PerfScore 5.75
+						;; size=31 bbWeight=1 PerfScore 9.25
 G_M45350_IG02:
+       vmovups  ymmword ptr [rbp+0x10], ymm0
        vmovaps  ymm1, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm1
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x20], rax
-       movzx    rax, byte  ptr [rbp-0x20]
+       vmovaps  xmmword ptr [rbp-0x40], xmm1
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x50], rdi
+       movzx    rdi, byte  ptr [rbp-0x50]
+       mov      esi, ebx
        and      esi, 7
-       sarx     eax, eax, esi
-       mov      byte  ptr [rbp-0x18], al
-       movzx    rax, byte  ptr [rbp-0x1F]
-       sarx     eax, eax, esi
-       mov      byte  ptr [rbp-0x17], al
-       movzx    rax, byte  ptr [rbp-0x1E]
-       sarx     eax, eax, esi
-       mov      byte  ptr [rbp-0x16], al
-       movzx    rax, byte  ptr [rbp-0x1D]
-       sarx     eax, eax, esi
-       mov      byte  ptr [rbp-0x15], al
-       movzx    rax, byte  ptr [rbp-0x1C]
-       sarx     eax, eax, esi
-       mov      byte  ptr [rbp-0x14], al
-       movzx    rax, byte  ptr [rbp-0x1B]
-       sarx     eax, eax, esi
-       mov      byte  ptr [rbp-0x13], al
-       movzx    rax, byte  ptr [rbp-0x1A]
-       sarx     eax, eax, esi
-       mov      byte  ptr [rbp-0x12], al
-       movzx    rax, byte  ptr [rbp-0x19]
-       sarx     eax, eax, esi
-       mov      byte  ptr [rbp-0x11], al
-       mov      rax, qword ptr [rbp-0x18]
-       mov      rcx, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x30], rcx
-       movzx    rcx, byte  ptr [rbp-0x30]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x28], cl
-       movzx    rcx, byte  ptr [rbp-0x2F]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x27], cl
-       movzx    rcx, byte  ptr [rbp-0x2E]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x26], cl
-       movzx    rcx, byte  ptr [rbp-0x2D]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x25], cl
-       movzx    rcx, byte  ptr [rbp-0x2C]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x24], cl
-       movzx    rcx, byte  ptr [rbp-0x2B]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x23], cl
-       movzx    rcx, byte  ptr [rbp-0x2A]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x22], cl
-       movzx    rcx, byte  ptr [rbp-0x29]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x21], cl
-       mov      rcx, qword ptr [rbp-0x28]
-       mov      qword ptr [rbp-0x40], rax
-       mov      qword ptr [rbp-0x38], rcx
-						;; size=236 bbWeight=1 PerfScore 49.50
-G_M45350_IG03:
-       vmovaps  xmm1, xmmword ptr [rbp-0x40]
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x50], xmm0
-       mov      rax, qword ptr [rbp-0x50]
-       mov      qword ptr [rbp-0x60], rax
-       movzx    rax, byte  ptr [rbp-0x60]
-       sarx     eax, eax, esi
+       sarx     edi, edi, esi
+       mov      byte  ptr [rbp-0x48], dil
+       movzx    rdi, byte  ptr [rbp-0x4F]
+       sarx     edi, edi, esi
+       mov      byte  ptr [rbp-0x47], dil
+       movzx    rdi, byte  ptr [rbp-0x4E]
+       sarx     edi, edi, esi
+       mov      byte  ptr [rbp-0x46], dil
+       movzx    rdi, byte  ptr [rbp-0x4D]
+       sarx     edi, edi, esi
+       mov      byte  ptr [rbp-0x45], dil
+       movzx    rdi, byte  ptr [rbp-0x4C]
+       sarx     edi, edi, esi
+       mov      byte  ptr [rbp-0x44], dil
+       movzx    rdi, byte  ptr [rbp-0x4B]
+       sarx     edi, edi, esi
+       mov      byte  ptr [rbp-0x43], dil
+       movzx    rdi, byte  ptr [rbp-0x4A]
+       sarx     edi, edi, esi
+       mov      byte  ptr [rbp-0x42], dil
+       movzx    rdi, byte  ptr [rbp-0x49]
+       sarx     edi, edi, esi
+       mov      byte  ptr [rbp-0x41], dil
+       mov      r14, qword ptr [rbp-0x48]
+       mov      rdi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x60], rdi
+       movzx    rdi, byte  ptr [rbp-0x60]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x58], al
-       movzx    rax, byte  ptr [rbp-0x5F]
-       sarx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5F]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x57], al
-       movzx    rax, byte  ptr [rbp-0x5E]
-       sarx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5E]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x56], al
-       movzx    rax, byte  ptr [rbp-0x5D]
-       sarx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5D]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+						;; size=236 bbWeight=1 PerfScore 48.75
+G_M45350_IG03:
        mov      byte  ptr [rbp-0x55], al
-       movzx    rax, byte  ptr [rbp-0x5C]
-       sarx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5C]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x54], al
-       movzx    rax, byte  ptr [rbp-0x5B]
-       sarx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5B]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x53], al
-       movzx    rax, byte  ptr [rbp-0x5A]
-       sarx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x5A]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x52], al
-       movzx    rax, byte  ptr [rbp-0x59]
-       sarx     eax, eax, esi
+       movzx    rdi, byte  ptr [rbp-0x59]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
        mov      byte  ptr [rbp-0x51], al
-       mov      rax, qword ptr [rbp-0x58]
-       mov      rcx, qword ptr [rbp-0x48]
-       mov      qword ptr [rbp-0x70], rcx
-       movzx    rcx, byte  ptr [rbp-0x70]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x68], cl
-       movzx    rcx, byte  ptr [rbp-0x6F]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x67], cl
-       movzx    rcx, byte  ptr [rbp-0x6E]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x66], cl
-       movzx    rcx, byte  ptr [rbp-0x6D]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x65], cl
-       movzx    rcx, byte  ptr [rbp-0x6C]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x64], cl
-       movzx    rcx, byte  ptr [rbp-0x6B]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x63], cl
-       movzx    rcx, byte  ptr [rbp-0x6A]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x62], cl
-       movzx    rcx, byte  ptr [rbp-0x69]
-       sarx     ecx, ecx, esi
-       mov      byte  ptr [rbp-0x61], cl
-       mov      rcx, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0x80], rax
-       mov      qword ptr [rbp-0x78], rcx
-						;; size=240 bbWeight=1 PerfScore 54.00
+       mov      rdi, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x70], r14
+       mov      qword ptr [rbp-0x68], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x70]
+       vmovaps  xmmword ptr [rbp-0x30], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x10]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0x80], xmm1
+       mov      rdi, qword ptr [rbp-0x80]
+       mov      qword ptr [rbp-0x90], rdi
+       movzx    rdi, byte  ptr [rbp-0x90]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x88], al
+       movzx    rdi, byte  ptr [rbp-0x8F]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x87], al
+       movzx    rdi, byte  ptr [rbp-0x8E]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x86], al
+       movzx    rdi, byte  ptr [rbp-0x8D]
+       mov      esi, ebx
+						;; size=234 bbWeight=1 PerfScore 56.75
 G_M45350_IG04:
-       vinserti128 ymm0, ymm1, xmmword ptr [rbp-0x80], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       mov      rax, rdi
-						;; size=14 bbWeight=1 PerfScore 6.25
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x85], al
+       movzx    rdi, byte  ptr [rbp-0x8C]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x84], al
+       movzx    rdi, byte  ptr [rbp-0x8B]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x83], al
+       movzx    rdi, byte  ptr [rbp-0x8A]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x82], al
+       movzx    rdi, byte  ptr [rbp-0x89]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x81], al
+       mov      r14, qword ptr [rbp-0x88]
+       mov      rdi, qword ptr [rbp-0x78]
+       mov      qword ptr [rbp-0xA0], rdi
+       movzx    rdi, byte  ptr [rbp-0xA0]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x98], al
+       movzx    rdi, byte  ptr [rbp-0x9F]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x97], al
+       movzx    rdi, byte  ptr [rbp-0x9E]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x96], al
+       movzx    rdi, byte  ptr [rbp-0x9D]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+						;; size=252 bbWeight=1 PerfScore 47.25
 G_M45350_IG05:
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x95], al
+       movzx    rdi, byte  ptr [rbp-0x9C]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x94], al
+       movzx    rdi, byte  ptr [rbp-0x9B]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x93], al
+       movzx    rdi, byte  ptr [rbp-0x9A]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x92], al
+       movzx    rdi, byte  ptr [rbp-0x99]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:ShiftRightArithmetic(ubyte,int):ubyte
+       mov      byte  ptr [rbp-0x91], al
+       mov      rax, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xB0], r14
+       mov      qword ptr [rbp-0xA8], rax
+       vmovaps  xmm0, xmmword ptr [rbp-0x30]
+       vinserti128 ymm0, ymm0, xmmword ptr [rbp-0xB0], 1
+       vmovups  ymmword ptr [r15], ymm0
+       mov      rax, r15
+						;; size=164 bbWeight=1 PerfScore 38.25
+G_M45350_IG06:
        vzeroupper 
-       add      rsp, 128
+       add      rsp, 152
+       pop      rbx
+       pop      r14
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=17 bbWeight=1 PerfScore 4.25
 
-; Total bytes of code 523, prolog size 16, PerfScore 118.25, instruction count 129, allocated bytes for code 523 (MethodHash=9dbe4ed9) for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256`1[ubyte],int):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts)
+; Total bytes of code 934, prolog size 21, PerfScore 204.50, instruction count 190, allocated bytes for code 934 (MethodHash=9dbe4ed9) for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256`1[ubyte],int):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts)
406 (82.35 % of base) - System.Runtime.Intrinsics.Vector512:Dot[long](System.Runtime.Intrinsics.Vector512`1[long],System.Runtime.Intrinsics.Vector512`1[long]):long
 ; Assembly listing for method System.Runtime.Intrinsics.Vector512:Dot[long](System.Runtime.Intrinsics.Vector512`1[long],System.Runtime.Intrinsics.Vector512`1[long]):long (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
-; rsp based frame
+; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 22 single block inlinees; 23 inlinees without PGO data
+; 0 inlinees with PGO data; 43 single block inlinees; 24 inlinees without PGO data
 ; Final local variable assignments
 ;
 ;* V00 arg0         [V00    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[long]>
 ;* V01 arg1         [V01    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[long]>
-;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V03 tmp1         [V03    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V04 tmp2         [V04    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
-;* V05 tmp3         [V05    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V06 tmp4         [V06,T00] (  2,  4   )    long  ->  rax         "impAppendStmt"
-;  V07 tmp5         [V07,T08] (  3,  6   )  simd16  ->  [rsp+0xF0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V08 tmp6         [V08,T09] (  3,  6   )  simd16  ->  [rsp+0xE0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V09 tmp7         [V09,T01] (  2,  4   )    long  ->  rax         "impAppendStmt"
-;* V10 tmp8         [V10    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V11 tmp9         [V11    ] (  2,  4   )  struct ( 8) [rsp+0xD8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V12 tmp10        [V12    ] (  2,  4   )  struct ( 8) [rsp+0xD0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V13 tmp11        [V13    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V14 tmp12        [V14    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V15 tmp13        [V15    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V16 tmp14        [V16    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V17 tmp15        [V17    ] (  2,  4   )  struct ( 8) [rsp+0xC8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V18 tmp16        [V18    ] (  2,  4   )  struct ( 8) [rsp+0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V19 tmp17        [V19    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V20 tmp18        [V20    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V21 tmp19        [V21    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V22 tmp20        [V22    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V23 tmp21        [V23    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V24 tmp22        [V24,T10] (  3,  6   )  simd16  ->  [rsp+0xB0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V25 tmp23        [V25,T11] (  3,  6   )  simd16  ->  [rsp+0xA0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V26 tmp24        [V26,T02] (  2,  4   )    long  ->  rcx         "impAppendStmt"
-;* V27 tmp25        [V27    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V28 tmp26        [V28    ] (  2,  4   )  struct ( 8) [rsp+0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V29 tmp27        [V29    ] (  2,  4   )  struct ( 8) [rsp+0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V30 tmp28        [V30    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V31 tmp29        [V31    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V32 tmp30        [V32    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V33 tmp31        [V33    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V34 tmp32        [V34    ] (  2,  4   )  struct ( 8) [rsp+0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V35 tmp33        [V35    ] (  2,  4   )  struct ( 8) [rsp+0x80]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V36 tmp34        [V36    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V37 tmp35        [V37    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V38 tmp36        [V38    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V39 tmp37        [V39    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V40 tmp38        [V40    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V41 tmp39        [V41,T06] (  2,  2   )    long  ->  rax         "Inline return value spill temp"
-;* V42 tmp40        [V42    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
-;* V43 tmp41        [V43    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V44 tmp42        [V44,T03] (  2,  4   )    long  ->  rcx         "impAppendStmt"
-;  V45 tmp43        [V45,T12] (  3,  6   )  simd16  ->  [rsp+0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V46 tmp44        [V46,T13] (  3,  6   )  simd16  ->  [rsp+0x60]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V47 tmp45        [V47,T04] (  2,  4   )    long  ->  rcx         "impAppendStmt"
-;* V48 tmp46        [V48    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V49 tmp47        [V49    ] (  2,  4   )  struct ( 8) [rsp+0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V50 tmp48        [V50    ] (  2,  4   )  struct ( 8) [rsp+0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V51 tmp49        [V51    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V52 tmp50        [V52    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V53 tmp51        [V53    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V54 tmp52        [V54    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V55 tmp53        [V55    ] (  2,  4   )  struct ( 8) [rsp+0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V56 tmp54        [V56    ] (  2,  4   )  struct ( 8) [rsp+0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V57 tmp55        [V57    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;  V02 OutArgs      [V02    ] (  1,  1   )  struct (16) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V03 tmp1         [V03,T00] (  2,  4   )    long  ->  rbx         "impAppendStmt"
+;  V04 tmp2         [V04,T50] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V05 tmp3         [V05,T51] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V06 tmp4         [V06,T01] (  2,  4   )    long  ->  rbx         "impAppendStmt"
+;  V07 tmp5         [V07,T52] (  3,  6   )  simd16  ->  [rbp-0x30]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V08 tmp6         [V08,T53] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V09 tmp7         [V09,T02] (  2,  4   )    long  ->  rbx         "impAppendStmt"
+;* V10 tmp8         [V10    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V11 tmp9         [V11    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V12 tmp10        [V12    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V13 tmp11        [V13    ] (  2,  2   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V14 tmp12        [V14,T35] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V15 tmp13        [V15    ] (  2,  4   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V16 tmp14        [V16    ] (  2,  4   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V17 tmp15        [V17    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V18 tmp16        [V18    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;* V19 tmp17        [V19    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V20 tmp18        [V20    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V21 tmp19        [V21,T09] (  2,  2   )    long  ->  rbx         ld-addr-op "Inline ldloca(s) first use temp"
+;* V22 tmp20        [V22,T36] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V23 tmp21        [V23    ] (  2,  4   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V24 tmp22        [V24    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V25 tmp23        [V25    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V26 tmp24        [V26    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;* V27 tmp25        [V27    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V28 tmp26        [V28    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V29 tmp27        [V29    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V30 tmp28        [V30    ] (  2,  2   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V31 tmp29        [V31,T37] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V32 tmp30        [V32    ] (  2,  4   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V33 tmp31        [V33    ] (  2,  4   )  struct ( 8) [rbp-0x78]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V34 tmp32        [V34    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V35 tmp33        [V35    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;* V36 tmp34        [V36    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V37 tmp35        [V37    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V38 tmp36        [V38,T10] (  2,  2   )    long  ->  r15         ld-addr-op "Inline ldloca(s) first use temp"
+;* V39 tmp37        [V39,T38] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V40 tmp38        [V40    ] (  2,  4   )  struct ( 8) [rbp-0x80]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V41 tmp39        [V41    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V42 tmp40        [V42    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V43 tmp41        [V43    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;* V44 tmp42        [V44    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V45 tmp43        [V45    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V46 tmp44        [V46,T54] (  3,  6   )  simd16  ->  [rbp-0x90]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V47 tmp45        [V47,T55] (  3,  6   )  simd16  ->  [rbp-0xA0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V48 tmp46        [V48,T03] (  2,  4   )    long  ->  r15         "impAppendStmt"
+;* V49 tmp47        [V49    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V50 tmp48        [V50    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V51 tmp49        [V51    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V52 tmp50        [V52    ] (  2,  2   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V53 tmp51        [V53,T39] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V54 tmp52        [V54    ] (  2,  4   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V55 tmp53        [V55    ] (  2,  4   )  struct ( 8) [rbp-0xB8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V56 tmp54        [V56    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V57 tmp55        [V57    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
 ;* V58 tmp56        [V58    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
 ;* V59 tmp57        [V59    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V60 tmp58        [V60    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V61 tmp59        [V61    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V62 tmp60        [V62,T14] (  3,  6   )  simd16  ->  [rsp+0x30]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V63 tmp61        [V63,T15] (  3,  6   )  simd16  ->  [rsp+0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V64 tmp62        [V64,T05] (  2,  4   )    long  ->  rdx         "impAppendStmt"
-;* V65 tmp63        [V65    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V66 tmp64        [V66    ] (  2,  4   )  struct ( 8) [rsp+0x18]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V67 tmp65        [V67    ] (  2,  4   )  struct ( 8) [rsp+0x10]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V68 tmp66        [V68    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V69 tmp67        [V69    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V70 tmp68        [V70    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V71 tmp69        [V71    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V72 tmp70        [V72    ] (  2,  4   )  struct ( 8) [rsp+0x08]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V73 tmp71        [V73    ] (  2,  4   )  struct ( 8) [rsp+0x00]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V74 tmp72        [V74    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;  V60 tmp58        [V60,T11] (  2,  2   )    long  ->  r15         ld-addr-op "Inline ldloca(s) first use temp"
+;* V61 tmp59        [V61,T40] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V62 tmp60        [V62    ] (  2,  4   )  struct ( 8) [rbp-0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V63 tmp61        [V63    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V64 tmp62        [V64    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V65 tmp63        [V65    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;* V66 tmp64        [V66    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V67 tmp65        [V67    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V68 tmp66        [V68    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V69 tmp67        [V69    ] (  2,  2   )  struct ( 8) [rbp-0xC8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V70 tmp68        [V70,T41] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V71 tmp69        [V71    ] (  2,  4   )  struct ( 8) [rbp-0xD0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V72 tmp70        [V72    ] (  2,  4   )  struct ( 8) [rbp-0xD8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V73 tmp71        [V73    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V74 tmp72        [V74    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
 ;* V75 tmp73        [V75    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
 ;* V76 tmp74        [V76    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V77 tmp75        [V77    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V78 tmp76        [V78    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V79 tmp77        [V79,T07] (  2,  2   )    long  ->  rcx         "Inline return value spill temp"
-;* V80 tmp78        [V80    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;  V81 tmp79        [V81,T16] (  2,  2   )  simd32  ->  mm0         single-def "field V00._lower (fldOffset=0x0)" P-INDEP
-;  V82 tmp80        [V82,T17] (  2,  2   )  simd32  ->  mm1         single-def "field V00._upper (fldOffset=0x20)" P-INDEP
-;  V83 tmp81        [V83,T18] (  2,  2   )  simd32  ->  mm2         single-def "field V01._lower (fldOffset=0x0)" P-INDEP
-;  V84 tmp82        [V84,T19] (  2,  2   )  simd32  ->  mm3         single-def "field V01._upper (fldOffset=0x20)" P-INDEP
-;  V85 tmp83        [V85    ] (  2,  3   )    long  ->  [rsp+0xD8]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
-;  V86 tmp84        [V86    ] (  2,  3   )    long  ->  [rsp+0xD0]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
-;  V87 tmp85        [V87    ] (  2,  3   )    long  ->  [rsp+0xC8]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
-;  V88 tmp86        [V88    ] (  2,  3   )    long  ->  [rsp+0xC0]  do-not-enreg[X] addr-exposed "field V18._00 (fldOffset=0x0)" P-DEP
-;  V89 tmp87        [V89    ] (  2,  3   )    long  ->  [rsp+0x98]  do-not-enreg[X] addr-exposed "field V28._00 (fldOffset=0x0)" P-DEP
-;  V90 tmp88        [V90    ] (  2,  3   )    long  ->  [rsp+0x90]  do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
-;  V91 tmp89        [V91    ] (  2,  3   )    long  ->  [rsp+0x88]  do-not-enreg[X] addr-exposed "field V34._00 (fldOffset=0x0)" P-DEP
-;  V92 tmp90        [V92    ] (  2,  3   )    long  ->  [rsp+0x80]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
-;  V93 tmp91        [V93    ] (  2,  3   )    long  ->  [rsp+0x58]  do-not-enreg[X] addr-exposed "field V49._00 (fldOffset=0x0)" P-DEP
-;  V94 tmp92        [V94    ] (  2,  3   )    long  ->  [rsp+0x50]  do-not-enreg[X] addr-exposed "field V50._00 (fldOffset=0x0)" P-DEP
-;  V95 tmp93        [V95    ] (  2,  3   )    long  ->  [rsp+0x48]  do-not-enreg[X] addr-exposed "field V55._00 (fldOffset=0x0)" P-DEP
-;  V96 tmp94        [V96    ] (  2,  3   )    long  ->  [rsp+0x40]  do-not-enreg[X] addr-exposed "field V56._00 (fldOffset=0x0)" P-DEP
-;  V97 tmp95        [V97    ] (  2,  3   )    long  ->  [rsp+0x18]  do-not-enreg[X] addr-exposed "field V66._00 (fldOffset=0x0)" P-DEP
-;  V98 tmp96        [V98    ] (  2,  3   )    long  ->  [rsp+0x10]  do-not-enreg[X] addr-exposed "field V67._00 (fldOffset=0x0)" P-DEP
-;  V99 tmp97        [V99    ] (  2,  3   )    long  ->  [rsp+0x08]  do-not-enreg[X] addr-exposed "field V72._00 (fldOffset=0x0)" P-DEP
-;  V100 tmp98       [V100    ] (  2,  3   )    long  ->  [rsp+0x00]  do-not-enreg[X] addr-exposed "field V73._00 (fldOffset=0x0)" P-DEP
+;  V77 tmp75        [V77,T12] (  2,  2   )    long  ->  rsi         ld-addr-op "Inline ldloca(s) first use temp"
+;* V78 tmp76        [V78,T42] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V79 tmp77        [V79    ] (  2,  4   )  struct ( 8) [rbp-0xE0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V80 tmp78        [V80    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;  V81 tmp79        [V81,T56] (  3,  6   )  simd32  ->  [rbp-0x110]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V82 tmp80        [V82,T57] (  3,  6   )  simd32  ->  [rbp-0x130]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V83 tmp81        [V83,T04] (  2,  4   )    long  ->  r15         "impAppendStmt"
+;  V84 tmp82        [V84,T58] (  3,  6   )  simd16  ->  [rbp-0x140]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V85 tmp83        [V85,T59] (  3,  6   )  simd16  ->  [rbp-0x150]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V86 tmp84        [V86,T05] (  2,  4   )    long  ->  r15         "impAppendStmt"
+;* V87 tmp85        [V87    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V88 tmp86        [V88    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V89 tmp87        [V89    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V90 tmp88        [V90    ] (  2,  2   )  struct ( 8) [rbp-0x158]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V91 tmp89        [V91,T43] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V92 tmp90        [V92    ] (  2,  4   )  struct ( 8) [rbp-0x160]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V93 tmp91        [V93    ] (  2,  4   )  struct ( 8) [rbp-0x168]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V94 tmp92        [V94    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V95 tmp93        [V95    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V96 tmp94        [V96,T13] (  2,  2   )    long  ->  r15         ld-addr-op "Inline ldloca(s) first use temp"
+;* V97 tmp95        [V97,T44] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V98 tmp96        [V98    ] (  2,  4   )  struct ( 8) [rbp-0x170]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V99 tmp97        [V99    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V100 tmp98       [V100    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V101 tmp99       [V101    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V102 tmp100      [V102    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V103 tmp101      [V103    ] (  2,  2   )  struct ( 8) [rbp-0x178]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V104 tmp102      [V104,T45] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V105 tmp103      [V105    ] (  2,  4   )  struct ( 8) [rbp-0x180]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V106 tmp104      [V106    ] (  2,  4   )  struct ( 8) [rbp-0x188]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V107 tmp105      [V107    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V108 tmp106      [V108    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V109 tmp107      [V109,T14] (  2,  2   )    long  ->  rsi         ld-addr-op "Inline ldloca(s) first use temp"
+;* V110 tmp108      [V110,T46] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V111 tmp109      [V111    ] (  2,  4   )  struct ( 8) [rbp-0x190]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V112 tmp110      [V112    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;  V113 tmp111      [V113,T60] (  3,  6   )  simd16  ->  [rbp-0x1A0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V114 tmp112      [V114,T61] (  3,  6   )  simd16  ->  [rbp-0x1B0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V115 tmp113      [V115,T06] (  2,  4   )    long  ->  r14         "impAppendStmt"
+;* V116 tmp114      [V116    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V117 tmp115      [V117    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V118 tmp116      [V118    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V119 tmp117      [V119    ] (  2,  2   )  struct ( 8) [rbp-0x1B8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V120 tmp118      [V120,T47] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V121 tmp119      [V121    ] (  2,  4   )  struct ( 8) [rbp-0x1C0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V122 tmp120      [V122    ] (  2,  4   )  struct ( 8) [rbp-0x1C8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V123 tmp121      [V123    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V124 tmp122      [V124    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V125 tmp123      [V125,T15] (  2,  2   )    long  ->  r14         ld-addr-op "Inline ldloca(s) first use temp"
+;* V126 tmp124      [V126,T48] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V127 tmp125      [V127    ] (  2,  4   )  struct ( 8) [rbp-0x1D0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V128 tmp126      [V128    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V129 tmp127      [V129    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V130 tmp128      [V130    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V131 tmp129      [V131    ] (  2,  4   )  struct ( 8) [rbp-0x1D8]  do-not-enreg[HS] hidden-struct-arg "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V132 tmp130      [V132,T16] (  2,  2   )    long  ->  rsi         ld-addr-op "Inline ldloca(s) first use temp"
+;* V133 tmp131      [V133,T49] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V134 tmp132      [V134    ] (  2,  4   )  struct ( 8) [rbp-0x1E0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V135 tmp133      [V135    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V136 tmp134      [V136    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V137 tmp135      [V137    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V138 tmp136      [V138,T62] (  1,  1   )  simd32  ->  [rbp+0x10]  single-def "field V00._lower (fldOffset=0x0)" P-INDEP
+;  V139 tmp137      [V139,T63] (  1,  1   )  simd32  ->  [rbp+0x30]  single-def "field V00._upper (fldOffset=0x20)" P-INDEP
+;  V140 tmp138      [V140,T64] (  1,  1   )  simd32  ->  [rbp+0x50]  single-def "field V01._lower (fldOffset=0x0)" P-INDEP
+;  V141 tmp139      [V141,T65] (  1,  1   )  simd32  ->  [rbp+0x70]  single-def "field V01._upper (fldOffset=0x20)" P-INDEP
+;  V142 tmp140      [V142,T18] (  2,  2   )    long  ->  rsi         "field V10._00 (fldOffset=0x0)" P-INDEP
+;  V143 tmp141      [V143,T19] (  2,  2   )    long  ->  rdi         "field V11._00 (fldOffset=0x0)" P-INDEP
+;* V144 tmp142      [V144    ] (  0,  0   )    long  ->  zero-ref    "field V12._00 (fldOffset=0x0)" P-INDEP
+;  V145 tmp143      [V145    ] (  2,  2   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V146 tmp144      [V146    ] (  2,  3   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
+;  V147 tmp145      [V147    ] (  2,  3   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
+;  V148 tmp146      [V148    ] (  2,  3   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+;  V149 tmp147      [V149,T20] (  2,  2   )    long  ->  rsi         "field V27._00 (fldOffset=0x0)" P-INDEP
+;  V150 tmp148      [V150,T21] (  2,  2   )    long  ->  rdi         "field V28._00 (fldOffset=0x0)" P-INDEP
+;* V151 tmp149      [V151    ] (  0,  0   )    long  ->  zero-ref    "field V29._00 (fldOffset=0x0)" P-INDEP
+;  V152 tmp150      [V152    ] (  2,  2   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V30._00 (fldOffset=0x0)" P-DEP
+;  V153 tmp151      [V153    ] (  2,  3   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V32._00 (fldOffset=0x0)" P-DEP
+;  V154 tmp152      [V154    ] (  2,  3   )    long  ->  [rbp-0x78]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
+;  V155 tmp153      [V155    ] (  2,  3   )    long  ->  [rbp-0x80]  do-not-enreg[X] addr-exposed "field V40._00 (fldOffset=0x0)" P-DEP
+;  V156 tmp154      [V156,T22] (  2,  2   )    long  ->  rsi         "field V49._00 (fldOffset=0x0)" P-INDEP
+;  V157 tmp155      [V157,T23] (  2,  2   )    long  ->  rdi         "field V50._00 (fldOffset=0x0)" P-INDEP
+;* V158 tmp156      [V158    ] (  0,  0   )    long  ->  zero-ref    "field V51._00 (fldOffset=0x0)" P-INDEP
+;  V159 tmp157      [V159    ] (  2,  2   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V52._00 (fldOffset=0x0)" P-DEP
+;  V160 tmp158      [V160    ] (  2,  3   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V54._00 (fldOffset=0x0)" P-DEP
+;  V161 tmp159      [V161    ] (  2,  3   )    long  ->  [rbp-0xB8]  do-not-enreg[X] addr-exposed "field V55._00 (fldOffset=0x0)" P-DEP
+;  V162 tmp160      [V162    ] (  2,  3   )    long  ->  [rbp-0xC0]  do-not-enreg[X] addr-exposed "field V62._00 (fldOffset=0x0)" P-DEP
+;  V163 tmp161      [V163,T24] (  2,  2   )    long  ->  rsi         "field V66._00 (fldOffset=0x0)" P-INDEP
+;  V164 tmp162      [V164,T25] (  2,  2   )    long  ->  rdi         "field V67._00 (fldOffset=0x0)" P-INDEP
+;* V165 tmp163      [V165    ] (  0,  0   )    long  ->  zero-ref    "field V68._00 (fldOffset=0x0)" P-INDEP
+;  V166 tmp164      [V166    ] (  2,  2   )    long  ->  [rbp-0xC8]  do-not-enreg[X] addr-exposed "field V69._00 (fldOffset=0x0)" P-DEP
+;  V167 tmp165      [V167    ] (  2,  3   )    long  ->  [rbp-0xD0]  do-not-enreg[X] addr-exposed "field V71._00 (fldOffset=0x0)" P-DEP
+;  V168 tmp166      [V168    ] (  2,  3   )    long  ->  [rbp-0xD8]  do-not-enreg[X] addr-exposed "field V72._00 (fldOffset=0x0)" P-DEP
+;  V169 tmp167      [V169    ] (  2,  3   )    long  ->  [rbp-0xE0]  do-not-enreg[X] addr-exposed "field V79._00 (fldOffset=0x0)" P-DEP
+;  V170 tmp168      [V170,T26] (  2,  2   )    long  ->  rdi         "field V87._00 (fldOffset=0x0)" P-INDEP
+;  V171 tmp169      [V171,T27] (  2,  2   )    long  ->  rsi         "field V88._00 (fldOffset=0x0)" P-INDEP
+;* V172 tmp170      [V172    ] (  0,  0   )    long  ->  zero-ref    "field V89._00 (fldOffset=0x0)" P-INDEP
+;  V173 tmp171      [V173    ] (  2,  2   )    long  ->  [rbp-0x158]  do-not-enreg[X] addr-exposed "field V90._00 (fldOffset=0x0)" P-DEP
+;  V174 tmp172      [V174    ] (  2,  3   )    long  ->  [rbp-0x160]  do-not-enreg[X] addr-exposed "field V92._00 (fldOffset=0x0)" P-DEP
+;  V175 tmp173      [V175    ] (  2,  3   )    long  ->  [rbp-0x168]  do-not-enreg[X] addr-exposed "field V93._00 (fldOffset=0x0)" P-DEP
+;  V176 tmp174      [V176    ] (  2,  3   )    long  ->  [rbp-0x170]  do-not-enreg[X] addr-exposed "field V98._00 (fldOffset=0x0)" P-DEP
+;  V177 tmp175      [V177,T28] (  2,  2   )    long  ->  rdi         "field V100._00 (fldOffset=0x0)" P-INDEP
+;  V178 tmp176      [V178,T29] (  2,  2   )    long  ->  rsi         "field V101._00 (fldOffset=0x0)" P-INDEP
+;* V179 tmp177      [V179    ] (  0,  0   )    long  ->  zero-ref    "field V102._00 (fldOffset=0x0)" P-INDEP
+;  V180 tmp178      [V180    ] (  2,  2   )    long  ->  [rbp-0x178]  do-not-enreg[X] addr-exposed "field V103._00 (fldOffset=0x0)" P-DEP
+;  V181 tmp179      [V181    ] (  2,  3   )    long  ->  [rbp-0x180]  do-not-enreg[X] addr-exposed "field V105._00 (fldOffset=0x0)" P-DEP
+;  V182 tmp180      [V182    ] (  2,  3   )    long  ->  [rbp-0x188]  do-not-enreg[X] addr-exposed "field V106._00 (fldOffset=0x0)" P-DEP
+;  V183 tmp181      [V183    ] (  2,  3   )    long  ->  [rbp-0x190]  do-not-enreg[X] addr-exposed "field V111._00 (fldOffset=0x0)" P-DEP
+;  V184 tmp182      [V184,T30] (  2,  2   )    long  ->  rdi         "field V116._00 (fldOffset=0x0)" P-INDEP
+;  V185 tmp183      [V185,T31] (  2,  2   )    long  ->  rsi         "field V117._00 (fldOffset=0x0)" P-INDEP
+;* V186 tmp184      [V186    ] (  0,  0   )    long  ->  zero-ref    "field V118._00 (fldOffset=0x0)" P-INDEP
+;  V187 tmp185      [V187    ] (  2,  2   )    long  ->  [rbp-0x1B8]  do-not-enreg[X] addr-exposed "field V119._00 (fldOffset=0x0)" P-DEP
+;  V188 tmp186      [V188    ] (  2,  3   )    long  ->  [rbp-0x1C0]  do-not-enreg[X] addr-exposed "field V121._00 (fldOffset=0x0)" P-DEP
+;  V189 tmp187      [V189    ] (  2,  3   )    long  ->  [rbp-0x1C8]  do-not-enreg[X] addr-exposed "field V122._00 (fldOffset=0x0)" P-DEP
+;  V190 tmp188      [V190    ] (  2,  3   )    long  ->  [rbp-0x1D0]  do-not-enreg[X] addr-exposed "field V127._00 (fldOffset=0x0)" P-DEP
+;* V191 tmp189      [V191    ] (  0,  0   )    long  ->  zero-ref    "field V129._00 (fldOffset=0x0)" P-INDEP
+;* V192 tmp190      [V192    ] (  0,  0   )    long  ->  zero-ref    "field V130._00 (fldOffset=0x0)" P-INDEP
+;  V193 tmp191      [V193,T17] (  2,  3   )    long  ->  [rbp-0x1D8]  do-not-enreg[H] hidden-struct-arg "field V131._00 (fldOffset=0x0)" P-DEP
+;  V194 tmp192      [V194    ] (  2,  3   )    long  ->  [rbp-0x1E0]  do-not-enreg[X] addr-exposed "field V134._00 (fldOffset=0x0)" P-DEP
+;  V195 tmp193      [V195,T07] (  2,  4   )    long  ->  rsi         "argument with side effect"
+;  V196 tmp194      [V196,T08] (  2,  4   )    long  ->  rsi         "argument with side effect"
+;  V197 cse0        [V197,T32] (  2,  2   )    long  ->  rbx         "CSE #01: conservative"
+;  V198 cse1        [V198,T33] (  2,  2   )    long  ->  r15         "CSE #02: conservative"
+;  V199 cse2        [V199,T34] (  2,  2   )    long  ->  r15         "CSE #03: conservative"
 ;
-; Lcl frame size = 264
+; Lcl frame size = 472
 
 G_M64954_IG01:
-       sub      rsp, 264
-       vmovups  ymm0, ymmword ptr [rsp+0x110]
-       vmovups  ymm1, ymmword ptr [rsp+0x130]
-       vmovups  ymm2, ymmword ptr [rsp+0x150]
-       vmovups  ymm3, ymmword ptr [rsp+0x170]
-						;; size=43 bbWeight=1 PerfScore 16.25
+       push     rbp
+       push     r15
+       push     r14
+       push     rbx
+       sub      rsp, 472
+       lea      rbp, [rsp+0x1F0]
+						;; size=21 bbWeight=1 PerfScore 4.75
 G_M64954_IG02:
-       vmovaps  ymm4, ymm0
-       vmovaps  xmmword ptr [rsp+0xF0], xmm4
-       vmovaps  ymm4, ymm2
-       vmovaps  xmmword ptr [rsp+0xE0], xmm4
-       mov      rax, qword ptr [rsp+0xF0]
-       mov      qword ptr [rsp+0xD8], rax
-       mov      rax, qword ptr [rsp+0xE0]
-       mov      qword ptr [rsp+0xD0], rax
-       mov      rax, qword ptr [rsp+0xD8]
-       imul     rax, qword ptr [rsp+0xD0]
-       mov      rcx, qword ptr [rsp+0xF8]
-       mov      qword ptr [rsp+0xC8], rcx
-       mov      rcx, qword ptr [rsp+0xE8]
-       mov      qword ptr [rsp+0xC0], rcx
-       mov      rcx, qword ptr [rsp+0xC8]
-       imul     rcx, qword ptr [rsp+0xC0]
-       add      rax, rcx
+       vmovups  ymm0, ymmword ptr [rbp+0x10]
+       vmovups  ymm1, ymmword ptr [rbp+0x50]
+       vmovaps  ymm2, ymm0
+       vmovaps  xmmword ptr [rbp-0x30], xmm2
+       vmovaps  ymm2, ymm1
+       vmovaps  xmmword ptr [rbp-0x40], xmm2
+       mov      rsi, qword ptr [rbp-0x30]
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x50], rsi
+       mov      qword ptr [rbp-0x58], rdi
+       mov      rsi, qword ptr [rbp-0x50]
+       imul     rsi, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x48], rsi
+       mov      rsi, qword ptr [rbp-0x48]
+       mov      qword ptr [rbp-0x60], rsi
+       mov      rbx, qword ptr [rbp-0x60]
+       mov      rsi, qword ptr [rbp-0x28]
+       mov      rdi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x70], rsi
+       mov      qword ptr [rbp-0x78], rdi
+       mov      rsi, qword ptr [rbp-0x70]
+       imul     rsi, qword ptr [rbp-0x78]
+       mov      qword ptr [rbp-0x68], rsi
+       mov      rsi, qword ptr [rbp-0x68]
+       mov      qword ptr [rbp-0x80], rsi
+       mov      r15, qword ptr [rbp-0x80]
+       add      rbx, r15
        vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rsp+0xB0], xmm0
-       vextractf128 xmm0, ymm2, 1
-       vmovaps  xmmword ptr [rsp+0xA0], xmm0
-       mov      rcx, qword ptr [rsp+0xB0]
-       mov      qword ptr [rsp+0x98], rcx
-       mov      rcx, qword ptr [rsp+0xA0]
-       mov      qword ptr [rsp+0x90], rcx
-       mov      rcx, qword ptr [rsp+0x98]
-       imul     rcx, qword ptr [rsp+0x90]
-       mov      rdx, qword ptr [rsp+0xB8]
-       mov      qword ptr [rsp+0x88], rdx
-       mov      rdx, qword ptr [rsp+0xA8]
-       mov      qword ptr [rsp+0x80], rdx
-       add      rax, rcx
-       mov      rcx, qword ptr [rsp+0x88]
-       imul     rcx, qword ptr [rsp+0x80]
-       add      rax, rcx
-       vmovaps  ymm0, ymm1
-       vmovaps  xmmword ptr [rsp+0x70], xmm0
-       vmovaps  ymm0, ymm3
-       vmovaps  xmmword ptr [rsp+0x60], xmm0
-       mov      rcx, qword ptr [rsp+0x70]
-       mov      qword ptr [rsp+0x58], rcx
-       mov      rcx, qword ptr [rsp+0x60]
-       mov      qword ptr [rsp+0x50], rcx
-       mov      rcx, qword ptr [rsp+0x58]
-       imul     rcx, qword ptr [rsp+0x50]
-       mov      rdx, qword ptr [rsp+0x78]
-       mov      qword ptr [rsp+0x48], rdx
-       mov      rdx, qword ptr [rsp+0x68]
-       mov      qword ptr [rsp+0x40], rdx
-       mov      rdx, qword ptr [rsp+0x48]
-       imul     rdx, qword ptr [rsp+0x40]
-       add      rcx, rdx
+       vmovaps  xmmword ptr [rbp-0x90], xmm0
        vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rsp+0x30], xmm0
-       vextractf128 xmm0, ymm3, 1
-       vmovaps  xmmword ptr [rsp+0x20], xmm0
-       mov      rdx, qword ptr [rsp+0x30]
-       mov      qword ptr [rsp+0x18], rdx
-       mov      rdx, qword ptr [rsp+0x20]
-       mov      qword ptr [rsp+0x10], rdx
-       mov      rdx, qword ptr [rsp+0x18]
-       imul     rdx, qword ptr [rsp+0x10]
-       mov      rdi, qword ptr [rsp+0x38]
-						;; size=406 bbWeight=1 PerfScore 82.00
+       vmovaps  xmmword ptr [rbp-0xA0], xmm0
+       mov      rsi, qword ptr [rbp-0x90]
+       mov      rdi, qword ptr [rbp-0xA0]
+       mov      qword ptr [rbp-0xB0], rsi
+       mov      qword ptr [rbp-0xB8], rdi
+       mov      rsi, qword ptr [rbp-0xB0]
+       imul     rsi, qword ptr [rbp-0xB8]
+       mov      qword ptr [rbp-0xA8], rsi
+       mov      rsi, qword ptr [rbp-0xA8]
+       mov      qword ptr [rbp-0xC0], rsi
+       mov      r15, qword ptr [rbp-0xC0]
+       mov      rsi, qword ptr [rbp-0x88]
+       mov      rdi, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xD0], rsi
+       mov      qword ptr [rbp-0xD8], rdi
+       mov      rsi, qword ptr [rbp-0xD0]
+       imul     rsi, qword ptr [rbp-0xD8]
+       mov      qword ptr [rbp-0xC8], rsi
+       mov      rsi, qword ptr [rbp-0xC8]
+       mov      qword ptr [rbp-0xE0], rsi
+       mov      rsi, qword ptr [rbp-0xE0]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rsi, rax
+       mov      rdi, r15
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rsi, rax
+       mov      rdi, rbx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+						;; size=331 bbWeight=1 PerfScore 76.75
 G_M64954_IG03:
-       mov      qword ptr [rsp+0x08], rdi
-       mov      rdi, qword ptr [rsp+0x28]
-       mov      qword ptr [rsp], rdi
-       add      rcx, rdx
-       mov      rdx, qword ptr [rsp+0x08]
-       imul     rdx, qword ptr [rsp]
-       add      rcx, rdx
-       add      rax, rcx
-						;; size=33 bbWeight=1 PerfScore 8.75
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rbx, rax
+       vmovups  ymm0, ymmword ptr [rbp+0x30]
+       vmovups  ymmword ptr [rbp-0x110], ymm0
+       vmovups  ymm1, ymmword ptr [rbp+0x70]
+       vmovups  ymmword ptr [rbp-0x130], ymm1
+       vmovaps  ymm2, ymm0
+       vmovaps  xmmword ptr [rbp-0x140], xmm2
+       vmovaps  ymm2, ymm1
+       vmovaps  xmmword ptr [rbp-0x150], xmm2
+       mov      rdi, qword ptr [rbp-0x140]
+       mov      rsi, qword ptr [rbp-0x150]
+       mov      qword ptr [rbp-0x160], rdi
+       mov      qword ptr [rbp-0x168], rsi
+       mov      rdi, qword ptr [rbp-0x160]
+       mov      rsi, qword ptr [rbp-0x168]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       mov      qword ptr [rbp-0x158], rax
+       mov      rsi, qword ptr [rbp-0x158]
+       mov      qword ptr [rbp-0x170], rsi
+       mov      rsi, qword ptr [rbp-0x170]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      r15, rax
+       mov      rdi, qword ptr [rbp-0x138]
+       mov      rsi, qword ptr [rbp-0x148]
+       mov      qword ptr [rbp-0x180], rdi
+       mov      qword ptr [rbp-0x188], rsi
+       mov      rdi, qword ptr [rbp-0x180]
+       mov      rsi, qword ptr [rbp-0x188]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       mov      qword ptr [rbp-0x178], rax
+       mov      rsi, qword ptr [rbp-0x178]
+       mov      qword ptr [rbp-0x190], rsi
+       mov      rsi, qword ptr [rbp-0x190]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rsi, rax
+       mov      rdi, r15
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      r15, rax
+       vmovups  ymm0, ymmword ptr [rbp-0x110]
+       vextractf128 xmm0, ymm0, 1
+       vmovaps  xmmword ptr [rbp-0x1A0], xmm0
+       vmovups  ymm1, ymmword ptr [rbp-0x130]
+       vextractf128 xmm0, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0x1B0], xmm0
+       mov      rdi, qword ptr [rbp-0x1A0]
+       mov      rsi, qword ptr [rbp-0x1B0]
+       mov      qword ptr [rbp-0x1C0], rdi
+       mov      qword ptr [rbp-0x1C8], rsi
+       mov      rdi, qword ptr [rbp-0x1C0]
+       mov      rsi, qword ptr [rbp-0x1C8]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+						;; size=369 bbWeight=1 PerfScore 76.75
 G_M64954_IG04:
+       mov      qword ptr [rbp-0x1B8], rax
+       mov      rsi, qword ptr [rbp-0x1B8]
+       mov      qword ptr [rbp-0x1D0], rsi
+       mov      rsi, qword ptr [rbp-0x1D0]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      r14, rax
+       mov      rdi, qword ptr [rbp-0x198]
+       mov      qword ptr [rsp], rdi
+       mov      rdi, qword ptr [rbp-0x1A8]
+       mov      qword ptr [rsp+0x08], rdi
+       lea      rdi, [rbp-0x1D8]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64`1[long]:op_Multiply(System.Runtime.Intrinsics.Vector64`1[long],System.Runtime.Intrinsics.Vector64`1[long]):System.Runtime.Intrinsics.Vector64`1[long]
+       call     [rax]System.Runtime.Intrinsics.Vector64`1[long]:op_Multiply(System.Runtime.Intrinsics.Vector64`1[long],System.Runtime.Intrinsics.Vector64`1[long]):System.Runtime.Intrinsics.Vector64`1[long]
+       mov      rsi, qword ptr [rbp-0x1D8]
+       mov      qword ptr [rbp-0x1E0], rsi
+       mov      rsi, qword ptr [rbp-0x1E0]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rsi, rax
+       mov      rdi, r14
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rsi, rax
+       mov      rdi, r15
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       add      rax, rbx
+						;; size=161 bbWeight=1 PerfScore 29.75
+G_M64954_IG05:
        vzeroupper 
-       add      rsp, 264
+       add      rsp, 472
+       pop      rbx
+       pop      r14
+       pop      r15
+       pop      rbp
        ret      
-						;; size=11 bbWeight=1 PerfScore 2.25
+						;; size=17 bbWeight=1 PerfScore 4.25
 
-; Total bytes of code 493, prolog size 7, PerfScore 109.25, instruction count 79, allocated bytes for code 493 (MethodHash=fe850245) for method System.Runtime.Intrinsics.Vector512:Dot[long](System.Runtime.Intrinsics.Vector512`1[long],System.Runtime.Intrinsics.Vector512`1[long]):long (FullOpts)
+; Total bytes of code 899, prolog size 21, PerfScore 192.25, instruction count 164, allocated bytes for code 899 (MethodHash=fe850245) for method System.Runtime.Intrinsics.Vector512:Dot[long](System.Runtime.Intrinsics.Vector512`1[long],System.Runtime.Intrinsics.Vector512`1[long]):long (FullOpts)
294 (88.55 % of base) - System.Runtime.Intrinsics.Vector256`1[short]:System.Runtime.Intrinsics.ISimdVector,T>.Divide(System.Runtime.Intrinsics.Vector256`1[short],short):System.Runtime.Intrinsics.Vector256`1[short]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[short]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[short],short):System.Runtime.Intrinsics.Vector256`1[short] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 16 single block inlinees; 13 inlinees without PGO data
+; 0 inlinees with PGO data; 16 single block inlinees; 9 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T01] (  4,  4   )   byref  ->  rdi         single-def
-;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[short]>
+;  V00 RetBuf       [V00,T01] (  4,  4   )   byref  ->  rbx         single-def
+;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[short]>
 ;  V02 arg1         [V02,T02] (  3,  3   )   short  ->  rsi         single-def
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  mm1         "impAppendStmt"
+;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  [rbp-0x30]  spill-single-def "impAppendStmt"
 ;* V05 tmp2         [V05    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V06 tmp3         [V06,T11] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V06 tmp3         [V06,T11] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
 ;* V07 tmp4         [V07    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V08 tmp5         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V09 tmp6         [V09    ] (  5,  5   )  struct ( 8) [rbp-0x18]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V09 tmp6         [V09    ] (  5,  5   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V10 tmp7         [V10,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V11 tmp8         [V11    ] (  5, 10   )  struct ( 8) [rbp-0x20]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V11 tmp8         [V11    ] (  5, 10   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V12 tmp9         [V12    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;* V13 tmp10        [V13    ] (  0,  0   )   short  ->  zero-ref    "Inline return value spill temp"
-;* V14 tmp11        [V14    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V15 tmp12        [V15    ] (  5,  5   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V16 tmp13        [V16,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp14        [V17    ] (  5, 10   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V18 tmp15        [V18    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;* V19 tmp16        [V19    ] (  0,  0   )   short  ->  zero-ref    "Inline return value spill temp"
-;* V20 tmp17        [V20    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V21 tmp18        [V21,T14] (  3,  3   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
-;  V22 tmp19        [V22,T12] (  3,  6   )  simd16  ->  [rbp-0x50]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
-;* V23 tmp20        [V23    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V24 tmp21        [V24    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V25 tmp22        [V25    ] (  5,  5   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V26 tmp23        [V26,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V27 tmp24        [V27    ] (  5, 10   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V13 tmp10        [V13    ] (  5,  5   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V14 tmp11        [V14,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V15 tmp12        [V15    ] (  5, 10   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V16 tmp13        [V16    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
+;  V17 tmp14        [V17,T14] (  3,  3   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V18 tmp15        [V18,T12] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;* V19 tmp16        [V19    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V20 tmp17        [V20    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V21 tmp18        [V21    ] (  5,  5   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V22 tmp19        [V22,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V23 tmp20        [V23    ] (  5, 10   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V24 tmp21        [V24    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
+;  V25 tmp22        [V25    ] (  5,  5   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V26 tmp23        [V26,T10] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V27 tmp24        [V27    ] (  5, 10   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V28 tmp25        [V28    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;* V29 tmp26        [V29    ] (  0,  0   )   short  ->  zero-ref    "Inline return value spill temp"
-;* V30 tmp27        [V30    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V31 tmp28        [V31    ] (  5,  5   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V32 tmp29        [V32,T10] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V33 tmp30        [V33    ] (  5, 10   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V34 tmp31        [V34    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;* V35 tmp32        [V35    ] (  0,  0   )   short  ->  zero-ref    "Inline return value spill temp"
-;* V36 tmp33        [V36    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V37 tmp34        [V37,T15] (  3,  3   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
-;* V38 tmp35        [V38    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[short]>
-;  V39 tmp36        [V39,T03] (  2,  2   )    long  ->  rsi         "field V07._00 (fldOffset=0x0)" P-INDEP
-;  V40 tmp37        [V40,T04] (  2,  2   )    long  ->  rax         "field V08._00 (fldOffset=0x0)" P-INDEP
-;  V41 tmp38        [V41    ] (  5,  5   )    long  ->  [rbp-0x18]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
-;  V42 tmp39        [V42    ] (  5,  9   )    long  ->  [rbp-0x20]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
-;  V43 tmp40        [V43    ] (  5,  5   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;  V44 tmp41        [V44    ] (  5,  9   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
-;  V45 tmp42        [V45,T05] (  2,  2   )    long  ->  rsi         "field V23._00 (fldOffset=0x0)" P-INDEP
-;  V46 tmp43        [V46,T06] (  2,  2   )    long  ->  rax         "field V24._00 (fldOffset=0x0)" P-INDEP
-;  V47 tmp44        [V47    ] (  5,  5   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V48 tmp45        [V48    ] (  5,  9   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
-;  V49 tmp46        [V49    ] (  5,  5   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
-;  V50 tmp47        [V50    ] (  5,  9   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
-;  V51 cse0         [V51,T00] ( 17, 17   )     int  ->  rcx         "CSE #01: aggressive"
+;  V29 tmp26        [V29,T15] (  3,  3   )  simd16  ->  [rbp-0xB0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
+;* V30 tmp27        [V30    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[short]>
+;  V31 tmp28        [V31,T03] (  2,  2   )    long  ->  r14         "field V07._00 (fldOffset=0x0)" P-INDEP
+;  V32 tmp29        [V32,T04] (  2,  2   )    long  ->  rdi         "field V08._00 (fldOffset=0x0)" P-INDEP
+;  V33 tmp30        [V33    ] (  5,  5   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
+;  V34 tmp31        [V34    ] (  5,  9   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
+;  V35 tmp32        [V35    ] (  5,  5   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V36 tmp33        [V36    ] (  5,  9   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
+;  V37 tmp34        [V37,T05] (  2,  2   )    long  ->  r14         "field V19._00 (fldOffset=0x0)" P-INDEP
+;  V38 tmp35        [V38,T06] (  2,  2   )    long  ->  rax         "field V20._00 (fldOffset=0x0)" P-INDEP
+;  V39 tmp36        [V39    ] (  5,  5   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V21._00 (fldOffset=0x0)" P-DEP
+;  V40 tmp37        [V40    ] (  5,  9   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+;  V41 tmp38        [V41    ] (  5,  5   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V42 tmp39        [V42    ] (  5,  9   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+;  V43 cse0         [V43,T00] ( 17, 17   )     int  ->  r15         "CSE #01: aggressive"
 ;
-; Lcl frame size = 128
+; Lcl frame size = 152
 
 G_M25609_IG01:
        push     rbp
-       sub      rsp, 128
-       lea      rbp, [rsp+0x80]
+       push     r15
+       push     r14
+       push     rbx
+       sub      rsp, 152
+       lea      rbp, [rsp+0xB0]
+       mov      rbx, rdi
        vmovups  ymm0, ymmword ptr [rbp+0x10]
-						;; size=21 bbWeight=1 PerfScore 5.75
+						;; size=29 bbWeight=1 PerfScore 9.00
 G_M25609_IG02:
+       vmovups  ymmword ptr [rbp+0x10], ymm0
        vmovaps  ymm1, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm1
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x20], rax
-       movsx    rax, word  ptr [rbp-0x20]
-       movsx    rcx, si
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x18], ax
-       movsx    rax, word  ptr [rbp-0x1E]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x16], ax
-       movsx    rax, word  ptr [rbp-0x1C]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x14], ax
-       movsx    rax, word  ptr [rbp-0x1A]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x12], ax
-       mov      rsi, qword ptr [rbp-0x18]
-       mov      rax, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x30], rax
-       movsx    rax, word  ptr [rbp-0x30]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x28], ax
-       movsx    rax, word  ptr [rbp-0x2E]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x26], ax
-       movsx    rax, word  ptr [rbp-0x2C]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x24], ax
-       movsx    rax, word  ptr [rbp-0x2A]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x22], ax
-       mov      rax, qword ptr [rbp-0x28]
-       mov      qword ptr [rbp-0x40], rsi
-       mov      qword ptr [rbp-0x38], rax
-       vmovaps  xmm1, xmmword ptr [rbp-0x40]
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x50], xmm0
-       mov      rax, qword ptr [rbp-0x50]
-       mov      qword ptr [rbp-0x60], rax
-       movsx    rax, word  ptr [rbp-0x60]
-       cdq      
-       idiv     edx:eax, ecx
+       vmovaps  xmmword ptr [rbp-0x40], xmm1
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x50], rdi
+       movsx    rdi, word  ptr [rbp-0x50]
+       movsx    r15, si
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x48], ax
+       movsx    rdi, word  ptr [rbp-0x4E]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x46], ax
+       movsx    rdi, word  ptr [rbp-0x4C]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x44], ax
+       movsx    rdi, word  ptr [rbp-0x4A]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x42], ax
+       mov      r14, qword ptr [rbp-0x48]
+       mov      rdi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x60], rdi
+       movsx    rdi, word  ptr [rbp-0x60]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
        mov      word  ptr [rbp-0x58], ax
-       movsx    rax, word  ptr [rbp-0x5E]
-       cdq      
-       idiv     edx:eax, ecx
+       movsx    rdi, word  ptr [rbp-0x5E]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
        mov      word  ptr [rbp-0x56], ax
-       movsx    rax, word  ptr [rbp-0x5C]
-       cdq      
-       idiv     edx:eax, ecx
+       movsx    rdi, word  ptr [rbp-0x5C]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
        mov      word  ptr [rbp-0x54], ax
-       movsx    rax, word  ptr [rbp-0x5A]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x52], ax
-						;; size=213 bbWeight=1 PerfScore 371.50
+       movsx    rdi, word  ptr [rbp-0x5A]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+						;; size=224 bbWeight=1 PerfScore 63.50
 G_M25609_IG03:
-       mov      rsi, qword ptr [rbp-0x58]
-       mov      rax, qword ptr [rbp-0x48]
-       mov      qword ptr [rbp-0x70], rax
-       movsx    rax, word  ptr [rbp-0x70]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x68], ax
-       movsx    rax, word  ptr [rbp-0x6E]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x66], ax
-       movsx    rax, word  ptr [rbp-0x6C]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x64], ax
-       movsx    rax, word  ptr [rbp-0x6A]
-       cdq      
-       idiv     edx:eax, ecx
-       mov      word  ptr [rbp-0x62], ax
-       mov      rax, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0x80], rsi
-       mov      qword ptr [rbp-0x78], rax
-       vinserti128 ymm0, ymm1, xmmword ptr [rbp-0x80], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       mov      rax, rdi
-						;; size=86 bbWeight=1 PerfScore 130.25
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x52], ax
+       mov      rdi, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x70], r14
+       mov      qword ptr [rbp-0x68], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x70]
+       vmovaps  xmmword ptr [rbp-0x30], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x10]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0x80], xmm1
+       mov      rdi, qword ptr [rbp-0x80]
+       mov      qword ptr [rbp-0x90], rdi
+       movsx    rdi, word  ptr [rbp-0x90]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x88], ax
+       movsx    rdi, word  ptr [rbp-0x8E]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x86], ax
+       movsx    rdi, word  ptr [rbp-0x8C]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x84], ax
+       movsx    rdi, word  ptr [rbp-0x8A]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x82], ax
+       mov      r14, qword ptr [rbp-0x88]
+       mov      rdi, qword ptr [rbp-0x78]
+       mov      qword ptr [rbp-0xA0], rdi
+       movsx    rdi, word  ptr [rbp-0xA0]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x98], ax
+       movsx    rdi, word  ptr [rbp-0x9E]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x96], ax
+       movsx    rdi, word  ptr [rbp-0x9C]
+       mov      esi, r15d
+						;; size=264 bbWeight=1 PerfScore 71.25
 G_M25609_IG04:
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x94], ax
+       movsx    rdi, word  ptr [rbp-0x9A]
+       mov      esi, r15d
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x92], ax
+       mov      rax, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xB0], r14
+       mov      qword ptr [rbp-0xA8], rax
+       vmovaps  xmm0, xmmword ptr [rbp-0x30]
+       vinserti128 ymm0, ymm0, xmmword ptr [rbp-0xB0], 1
+       vmovups  ymmword ptr [rbx], ymm0
+       mov      rax, rbx
+						;; size=92 bbWeight=1 PerfScore 24.00
+G_M25609_IG05:
        vzeroupper 
-       add      rsp, 128
+       add      rsp, 152
+       pop      rbx
+       pop      r14
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=17 bbWeight=1 PerfScore 4.25
 
-; Total bytes of code 332, prolog size 16, PerfScore 510.25, instruction count 97, allocated bytes for code 332 (MethodHash=57b79bf6) for method System.Runtime.Intrinsics.Vector256`1[short]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[short],short):System.Runtime.Intrinsics.Vector256`1[short] (FullOpts)
+; Total bytes of code 626, prolog size 21, PerfScore 172.00, instruction count 124, allocated bytes for code 626 (MethodHash=57b79bf6) for method System.Runtime.Intrinsics.Vector256`1[short]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[short],short):System.Runtime.Intrinsics.Vector256`1[short] (FullOpts)
267 (126.54 % of base) - System.Runtime.Intrinsics.Vector256`1[long]:System.Runtime.Intrinsics.ISimdVector,T>.Dot(System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):long
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[long]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Dot(System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):long (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
-; rsp based frame
+; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 11 single block inlinees; 11 inlinees without PGO data
+; 0 inlinees with PGO data; 23 single block inlinees; 9 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T08] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V01 arg1         [V01,T09] (  2,  2   )  simd32  ->  mm1         single-def <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V00 arg0         [V00,T28] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V01 arg1         [V01,T29] (  2,  2   )  simd32  ->  [rbp+0x30]  single-def <System.Runtime.Intrinsics.Vector256`1[long]>
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V03 tmp1         [V03,T00] (  2,  4   )    long  ->  rax         "impAppendStmt"
-;  V04 tmp2         [V04,T04] (  3,  6   )  simd16  ->  [rsp+0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V05 tmp3         [V05,T05] (  3,  6   )  simd16  ->  [rsp+0x60]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V06 tmp4         [V06,T01] (  2,  4   )    long  ->  rax         "impAppendStmt"
-;* V07 tmp5         [V07    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V08 tmp6         [V08    ] (  2,  4   )  struct ( 8) [rsp+0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V09 tmp7         [V09    ] (  2,  4   )  struct ( 8) [rsp+0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V10 tmp8         [V10    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V11 tmp9         [V11    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V12 tmp10        [V12    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V13 tmp11        [V13    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V14 tmp12        [V14    ] (  2,  4   )  struct ( 8) [rsp+0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V15 tmp13        [V15    ] (  2,  4   )  struct ( 8) [rsp+0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V16 tmp14        [V16    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V17 tmp15        [V17    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V18 tmp16        [V18    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V19 tmp17        [V19    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V20 tmp18        [V20    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V21 tmp19        [V21,T06] (  3,  6   )  simd16  ->  [rsp+0x30]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V22 tmp20        [V22,T07] (  3,  6   )  simd16  ->  [rsp+0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V23 tmp21        [V23,T02] (  2,  4   )    long  ->  rcx         "impAppendStmt"
-;* V24 tmp22        [V24    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V25 tmp23        [V25    ] (  2,  4   )  struct ( 8) [rsp+0x18]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V26 tmp24        [V26    ] (  2,  4   )  struct ( 8) [rsp+0x10]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V27 tmp25        [V27    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V28 tmp26        [V28    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V29 tmp27        [V29    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V30 tmp28        [V30    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V31 tmp29        [V31    ] (  2,  4   )  struct ( 8) [rsp+0x08]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V32 tmp30        [V32    ] (  2,  4   )  struct ( 8) [rsp+0x00]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V33 tmp31        [V33    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V34 tmp32        [V34    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V35 tmp33        [V35    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V36 tmp34        [V36    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V37 tmp35        [V37    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V38 tmp36        [V38,T03] (  2,  2   )    long  ->  rax         "Inline return value spill temp"
-;  V39 tmp37        [V39    ] (  2,  3   )    long  ->  [rsp+0x58]  do-not-enreg[X] addr-exposed "field V08._00 (fldOffset=0x0)" P-DEP
-;  V40 tmp38        [V40    ] (  2,  3   )    long  ->  [rsp+0x50]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
-;  V41 tmp39        [V41    ] (  2,  3   )    long  ->  [rsp+0x48]  do-not-enreg[X] addr-exposed "field V14._00 (fldOffset=0x0)" P-DEP
-;  V42 tmp40        [V42    ] (  2,  3   )    long  ->  [rsp+0x40]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;  V43 tmp41        [V43    ] (  2,  3   )    long  ->  [rsp+0x18]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V44 tmp42        [V44    ] (  2,  3   )    long  ->  [rsp+0x10]  do-not-enreg[X] addr-exposed "field V26._00 (fldOffset=0x0)" P-DEP
-;  V45 tmp43        [V45    ] (  2,  3   )    long  ->  [rsp+0x08]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
-;  V46 tmp44        [V46    ] (  2,  3   )    long  ->  [rsp+0x00]  do-not-enreg[X] addr-exposed "field V32._00 (fldOffset=0x0)" P-DEP
+;  V03 tmp1         [V03,T00] (  2,  4   )    long  ->  rbx         "impAppendStmt"
+;  V04 tmp2         [V04,T24] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V05 tmp3         [V05,T25] (  3,  6   )  simd16  ->  [rbp-0x30]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V06 tmp4         [V06,T01] (  2,  4   )    long  ->  rbx         "impAppendStmt"
+;* V07 tmp5         [V07    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V08 tmp6         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V09 tmp7         [V09    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V10 tmp8         [V10    ] (  2,  2   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V11 tmp9         [V11,T16] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V12 tmp10        [V12    ] (  2,  4   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V13 tmp11        [V13    ] (  2,  4   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V14 tmp12        [V14    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V15 tmp13        [V15    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;* V16 tmp14        [V16    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V17 tmp15        [V17    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V18 tmp16        [V18,T04] (  2,  2   )    long  ->  rbx         ld-addr-op "Inline ldloca(s) first use temp"
+;* V19 tmp17        [V19,T17] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V20 tmp18        [V20    ] (  2,  4   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V21 tmp19        [V21    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V22 tmp20        [V22    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V23 tmp21        [V23    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V24 tmp22        [V24    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V25 tmp23        [V25    ] (  2,  2   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V26 tmp24        [V26,T18] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V27 tmp25        [V27    ] (  2,  4   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V28 tmp26        [V28    ] (  2,  4   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V29 tmp27        [V29    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V30 tmp28        [V30    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V31 tmp29        [V31,T05] (  2,  2   )    long  ->  rsi         ld-addr-op "Inline ldloca(s) first use temp"
+;* V32 tmp30        [V32,T19] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V33 tmp31        [V33    ] (  2,  4   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V34 tmp32        [V34    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;  V35 tmp33        [V35,T26] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V36 tmp34        [V36,T27] (  3,  6   )  simd16  ->  [rbp-0x90]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V37 tmp35        [V37,T02] (  2,  4   )    long  ->  r15         "impAppendStmt"
+;* V38 tmp36        [V38    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V39 tmp37        [V39    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V40 tmp38        [V40    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V41 tmp39        [V41    ] (  2,  2   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V42 tmp40        [V42,T20] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V43 tmp41        [V43    ] (  2,  4   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V44 tmp42        [V44    ] (  2,  4   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V45 tmp43        [V45    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V46 tmp44        [V46    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V47 tmp45        [V47,T06] (  2,  2   )    long  ->  r15         ld-addr-op "Inline ldloca(s) first use temp"
+;* V48 tmp46        [V48,T21] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V49 tmp47        [V49    ] (  2,  4   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V50 tmp48        [V50    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V51 tmp49        [V51    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V52 tmp50        [V52    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V53 tmp51        [V53    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V54 tmp52        [V54    ] (  2,  2   )  struct ( 8) [rbp-0xB8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V55 tmp53        [V55,T22] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V56 tmp54        [V56    ] (  2,  4   )  struct ( 8) [rbp-0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V57 tmp55        [V57    ] (  2,  4   )  struct ( 8) [rbp-0xC8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V58 tmp56        [V58    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V59 tmp57        [V59    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V60 tmp58        [V60,T07] (  2,  2   )    long  ->  rsi         ld-addr-op "Inline ldloca(s) first use temp"
+;* V61 tmp59        [V61,T23] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V62 tmp60        [V62    ] (  2,  4   )  struct ( 8) [rbp-0xD0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V63 tmp61        [V63    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;  V64 tmp62        [V64,T08] (  2,  2   )    long  ->  rsi         "field V07._00 (fldOffset=0x0)" P-INDEP
+;  V65 tmp63        [V65,T09] (  2,  2   )    long  ->  rdi         "field V08._00 (fldOffset=0x0)" P-INDEP
+;* V66 tmp64        [V66    ] (  0,  0   )    long  ->  zero-ref    "field V09._00 (fldOffset=0x0)" P-INDEP
+;  V67 tmp65        [V67    ] (  2,  2   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
+;  V68 tmp66        [V68    ] (  2,  3   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
+;  V69 tmp67        [V69    ] (  2,  3   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V70 tmp68        [V70    ] (  2,  3   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V20._00 (fldOffset=0x0)" P-DEP
+;  V71 tmp69        [V71,T10] (  2,  2   )    long  ->  rdi         "field V22._00 (fldOffset=0x0)" P-INDEP
+;  V72 tmp70        [V72,T11] (  2,  2   )    long  ->  rsi         "field V23._00 (fldOffset=0x0)" P-INDEP
+;* V73 tmp71        [V73    ] (  0,  0   )    long  ->  zero-ref    "field V24._00 (fldOffset=0x0)" P-INDEP
+;  V74 tmp72        [V74    ] (  2,  2   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V75 tmp73        [V75    ] (  2,  3   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+;  V76 tmp74        [V76    ] (  2,  3   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V28._00 (fldOffset=0x0)" P-DEP
+;  V77 tmp75        [V77    ] (  2,  3   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
+;  V78 tmp76        [V78,T12] (  2,  2   )    long  ->  rdi         "field V38._00 (fldOffset=0x0)" P-INDEP
+;  V79 tmp77        [V79,T13] (  2,  2   )    long  ->  rsi         "field V39._00 (fldOffset=0x0)" P-INDEP
+;* V80 tmp78        [V80    ] (  0,  0   )    long  ->  zero-ref    "field V40._00 (fldOffset=0x0)" P-INDEP
+;  V81 tmp79        [V81    ] (  2,  2   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V41._00 (fldOffset=0x0)" P-DEP
+;  V82 tmp80        [V82    ] (  2,  3   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V43._00 (fldOffset=0x0)" P-DEP
+;  V83 tmp81        [V83    ] (  2,  3   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V44._00 (fldOffset=0x0)" P-DEP
+;  V84 tmp82        [V84    ] (  2,  3   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V49._00 (fldOffset=0x0)" P-DEP
+;  V85 tmp83        [V85,T14] (  2,  2   )    long  ->  rdi         "field V51._00 (fldOffset=0x0)" P-INDEP
+;  V86 tmp84        [V86,T15] (  2,  2   )    long  ->  rsi         "field V52._00 (fldOffset=0x0)" P-INDEP
+;* V87 tmp85        [V87    ] (  0,  0   )    long  ->  zero-ref    "field V53._00 (fldOffset=0x0)" P-INDEP
+;  V88 tmp86        [V88    ] (  2,  2   )    long  ->  [rbp-0xB8]  do-not-enreg[X] addr-exposed "field V54._00 (fldOffset=0x0)" P-DEP
+;  V89 tmp87        [V89    ] (  2,  3   )    long  ->  [rbp-0xC0]  do-not-enreg[X] addr-exposed "field V56._00 (fldOffset=0x0)" P-DEP
+;  V90 tmp88        [V90    ] (  2,  3   )    long  ->  [rbp-0xC8]  do-not-enreg[X] addr-exposed "field V57._00 (fldOffset=0x0)" P-DEP
+;  V91 tmp89        [V91    ] (  2,  3   )    long  ->  [rbp-0xD0]  do-not-enreg[X] addr-exposed "field V62._00 (fldOffset=0x0)" P-DEP
+;  V92 tmp90        [V92,T03] (  2,  4   )    long  ->  rsi         "argument with side effect"
 ;
-; Lcl frame size = 136
+; Lcl frame size = 192
 
 G_M64869_IG01:
-       sub      rsp, 136
-       vmovups  ymm0, ymmword ptr [rsp+0x90]
-       vmovups  ymm1, ymmword ptr [rsp+0xB0]
-						;; size=25 bbWeight=1 PerfScore 8.25
+       push     rbp
+       push     r15
+       push     rbx
+       sub      rsp, 192
+       lea      rbp, [rsp+0xD0]
+       vmovups  ymm0, ymmword ptr [rbp+0x10]
+       vmovups  ymm1, ymmword ptr [rbp+0x30]
+						;; size=29 bbWeight=1 PerfScore 11.75
 G_M64869_IG02:
+       vmovups  ymmword ptr [rbp+0x10], ymm0
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rsp+0x70], xmm2
+       vmovaps  xmmword ptr [rbp-0x20], xmm2
+       vmovups  ymmword ptr [rbp+0x30], ymm1
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rsp+0x60], xmm2
-       mov      rax, qword ptr [rsp+0x70]
-       mov      qword ptr [rsp+0x58], rax
-       mov      rax, qword ptr [rsp+0x60]
-       mov      qword ptr [rsp+0x50], rax
-       mov      rax, qword ptr [rsp+0x58]
-       imul     rax, qword ptr [rsp+0x50]
-       mov      rcx, qword ptr [rsp+0x78]
-       mov      qword ptr [rsp+0x48], rcx
-       mov      rcx, qword ptr [rsp+0x68]
-       mov      qword ptr [rsp+0x40], rcx
-       mov      rcx, qword ptr [rsp+0x48]
-       imul     rcx, qword ptr [rsp+0x40]
-       add      rax, rcx
+       vmovaps  xmmword ptr [rbp-0x30], xmm2
+       mov      rsi, qword ptr [rbp-0x20]
+       mov      rdi, qword ptr [rbp-0x30]
+       mov      qword ptr [rbp-0x40], rsi
+       mov      qword ptr [rbp-0x48], rdi
+       mov      rsi, qword ptr [rbp-0x40]
+       imul     rsi, qword ptr [rbp-0x48]
+       mov      qword ptr [rbp-0x38], rsi
+       mov      rsi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x50], rsi
+       mov      rsi, qword ptr [rbp-0x50]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rbx, rax
+       mov      rdi, qword ptr [rbp-0x18]
+       mov      rsi, qword ptr [rbp-0x28]
+       mov      qword ptr [rbp-0x60], rdi
+       mov      qword ptr [rbp-0x68], rsi
+       mov      rdi, qword ptr [rbp-0x60]
+       mov      rsi, qword ptr [rbp-0x68]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       mov      qword ptr [rbp-0x58], rax
+       mov      rsi, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x70], rsi
+       mov      rsi, qword ptr [rbp-0x70]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rsi, rax
+       mov      rdi, rbx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rbx, rax
+       vmovups  ymm0, ymmword ptr [rbp+0x10]
        vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rsp+0x30], xmm0
+       vmovaps  xmmword ptr [rbp-0x80], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x30]
        vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rsp+0x20], xmm0
-       mov      rcx, qword ptr [rsp+0x30]
-       mov      qword ptr [rsp+0x18], rcx
-       mov      rcx, qword ptr [rsp+0x20]
-       mov      qword ptr [rsp+0x10], rcx
-       mov      rcx, qword ptr [rsp+0x18]
-       imul     rcx, qword ptr [rsp+0x10]
-       mov      rdx, qword ptr [rsp+0x38]
-       mov      qword ptr [rsp+0x08], rdx
-       mov      rdx, qword ptr [rsp+0x28]
-       mov      qword ptr [rsp], rdx
-       add      rax, rcx
-       mov      rcx, qword ptr [rsp+0x08]
-       imul     rcx, qword ptr [rsp]
-       add      rax, rcx
-						;; size=175 bbWeight=1 PerfScore 45.25
+       vmovaps  xmmword ptr [rbp-0x90], xmm0
+       mov      rdi, qword ptr [rbp-0x80]
+       mov      rsi, qword ptr [rbp-0x90]
+       mov      qword ptr [rbp-0xA0], rdi
+       mov      qword ptr [rbp-0xA8], rsi
+       mov      rdi, qword ptr [rbp-0xA0]
+       mov      rsi, qword ptr [rbp-0xA8]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       mov      qword ptr [rbp-0x98], rax
+       mov      rsi, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xB0], rsi
+       mov      rsi, qword ptr [rbp-0xB0]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+						;; size=299 bbWeight=1 PerfScore 69.75
 G_M64869_IG03:
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      r15, rax
+       mov      rdi, qword ptr [rbp-0x78]
+       mov      rsi, qword ptr [rbp-0x88]
+       mov      qword ptr [rbp-0xC0], rdi
+       mov      qword ptr [rbp-0xC8], rsi
+       mov      rdi, qword ptr [rbp-0xC0]
+       mov      rsi, qword ptr [rbp-0xC8]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       mov      qword ptr [rbp-0xB8], rax
+       mov      rsi, qword ptr [rbp-0xB8]
+       mov      qword ptr [rbp-0xD0], rsi
+       mov      rsi, qword ptr [rbp-0xD0]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rsi, rax
+       mov      rdi, r15
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rsi, rax
+       mov      rdi, rbx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       nop      
+						;; size=135 bbWeight=1 PerfScore 27.75
+G_M64869_IG04:
        vzeroupper 
-       add      rsp, 136
+       add      rsp, 192
+       pop      rbx
+       pop      r15
+       pop      rbp
        ret      
-						;; size=11 bbWeight=1 PerfScore 2.25
+						;; size=15 bbWeight=1 PerfScore 3.75
 
-; Total bytes of code 211, prolog size 7, PerfScore 55.75, instruction count 41, allocated bytes for code 211 (MethodHash=91e4029a) for method System.Runtime.Intrinsics.Vector256`1[long]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Dot(System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):long (FullOpts)
+; Total bytes of code 478, prolog size 19, PerfScore 113.00, instruction count 99, allocated bytes for code 478 (MethodHash=91e4029a) for method System.Runtime.Intrinsics.Vector256`1[long]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Dot(System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):long (FullOpts)
266 (35.90 % of base) - System.Runtime.Intrinsics.Vector512:Divide[int](System.Runtime.Intrinsics.Vector512`1[int],System.Runtime.Intrinsics.Vector512`1[int]):System.Runtime.Intrinsics.Vector512`1[int]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector512:Divide[int](System.Runtime.Intrinsics.Vector512`1[int],System.Runtime.Intrinsics.Vector512`1[int]):System.Runtime.Intrinsics.Vector512`1[int] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 43 single block inlinees; 27 inlinees without PGO data
+; 0 inlinees with PGO data; 38 single block inlinees; 21 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T00] (  5,  5   )   byref  ->  rdi         single-def
+;  V00 RetBuf       [V00,T00] (  4,  4   )   byref  ->  rbx         single-def
 ;* V01 arg0         [V01    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[int]>
 ;* V02 arg1         [V02    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[int]>
-;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T29] (  2,  4   )  simd32  ->  mm0         "impAppendStmt"
-;  V05 tmp2         [V05,T30] (  2,  4   )  simd32  ->  mm1         "spilled call-like call argument"
-;  V06 tmp3         [V06,T17] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
-;  V07 tmp4         [V07,T18] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
-;  V08 tmp5         [V08,T31] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
+;  V03 OutArgs      [V03    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V04 tmp1         [V04,T28] (  2,  4   )  simd32  ->  [rbp-0x30]  spill-single-def "impAppendStmt"
+;  V05 tmp2         [V05,T29] (  2,  4   )  simd32  ->  [rbp-0x50]  do-not-enreg[HS] hidden-struct-arg "spilled call-like call argument"
+;  V06 tmp3         [V06,T16] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
+;  V07 tmp4         [V07,T17] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
+;  V08 tmp5         [V08,T30] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
 ;* V09 tmp6         [V09    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V10 tmp7         [V10,T19] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
-;  V11 tmp8         [V11,T20] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V10 tmp7         [V10,T18] (  3,  6   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V11 tmp8         [V11,T19] (  3,  6   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
 ;* V12 tmp9         [V12    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V13 tmp10        [V13    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V14 tmp11        [V14    ] (  3,  3   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V14 tmp11        [V14    ] (  3,  3   )  struct ( 8) [rbp-0x78]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V15 tmp12        [V15,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V16 tmp13        [V16    ] (  3,  6   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V17 tmp14        [V17    ] (  3,  6   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V16 tmp13        [V16    ] (  3,  6   )  struct ( 8) [rbp-0x80]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V17 tmp14        [V17    ] (  3,  6   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V18 tmp15        [V18    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V19 tmp16        [V19    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
 ;* V20 tmp17        [V20    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
 ;* V21 tmp18        [V21    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V22 tmp19        [V22    ] (  3,  3   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V22 tmp19        [V22    ] (  3,  3   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V23 tmp20        [V23,T10] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V24 tmp21        [V24    ] (  3,  6   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V25 tmp22        [V25    ] (  3,  6   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V24 tmp21        [V24    ] (  3,  6   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V25 tmp22        [V25    ] (  3,  6   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V26 tmp23        [V26    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V27 tmp24        [V27    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
 ;* V28 tmp25        [V28    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
 ;* V29 tmp26        [V29    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V30 tmp27        [V30,T33] (  3,  3   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
-;  V31 tmp28        [V31,T21] (  3,  6   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
-;  V32 tmp29        [V32,T22] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V30 tmp27        [V30,T32] (  3,  3   )  simd16  ->  [rbp-0xB0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V31 tmp28        [V31,T20] (  3,  6   )  simd16  ->  [rbp-0xC0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V32 tmp29        [V32,T21] (  3,  6   )  simd16  ->  [rbp-0xD0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
 ;* V33 tmp30        [V33    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V34 tmp31        [V34    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V35 tmp32        [V35    ] (  3,  3   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V35 tmp32        [V35    ] (  3,  3   )  struct ( 8) [rbp-0xD8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V36 tmp33        [V36,T11] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V37 tmp34        [V37    ] (  3,  6   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V38 tmp35        [V38    ] (  3,  6   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V37 tmp34        [V37    ] (  3,  6   )  struct ( 8) [rbp-0xE0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V38 tmp35        [V38    ] (  3,  6   )  struct ( 8) [rbp-0xE8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V39 tmp36        [V39    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V40 tmp37        [V40    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
 ;* V41 tmp38        [V41    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
 ;* V42 tmp39        [V42    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V43 tmp40        [V43    ] (  3,  3   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V43 tmp40        [V43    ] (  3,  3   )  struct ( 8) [rbp-0xF0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V44 tmp41        [V44,T12] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V45 tmp42        [V45    ] (  3,  6   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V46 tmp43        [V46    ] (  3,  6   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V45 tmp42        [V45    ] (  3,  6   )  struct ( 8) [rbp-0xF8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V46 tmp43        [V46    ] (  3,  6   )  struct ( 8) [rbp-0x100]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V47 tmp44        [V47    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V48 tmp45        [V48    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
 ;* V49 tmp46        [V49    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
 ;* V50 tmp47        [V50    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V51 tmp48        [V51,T34] (  3,  3   )  simd16  ->  [rbp-0xC0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V51 tmp48        [V51,T33] (  3,  3   )  simd16  ->  [rbp-0x110]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
 ;* V52 tmp49        [V52    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[int]>
-;  V53 tmp50        [V53,T23] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
-;  V54 tmp51        [V54,T24] (  3,  6   )  simd32  ->  mm2         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
-;  V55 tmp52        [V55,T32] (  2,  4   )  simd16  ->  mm3         "impAppendStmt"
+;  V53 tmp50        [V53,T22] (  3,  6   )  simd32  ->  [rbp-0x130]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
+;  V54 tmp51        [V54,T23] (  3,  6   )  simd32  ->  [rbp-0x150]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[int]>
+;  V55 tmp52        [V55,T31] (  2,  4   )  simd16  ->  [rbp-0x160]  spill-single-def "impAppendStmt"
 ;* V56 tmp53        [V56    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V57 tmp54        [V57,T25] (  3,  6   )  simd16  ->  [rbp-0xD0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
-;  V58 tmp55        [V58,T26] (  3,  6   )  simd16  ->  [rbp-0xE0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V57 tmp54        [V57,T24] (  3,  6   )  simd16  ->  [rbp-0x170]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V58 tmp55        [V58,T25] (  3,  6   )  simd16  ->  [rbp-0x180]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
 ;* V59 tmp56        [V59    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V60 tmp57        [V60    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V61 tmp58        [V61    ] (  3,  3   )  struct ( 8) [rbp-0xE8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V61 tmp58        [V61    ] (  3,  3   )  struct ( 8) [rbp-0x188]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V62 tmp59        [V62,T13] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V63 tmp60        [V63    ] (  3,  6   )  struct ( 8) [rbp-0xF0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V64 tmp61        [V64    ] (  3,  6   )  struct ( 8) [rbp-0xF8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V63 tmp60        [V63    ] (  3,  6   )  struct ( 8) [rbp-0x190]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V64 tmp61        [V64    ] (  3,  6   )  struct ( 8) [rbp-0x198]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V65 tmp62        [V65    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V66 tmp63        [V66    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V67 tmp64        [V67    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V68 tmp65        [V68    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V69 tmp66        [V69    ] (  3,  3   )  struct ( 8) [rbp-0x100]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V70 tmp67        [V70,T14] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V71 tmp68        [V71    ] (  3,  6   )  struct ( 8) [rbp-0x108]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V72 tmp69        [V72    ] (  3,  6   )  struct ( 8) [rbp-0x110]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V73 tmp70        [V73    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V74 tmp71        [V74    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V75 tmp72        [V75    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V76 tmp73        [V76    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V77 tmp74        [V77,T35] (  3,  3   )  simd16  ->  [rbp-0x120]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
-;  V78 tmp75        [V78,T27] (  3,  6   )  simd16  ->  [rbp-0x130]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
-;  V79 tmp76        [V79,T28] (  3,  6   )  simd16  ->  [rbp-0x140]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
-;* V80 tmp77        [V80    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V81 tmp78        [V81    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V82 tmp79        [V82    ] (  3,  3   )  struct ( 8) [rbp-0x148]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V83 tmp80        [V83,T15] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V84 tmp81        [V84    ] (  3,  6   )  struct ( 8) [rbp-0x150]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V85 tmp82        [V85    ] (  3,  6   )  struct ( 8) [rbp-0x158]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V86 tmp83        [V86    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V87 tmp84        [V87    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V88 tmp85        [V88    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V89 tmp86        [V89    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V90 tmp87        [V90    ] (  3,  3   )  struct ( 8) [rbp-0x160]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V91 tmp88        [V91,T16] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V92 tmp89        [V92    ] (  3,  6   )  struct ( 8) [rbp-0x168]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V93 tmp90        [V93    ] (  3,  6   )  struct ( 8) [rbp-0x170]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V94 tmp91        [V94    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V95 tmp92        [V95    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V96 tmp93        [V96    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V97 tmp94        [V97    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V98 tmp95        [V98,T36] (  3,  3   )  simd16  ->  [rbp-0x180]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
-;* V99 tmp96        [V99    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[int]>
-;* V100 tmp97       [V100    ] (  0,  0   )  struct (64) zero-ref    ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[int]>
-;  V101 tmp98       [V101,T39] (  1,  1   )  simd32  ->  [rbp+0x10]  single-def "field V01._lower (fldOffset=0x0)" P-INDEP
-;  V102 tmp99       [V102,T40] (  1,  1   )  simd32  ->  [rbp+0x30]  single-def "field V01._upper (fldOffset=0x20)" P-INDEP
-;  V103 tmp100      [V103,T41] (  1,  1   )  simd32  ->  [rbp+0x50]  single-def "field V02._lower (fldOffset=0x0)" P-INDEP
-;  V104 tmp101      [V104,T42] (  1,  1   )  simd32  ->  [rbp+0x70]  single-def "field V02._upper (fldOffset=0x20)" P-INDEP
-;  V105 tmp102      [V105,T01] (  2,  2   )    long  ->  rcx         "field V12._00 (fldOffset=0x0)" P-INDEP
-;  V106 tmp103      [V106,T02] (  2,  2   )    long  ->  rax         "field V13._00 (fldOffset=0x0)" P-INDEP
-;  V107 tmp104      [V107    ] (  3,  3   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V14._00 (fldOffset=0x0)" P-DEP
-;  V108 tmp105      [V108    ] (  3,  5   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
-;  V109 tmp106      [V109    ] (  3,  5   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
-;  V110 tmp107      [V110    ] (  3,  3   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V22._00 (fldOffset=0x0)" P-DEP
-;  V111 tmp108      [V111    ] (  3,  5   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V24._00 (fldOffset=0x0)" P-DEP
-;  V112 tmp109      [V112    ] (  3,  5   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V113 tmp110      [V113,T03] (  2,  2   )    long  ->  rcx         "field V33._00 (fldOffset=0x0)" P-INDEP
-;  V114 tmp111      [V114,T04] (  2,  2   )    long  ->  rax         "field V34._00 (fldOffset=0x0)" P-INDEP
-;  V115 tmp112      [V115    ] (  3,  3   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
-;  V116 tmp113      [V116    ] (  3,  5   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
-;  V117 tmp114      [V117    ] (  3,  5   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V38._00 (fldOffset=0x0)" P-DEP
-;  V118 tmp115      [V118    ] (  3,  3   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V43._00 (fldOffset=0x0)" P-DEP
-;  V119 tmp116      [V119    ] (  3,  5   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V45._00 (fldOffset=0x0)" P-DEP
-;  V120 tmp117      [V120    ] (  3,  5   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
-;  V121 tmp118      [V121,T05] (  2,  2   )    long  ->  rcx         "field V59._00 (fldOffset=0x0)" P-INDEP
-;  V122 tmp119      [V122,T06] (  2,  2   )    long  ->  rax         "field V60._00 (fldOffset=0x0)" P-INDEP
-;  V123 tmp120      [V123    ] (  3,  3   )    long  ->  [rbp-0xE8]  do-not-enreg[X] addr-exposed "field V61._00 (fldOffset=0x0)" P-DEP
-;  V124 tmp121      [V124    ] (  3,  5   )    long  ->  [rbp-0xF0]  do-not-enreg[X] addr-exposed "field V63._00 (fldOffset=0x0)" P-DEP
-;  V125 tmp122      [V125    ] (  3,  5   )    long  ->  [rbp-0xF8]  do-not-enreg[X] addr-exposed "field V64._00 (fldOffset=0x0)" P-DEP
-;  V126 tmp123      [V126    ] (  3,  3   )    long  ->  [rbp-0x100]  do-not-enreg[X] addr-exposed "field V69._00 (fldOffset=0x0)" P-DEP
-;  V127 tmp124      [V127    ] (  3,  5   )    long  ->  [rbp-0x108]  do-not-enreg[X] addr-exposed "field V71._00 (fldOffset=0x0)" P-DEP
-;  V128 tmp125      [V128    ] (  3,  5   )    long  ->  [rbp-0x110]  do-not-enreg[X] addr-exposed "field V72._00 (fldOffset=0x0)" P-DEP
-;  V129 tmp126      [V129,T07] (  2,  2   )    long  ->  rcx         "field V80._00 (fldOffset=0x0)" P-INDEP
-;  V130 tmp127      [V130,T08] (  2,  2   )    long  ->  rax         "field V81._00 (fldOffset=0x0)" P-INDEP
-;  V131 tmp128      [V131    ] (  3,  3   )    long  ->  [rbp-0x148]  do-not-enreg[X] addr-exposed "field V82._00 (fldOffset=0x0)" P-DEP
-;  V132 tmp129      [V132    ] (  3,  5   )    long  ->  [rbp-0x150]  do-not-enreg[X] addr-exposed "field V84._00 (fldOffset=0x0)" P-DEP
-;  V133 tmp130      [V133    ] (  3,  5   )    long  ->  [rbp-0x158]  do-not-enreg[X] addr-exposed "field V85._00 (fldOffset=0x0)" P-DEP
-;  V134 tmp131      [V134    ] (  3,  3   )    long  ->  [rbp-0x160]  do-not-enreg[X] addr-exposed "field V90._00 (fldOffset=0x0)" P-DEP
-;  V135 tmp132      [V135    ] (  3,  5   )    long  ->  [rbp-0x168]  do-not-enreg[X] addr-exposed "field V92._00 (fldOffset=0x0)" P-DEP
-;  V136 tmp133      [V136    ] (  3,  5   )    long  ->  [rbp-0x170]  do-not-enreg[X] addr-exposed "field V93._00 (fldOffset=0x0)" P-DEP
-;  V137 tmp134      [V137,T37] (  2,  2   )  simd32  ->  mm0         "field V100._lower (fldOffset=0x0)" P-INDEP
-;  V138 tmp135      [V138,T38] (  2,  2   )  simd32  ->  mm1         "field V100._upper (fldOffset=0x20)" P-INDEP
+;  V67 tmp64        [V67    ] (  3,  3   )  struct ( 8) [rbp-0x1A0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V68 tmp65        [V68,T14] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V69 tmp66        [V69    ] (  3,  6   )  struct ( 8) [rbp-0x1A8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V70 tmp67        [V70    ] (  3,  6   )  struct ( 8) [rbp-0x1B0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V71 tmp68        [V71    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V72 tmp69        [V72    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V73 tmp70        [V73,T34] (  3,  3   )  simd16  ->  [rbp-0x1C0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V74 tmp71        [V74,T26] (  3,  6   )  simd16  ->  [rbp-0x1D0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V75 tmp72        [V75,T27] (  3,  6   )  simd16  ->  [rbp-0x1E0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;* V76 tmp73        [V76    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V77 tmp74        [V77    ] (  2,  4   )  struct ( 8) [rbp-0x1E8]  do-not-enreg[HS] hidden-struct-arg "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V78 tmp75        [V78    ] (  3,  3   )  struct ( 8) [rbp-0x1F0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V79 tmp76        [V79,T15] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V80 tmp77        [V80    ] (  3,  6   )  struct ( 8) [rbp-0x1F8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V81 tmp78        [V81    ] (  3,  6   )  struct ( 8) [rbp-0x200]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V82 tmp79        [V82    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V83 tmp80        [V83    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V84 tmp81        [V84    ] (  3,  3   )  simd16  ->  [rbp-0x210]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V85 tmp82        [V85    ] (  3,  3   )  struct (64) [rbp-0x250]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[int]>
+;  V86 tmp83        [V86,T35] (  1,  1   )  simd32  ->  [rbp+0x10]  single-def "field V01._lower (fldOffset=0x0)" P-INDEP
+;  V87 tmp84        [V87,T36] (  1,  1   )  simd32  ->  [rbp+0x30]  single-def "field V01._upper (fldOffset=0x20)" P-INDEP
+;  V88 tmp85        [V88,T37] (  1,  1   )  simd32  ->  [rbp+0x50]  single-def "field V02._lower (fldOffset=0x0)" P-INDEP
+;  V89 tmp86        [V89,T38] (  1,  1   )  simd32  ->  [rbp+0x70]  single-def "field V02._upper (fldOffset=0x20)" P-INDEP
+;  V90 tmp87        [V90,T02] (  2,  2   )    long  ->  rdi         "field V12._00 (fldOffset=0x0)" P-INDEP
+;  V91 tmp88        [V91,T03] (  2,  2   )    long  ->  rax         "field V13._00 (fldOffset=0x0)" P-INDEP
+;  V92 tmp89        [V92    ] (  3,  3   )    long  ->  [rbp-0x78]  do-not-enreg[X] addr-exposed "field V14._00 (fldOffset=0x0)" P-DEP
+;  V93 tmp90        [V93    ] (  3,  5   )    long  ->  [rbp-0x80]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
+;  V94 tmp91        [V94    ] (  3,  5   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
+;  V95 tmp92        [V95    ] (  3,  3   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V22._00 (fldOffset=0x0)" P-DEP
+;  V96 tmp93        [V96    ] (  3,  5   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V24._00 (fldOffset=0x0)" P-DEP
+;  V97 tmp94        [V97    ] (  3,  5   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V98 tmp95        [V98,T04] (  2,  2   )    long  ->  rdi         "field V33._00 (fldOffset=0x0)" P-INDEP
+;  V99 tmp96        [V99,T05] (  2,  2   )    long  ->  rsi         "field V34._00 (fldOffset=0x0)" P-INDEP
+;  V100 tmp97       [V100    ] (  3,  3   )    long  ->  [rbp-0xD8]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
+;  V101 tmp98       [V101    ] (  3,  5   )    long  ->  [rbp-0xE0]  do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
+;  V102 tmp99       [V102    ] (  3,  5   )    long  ->  [rbp-0xE8]  do-not-enreg[X] addr-exposed "field V38._00 (fldOffset=0x0)" P-DEP
+;  V103 tmp100      [V103    ] (  3,  3   )    long  ->  [rbp-0xF0]  do-not-enreg[X] addr-exposed "field V43._00 (fldOffset=0x0)" P-DEP
+;  V104 tmp101      [V104    ] (  3,  5   )    long  ->  [rbp-0xF8]  do-not-enreg[X] addr-exposed "field V45._00 (fldOffset=0x0)" P-DEP
+;  V105 tmp102      [V105    ] (  3,  5   )    long  ->  [rbp-0x100]  do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
+;  V106 tmp103      [V106,T06] (  2,  2   )    long  ->  r15         "field V59._00 (fldOffset=0x0)" P-INDEP
+;  V107 tmp104      [V107,T07] (  2,  2   )    long  ->  rdi         "field V60._00 (fldOffset=0x0)" P-INDEP
+;  V108 tmp105      [V108    ] (  3,  3   )    long  ->  [rbp-0x188]  do-not-enreg[X] addr-exposed "field V61._00 (fldOffset=0x0)" P-DEP
+;  V109 tmp106      [V109    ] (  3,  5   )    long  ->  [rbp-0x190]  do-not-enreg[X] addr-exposed "field V63._00 (fldOffset=0x0)" P-DEP
+;  V110 tmp107      [V110    ] (  3,  5   )    long  ->  [rbp-0x198]  do-not-enreg[X] addr-exposed "field V64._00 (fldOffset=0x0)" P-DEP
+;  V111 tmp108      [V111    ] (  3,  3   )    long  ->  [rbp-0x1A0]  do-not-enreg[X] addr-exposed "field V67._00 (fldOffset=0x0)" P-DEP
+;  V112 tmp109      [V112    ] (  3,  5   )    long  ->  [rbp-0x1A8]  do-not-enreg[X] addr-exposed "field V69._00 (fldOffset=0x0)" P-DEP
+;  V113 tmp110      [V113    ] (  3,  5   )    long  ->  [rbp-0x1B0]  do-not-enreg[X] addr-exposed "field V70._00 (fldOffset=0x0)" P-DEP
+;  V114 tmp111      [V114,T08] (  2,  2   )    long  ->  r15         "field V76._00 (fldOffset=0x0)" P-INDEP
+;  V115 tmp112      [V115,T01] (  2,  3   )    long  ->  [rbp-0x1E8]  do-not-enreg[H] hidden-struct-arg "field V77._00 (fldOffset=0x0)" P-DEP
+;  V116 tmp113      [V116    ] (  3,  3   )    long  ->  [rbp-0x1F0]  do-not-enreg[X] addr-exposed "field V78._00 (fldOffset=0x0)" P-DEP
+;  V117 tmp114      [V117    ] (  3,  5   )    long  ->  [rbp-0x1F8]  do-not-enreg[X] addr-exposed "field V80._00 (fldOffset=0x0)" P-DEP
+;  V118 tmp115      [V118    ] (  3,  5   )    long  ->  [rbp-0x200]  do-not-enreg[X] addr-exposed "field V81._00 (fldOffset=0x0)" P-DEP
+;  V119 tmp116      [V119    ] (  3,  3   )  simd32  ->  [rbp-0x250]  do-not-enreg[XS] addr-exposed "field V85._lower (fldOffset=0x0)" P-DEP
+;  V120 tmp117      [V120    ] (  2,  2   )  simd32  ->  [rbp-0x230]  do-not-enreg[XS] addr-exposed "field V85._upper (fldOffset=0x20)" P-DEP
 ;
-; Lcl frame size = 384
+; Lcl frame size = 608
 
 G_M33333_IG01:
        push     rbp
-       sub      rsp, 384
-       lea      rbp, [rsp+0x180]
-						;; size=16 bbWeight=1 PerfScore 1.75
+       push     r15
+       push     rbx
+       sub      rsp, 608
+       lea      rbp, [rsp+0x270]
+       mov      rbx, rdi
+						;; size=22 bbWeight=1 PerfScore 4.00
 G_M33333_IG02:
        vmovups  ymm0, ymmword ptr [rbp+0x10]
        vmovups  ymm1, ymmword ptr [rbp+0x50]
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm2
+       vmovaps  xmmword ptr [rbp-0x60], xmm2
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rbp-0x20], xmm2
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x30], rax
-       mov      rax, qword ptr [rbp-0x20]
-       mov      qword ptr [rbp-0x38], rax
-       mov      eax, dword ptr [rbp-0x30]
+       vmovaps  xmmword ptr [rbp-0x70], xmm2
+       mov      rax, qword ptr [rbp-0x60]
+       mov      qword ptr [rbp-0x80], rax
+       mov      rax, qword ptr [rbp-0x70]
+       mov      qword ptr [rbp-0x88], rax
+       mov      eax, dword ptr [rbp-0x80]
        cdq      
-       idiv     edx:eax, dword ptr [rbp-0x38]
-       mov      dword ptr [rbp-0x28], eax
-       mov      eax, dword ptr [rbp-0x2C]
+       idiv     edx:eax, dword ptr [rbp-0x88]
+       mov      dword ptr [rbp-0x78], eax
+       mov      eax, dword ptr [rbp-0x7C]
        cdq      
-       idiv     edx:eax, dword ptr [rbp-0x34]
-       mov      dword ptr [rbp-0x24], eax
-       mov      rcx, qword ptr [rbp-0x28]
-       mov      rax, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x48], rax
-       mov      rax, qword ptr [rbp-0x18]
-       mov      qword ptr [rbp-0x50], rax
-       mov      eax, dword ptr [rbp-0x48]
+       idiv     edx:eax, dword ptr [rbp-0x84]
+       mov      dword ptr [rbp-0x74], eax
+       mov      rdi, qword ptr [rbp-0x78]
+       mov      rax, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x98], rax
+       mov      rax, qword ptr [rbp-0x68]
+       mov      qword ptr [rbp-0xA0], rax
+       mov      eax, dword ptr [rbp-0x98]
        cdq      
-       idiv     edx:eax, dword ptr [rbp-0x50]
-       mov      dword ptr [rbp-0x40], eax
-       mov      eax, dword ptr [rbp-0x44]
+       idiv     edx:eax, dword ptr [rbp-0xA0]
+       mov      dword ptr [rbp-0x90], eax
+       mov      eax, dword ptr [rbp-0x94]
        cdq      
-       idiv     edx:eax, dword ptr [rbp-0x4C]
-       mov      dword ptr [rbp-0x3C], eax
-       mov      rax, qword ptr [rbp-0x40]
-       mov      qword ptr [rbp-0x60], rcx
-       mov      qword ptr [rbp-0x58], rax
-       vmovaps  xmm2, xmmword ptr [rbp-0x60]
+       idiv     edx:eax, dword ptr [rbp-0x9C]
+       mov      dword ptr [rbp-0x8C], eax
+       mov      rax, qword ptr [rbp-0x90]
+       mov      qword ptr [rbp-0xB0], rdi
+       mov      qword ptr [rbp-0xA8], rax
+       vmovaps  xmm2, xmmword ptr [rbp-0xB0]
        vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x70], xmm0
+       vmovaps  xmmword ptr [rbp-0xC0], xmm0
        vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x80], xmm0
-       mov      rax, qword ptr [rbp-0x70]
-       mov      qword ptr [rbp-0x90], rax
-       mov      rax, qword ptr [rbp-0x80]
-       mov      qword ptr [rbp-0x98], rax
-       mov      eax, dword ptr [rbp-0x90]
+       vmovaps  xmmword ptr [rbp-0xD0], xmm0
+       mov      rax, qword ptr [rbp-0xC0]
+       mov      qword ptr [rbp-0xE0], rax
+       mov      rax, qword ptr [rbp-0xD0]
+       mov      qword ptr [rbp-0xE8], rax
+       mov      eax, dword ptr [rbp-0xE0]
        cdq      
-       idiv     edx:eax, dword ptr [rbp-0x98]
-       mov      dword ptr [rbp-0x88], eax
-       mov      eax, dword ptr [rbp-0x8C]
+       idiv     edx:eax, dword ptr [rbp-0xE8]
+       mov      dword ptr [rbp-0xD8], eax
+       mov      eax, dword ptr [rbp-0xDC]
        cdq      
-       idiv     edx:eax, dword ptr [rbp-0x94]
-       mov      dword ptr [rbp-0x84], eax
-       mov      rcx, qword ptr [rbp-0x88]
-       mov      rax, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0xA8], rax
-       mov      rax, qword ptr [rbp-0x78]
-       mov      qword ptr [rbp-0xB0], rax
-       mov      eax, dword ptr [rbp-0xA8]
+       idiv     edx:eax, dword ptr [rbp-0xE4]
+       mov      dword ptr [rbp-0xD4], eax
+       mov      rdi, qword ptr [rbp-0xD8]
+       mov      rax, qword ptr [rbp-0xB8]
+       mov      qword ptr [rbp-0xF8], rax
+       mov      rax, qword ptr [rbp-0xC8]
+       mov      qword ptr [rbp-0x100], rax
+       mov      eax, dword ptr [rbp-0xF8]
        cdq      
-       idiv     edx:eax, dword ptr [rbp-0xB0]
-       mov      dword ptr [rbp-0xA0], eax
-						;; size=251 bbWeight=1 PerfScore 233.00
+       idiv     edx:eax, dword ptr [rbp-0x100]
+       mov      dword ptr [rbp-0xF0], eax
+						;; size=314 bbWeight=1 PerfScore 233.00
 G_M33333_IG03:
-       mov      eax, dword ptr [rbp-0xA4]
+       mov      eax, dword ptr [rbp-0xF4]
        cdq      
-       idiv     edx:eax, dword ptr [rbp-0xAC]
-       mov      dword ptr [rbp-0x9C], eax
-       mov      rax, qword ptr [rbp-0xA0]
-       mov      qword ptr [rbp-0xC0], rcx
-       mov      qword ptr [rbp-0xB8], rax
-       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0xC0], 1
+       idiv     edx:eax, dword ptr [rbp-0xFC]
+       mov      dword ptr [rbp-0xEC], eax
+       mov      rsi, qword ptr [rbp-0xF0]
+       mov      qword ptr [rbp-0x110], rdi
+       mov      qword ptr [rbp-0x108], rsi
+       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0x110], 1
+       vmovups  ymmword ptr [rbp-0x30], ymm0
        vmovups  ymm1, ymmword ptr [rbp+0x30]
+       vmovups  ymmword ptr [rbp-0x130], ymm1
        vmovups  ymm2, ymmword ptr [rbp+0x70]
+       vmovups  ymmword ptr [rbp-0x150], ymm2
        vmovaps  ymm3, ymm1
-       vmovaps  xmmword ptr [rbp-0xD0], xmm3
+       vmovaps  xmmword ptr [rbp-0x170], xmm3
        vmovaps  ymm3, ymm2
-       vmovaps  xmmword ptr [rbp-0xE0], xmm3
-       mov      rax, qword ptr [rbp-0xD0]
-       mov      qword ptr [rbp-0xF0], rax
-       mov      rax, qword ptr [rbp-0xE0]
-       mov      qword ptr [rbp-0xF8], rax
-       mov      eax, dword ptr [rbp-0xF0]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0xF8]
-       mov      dword ptr [rbp-0xE8], eax
-       mov      eax, dword ptr [rbp-0xEC]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0xF4]
-       mov      dword ptr [rbp-0xE4], eax
-       mov      rcx, qword ptr [rbp-0xE8]
-       mov      rax, qword ptr [rbp-0xC8]
-       mov      qword ptr [rbp-0x108], rax
-       mov      rax, qword ptr [rbp-0xD8]
-       mov      qword ptr [rbp-0x110], rax
-       mov      eax, dword ptr [rbp-0x108]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x110]
-       mov      dword ptr [rbp-0x100], eax
-       mov      eax, dword ptr [rbp-0x104]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x10C]
-       mov      dword ptr [rbp-0xFC], eax
-       mov      rax, qword ptr [rbp-0x100]
-       mov      qword ptr [rbp-0x120], rcx
-       mov      qword ptr [rbp-0x118], rax
-       vmovaps  xmm3, xmmword ptr [rbp-0x120]
+       vmovaps  xmmword ptr [rbp-0x180], xmm3
+       mov      rdi, qword ptr [rbp-0x170]
+       mov      qword ptr [rbp-0x190], rdi
+       mov      rdi, qword ptr [rbp-0x180]
+       mov      qword ptr [rbp-0x198], rdi
+       mov      edi, dword ptr [rbp-0x190]
+       mov      esi, dword ptr [rbp-0x198]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x188], eax
+       mov      edi, dword ptr [rbp-0x18C]
+       mov      esi, dword ptr [rbp-0x194]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x184], eax
+       mov      r15, qword ptr [rbp-0x188]
+       mov      rdi, qword ptr [rbp-0x168]
+       mov      qword ptr [rbp-0x1A8], rdi
+       mov      rdi, qword ptr [rbp-0x178]
+       mov      qword ptr [rbp-0x1B0], rdi
+       mov      edi, dword ptr [rbp-0x1A8]
+       mov      esi, dword ptr [rbp-0x1B0]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x1A0], eax
+       mov      edi, dword ptr [rbp-0x1A4]
+       mov      esi, dword ptr [rbp-0x1AC]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x19C], eax
+       mov      rdi, qword ptr [rbp-0x1A0]
+       mov      qword ptr [rbp-0x1C0], r15
+       mov      qword ptr [rbp-0x1B8], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x1C0]
+       vmovaps  xmmword ptr [rbp-0x160], xmm0
+						;; size=325 bbWeight=1 PerfScore 89.00
+G_M33333_IG04:
+       vmovups  ymm1, ymmword ptr [rbp-0x130]
        vextractf128 xmm1, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x130], xmm1
+       vmovaps  xmmword ptr [rbp-0x1D0], xmm1
+       vmovups  ymm2, ymmword ptr [rbp-0x150]
        vextractf128 xmm1, ymm2, 1
-       vmovaps  xmmword ptr [rbp-0x140], xmm1
-       mov      rax, qword ptr [rbp-0x130]
-       mov      qword ptr [rbp-0x150], rax
-       mov      rax, qword ptr [rbp-0x140]
-       mov      qword ptr [rbp-0x158], rax
-       mov      eax, dword ptr [rbp-0x150]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x158]
-       mov      dword ptr [rbp-0x148], eax
-       mov      eax, dword ptr [rbp-0x14C]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x154]
-       mov      dword ptr [rbp-0x144], eax
-       mov      rcx, qword ptr [rbp-0x148]
-						;; size=353 bbWeight=1 PerfScore 236.00
-G_M33333_IG04:
-       mov      rax, qword ptr [rbp-0x128]
-       mov      qword ptr [rbp-0x168], rax
-       mov      rax, qword ptr [rbp-0x138]
-       mov      qword ptr [rbp-0x170], rax
-       mov      eax, dword ptr [rbp-0x168]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x170]
-       mov      dword ptr [rbp-0x160], eax
-       mov      eax, dword ptr [rbp-0x164]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x16C]
-       mov      dword ptr [rbp-0x15C], eax
-       mov      rax, qword ptr [rbp-0x160]
-       mov      qword ptr [rbp-0x180], rcx
-       mov      qword ptr [rbp-0x178], rax
-       vinserti128 ymm1, ymm3, xmmword ptr [rbp-0x180], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       vmovups  ymmword ptr [rdi+0x20], ymm1
-       mov      rax, rdi
-						;; size=109 bbWeight=1 PerfScore 70.25
+       vmovaps  xmmword ptr [rbp-0x1E0], xmm1
+       mov      rdi, qword ptr [rbp-0x1D0]
+       mov      qword ptr [rbp-0x1F8], rdi
+       mov      rdi, qword ptr [rbp-0x1E0]
+       mov      qword ptr [rbp-0x200], rdi
+       mov      edi, dword ptr [rbp-0x1F8]
+       mov      esi, dword ptr [rbp-0x200]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x1F0], eax
+       mov      edi, dword ptr [rbp-0x1F4]
+       mov      esi, dword ptr [rbp-0x1FC]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x1EC], eax
+       mov      r15, qword ptr [rbp-0x1F0]
+       mov      rdi, qword ptr [rbp-0x1C8]
+       mov      qword ptr [rsp], rdi
+       mov      rdi, qword ptr [rbp-0x1D8]
+       mov      qword ptr [rsp+0x08], rdi
+       lea      rdi, [rbp-0x1E8]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64`1[int]:op_Division(System.Runtime.Intrinsics.Vector64`1[int],System.Runtime.Intrinsics.Vector64`1[int]):System.Runtime.Intrinsics.Vector64`1[int]
+       call     [rax]System.Runtime.Intrinsics.Vector64`1[int]:op_Division(System.Runtime.Intrinsics.Vector64`1[int],System.Runtime.Intrinsics.Vector64`1[int]):System.Runtime.Intrinsics.Vector64`1[int]
+       mov      qword ptr [rbp-0x210], r15
+       mov      rdi, qword ptr [rbp-0x1E8]
+       mov      qword ptr [rsp], rdi
+       lea      rdi, [rbp-0x210]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector128:SetUpperUnsafe[int](byref,System.Runtime.Intrinsics.Vector64`1[int])
+       call     [rax]System.Runtime.Intrinsics.Vector128:SetUpperUnsafe[int](byref,System.Runtime.Intrinsics.Vector64`1[int])
+       vmovaps  xmm0, xmmword ptr [rbp-0x160]
+       vmovups  xmmword ptr [rsp], xmm0
+       vmovaps  xmm0, xmmword ptr [rbp-0x210]
+       vmovups  xmmword ptr [rsp+0x10], xmm0
+       lea      rdi, [rbp-0x50]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector256:Create[int](System.Runtime.Intrinsics.Vector128`1[int],System.Runtime.Intrinsics.Vector128`1[int]):System.Runtime.Intrinsics.Vector256`1[int]
+       call     [rax]System.Runtime.Intrinsics.Vector256:Create[int](System.Runtime.Intrinsics.Vector128`1[int],System.Runtime.Intrinsics.Vector128`1[int]):System.Runtime.Intrinsics.Vector256`1[int]
+       vmovups  ymm0, ymmword ptr [rbp-0x30]
+       vmovups  ymmword ptr [rbp-0x250], ymm0
+       vmovups  ymm0, ymmword ptr [rbp-0x50]
+       vmovups  ymmword ptr [rsp], ymm0
+       lea      rdi, [rbp-0x250]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector512:SetUpperUnsafe[int](byref,System.Runtime.Intrinsics.Vector256`1[int])
+						;; size=301 bbWeight=1 PerfScore 68.50
 G_M33333_IG05:
+       call     [rax]System.Runtime.Intrinsics.Vector512:SetUpperUnsafe[int](byref,System.Runtime.Intrinsics.Vector256`1[int])
+       vmovdqu  ymm0, ymmword ptr [rbp-0x250]
+       vmovdqu  ymmword ptr [rbx], ymm0
+       vmovdqu  ymm0, ymmword ptr [rbp-0x230]
+       vmovdqu  ymmword ptr [rbx+0x20], ymm0
+       mov      rax, rbx
+						;; size=30 bbWeight=1 PerfScore 15.25
+G_M33333_IG06:
        vzeroupper 
-       add      rsp, 384
+       add      rsp, 608
+       pop      rbx
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=15 bbWeight=1 PerfScore 3.75
 
-; Total bytes of code 741, prolog size 16, PerfScore 543.75, instruction count 146, allocated bytes for code 741 (MethodHash=c5217dca) for method System.Runtime.Intrinsics.Vector512:Divide[int](System.Runtime.Intrinsics.Vector512`1[int],System.Runtime.Intrinsics.Vector512`1[int]):System.Runtime.Intrinsics.Vector512`1[int] (FullOpts)
+; Total bytes of code 1007, prolog size 19, PerfScore 413.50, instruction count 176, allocated bytes for code 1007 (MethodHash=c5217dca) for method System.Runtime.Intrinsics.Vector512:Divide[int](System.Runtime.Intrinsics.Vector512`1[int],System.Runtime.Intrinsics.Vector512`1[int]):System.Runtime.Intrinsics.Vector512`1[int] (FullOpts)
237 (27.15 % of base) - System.Runtime.Intrinsics.Vector512:Divide[ubyte](System.Runtime.Intrinsics.Vector512`1[ubyte],System.Runtime.Intrinsics.Vector512`1[ubyte]):System.Runtime.Intrinsics.Vector512`1[ubyte]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector512:Divide[ubyte](System.Runtime.Intrinsics.Vector512`1[ubyte],System.Runtime.Intrinsics.Vector512`1[ubyte]):System.Runtime.Intrinsics.Vector512`1[ubyte] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 43 single block inlinees; 27 inlinees without PGO data
+; 0 inlinees with PGO data; 37 single block inlinees; 21 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T16] (  5,  5   )   byref  ->  rdi         single-def
+;  V00 RetBuf       [V00,T14] (  4,  4   )   byref  ->  rbx         single-def
 ;* V01 arg0         [V01    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[ubyte]>
 ;* V02 arg1         [V02    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[ubyte]>
-;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T37] (  2,  4   )  simd32  ->  mm0         "impAppendStmt"
-;  V05 tmp2         [V05,T38] (  2,  4   )  simd32  ->  mm1         "spilled call-like call argument"
-;  V06 tmp3         [V06,T25] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V07 tmp4         [V07,T26] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V08 tmp5         [V08,T39] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
+;  V03 OutArgs      [V03    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V04 tmp1         [V04,T35] (  2,  4   )  simd32  ->  [rbp-0x50]  spill-single-def "impAppendStmt"
+;  V05 tmp2         [V05,T36] (  2,  4   )  simd32  ->  [rbp-0x70]  do-not-enreg[HS] hidden-struct-arg "spilled call-like call argument"
+;  V06 tmp3         [V06,T23] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V07 tmp4         [V07,T24] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V08 tmp5         [V08,T37] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
 ;* V09 tmp6         [V09    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V10 tmp7         [V10,T27] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V11 tmp8         [V11,T28] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V10 tmp7         [V10,T25] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V11 tmp8         [V11,T26] (  3,  6   )  simd16  ->  [rbp-0x90]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V12 tmp9         [V12    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V13 tmp10        [V13    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V14 tmp11        [V14    ] (  2,  5   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V15 tmp12        [V15,T00] (  5, 17   )     int  ->  rcx         "Inline stloc first use temp"
-;  V16 tmp13        [V16    ] (  2, 10   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V17 tmp14        [V17    ] (  2, 10   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V14 tmp11        [V14    ] (  2,  5   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V15 tmp12        [V15,T00] (  5, 17   )     int  ->  rdi         "Inline stloc first use temp"
+;  V16 tmp13        [V16    ] (  2, 10   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V17 tmp14        [V17    ] (  2, 10   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V18 tmp15        [V18    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V19 tmp16        [V19    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V20 tmp17        [V20    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;* V21 tmp18        [V21    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
 ;* V22 tmp19        [V22    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V23 tmp20        [V23    ] (  2,  5   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V23 tmp20        [V23    ] (  2,  5   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;  V24 tmp21        [V24,T01] (  5, 17   )     int  ->  rsi         "Inline stloc first use temp"
-;  V25 tmp22        [V25    ] (  2, 10   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V26 tmp23        [V26    ] (  2, 10   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V25 tmp22        [V25    ] (  2, 10   )  struct ( 8) [rbp-0xB8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V26 tmp23        [V26    ] (  2, 10   )  struct ( 8) [rbp-0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V27 tmp24        [V27    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V28 tmp25        [V28    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V29 tmp26        [V29    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;* V30 tmp27        [V30    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
 ;* V31 tmp28        [V31    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V32 tmp29        [V32,T41] (  3,  3   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V33 tmp30        [V33,T29] (  3,  6   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V34 tmp31        [V34,T30] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V32 tmp29        [V32,T39] (  3,  3   )  simd16  ->  [rbp-0xD0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V33 tmp30        [V33,T27] (  3,  6   )  simd16  ->  [rbp-0xE0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V34 tmp31        [V34,T28] (  3,  6   )  simd16  ->  [rbp-0xF0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V35 tmp32        [V35    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V36 tmp33        [V36    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V37 tmp34        [V37    ] (  2,  5   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V38 tmp35        [V38,T02] (  5, 17   )     int  ->  rcx         "Inline stloc first use temp"
-;  V39 tmp36        [V39    ] (  2, 10   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V40 tmp37        [V40    ] (  2, 10   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V37 tmp34        [V37    ] (  2,  5   )  struct ( 8) [rbp-0xF8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V38 tmp35        [V38,T02] (  5, 17   )     int  ->  rdi         "Inline stloc first use temp"
+;  V39 tmp36        [V39    ] (  2, 10   )  struct ( 8) [rbp-0x100]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V40 tmp37        [V40    ] (  2, 10   )  struct ( 8) [rbp-0x108]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V41 tmp38        [V41    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V42 tmp39        [V42    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V43 tmp40        [V43    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;* V44 tmp41        [V44    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
 ;* V45 tmp42        [V45    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V46 tmp43        [V46    ] (  2,  5   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V46 tmp43        [V46    ] (  2,  5   )  struct ( 8) [rbp-0x110]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;  V47 tmp44        [V47,T03] (  5, 17   )     int  ->  rsi         "Inline stloc first use temp"
-;  V48 tmp45        [V48    ] (  2, 10   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V49 tmp46        [V49    ] (  2, 10   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V48 tmp45        [V48    ] (  2, 10   )  struct ( 8) [rbp-0x118]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V49 tmp46        [V49    ] (  2, 10   )  struct ( 8) [rbp-0x120]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V50 tmp47        [V50    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V51 tmp48        [V51    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
 ;* V52 tmp49        [V52    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;* V53 tmp50        [V53    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
 ;* V54 tmp51        [V54    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V55 tmp52        [V55,T42] (  3,  3   )  simd16  ->  [rbp-0xC0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V55 tmp52        [V55,T40] (  3,  3   )  simd16  ->  [rbp-0x130]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V56 tmp53        [V56    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V57 tmp54        [V57,T31] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V58 tmp55        [V58,T32] (  3,  6   )  simd32  ->  mm2         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V59 tmp56        [V59,T40] (  2,  4   )  simd16  ->  mm3         "impAppendStmt"
+;  V57 tmp54        [V57,T29] (  3,  6   )  simd32  ->  [rbp-0x150]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V58 tmp55        [V58,T30] (  3,  6   )  simd32  ->  [rbp-0x170]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V59 tmp56        [V59,T38] (  2,  4   )  simd16  ->  [rbp-0x180]  spill-single-def "impAppendStmt"
 ;* V60 tmp57        [V60    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V61 tmp58        [V61,T33] (  3,  6   )  simd16  ->  [rbp-0xD0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V62 tmp59        [V62,T34] (  3,  6   )  simd16  ->  [rbp-0xE0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V61 tmp58        [V61,T31] (  3,  6   )  simd16  ->  [rbp-0x190]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V62 tmp59        [V62,T32] (  3,  6   )  simd16  ->  [rbp-0x1A0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V63 tmp60        [V63    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V64 tmp61        [V64    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V65 tmp62        [V65    ] (  2,  5   )  struct ( 8) [rbp-0xE8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V66 tmp63        [V66,T04] (  5, 17   )     int  ->  rcx         "Inline stloc first use temp"
-;  V67 tmp64        [V67    ] (  2, 10   )  struct ( 8) [rbp-0xF0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V68 tmp65        [V68    ] (  2, 10   )  struct ( 8) [rbp-0xF8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V65 tmp62        [V65    ] (  2,  5   )  struct ( 8) [rbp-0x1A8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V66 tmp63        [V66,T04] (  5, 17   )     int  ->  r15         "Inline stloc first use temp"
+;  V67 tmp64        [V67    ] (  2, 10   )  struct ( 8) [rbp-0x1B0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V68 tmp65        [V68    ] (  2, 10   )  struct ( 8) [rbp-0x1B8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V69 tmp66        [V69    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V70 tmp67        [V70    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V71 tmp68        [V71    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V72 tmp69        [V72    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V73 tmp70        [V73    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V74 tmp71        [V74    ] (  2,  5   )  struct ( 8) [rbp-0x100]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V75 tmp72        [V75,T05] (  5, 17   )     int  ->  rsi         "Inline stloc first use temp"
-;  V76 tmp73        [V76    ] (  2, 10   )  struct ( 8) [rbp-0x108]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V77 tmp74        [V77    ] (  2, 10   )  struct ( 8) [rbp-0x110]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V78 tmp75        [V78    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V79 tmp76        [V79    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V80 tmp77        [V80    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V81 tmp78        [V81    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V82 tmp79        [V82    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V83 tmp80        [V83,T43] (  3,  3   )  simd16  ->  [rbp-0x120]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V84 tmp81        [V84,T35] (  3,  6   )  simd16  ->  [rbp-0x130]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V85 tmp82        [V85,T36] (  3,  6   )  simd16  ->  [rbp-0x140]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V86 tmp83        [V86    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V87 tmp84        [V87    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V88 tmp85        [V88    ] (  2,  5   )  struct ( 8) [rbp-0x148]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V89 tmp86        [V89,T06] (  5, 17   )     int  ->  rcx         "Inline stloc first use temp"
-;  V90 tmp87        [V90    ] (  2, 10   )  struct ( 8) [rbp-0x150]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V91 tmp88        [V91    ] (  2, 10   )  struct ( 8) [rbp-0x158]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V92 tmp89        [V92    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V93 tmp90        [V93    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V94 tmp91        [V94    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V95 tmp92        [V95    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V96 tmp93        [V96    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V97 tmp94        [V97    ] (  2,  5   )  struct ( 8) [rbp-0x160]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V98 tmp95        [V98,T07] (  5, 17   )     int  ->  rsi         "Inline stloc first use temp"
-;  V99 tmp96        [V99    ] (  2, 10   )  struct ( 8) [rbp-0x168]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V100 tmp97       [V100    ] (  2, 10   )  struct ( 8) [rbp-0x170]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V101 tmp98       [V101    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V102 tmp99       [V102    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V103 tmp100      [V103    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V104 tmp101      [V104    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V105 tmp102      [V105    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V106 tmp103      [V106,T44] (  3,  3   )  simd16  ->  [rbp-0x180]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V107 tmp104      [V107    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;* V108 tmp105      [V108    ] (  0,  0   )  struct (64) zero-ref    ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
-;  V109 tmp106      [V109,T47] (  1,  1   )  simd32  ->  [rbp+0x10]  single-def "field V01._lower (fldOffset=0x0)" P-INDEP
-;  V110 tmp107      [V110,T48] (  1,  1   )  simd32  ->  [rbp+0x30]  single-def "field V01._upper (fldOffset=0x20)" P-INDEP
-;  V111 tmp108      [V111,T49] (  1,  1   )  simd32  ->  [rbp+0x50]  single-def "field V02._lower (fldOffset=0x0)" P-INDEP
-;  V112 tmp109      [V112,T50] (  1,  1   )  simd32  ->  [rbp+0x70]  single-def "field V02._upper (fldOffset=0x20)" P-INDEP
-;  V113 tmp110      [V113,T17] (  2,  2   )    long  ->  rcx         "field V12._00 (fldOffset=0x0)" P-INDEP
-;  V114 tmp111      [V114,T18] (  2,  2   )    long  ->  rax         "field V13._00 (fldOffset=0x0)" P-INDEP
-;  V115 tmp112      [V115    ] (  2,  5   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V14._00 (fldOffset=0x0)" P-DEP
-;  V116 tmp113      [V116    ] (  2,  9   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
-;  V117 tmp114      [V117    ] (  2,  9   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
-;  V118 tmp115      [V118    ] (  2,  5   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
-;  V119 tmp116      [V119    ] (  2,  9   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V120 tmp117      [V120    ] (  2,  9   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V26._00 (fldOffset=0x0)" P-DEP
-;  V121 tmp118      [V121,T19] (  2,  2   )    long  ->  rcx         "field V35._00 (fldOffset=0x0)" P-INDEP
-;  V122 tmp119      [V122,T20] (  2,  2   )    long  ->  rax         "field V36._00 (fldOffset=0x0)" P-INDEP
-;  V123 tmp120      [V123    ] (  2,  5   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
-;  V124 tmp121      [V124    ] (  2,  9   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V39._00 (fldOffset=0x0)" P-DEP
-;  V125 tmp122      [V125    ] (  2,  9   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V40._00 (fldOffset=0x0)" P-DEP
-;  V126 tmp123      [V126    ] (  2,  5   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
-;  V127 tmp124      [V127    ] (  2,  9   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V48._00 (fldOffset=0x0)" P-DEP
-;  V128 tmp125      [V128    ] (  2,  9   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V49._00 (fldOffset=0x0)" P-DEP
-;  V129 tmp126      [V129,T21] (  2,  2   )    long  ->  rcx         "field V63._00 (fldOffset=0x0)" P-INDEP
-;  V130 tmp127      [V130,T22] (  2,  2   )    long  ->  rax         "field V64._00 (fldOffset=0x0)" P-INDEP
-;  V131 tmp128      [V131    ] (  2,  5   )    long  ->  [rbp-0xE8]  do-not-enreg[X] addr-exposed "field V65._00 (fldOffset=0x0)" P-DEP
-;  V132 tmp129      [V132    ] (  2,  9   )    long  ->  [rbp-0xF0]  do-not-enreg[X] addr-exposed "field V67._00 (fldOffset=0x0)" P-DEP
-;  V133 tmp130      [V133    ] (  2,  9   )    long  ->  [rbp-0xF8]  do-not-enreg[X] addr-exposed "field V68._00 (fldOffset=0x0)" P-DEP
-;  V134 tmp131      [V134    ] (  2,  5   )    long  ->  [rbp-0x100]  do-not-enreg[X] addr-exposed "field V74._00 (fldOffset=0x0)" P-DEP
-;  V135 tmp132      [V135    ] (  2,  9   )    long  ->  [rbp-0x108]  do-not-enreg[X] addr-exposed "field V76._00 (fldOffset=0x0)" P-DEP
-;  V136 tmp133      [V136    ] (  2,  9   )    long  ->  [rbp-0x110]  do-not-enreg[X] addr-exposed "field V77._00 (fldOffset=0x0)" P-DEP
-;  V137 tmp134      [V137,T23] (  2,  2   )    long  ->  rcx         "field V86._00 (fldOffset=0x0)" P-INDEP
-;  V138 tmp135      [V138,T24] (  2,  2   )    long  ->  rax         "field V87._00 (fldOffset=0x0)" P-INDEP
-;  V139 tmp136      [V139    ] (  2,  5   )    long  ->  [rbp-0x148]  do-not-enreg[X] addr-exposed "field V88._00 (fldOffset=0x0)" P-DEP
-;  V140 tmp137      [V140    ] (  2,  9   )    long  ->  [rbp-0x150]  do-not-enreg[X] addr-exposed "field V90._00 (fldOffset=0x0)" P-DEP
-;  V141 tmp138      [V141    ] (  2,  9   )    long  ->  [rbp-0x158]  do-not-enreg[X] addr-exposed "field V91._00 (fldOffset=0x0)" P-DEP
-;  V142 tmp139      [V142    ] (  2,  5   )    long  ->  [rbp-0x160]  do-not-enreg[X] addr-exposed "field V97._00 (fldOffset=0x0)" P-DEP
-;  V143 tmp140      [V143    ] (  2,  9   )    long  ->  [rbp-0x168]  do-not-enreg[X] addr-exposed "field V99._00 (fldOffset=0x0)" P-DEP
-;  V144 tmp141      [V144    ] (  2,  9   )    long  ->  [rbp-0x170]  do-not-enreg[X] addr-exposed "field V100._00 (fldOffset=0x0)" P-DEP
-;  V145 tmp142      [V145,T45] (  2,  2   )  simd32  ->  mm0         "field V108._lower (fldOffset=0x0)" P-INDEP
-;  V146 tmp143      [V146,T46] (  2,  2   )  simd32  ->  mm1         "field V108._upper (fldOffset=0x20)" P-INDEP
-;  V147 cse0        [V147,T08] (  4, 16   )    long  ->  rsi         "CSE #01: moderate"
-;  V148 cse1        [V148,T09] (  4, 16   )    long  ->   r8         "CSE #02: moderate"
-;  V149 cse2        [V149,T10] (  4, 16   )    long  ->  rsi         "CSE #03: moderate"
-;  V150 cse3        [V150,T11] (  4, 16   )    long  ->   r8         "CSE #04: moderate"
-;  V151 cse4        [V151,T12] (  4, 16   )    long  ->  rsi         "CSE #05: moderate"
-;  V152 cse5        [V152,T13] (  4, 16   )    long  ->   r8         "CSE #06: moderate"
-;  V153 cse6        [V153,T14] (  4, 16   )    long  ->  rsi         "CSE #07: moderate"
-;  V154 cse7        [V154,T15] (  4, 16   )    long  ->   r8         "CSE #08: moderate"
+;  V71 tmp68        [V71    ] (  2,  5   )  struct ( 8) [rbp-0x1C0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V72 tmp69        [V72,T05] (  5, 17   )     int  ->  r14         "Inline stloc first use temp"
+;  V73 tmp70        [V73    ] (  2, 10   )  struct ( 8) [rbp-0x1C8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V74 tmp71        [V74    ] (  2, 10   )  struct ( 8) [rbp-0x1D0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V75 tmp72        [V75    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V76 tmp73        [V76    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V77 tmp74        [V77,T41] (  3,  3   )  simd16  ->  [rbp-0x1E0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V78 tmp75        [V78,T33] (  3,  6   )  simd16  ->  [rbp-0x1F0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V79 tmp76        [V79,T34] (  3,  6   )  simd16  ->  [rbp-0x200]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V80 tmp77        [V80    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V81 tmp78        [V81    ] (  2,  4   )  struct ( 8) [rbp-0x208]  do-not-enreg[HS] hidden-struct-arg "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V82 tmp79        [V82    ] (  2,  5   )  struct ( 8) [rbp-0x210]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V83 tmp80        [V83,T06] (  5, 17   )     int  ->  r15         "Inline stloc first use temp"
+;  V84 tmp81        [V84    ] (  2, 10   )  struct ( 8) [rbp-0x218]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V85 tmp82        [V85    ] (  2, 10   )  struct ( 8) [rbp-0x220]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V86 tmp83        [V86    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V87 tmp84        [V87    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V88 tmp85        [V88    ] (  3,  3   )  simd16  ->  [rbp-0x230]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V89 tmp86        [V89    ] (  3,  3   )  struct (64) [rbp-0x270]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;  V90 tmp87        [V90,T42] (  1,  1   )  simd32  ->  [rbp+0x10]  single-def "field V01._lower (fldOffset=0x0)" P-INDEP
+;  V91 tmp88        [V91,T43] (  1,  1   )  simd32  ->  [rbp+0x30]  single-def "field V01._upper (fldOffset=0x20)" P-INDEP
+;  V92 tmp89        [V92,T44] (  1,  1   )  simd32  ->  [rbp+0x50]  single-def "field V02._lower (fldOffset=0x0)" P-INDEP
+;  V93 tmp90        [V93,T45] (  1,  1   )  simd32  ->  [rbp+0x70]  single-def "field V02._upper (fldOffset=0x20)" P-INDEP
+;  V94 tmp91        [V94,T16] (  2,  2   )    long  ->  rdi         "field V12._00 (fldOffset=0x0)" P-INDEP
+;  V95 tmp92        [V95,T17] (  2,  2   )    long  ->  rax         "field V13._00 (fldOffset=0x0)" P-INDEP
+;  V96 tmp93        [V96    ] (  2,  5   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V14._00 (fldOffset=0x0)" P-DEP
+;  V97 tmp94        [V97    ] (  2,  9   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
+;  V98 tmp95        [V98    ] (  2,  9   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
+;  V99 tmp96        [V99    ] (  2,  5   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+;  V100 tmp97       [V100    ] (  2,  9   )    long  ->  [rbp-0xB8]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V101 tmp98       [V101    ] (  2,  9   )    long  ->  [rbp-0xC0]  do-not-enreg[X] addr-exposed "field V26._00 (fldOffset=0x0)" P-DEP
+;  V102 tmp99       [V102,T18] (  2,  2   )    long  ->  rdi         "field V35._00 (fldOffset=0x0)" P-INDEP
+;  V103 tmp100      [V103,T19] (  2,  2   )    long  ->  rsi         "field V36._00 (fldOffset=0x0)" P-INDEP
+;  V104 tmp101      [V104    ] (  2,  5   )    long  ->  [rbp-0xF8]  do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
+;  V105 tmp102      [V105    ] (  2,  9   )    long  ->  [rbp-0x100]  do-not-enreg[X] addr-exposed "field V39._00 (fldOffset=0x0)" P-DEP
+;  V106 tmp103      [V106    ] (  2,  9   )    long  ->  [rbp-0x108]  do-not-enreg[X] addr-exposed "field V40._00 (fldOffset=0x0)" P-DEP
+;  V107 tmp104      [V107    ] (  2,  5   )    long  ->  [rbp-0x110]  do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
+;  V108 tmp105      [V108    ] (  2,  9   )    long  ->  [rbp-0x118]  do-not-enreg[X] addr-exposed "field V48._00 (fldOffset=0x0)" P-DEP
+;  V109 tmp106      [V109    ] (  2,  9   )    long  ->  [rbp-0x120]  do-not-enreg[X] addr-exposed "field V49._00 (fldOffset=0x0)" P-DEP
+;  V110 tmp107      [V110,T20] (  2,  2   )    long  ->  r15         "field V63._00 (fldOffset=0x0)" P-INDEP
+;  V111 tmp108      [V111,T21] (  2,  2   )    long  ->  rdi         "field V64._00 (fldOffset=0x0)" P-INDEP
+;  V112 tmp109      [V112    ] (  2,  5   )    long  ->  [rbp-0x1A8]  do-not-enreg[X] addr-exposed "field V65._00 (fldOffset=0x0)" P-DEP
+;  V113 tmp110      [V113    ] (  2,  9   )    long  ->  [rbp-0x1B0]  do-not-enreg[X] addr-exposed "field V67._00 (fldOffset=0x0)" P-DEP
+;  V114 tmp111      [V114    ] (  2,  9   )    long  ->  [rbp-0x1B8]  do-not-enreg[X] addr-exposed "field V68._00 (fldOffset=0x0)" P-DEP
+;  V115 tmp112      [V115    ] (  2,  5   )    long  ->  [rbp-0x1C0]  do-not-enreg[X] addr-exposed "field V71._00 (fldOffset=0x0)" P-DEP
+;  V116 tmp113      [V116    ] (  2,  9   )    long  ->  [rbp-0x1C8]  do-not-enreg[X] addr-exposed "field V73._00 (fldOffset=0x0)" P-DEP
+;  V117 tmp114      [V117    ] (  2,  9   )    long  ->  [rbp-0x1D0]  do-not-enreg[X] addr-exposed "field V74._00 (fldOffset=0x0)" P-DEP
+;  V118 tmp115      [V118,T22] (  2,  2   )    long  ->  r15         "field V80._00 (fldOffset=0x0)" P-INDEP
+;  V119 tmp116      [V119,T15] (  2,  3   )    long  ->  [rbp-0x208]  do-not-enreg[H] hidden-struct-arg "field V81._00 (fldOffset=0x0)" P-DEP
+;  V120 tmp117      [V120    ] (  2,  5   )    long  ->  [rbp-0x210]  do-not-enreg[X] addr-exposed "field V82._00 (fldOffset=0x0)" P-DEP
+;  V121 tmp118      [V121    ] (  2,  9   )    long  ->  [rbp-0x218]  do-not-enreg[X] addr-exposed "field V84._00 (fldOffset=0x0)" P-DEP
+;  V122 tmp119      [V122    ] (  2,  9   )    long  ->  [rbp-0x220]  do-not-enreg[X] addr-exposed "field V85._00 (fldOffset=0x0)" P-DEP
+;  V123 tmp120      [V123    ] (  3,  3   )  simd32  ->  [rbp-0x270]  do-not-enreg[XS] addr-exposed "field V89._lower (fldOffset=0x0)" P-DEP
+;  V124 tmp121      [V124    ] (  3,  3   )  simd32  ->  [rbp-0x250]  do-not-enreg[XS] addr-exposed "field V89._upper (fldOffset=0x20)" P-DEP
+;  V125 cse0        [V125,T07] (  4, 16   )    long  ->  rsi         "CSE #01: aggressive"
+;  V126 cse1        [V126,T08] (  4, 16   )    long  ->  rcx         "CSE #02: aggressive"
+;  V127 cse2        [V127,T09] (  4, 16   )    long  ->  rsi         "CSE #03: aggressive"
+;  V128 cse3        [V128,T10] (  4, 16   )    long  ->  rcx         "CSE #04: aggressive"
+;  V129 cse4        [V129,T11] (  4, 16   )    long  ->  r14         "CSE #05: aggressive"
+;  V130 cse5        [V130,T12] (  4, 16   )    long  ->  r13         "CSE #06: aggressive"
+;  V131 cse6        [V131,T13] (  4, 16   )    long  ->  r14         "CSE #07: aggressive"
 ;
-; Lcl frame size = 384
+; Lcl frame size = 624
 
 G_M23733_IG01:
        push     rbp
-       sub      rsp, 384
-       lea      rbp, [rsp+0x180]
-						;; size=16 bbWeight=1 PerfScore 1.75
+       push     r15
+       push     r14
+       push     r13
+       push     rbx
+       sub      rsp, 624
+       lea      rbp, [rsp+0x290]
+       mov      rbx, rdi
+						;; size=26 bbWeight=1 PerfScore 6.00
 G_M23733_IG02:
        vmovups  ymm0, ymmword ptr [rbp+0x10]
        vmovups  ymm1, ymmword ptr [rbp+0x50]
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm2
+       vmovaps  xmmword ptr [rbp-0x80], xmm2
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rbp-0x20], xmm2
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x30], rax
-       mov      rax, qword ptr [rbp-0x20]
-       mov      qword ptr [rbp-0x38], rax
-       xor      ecx, ecx
-       align    [2 bytes for IG03]
-						;; size=48 bbWeight=1 PerfScore 15.00
+       vmovaps  xmmword ptr [rbp-0x90], xmm2
+       mov      rax, qword ptr [rbp-0x80]
+       mov      qword ptr [rbp-0xA0], rax
+       mov      rax, qword ptr [rbp-0x90]
+       mov      qword ptr [rbp-0xA8], rax
+       xor      edi, edi
+       align    [0 bytes for IG03]
+						;; size=58 bbWeight=1 PerfScore 14.75
 G_M23733_IG03:
-       lea      rax, [rbp-0x30]
-       movsxd   rsi, ecx
+       lea      rax, [rbp-0xA0]
+       movsxd   rsi, edi
        movzx    rax, byte  ptr [rax+rsi]
-       lea      rdx, [rbp-0x38]
-       movzx    r8, byte  ptr [rdx+rsi]
+       lea      rdx, [rbp-0xA8]
+       movzx    rcx, byte  ptr [rdx+rsi]
        xor      edx, edx
-       div      edx:eax, r8d
-       lea      rdx, [rbp-0x28]
+       div      edx:eax, ecx
+       lea      rdx, [rbp-0x98]
        mov      byte  ptr [rdx+rsi], al
-       inc      ecx
-       cmp      ecx, 8
+       inc      edi
+       cmp      edi, 8
        jl       SHORT G_M23733_IG03
-						;; size=39 bbWeight=4 PerfScore 134.00
+						;; size=46 bbWeight=4 PerfScore 134.00
 G_M23733_IG04:
-       mov      rcx, qword ptr [rbp-0x28]
-       mov      rax, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x48], rax
-       mov      rax, qword ptr [rbp-0x18]
-       mov      qword ptr [rbp-0x50], rax
+       mov      rdi, qword ptr [rbp-0x98]
+       mov      rax, qword ptr [rbp-0x78]
+       mov      qword ptr [rbp-0xB8], rax
+       mov      rax, qword ptr [rbp-0x88]
+       mov      qword ptr [rbp-0xC0], rax
        xor      esi, esi
-       align    [3 bytes for IG05]
-						;; size=25 bbWeight=1 PerfScore 5.50
+       align    [0 bytes for IG05]
+						;; size=34 bbWeight=1 PerfScore 5.25
 G_M23733_IG05:
-       lea      rax, [rbp-0x48]
-       movsxd   r8, esi
-       movzx    rax, byte  ptr [rax+r8]
-       lea      rdx, [rbp-0x50]
-       movzx    r9, byte  ptr [rdx+r8]
+       lea      rax, [rbp-0xB8]
+       movsxd   rcx, esi
+       movzx    rax, byte  ptr [rax+rcx]
+       lea      rdx, [rbp-0xC0]
+       movzx    r8, byte  ptr [rdx+rcx]
        xor      edx, edx
-       div      edx:eax, r9d
-       lea      rdx, [rbp-0x40]
-       mov      byte  ptr [rdx+r8], al
+       div      edx:eax, r8d
+       lea      rdx, [rbp-0xB0]
+       mov      byte  ptr [rdx+rcx], al
        inc      esi
        cmp      esi, 8
        jl       SHORT G_M23733_IG05
-						;; size=41 bbWeight=4 PerfScore 134.00
+						;; size=48 bbWeight=4 PerfScore 134.00
 G_M23733_IG06:
-       mov      rax, qword ptr [rbp-0x40]
-       mov      qword ptr [rbp-0x60], rcx
-       mov      qword ptr [rbp-0x58], rax
-       vmovaps  xmm2, xmmword ptr [rbp-0x60]
+       mov      rax, qword ptr [rbp-0xB0]
+       mov      qword ptr [rbp-0xD0], rdi
+       mov      qword ptr [rbp-0xC8], rax
+       vmovaps  xmm2, xmmword ptr [rbp-0xD0]
        vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x70], xmm0
+       vmovaps  xmmword ptr [rbp-0xE0], xmm0
        vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x80], xmm0
-       mov      rax, qword ptr [rbp-0x70]
-       mov      qword ptr [rbp-0x90], rax
-       mov      rax, qword ptr [rbp-0x80]
-       mov      qword ptr [rbp-0x98], rax
-       xor      ecx, ecx
+       vmovaps  xmmword ptr [rbp-0xF0], xmm0
+       mov      rax, qword ptr [rbp-0xE0]
+       mov      qword ptr [rbp-0x100], rax
+       mov      rax, qword ptr [rbp-0xF0]
+       mov      qword ptr [rbp-0x108], rax
+       xor      edi, edi
        align    [0 bytes for IG07]
-						;; size=63 bbWeight=1 PerfScore 16.25
+						;; size=87 bbWeight=1 PerfScore 16.25
 G_M23733_IG07:
-       lea      rax, [rbp-0x90]
-       movsxd   rsi, ecx
+       lea      rax, [rbp-0x100]
+       movsxd   rsi, edi
        movzx    rax, byte  ptr [rax+rsi]
-       lea      rdx, [rbp-0x98]
-       movzx    r8, byte  ptr [rdx+rsi]
+       lea      rdx, [rbp-0x108]
+       movzx    rcx, byte  ptr [rdx+rsi]
        xor      edx, edx
-       div      edx:eax, r8d
-       lea      rdx, [rbp-0x88]
+       div      edx:eax, ecx
+       lea      rdx, [rbp-0xF8]
        mov      byte  ptr [rdx+rsi], al
-       inc      ecx
-       cmp      ecx, 8
+       inc      edi
+       cmp      edi, 8
        jl       SHORT G_M23733_IG07
-						;; size=48 bbWeight=4 PerfScore 134.00
+						;; size=46 bbWeight=4 PerfScore 134.00
 G_M23733_IG08:
-       mov      rcx, qword ptr [rbp-0x88]
-       mov      rax, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0xA8], rax
-       mov      rax, qword ptr [rbp-0x78]
-       mov      qword ptr [rbp-0xB0], rax
+       mov      rdi, qword ptr [rbp-0xF8]
+       mov      rax, qword ptr [rbp-0xD8]
+       mov      qword ptr [rbp-0x118], rax
+       mov      rax, qword ptr [rbp-0xE8]
+       mov      qword ptr [rbp-0x120], rax
        xor      esi, esi
-       align    [0 bytes for IG09]
-						;; size=31 bbWeight=1 PerfScore 5.25
+       align    [2 bytes for IG09]
+						;; size=39 bbWeight=1 PerfScore 5.50
 G_M23733_IG09:
-       lea      rax, [rbp-0xA8]
-       movsxd   r8, esi
-       movzx    rax, byte  ptr [rax+r8]
-       lea      rdx, [rbp-0xB0]
-       movzx    r9, byte  ptr [rdx+r8]
+       lea      rax, [rbp-0x118]
+       movsxd   rcx, esi
+       movzx    rax, byte  ptr [rax+rcx]
+       lea      rdx, [rbp-0x120]
+       movzx    r8, byte  ptr [rdx+rcx]
        xor      edx, edx
-       div      edx:eax, r9d
-       lea      rdx, [rbp-0xA0]
-       mov      byte  ptr [rdx+r8], al
+       div      edx:eax, r8d
+       lea      rdx, [rbp-0x110]
+       mov      byte  ptr [rdx+rcx], al
        inc      esi
        cmp      esi, 8
        jl       SHORT G_M23733_IG09
-						;; size=50 bbWeight=4 PerfScore 134.00
+						;; size=48 bbWeight=4 PerfScore 134.00
 G_M23733_IG10:
-       mov      rax, qword ptr [rbp-0xA0]
-       mov      qword ptr [rbp-0xC0], rcx
-       mov      qword ptr [rbp-0xB8], rax
-       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0xC0], 1
+       mov      rsi, qword ptr [rbp-0x110]
+       mov      qword ptr [rbp-0x130], rdi
+       mov      qword ptr [rbp-0x128], rsi
+       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0x130], 1
+       vmovups  ymmword ptr [rbp-0x50], ymm0
        vmovups  ymm1, ymmword ptr [rbp+0x30]
+       vmovups  ymmword ptr [rbp-0x150], ymm1
        vmovups  ymm2, ymmword ptr [rbp+0x70]
+       vmovups  ymmword ptr [rbp-0x170], ymm2
        vmovaps  ymm3, ymm1
-       vmovaps  xmmword ptr [rbp-0xD0], xmm3
+       vmovaps  xmmword ptr [rbp-0x190], xmm3
        vmovaps  ymm3, ymm2
-       vmovaps  xmmword ptr [rbp-0xE0], xmm3
-       mov      rax, qword ptr [rbp-0xD0]
-       mov      qword ptr [rbp-0xF0], rax
-       mov      rax, qword ptr [rbp-0xE0]
-       mov      qword ptr [rbp-0xF8], rax
-       xor      ecx, ecx
-       align    [0 bytes for IG11]
-						;; size=95 bbWeight=1 PerfScore 21.75
+       vmovaps  xmmword ptr [rbp-0x1A0], xmm3
+       mov      rdi, qword ptr [rbp-0x190]
+       mov      qword ptr [rbp-0x1B0], rdi
+       mov      rdi, qword ptr [rbp-0x1A0]
+       mov      qword ptr [rbp-0x1B8], rdi
+       xor      r15d, r15d
+						;; size=117 bbWeight=1 PerfScore 24.75
 G_M23733_IG11:
-       lea      rax, [rbp-0xF0]
-       movsxd   rsi, ecx
-       movzx    rax, byte  ptr [rax+rsi]
-       lea      rdx, [rbp-0xF8]
-       movzx    r8, byte  ptr [rdx+rsi]
-       xor      edx, edx
-       div      edx:eax, r8d
-       lea      rdx, [rbp-0xE8]
-       mov      byte  ptr [rdx+rsi], al
-       inc      ecx
-       cmp      ecx, 8
+       lea      rdi, [rbp-0x1B0]
+       movsxd   r14, r15d
+       movzx    rdi, byte  ptr [rdi+r14]
+       lea      rsi, [rbp-0x1B8]
+       movzx    rsi, byte  ptr [rsi+r14]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       lea      rdi, [rbp-0x1A8]
+       mov      byte  ptr [rdi+r14], al
+       inc      r15d
+       cmp      r15d, 8
        jl       SHORT G_M23733_IG11
-						;; size=48 bbWeight=4 PerfScore 134.00
+						;; size=59 bbWeight=4 PerfScore 46.00
 G_M23733_IG12:
-       mov      rcx, qword ptr [rbp-0xE8]
-       mov      rax, qword ptr [rbp-0xC8]
-       mov      qword ptr [rbp-0x108], rax
-       mov      rax, qword ptr [rbp-0xD8]
-       mov      qword ptr [rbp-0x110], rax
-       xor      esi, esi
-       align    [3 bytes for IG13]
-						;; size=40 bbWeight=1 PerfScore 5.50
+       mov      r15, qword ptr [rbp-0x1A8]
+       mov      rdi, qword ptr [rbp-0x188]
+       mov      qword ptr [rbp-0x1C8], rdi
+       mov      rdi, qword ptr [rbp-0x198]
+       mov      qword ptr [rbp-0x1D0], rdi
+       xor      r14d, r14d
+						;; size=38 bbWeight=1 PerfScore 5.25
 G_M23733_IG13:
-       lea      rax, [rbp-0x108]
-       movsxd   r8, esi
-       movzx    rax, byte  ptr [rax+r8]
-       lea      rdx, [rbp-0x110]
-       movzx    r9, byte  ptr [rdx+r8]
-       xor      edx, edx
-       div      edx:eax, r9d
-       lea      rdx, [rbp-0x100]
-       mov      byte  ptr [rdx+r8], al
-       inc      esi
-       cmp      esi, 8
+       lea      rdi, [rbp-0x1C8]
+       movsxd   r13, r14d
+       movzx    rdi, byte  ptr [rdi+r13]
+       lea      rsi, [rbp-0x1D0]
+       movzx    rsi, byte  ptr [rsi+r13]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       lea      rdi, [rbp-0x1C0]
+       mov      byte  ptr [rdi+r13], al
+       inc      r14d
+       cmp      r14d, 8
        jl       SHORT G_M23733_IG13
-						;; size=50 bbWeight=4 PerfScore 134.00
+						;; size=59 bbWeight=4 PerfScore 46.00
 G_M23733_IG14:
-       mov      rax, qword ptr [rbp-0x100]
-       mov      qword ptr [rbp-0x120], rcx
-       mov      qword ptr [rbp-0x118], rax
-       vmovaps  xmm3, xmmword ptr [rbp-0x120]
+       mov      rdi, qword ptr [rbp-0x1C0]
+       mov      qword ptr [rbp-0x1E0], r15
+       mov      qword ptr [rbp-0x1D8], rdi
+       vmovaps  xmm3, xmmword ptr [rbp-0x1E0]
+       vmovaps  xmmword ptr [rbp-0x180], xmm3
+       vmovups  ymm1, ymmword ptr [rbp-0x150]
        vextractf128 xmm1, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x130], xmm1
+       vmovaps  xmmword ptr [rbp-0x1F0], xmm1
+       vmovups  ymm2, ymmword ptr [rbp-0x170]
        vextractf128 xmm1, ymm2, 1
-       vmovaps  xmmword ptr [rbp-0x140], xmm1
-       mov      rax, qword ptr [rbp-0x130]
-       mov      qword ptr [rbp-0x150], rax
-       mov      rax, qword ptr [rbp-0x140]
-       mov      qword ptr [rbp-0x158], rax
-       xor      ecx, ecx
-       align    [0 bytes for IG15]
-						;; size=87 bbWeight=1 PerfScore 16.25
+       vmovaps  xmmword ptr [rbp-0x200], xmm1
+       mov      rdi, qword ptr [rbp-0x1F0]
+       mov      qword ptr [rbp-0x218], rdi
+       mov      rdi, qword ptr [rbp-0x200]
+       mov      qword ptr [rbp-0x220], rdi
+       xor      r15d, r15d
+						;; size=112 bbWeight=1 PerfScore 25.25
 G_M23733_IG15:
-       lea      rax, [rbp-0x150]
-       movsxd   rsi, ecx
-       movzx    rax, byte  ptr [rax+rsi]
-       lea      rdx, [rbp-0x158]
-       movzx    r8, byte  ptr [rdx+rsi]
-       xor      edx, edx
-       div      edx:eax, r8d
-       lea      rdx, [rbp-0x148]
-       mov      byte  ptr [rdx+rsi], al
-       inc      ecx
-       cmp      ecx, 8
+       lea      rdi, [rbp-0x218]
+       movsxd   r14, r15d
+       movzx    rdi, byte  ptr [rdi+r14]
+       lea      rsi, [rbp-0x220]
+       movzx    rsi, byte  ptr [rsi+r14]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       lea      rdi, [rbp-0x210]
+       mov      byte  ptr [rdi+r14], al
+       inc      r15d
+       cmp      r15d, 8
        jl       SHORT G_M23733_IG15
-						;; size=48 bbWeight=4 PerfScore 134.00
+						;; size=59 bbWeight=4 PerfScore 46.00
 G_M23733_IG16:
-       mov      rcx, qword ptr [rbp-0x148]
-       mov      rax, qword ptr [rbp-0x128]
-       mov      qword ptr [rbp-0x168], rax
-       mov      rax, qword ptr [rbp-0x138]
-       mov      qword ptr [rbp-0x170], rax
-       xor      esi, esi
-       align    [2 bytes for IG17]
-						;; size=39 bbWeight=1 PerfScore 5.50
+       mov      r15, qword ptr [rbp-0x210]
+       mov      rdi, qword ptr [rbp-0x1E8]
+       mov      qword ptr [rsp], rdi
+       mov      rdi, qword ptr [rbp-0x1F8]
+       mov      qword ptr [rsp+0x08], rdi
+       lea      rdi, [rbp-0x208]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64`1[ubyte]:op_Division(System.Runtime.Intrinsics.Vector64`1[ubyte],System.Runtime.Intrinsics.Vector64`1[ubyte]):System.Runtime.Intrinsics.Vector64`1[ubyte]
+       call     [rax]System.Runtime.Intrinsics.Vector64`1[ubyte]:op_Division(System.Runtime.Intrinsics.Vector64`1[ubyte],System.Runtime.Intrinsics.Vector64`1[ubyte]):System.Runtime.Intrinsics.Vector64`1[ubyte]
+       mov      qword ptr [rbp-0x230], r15
+       mov      rdi, qword ptr [rbp-0x208]
+       mov      qword ptr [rsp], rdi
+       lea      rdi, [rbp-0x230]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector128:SetUpperUnsafe[ubyte](byref,System.Runtime.Intrinsics.Vector64`1[ubyte])
+       call     [rax]System.Runtime.Intrinsics.Vector128:SetUpperUnsafe[ubyte](byref,System.Runtime.Intrinsics.Vector64`1[ubyte])
+       vmovaps  xmm3, xmmword ptr [rbp-0x180]
+       vmovups  xmmword ptr [rsp], xmm3
+       vmovaps  xmm0, xmmword ptr [rbp-0x230]
+       vmovups  xmmword ptr [rsp+0x10], xmm0
+       lea      rdi, [rbp-0x70]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector256:Create[ubyte](System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector256`1[ubyte]
+       call     [rax]System.Runtime.Intrinsics.Vector256:Create[ubyte](System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector256`1[ubyte]
+       vmovups  ymm0, ymmword ptr [rbp-0x50]
+       vmovups  ymmword ptr [rsp], ymm0
+       lea      rdi, [rbp-0x270]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector512:SetLowerUnsafe[ubyte](byref,System.Runtime.Intrinsics.Vector256`1[ubyte])
+       call     [rax]System.Runtime.Intrinsics.Vector512:SetLowerUnsafe[ubyte](byref,System.Runtime.Intrinsics.Vector256`1[ubyte])
+       vmovups  ymm0, ymmword ptr [rbp-0x70]
+       vmovups  ymmword ptr [rsp], ymm0
+       lea      rdi, [rbp-0x270]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector512:SetUpperUnsafe[ubyte](byref,System.Runtime.Intrinsics.Vector256`1[ubyte])
+       call     [rax]System.Runtime.Intrinsics.Vector512:SetUpperUnsafe[ubyte](byref,System.Runtime.Intrinsics.Vector256`1[ubyte])
+       vmovdqu  ymm0, ymmword ptr [rbp-0x270]
+       vmovdqu  ymmword ptr [rbx], ymm0
+       vmovdqu  ymm0, ymmword ptr [rbp-0x250]
+       vmovdqu  ymmword ptr [rbx+0x20], ymm0
+       mov      rax, rbx
+						;; size=215 bbWeight=1 PerfScore 57.00
 G_M23733_IG17:
-       lea      rax, [rbp-0x168]
-       movsxd   r8, esi
-       movzx    rax, byte  ptr [rax+r8]
-       lea      rdx, [rbp-0x170]
-       movzx    r9, byte  ptr [rdx+r8]
-       xor      edx, edx
-       div      edx:eax, r9d
-       lea      rdx, [rbp-0x160]
-       mov      byte  ptr [rdx+r8], al
-       inc      esi
-       cmp      esi, 8
-       jl       SHORT G_M23733_IG17
-						;; size=50 bbWeight=4 PerfScore 134.00
-G_M23733_IG18:
-       mov      rax, qword ptr [rbp-0x160]
-       mov      qword ptr [rbp-0x180], rcx
-       mov      qword ptr [rbp-0x178], rax
-       vinserti128 ymm1, ymm3, xmmword ptr [rbp-0x180], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       vmovups  ymmword ptr [rdi+0x20], ymm1
-       mov      rax, rdi
-						;; size=43 bbWeight=1 PerfScore 11.25
-G_M23733_IG19:
        vzeroupper 
-       add      rsp, 384
+       add      rsp, 624
+       pop      rbx
+       pop      r13
+       pop      r14
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=19 bbWeight=1 PerfScore 4.75
 
-; Total bytes of code 873, prolog size 16, PerfScore 1178.75, instruction count 194, allocated bytes for code 873 (MethodHash=f113a34a) for method System.Runtime.Intrinsics.Vector512:Divide[ubyte](System.Runtime.Intrinsics.Vector512`1[ubyte],System.Runtime.Intrinsics.Vector512`1[ubyte]):System.Runtime.Intrinsics.Vector512`1[ubyte] (FullOpts)
+; Total bytes of code 1110, prolog size 26, PerfScore 838.75, instruction count 216, allocated bytes for code 1110 (MethodHash=f113a34a) for method System.Runtime.Intrinsics.Vector512:Divide[ubyte](System.Runtime.Intrinsics.Vector512`1[ubyte],System.Runtime.Intrinsics.Vector512`1[ubyte]):System.Runtime.Intrinsics.Vector512`1[ubyte] (FullOpts)
233 (36.46 % of base) - System.Runtime.Intrinsics.Vector512:Divide[long](System.Runtime.Intrinsics.Vector512`1[long],System.Runtime.Intrinsics.Vector512`1[long]):System.Runtime.Intrinsics.Vector512`1[long]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector512:Divide[long](System.Runtime.Intrinsics.Vector512`1[long],System.Runtime.Intrinsics.Vector512`1[long]):System.Runtime.Intrinsics.Vector512`1[long] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 43 single block inlinees; 27 inlinees without PGO data
+; 0 inlinees with PGO data; 38 single block inlinees; 21 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T00] (  5,  5   )   byref  ->  rdi         single-def
+;  V00 RetBuf       [V00,T00] (  4,  4   )   byref  ->  rbx         single-def
 ;* V01 arg0         [V01    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[long]>
 ;* V02 arg1         [V02    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[long]>
-;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T29] (  2,  4   )  simd32  ->  mm0         "impAppendStmt"
-;  V05 tmp2         [V05,T30] (  2,  4   )  simd32  ->  mm1         "spilled call-like call argument"
-;  V06 tmp3         [V06,T17] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V07 tmp4         [V07,T18] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V08 tmp5         [V08,T31] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
+;  V03 OutArgs      [V03    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V04 tmp1         [V04,T28] (  2,  4   )  simd32  ->  [rbp-0x30]  spill-single-def "impAppendStmt"
+;  V05 tmp2         [V05,T29] (  2,  4   )  simd32  ->  [rbp-0x50]  do-not-enreg[HS] hidden-struct-arg "spilled call-like call argument"
+;  V06 tmp3         [V06,T16] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V07 tmp4         [V07,T17] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V08 tmp5         [V08,T30] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
 ;* V09 tmp6         [V09    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V10 tmp7         [V10,T19] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V11 tmp8         [V11,T20] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V10 tmp7         [V10,T18] (  3,  6   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V11 tmp8         [V11,T19] (  3,  6   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
 ;* V12 tmp9         [V12    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V13 tmp10        [V13    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V14 tmp11        [V14    ] (  2,  2   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V14 tmp11        [V14    ] (  2,  2   )  struct ( 8) [rbp-0x78]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V15 tmp12        [V15,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V16 tmp13        [V16    ] (  2,  4   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V17 tmp14        [V17    ] (  2,  4   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V16 tmp13        [V16    ] (  2,  4   )  struct ( 8) [rbp-0x80]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V17 tmp14        [V17    ] (  2,  4   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V18 tmp15        [V18    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
 ;* V19 tmp16        [V19    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
 ;* V20 tmp17        [V20    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
 ;* V21 tmp18        [V21    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V22 tmp19        [V22    ] (  2,  2   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V22 tmp19        [V22    ] (  2,  2   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V23 tmp20        [V23,T10] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V24 tmp21        [V24    ] (  2,  4   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V25 tmp22        [V25    ] (  2,  4   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V24 tmp21        [V24    ] (  2,  4   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V25 tmp22        [V25    ] (  2,  4   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V26 tmp23        [V26    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
 ;* V27 tmp24        [V27    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
 ;* V28 tmp25        [V28    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
 ;* V29 tmp26        [V29    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V30 tmp27        [V30,T33] (  3,  3   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V31 tmp28        [V31,T21] (  3,  6   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V32 tmp29        [V32,T22] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V30 tmp27        [V30,T32] (  3,  3   )  simd16  ->  [rbp-0xB0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V31 tmp28        [V31,T20] (  3,  6   )  simd16  ->  [rbp-0xC0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V32 tmp29        [V32,T21] (  3,  6   )  simd16  ->  [rbp-0xD0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
 ;* V33 tmp30        [V33    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V34 tmp31        [V34    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V35 tmp32        [V35    ] (  2,  2   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V35 tmp32        [V35    ] (  2,  2   )  struct ( 8) [rbp-0xD8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V36 tmp33        [V36,T11] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V37 tmp34        [V37    ] (  2,  4   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V38 tmp35        [V38    ] (  2,  4   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V37 tmp34        [V37    ] (  2,  4   )  struct ( 8) [rbp-0xE0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V38 tmp35        [V38    ] (  2,  4   )  struct ( 8) [rbp-0xE8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V39 tmp36        [V39    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
 ;* V40 tmp37        [V40    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
 ;* V41 tmp38        [V41    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
 ;* V42 tmp39        [V42    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V43 tmp40        [V43    ] (  2,  2   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V43 tmp40        [V43    ] (  2,  2   )  struct ( 8) [rbp-0xF0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V44 tmp41        [V44,T12] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V45 tmp42        [V45    ] (  2,  4   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V46 tmp43        [V46    ] (  2,  4   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V45 tmp42        [V45    ] (  2,  4   )  struct ( 8) [rbp-0xF8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V46 tmp43        [V46    ] (  2,  4   )  struct ( 8) [rbp-0x100]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V47 tmp44        [V47    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
 ;* V48 tmp45        [V48    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
 ;* V49 tmp46        [V49    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
 ;* V50 tmp47        [V50    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V51 tmp48        [V51,T34] (  3,  3   )  simd16  ->  [rbp-0xC0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V51 tmp48        [V51,T33] (  3,  3   )  simd16  ->  [rbp-0x110]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
 ;* V52 tmp49        [V52    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V53 tmp50        [V53,T23] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V54 tmp51        [V54,T24] (  3,  6   )  simd32  ->  mm2         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V55 tmp52        [V55,T32] (  2,  4   )  simd16  ->  mm3         "impAppendStmt"
+;  V53 tmp50        [V53,T22] (  3,  6   )  simd32  ->  [rbp-0x130]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V54 tmp51        [V54,T23] (  3,  6   )  simd32  ->  [rbp-0x150]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V55 tmp52        [V55,T31] (  2,  4   )  simd16  ->  [rbp-0x160]  spill-single-def "impAppendStmt"
 ;* V56 tmp53        [V56    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V57 tmp54        [V57,T25] (  3,  6   )  simd16  ->  [rbp-0xD0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V58 tmp55        [V58,T26] (  3,  6   )  simd16  ->  [rbp-0xE0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V57 tmp54        [V57,T24] (  3,  6   )  simd16  ->  [rbp-0x170]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V58 tmp55        [V58,T25] (  3,  6   )  simd16  ->  [rbp-0x180]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
 ;* V59 tmp56        [V59    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V60 tmp57        [V60    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V61 tmp58        [V61    ] (  2,  2   )  struct ( 8) [rbp-0xE8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V61 tmp58        [V61    ] (  2,  2   )  struct ( 8) [rbp-0x188]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V62 tmp59        [V62,T13] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V63 tmp60        [V63    ] (  2,  4   )  struct ( 8) [rbp-0xF0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V64 tmp61        [V64    ] (  2,  4   )  struct ( 8) [rbp-0xF8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V63 tmp60        [V63    ] (  2,  4   )  struct ( 8) [rbp-0x190]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V64 tmp61        [V64    ] (  2,  4   )  struct ( 8) [rbp-0x198]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V65 tmp62        [V65    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
 ;* V66 tmp63        [V66    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V67 tmp64        [V67    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V68 tmp65        [V68    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V69 tmp66        [V69    ] (  2,  2   )  struct ( 8) [rbp-0x100]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V70 tmp67        [V70,T14] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V71 tmp68        [V71    ] (  2,  4   )  struct ( 8) [rbp-0x108]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V72 tmp69        [V72    ] (  2,  4   )  struct ( 8) [rbp-0x110]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V73 tmp70        [V73    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V74 tmp71        [V74    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V75 tmp72        [V75    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V76 tmp73        [V76    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V77 tmp74        [V77,T35] (  3,  3   )  simd16  ->  [rbp-0x120]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V78 tmp75        [V78,T27] (  3,  6   )  simd16  ->  [rbp-0x130]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V79 tmp76        [V79,T28] (  3,  6   )  simd16  ->  [rbp-0x140]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;* V80 tmp77        [V80    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V81 tmp78        [V81    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V82 tmp79        [V82    ] (  2,  2   )  struct ( 8) [rbp-0x148]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V83 tmp80        [V83,T15] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V84 tmp81        [V84    ] (  2,  4   )  struct ( 8) [rbp-0x150]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V85 tmp82        [V85    ] (  2,  4   )  struct ( 8) [rbp-0x158]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V86 tmp83        [V86    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V87 tmp84        [V87    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V88 tmp85        [V88    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V89 tmp86        [V89    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V90 tmp87        [V90    ] (  2,  2   )  struct ( 8) [rbp-0x160]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V91 tmp88        [V91,T16] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V92 tmp89        [V92    ] (  2,  4   )  struct ( 8) [rbp-0x168]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V93 tmp90        [V93    ] (  2,  4   )  struct ( 8) [rbp-0x170]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V94 tmp91        [V94    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V95 tmp92        [V95    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V96 tmp93        [V96    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V97 tmp94        [V97    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V98 tmp95        [V98,T36] (  3,  3   )  simd16  ->  [rbp-0x180]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
-;* V99 tmp96        [V99    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[long]>
-;* V100 tmp97       [V100    ] (  0,  0   )  struct (64) zero-ref    ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[long]>
-;  V101 tmp98       [V101,T39] (  1,  1   )  simd32  ->  [rbp+0x10]  single-def "field V01._lower (fldOffset=0x0)" P-INDEP
-;  V102 tmp99       [V102,T40] (  1,  1   )  simd32  ->  [rbp+0x30]  single-def "field V01._upper (fldOffset=0x20)" P-INDEP
-;  V103 tmp100      [V103,T41] (  1,  1   )  simd32  ->  [rbp+0x50]  single-def "field V02._lower (fldOffset=0x0)" P-INDEP
-;  V104 tmp101      [V104,T42] (  1,  1   )  simd32  ->  [rbp+0x70]  single-def "field V02._upper (fldOffset=0x20)" P-INDEP
-;  V105 tmp102      [V105,T01] (  2,  2   )    long  ->  rcx         "field V12._00 (fldOffset=0x0)" P-INDEP
-;  V106 tmp103      [V106,T02] (  2,  2   )    long  ->  rax         "field V13._00 (fldOffset=0x0)" P-INDEP
-;  V107 tmp104      [V107    ] (  2,  2   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V14._00 (fldOffset=0x0)" P-DEP
-;  V108 tmp105      [V108    ] (  2,  3   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
-;  V109 tmp106      [V109    ] (  2,  3   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
-;  V110 tmp107      [V110    ] (  2,  2   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V22._00 (fldOffset=0x0)" P-DEP
-;  V111 tmp108      [V111    ] (  2,  3   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V24._00 (fldOffset=0x0)" P-DEP
-;  V112 tmp109      [V112    ] (  2,  3   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V113 tmp110      [V113,T03] (  2,  2   )    long  ->  rcx         "field V33._00 (fldOffset=0x0)" P-INDEP
-;  V114 tmp111      [V114,T04] (  2,  2   )    long  ->  rax         "field V34._00 (fldOffset=0x0)" P-INDEP
-;  V115 tmp112      [V115    ] (  2,  2   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
-;  V116 tmp113      [V116    ] (  2,  3   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
-;  V117 tmp114      [V117    ] (  2,  3   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V38._00 (fldOffset=0x0)" P-DEP
-;  V118 tmp115      [V118    ] (  2,  2   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V43._00 (fldOffset=0x0)" P-DEP
-;  V119 tmp116      [V119    ] (  2,  3   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V45._00 (fldOffset=0x0)" P-DEP
-;  V120 tmp117      [V120    ] (  2,  3   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
-;  V121 tmp118      [V121,T05] (  2,  2   )    long  ->  rcx         "field V59._00 (fldOffset=0x0)" P-INDEP
-;  V122 tmp119      [V122,T06] (  2,  2   )    long  ->  rax         "field V60._00 (fldOffset=0x0)" P-INDEP
-;  V123 tmp120      [V123    ] (  2,  2   )    long  ->  [rbp-0xE8]  do-not-enreg[X] addr-exposed "field V61._00 (fldOffset=0x0)" P-DEP
-;  V124 tmp121      [V124    ] (  2,  3   )    long  ->  [rbp-0xF0]  do-not-enreg[X] addr-exposed "field V63._00 (fldOffset=0x0)" P-DEP
-;  V125 tmp122      [V125    ] (  2,  3   )    long  ->  [rbp-0xF8]  do-not-enreg[X] addr-exposed "field V64._00 (fldOffset=0x0)" P-DEP
-;  V126 tmp123      [V126    ] (  2,  2   )    long  ->  [rbp-0x100]  do-not-enreg[X] addr-exposed "field V69._00 (fldOffset=0x0)" P-DEP
-;  V127 tmp124      [V127    ] (  2,  3   )    long  ->  [rbp-0x108]  do-not-enreg[X] addr-exposed "field V71._00 (fldOffset=0x0)" P-DEP
-;  V128 tmp125      [V128    ] (  2,  3   )    long  ->  [rbp-0x110]  do-not-enreg[X] addr-exposed "field V72._00 (fldOffset=0x0)" P-DEP
-;  V129 tmp126      [V129,T07] (  2,  2   )    long  ->  rcx         "field V80._00 (fldOffset=0x0)" P-INDEP
-;  V130 tmp127      [V130,T08] (  2,  2   )    long  ->  rax         "field V81._00 (fldOffset=0x0)" P-INDEP
-;  V131 tmp128      [V131    ] (  2,  2   )    long  ->  [rbp-0x148]  do-not-enreg[X] addr-exposed "field V82._00 (fldOffset=0x0)" P-DEP
-;  V132 tmp129      [V132    ] (  2,  3   )    long  ->  [rbp-0x150]  do-not-enreg[X] addr-exposed "field V84._00 (fldOffset=0x0)" P-DEP
-;  V133 tmp130      [V133    ] (  2,  3   )    long  ->  [rbp-0x158]  do-not-enreg[X] addr-exposed "field V85._00 (fldOffset=0x0)" P-DEP
-;  V134 tmp131      [V134    ] (  2,  2   )    long  ->  [rbp-0x160]  do-not-enreg[X] addr-exposed "field V90._00 (fldOffset=0x0)" P-DEP
-;  V135 tmp132      [V135    ] (  2,  3   )    long  ->  [rbp-0x168]  do-not-enreg[X] addr-exposed "field V92._00 (fldOffset=0x0)" P-DEP
-;  V136 tmp133      [V136    ] (  2,  3   )    long  ->  [rbp-0x170]  do-not-enreg[X] addr-exposed "field V93._00 (fldOffset=0x0)" P-DEP
-;  V137 tmp134      [V137,T37] (  2,  2   )  simd32  ->  mm0         "field V100._lower (fldOffset=0x0)" P-INDEP
-;  V138 tmp135      [V138,T38] (  2,  2   )  simd32  ->  mm1         "field V100._upper (fldOffset=0x20)" P-INDEP
+;  V67 tmp64        [V67    ] (  2,  2   )  struct ( 8) [rbp-0x1A0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V68 tmp65        [V68,T14] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V69 tmp66        [V69    ] (  2,  4   )  struct ( 8) [rbp-0x1A8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V70 tmp67        [V70    ] (  2,  4   )  struct ( 8) [rbp-0x1B0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V71 tmp68        [V71    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V72 tmp69        [V72    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V73 tmp70        [V73,T34] (  3,  3   )  simd16  ->  [rbp-0x1C0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V74 tmp71        [V74,T26] (  3,  6   )  simd16  ->  [rbp-0x1D0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V75 tmp72        [V75,T27] (  3,  6   )  simd16  ->  [rbp-0x1E0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;* V76 tmp73        [V76    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V77 tmp74        [V77    ] (  2,  4   )  struct ( 8) [rbp-0x1E8]  do-not-enreg[HS] hidden-struct-arg "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V78 tmp75        [V78    ] (  2,  2   )  struct ( 8) [rbp-0x1F0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V79 tmp76        [V79,T15] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V80 tmp77        [V80    ] (  2,  4   )  struct ( 8) [rbp-0x1F8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V81 tmp78        [V81    ] (  2,  4   )  struct ( 8) [rbp-0x200]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V82 tmp79        [V82    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V83 tmp80        [V83    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V84 tmp81        [V84    ] (  3,  3   )  simd16  ->  [rbp-0x210]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V85 tmp82        [V85    ] (  3,  3   )  struct (64) [rbp-0x250]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[long]>
+;  V86 tmp83        [V86,T35] (  1,  1   )  simd32  ->  [rbp+0x10]  single-def "field V01._lower (fldOffset=0x0)" P-INDEP
+;  V87 tmp84        [V87,T36] (  1,  1   )  simd32  ->  [rbp+0x30]  single-def "field V01._upper (fldOffset=0x20)" P-INDEP
+;  V88 tmp85        [V88,T37] (  1,  1   )  simd32  ->  [rbp+0x50]  single-def "field V02._lower (fldOffset=0x0)" P-INDEP
+;  V89 tmp86        [V89,T38] (  1,  1   )  simd32  ->  [rbp+0x70]  single-def "field V02._upper (fldOffset=0x20)" P-INDEP
+;  V90 tmp87        [V90,T02] (  2,  2   )    long  ->  rdi         "field V12._00 (fldOffset=0x0)" P-INDEP
+;  V91 tmp88        [V91,T03] (  2,  2   )    long  ->  rax         "field V13._00 (fldOffset=0x0)" P-INDEP
+;  V92 tmp89        [V92    ] (  2,  2   )    long  ->  [rbp-0x78]  do-not-enreg[X] addr-exposed "field V14._00 (fldOffset=0x0)" P-DEP
+;  V93 tmp90        [V93    ] (  2,  3   )    long  ->  [rbp-0x80]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
+;  V94 tmp91        [V94    ] (  2,  3   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
+;  V95 tmp92        [V95    ] (  2,  2   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V22._00 (fldOffset=0x0)" P-DEP
+;  V96 tmp93        [V96    ] (  2,  3   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V24._00 (fldOffset=0x0)" P-DEP
+;  V97 tmp94        [V97    ] (  2,  3   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V98 tmp95        [V98,T04] (  2,  2   )    long  ->  rdi         "field V33._00 (fldOffset=0x0)" P-INDEP
+;  V99 tmp96        [V99,T05] (  2,  2   )    long  ->  rsi         "field V34._00 (fldOffset=0x0)" P-INDEP
+;  V100 tmp97       [V100    ] (  2,  2   )    long  ->  [rbp-0xD8]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
+;  V101 tmp98       [V101    ] (  2,  3   )    long  ->  [rbp-0xE0]  do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
+;  V102 tmp99       [V102    ] (  2,  3   )    long  ->  [rbp-0xE8]  do-not-enreg[X] addr-exposed "field V38._00 (fldOffset=0x0)" P-DEP
+;  V103 tmp100      [V103    ] (  2,  2   )    long  ->  [rbp-0xF0]  do-not-enreg[X] addr-exposed "field V43._00 (fldOffset=0x0)" P-DEP
+;  V104 tmp101      [V104    ] (  2,  3   )    long  ->  [rbp-0xF8]  do-not-enreg[X] addr-exposed "field V45._00 (fldOffset=0x0)" P-DEP
+;  V105 tmp102      [V105    ] (  2,  3   )    long  ->  [rbp-0x100]  do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
+;  V106 tmp103      [V106,T06] (  2,  2   )    long  ->  r15         "field V59._00 (fldOffset=0x0)" P-INDEP
+;  V107 tmp104      [V107,T07] (  2,  2   )    long  ->  rdi         "field V60._00 (fldOffset=0x0)" P-INDEP
+;  V108 tmp105      [V108    ] (  2,  2   )    long  ->  [rbp-0x188]  do-not-enreg[X] addr-exposed "field V61._00 (fldOffset=0x0)" P-DEP
+;  V109 tmp106      [V109    ] (  2,  3   )    long  ->  [rbp-0x190]  do-not-enreg[X] addr-exposed "field V63._00 (fldOffset=0x0)" P-DEP
+;  V110 tmp107      [V110    ] (  2,  3   )    long  ->  [rbp-0x198]  do-not-enreg[X] addr-exposed "field V64._00 (fldOffset=0x0)" P-DEP
+;  V111 tmp108      [V111    ] (  2,  2   )    long  ->  [rbp-0x1A0]  do-not-enreg[X] addr-exposed "field V67._00 (fldOffset=0x0)" P-DEP
+;  V112 tmp109      [V112    ] (  2,  3   )    long  ->  [rbp-0x1A8]  do-not-enreg[X] addr-exposed "field V69._00 (fldOffset=0x0)" P-DEP
+;  V113 tmp110      [V113    ] (  2,  3   )    long  ->  [rbp-0x1B0]  do-not-enreg[X] addr-exposed "field V70._00 (fldOffset=0x0)" P-DEP
+;  V114 tmp111      [V114,T08] (  2,  2   )    long  ->  r15         "field V76._00 (fldOffset=0x0)" P-INDEP
+;  V115 tmp112      [V115,T01] (  2,  3   )    long  ->  [rbp-0x1E8]  do-not-enreg[H] hidden-struct-arg "field V77._00 (fldOffset=0x0)" P-DEP
+;  V116 tmp113      [V116    ] (  2,  2   )    long  ->  [rbp-0x1F0]  do-not-enreg[X] addr-exposed "field V78._00 (fldOffset=0x0)" P-DEP
+;  V117 tmp114      [V117    ] (  2,  3   )    long  ->  [rbp-0x1F8]  do-not-enreg[X] addr-exposed "field V80._00 (fldOffset=0x0)" P-DEP
+;  V118 tmp115      [V118    ] (  2,  3   )    long  ->  [rbp-0x200]  do-not-enreg[X] addr-exposed "field V81._00 (fldOffset=0x0)" P-DEP
+;  V119 tmp116      [V119    ] (  3,  3   )  simd32  ->  [rbp-0x250]  do-not-enreg[XS] addr-exposed "field V85._lower (fldOffset=0x0)" P-DEP
+;  V120 tmp117      [V120    ] (  2,  2   )  simd32  ->  [rbp-0x230]  do-not-enreg[XS] addr-exposed "field V85._upper (fldOffset=0x20)" P-DEP
 ;
-; Lcl frame size = 384
+; Lcl frame size = 608
 
 G_M43701_IG01:
        push     rbp
-       sub      rsp, 384
-       lea      rbp, [rsp+0x180]
-						;; size=16 bbWeight=1 PerfScore 1.75
+       push     r15
+       push     rbx
+       sub      rsp, 608
+       lea      rbp, [rsp+0x270]
+       mov      rbx, rdi
+						;; size=22 bbWeight=1 PerfScore 4.00
 G_M43701_IG02:
        vmovups  ymm0, ymmword ptr [rbp+0x10]
        vmovups  ymm1, ymmword ptr [rbp+0x50]
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm2
+       vmovaps  xmmword ptr [rbp-0x60], xmm2
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rbp-0x20], xmm2
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x30], rax
-       mov      rax, qword ptr [rbp-0x20]
-       mov      qword ptr [rbp-0x38], rax
-       mov      rax, qword ptr [rbp-0x30]
-       cqo      
-       idiv     rdx:rax, qword ptr [rbp-0x38]
-       mov      qword ptr [rbp-0x28], rax
-       mov      rcx, qword ptr [rbp-0x28]
-       mov      rax, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x48], rax
-       mov      rax, qword ptr [rbp-0x18]
-       mov      qword ptr [rbp-0x50], rax
-       mov      rax, qword ptr [rbp-0x48]
-       cqo      
-       idiv     rdx:rax, qword ptr [rbp-0x50]
-       mov      qword ptr [rbp-0x40], rax
-       mov      rax, qword ptr [rbp-0x40]
-       mov      qword ptr [rbp-0x60], rcx
-       mov      qword ptr [rbp-0x58], rax
-       vmovaps  xmm2, xmmword ptr [rbp-0x60]
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x70], xmm0
-       vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x80], xmm0
+       vmovaps  xmmword ptr [rbp-0x70], xmm2
+       mov      rax, qword ptr [rbp-0x60]
+       mov      qword ptr [rbp-0x80], rax
        mov      rax, qword ptr [rbp-0x70]
-       mov      qword ptr [rbp-0x90], rax
+       mov      qword ptr [rbp-0x88], rax
        mov      rax, qword ptr [rbp-0x80]
-       mov      qword ptr [rbp-0x98], rax
-       mov      rax, qword ptr [rbp-0x90]
        cqo      
-       idiv     rdx:rax, qword ptr [rbp-0x98]
-       mov      qword ptr [rbp-0x88], rax
-       mov      rcx, qword ptr [rbp-0x88]
+       idiv     rdx:rax, qword ptr [rbp-0x88]
+       mov      qword ptr [rbp-0x78], rax
+       mov      rdi, qword ptr [rbp-0x78]
+       mov      rax, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x98], rax
        mov      rax, qword ptr [rbp-0x68]
+       mov      qword ptr [rbp-0xA0], rax
+       mov      rax, qword ptr [rbp-0x98]
+       cqo      
+       idiv     rdx:rax, qword ptr [rbp-0xA0]
+       mov      qword ptr [rbp-0x90], rax
+       mov      rax, qword ptr [rbp-0x90]
+       mov      qword ptr [rbp-0xB0], rdi
        mov      qword ptr [rbp-0xA8], rax
-       mov      rax, qword ptr [rbp-0x78]
-       mov      qword ptr [rbp-0xB0], rax
-       mov      rax, qword ptr [rbp-0xA8]
+       vmovaps  xmm2, xmmword ptr [rbp-0xB0]
+       vextractf128 xmm0, ymm0, 1
+       vmovaps  xmmword ptr [rbp-0xC0], xmm0
+       vextractf128 xmm0, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0xD0], xmm0
+       mov      rax, qword ptr [rbp-0xC0]
+       mov      qword ptr [rbp-0xE0], rax
+       mov      rax, qword ptr [rbp-0xD0]
+       mov      qword ptr [rbp-0xE8], rax
+       mov      rax, qword ptr [rbp-0xE0]
        cqo      
-       idiv     rdx:rax, qword ptr [rbp-0xB0]
-       mov      qword ptr [rbp-0xA0], rax
-       mov      rax, qword ptr [rbp-0xA0]
-       mov      qword ptr [rbp-0xC0], rcx
-       mov      qword ptr [rbp-0xB8], rax
-       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0xC0], 1
+       idiv     rdx:rax, qword ptr [rbp-0xE8]
+       mov      qword ptr [rbp-0xD8], rax
+       mov      rdi, qword ptr [rbp-0xD8]
+       mov      rax, qword ptr [rbp-0xB8]
+       mov      qword ptr [rbp-0xF8], rax
+       mov      rax, qword ptr [rbp-0xC8]
+       mov      qword ptr [rbp-0x100], rax
+       mov      rax, qword ptr [rbp-0xF8]
+       cqo      
+       idiv     rdx:rax, qword ptr [rbp-0x100]
+       mov      qword ptr [rbp-0xF0], rax
+       mov      rsi, qword ptr [rbp-0xF0]
+       mov      qword ptr [rbp-0x110], rdi
+       mov      qword ptr [rbp-0x108], rsi
+       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0x110], 1
+       vmovups  ymmword ptr [rbp-0x30], ymm0
        vmovups  ymm1, ymmword ptr [rbp+0x30]
+       vmovups  ymmword ptr [rbp-0x130], ymm1
        vmovups  ymm2, ymmword ptr [rbp+0x70]
+       vmovups  ymmword ptr [rbp-0x150], ymm2
        vmovaps  ymm3, ymm1
-       vmovaps  xmmword ptr [rbp-0xD0], xmm3
+       vmovaps  xmmword ptr [rbp-0x170], xmm3
        vmovaps  ymm3, ymm2
-       vmovaps  xmmword ptr [rbp-0xE0], xmm3
-       mov      rax, qword ptr [rbp-0xD0]
-       mov      qword ptr [rbp-0xF0], rax
-       mov      rax, qword ptr [rbp-0xE0]
-						;; size=314 bbWeight=1 PerfScore 343.00
+       vmovaps  xmmword ptr [rbp-0x180], xmm3
+						;; size=365 bbWeight=1 PerfScore 343.00
 G_M43701_IG03:
-       mov      qword ptr [rbp-0xF8], rax
-       mov      rax, qword ptr [rbp-0xF0]
-       cqo      
-       idiv     rdx:rax, qword ptr [rbp-0xF8]
-       mov      qword ptr [rbp-0xE8], rax
-       mov      rcx, qword ptr [rbp-0xE8]
-       mov      rax, qword ptr [rbp-0xC8]
-       mov      qword ptr [rbp-0x108], rax
-       mov      rax, qword ptr [rbp-0xD8]
-       mov      qword ptr [rbp-0x110], rax
-       mov      rax, qword ptr [rbp-0x108]
-       cqo      
-       idiv     rdx:rax, qword ptr [rbp-0x110]
-       mov      qword ptr [rbp-0x100], rax
-       mov      rax, qword ptr [rbp-0x100]
-       mov      qword ptr [rbp-0x120], rcx
-       mov      qword ptr [rbp-0x118], rax
-       vmovaps  xmm3, xmmword ptr [rbp-0x120]
+       mov      rdi, qword ptr [rbp-0x170]
+       mov      qword ptr [rbp-0x190], rdi
+       mov      rdi, qword ptr [rbp-0x180]
+       mov      qword ptr [rbp-0x198], rdi
+       mov      rdi, qword ptr [rbp-0x190]
+       mov      rsi, qword ptr [rbp-0x198]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Divide(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Divide(long,long):long
+       mov      qword ptr [rbp-0x188], rax
+       mov      r15, qword ptr [rbp-0x188]
+       mov      rdi, qword ptr [rbp-0x168]
+       mov      qword ptr [rbp-0x1A8], rdi
+       mov      rdi, qword ptr [rbp-0x178]
+       mov      qword ptr [rbp-0x1B0], rdi
+       mov      rdi, qword ptr [rbp-0x1A8]
+       mov      rsi, qword ptr [rbp-0x1B0]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Divide(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Divide(long,long):long
+       mov      qword ptr [rbp-0x1A0], rax
+       mov      rdi, qword ptr [rbp-0x1A0]
+       mov      qword ptr [rbp-0x1C0], r15
+       mov      qword ptr [rbp-0x1B8], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x1C0]
+       vmovaps  xmmword ptr [rbp-0x160], xmm0
+       vmovups  ymm1, ymmword ptr [rbp-0x130]
        vextractf128 xmm1, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x130], xmm1
+       vmovaps  xmmword ptr [rbp-0x1D0], xmm1
+       vmovups  ymm2, ymmword ptr [rbp-0x150]
        vextractf128 xmm1, ymm2, 1
-       vmovaps  xmmword ptr [rbp-0x140], xmm1
-       mov      rax, qword ptr [rbp-0x130]
-       mov      qword ptr [rbp-0x150], rax
-       mov      rax, qword ptr [rbp-0x140]
-       mov      qword ptr [rbp-0x158], rax
-       mov      rax, qword ptr [rbp-0x150]
-       cqo      
-       idiv     rdx:rax, qword ptr [rbp-0x158]
-       mov      qword ptr [rbp-0x148], rax
-       mov      rcx, qword ptr [rbp-0x148]
-       mov      rax, qword ptr [rbp-0x128]
-       mov      qword ptr [rbp-0x168], rax
-       mov      rax, qword ptr [rbp-0x138]
-       mov      qword ptr [rbp-0x170], rax
-       mov      rax, qword ptr [rbp-0x168]
-       cqo      
-       idiv     rdx:rax, qword ptr [rbp-0x170]
-       mov      qword ptr [rbp-0x160], rax
-       mov      rax, qword ptr [rbp-0x160]
-       mov      qword ptr [rbp-0x180], rcx
-       mov      qword ptr [rbp-0x178], rax
-       vinserti128 ymm1, ymm3, xmmword ptr [rbp-0x180], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       vmovups  ymmword ptr [rdi+0x20], ymm1
-       mov      rax, rdi
-						;; size=297 bbWeight=1 PerfScore 320.25
+       vmovaps  xmmword ptr [rbp-0x1E0], xmm1
+       mov      rdi, qword ptr [rbp-0x1D0]
+       mov      qword ptr [rbp-0x1F8], rdi
+       mov      rdi, qword ptr [rbp-0x1E0]
+       mov      qword ptr [rbp-0x200], rdi
+       mov      rdi, qword ptr [rbp-0x1F8]
+       mov      rsi, qword ptr [rbp-0x200]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Divide(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Divide(long,long):long
+       mov      qword ptr [rbp-0x1F0], rax
+       mov      r15, qword ptr [rbp-0x1F0]
+       mov      rdi, qword ptr [rbp-0x1C8]
+       mov      qword ptr [rsp], rdi
+       mov      rdi, qword ptr [rbp-0x1D8]
+       mov      qword ptr [rsp+0x08], rdi
+       lea      rdi, [rbp-0x1E8]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64`1[long]:op_Division(System.Runtime.Intrinsics.Vector64`1[long],System.Runtime.Intrinsics.Vector64`1[long]):System.Runtime.Intrinsics.Vector64`1[long]
+       call     [rax]System.Runtime.Intrinsics.Vector64`1[long]:op_Division(System.Runtime.Intrinsics.Vector64`1[long],System.Runtime.Intrinsics.Vector64`1[long]):System.Runtime.Intrinsics.Vector64`1[long]
+       mov      qword ptr [rbp-0x210], r15
+       mov      rdi, qword ptr [rbp-0x1E8]
+       mov      qword ptr [rsp], rdi
+       lea      rdi, [rbp-0x210]
+						;; size=345 bbWeight=1 PerfScore 65.00
 G_M43701_IG04:
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector128:SetUpperUnsafe[long](byref,System.Runtime.Intrinsics.Vector64`1[long])
+       call     [rax]System.Runtime.Intrinsics.Vector128:SetUpperUnsafe[long](byref,System.Runtime.Intrinsics.Vector64`1[long])
+       vmovaps  xmm0, xmmword ptr [rbp-0x160]
+       vmovups  xmmword ptr [rsp], xmm0
+       vmovaps  xmm0, xmmword ptr [rbp-0x210]
+       vmovups  xmmword ptr [rsp+0x10], xmm0
+       lea      rdi, [rbp-0x50]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector256:Create[long](System.Runtime.Intrinsics.Vector128`1[long],System.Runtime.Intrinsics.Vector128`1[long]):System.Runtime.Intrinsics.Vector256`1[long]
+       call     [rax]System.Runtime.Intrinsics.Vector256:Create[long](System.Runtime.Intrinsics.Vector128`1[long],System.Runtime.Intrinsics.Vector128`1[long]):System.Runtime.Intrinsics.Vector256`1[long]
+       vmovups  ymm0, ymmword ptr [rbp-0x30]
+       vmovups  ymmword ptr [rbp-0x250], ymm0
+       vmovups  ymm0, ymmword ptr [rbp-0x50]
+       vmovups  ymmword ptr [rsp], ymm0
+       lea      rdi, [rbp-0x250]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector512:SetUpperUnsafe[long](byref,System.Runtime.Intrinsics.Vector256`1[long])
+       call     [rax]System.Runtime.Intrinsics.Vector512:SetUpperUnsafe[long](byref,System.Runtime.Intrinsics.Vector256`1[long])
+       vmovdqu  ymm0, ymmword ptr [rbp-0x250]
+       vmovdqu  ymmword ptr [rbx], ymm0
+       vmovdqu  ymm0, ymmword ptr [rbp-0x230]
+       vmovdqu  ymmword ptr [rbx+0x20], ymm0
+       mov      rax, rbx
+						;; size=125 bbWeight=1 PerfScore 41.00
+G_M43701_IG05:
        vzeroupper 
-       add      rsp, 384
+       add      rsp, 608
+       pop      rbx
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=15 bbWeight=1 PerfScore 3.75
 
-; Total bytes of code 639, prolog size 16, PerfScore 667.75, instruction count 114, allocated bytes for code 639 (MethodHash=8284554a) for method System.Runtime.Intrinsics.Vector512:Divide[long](System.Runtime.Intrinsics.Vector512`1[long],System.Runtime.Intrinsics.Vector512`1[long]):System.Runtime.Intrinsics.Vector512`1[long] (FullOpts)
+; Total bytes of code 872, prolog size 19, PerfScore 456.75, instruction count 145, allocated bytes for code 872 (MethodHash=8284554a) for method System.Runtime.Intrinsics.Vector512:Divide[long](System.Runtime.Intrinsics.Vector512`1[long],System.Runtime.Intrinsics.Vector512`1[long]):System.Runtime.Intrinsics.Vector512`1[long] (FullOpts)
218 (16.02 % of base) - System.Runtime.Intrinsics.Vector512:Divide[short](System.Runtime.Intrinsics.Vector512`1[short],System.Runtime.Intrinsics.Vector512`1[short]):System.Runtime.Intrinsics.Vector512`1[short]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector512:Divide[short](System.Runtime.Intrinsics.Vector512`1[short],System.Runtime.Intrinsics.Vector512`1[short]):System.Runtime.Intrinsics.Vector512`1[short] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 43 single block inlinees; 27 inlinees without PGO data
+; 0 inlinees with PGO data; 37 single block inlinees; 21 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T08] (  5,  5   )   byref  ->  rdi         single-def
+;  V00 RetBuf       [V00,T04] (  4,  4   )   byref  ->  rbx         single-def
 ;* V01 arg0         [V01    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[short]>
 ;* V02 arg1         [V02    ] (  0,  0   )  struct (64) zero-ref    single-def <System.Runtime.Intrinsics.Vector512`1[short]>
-;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T37] (  2,  4   )  simd32  ->  mm0         "impAppendStmt"
-;  V05 tmp2         [V05,T38] (  2,  4   )  simd32  ->  mm1         "spilled call-like call argument"
-;  V06 tmp3         [V06,T25] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[short]>
-;  V07 tmp4         [V07,T26] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[short]>
-;  V08 tmp5         [V08,T39] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
+;  V03 OutArgs      [V03    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V04 tmp1         [V04,T32] (  2,  4   )  simd32  ->  [rbp-0x30]  spill-single-def "impAppendStmt"
+;  V05 tmp2         [V05,T33] (  2,  4   )  simd32  ->  [rbp-0x50]  do-not-enreg[HS] hidden-struct-arg "spilled call-like call argument"
+;  V06 tmp3         [V06,T20] (  3,  6   )  simd32  ->  mm0         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[short]>
+;  V07 tmp4         [V07,T21] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[short]>
+;  V08 tmp5         [V08,T34] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
 ;* V09 tmp6         [V09    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V10 tmp7         [V10,T27] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
-;  V11 tmp8         [V11,T28] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V10 tmp7         [V10,T22] (  3,  6   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V11 tmp8         [V11,T23] (  3,  6   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
 ;* V12 tmp9         [V12    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V13 tmp10        [V13    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V14 tmp11        [V14    ] (  5,  5   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V15 tmp12        [V15,T17] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V16 tmp13        [V16    ] (  5, 10   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V17 tmp14        [V17    ] (  5, 10   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V14 tmp11        [V14    ] (  5,  5   )  struct ( 8) [rbp-0x78]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V15 tmp12        [V15,T13] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V16 tmp13        [V16    ] (  5, 10   )  struct ( 8) [rbp-0x80]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V17 tmp14        [V17    ] (  5, 10   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V18 tmp15        [V18    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V19 tmp16        [V19    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;  V20 tmp17        [V20,T00] (  8,  8   )   short  ->  rcx         "Inline return value spill temp"
+;  V20 tmp17        [V20,T00] (  8,  8   )   short  ->  rdi         "Inline return value spill temp"
 ;* V21 tmp18        [V21    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
 ;* V22 tmp19        [V22    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V23 tmp20        [V23    ] (  5,  5   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V24 tmp21        [V24,T18] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V25 tmp22        [V25    ] (  5, 10   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V26 tmp23        [V26    ] (  5, 10   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V23 tmp20        [V23    ] (  5,  5   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V24 tmp21        [V24,T14] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V25 tmp22        [V25    ] (  5, 10   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V26 tmp23        [V26    ] (  5, 10   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V27 tmp24        [V27    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V28 tmp25        [V28    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
 ;  V29 tmp26        [V29,T01] (  8,  8   )   short  ->  rsi         "Inline return value spill temp"
 ;* V30 tmp27        [V30    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
 ;* V31 tmp28        [V31    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V32 tmp29        [V32,T41] (  3,  3   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
-;  V33 tmp30        [V33,T29] (  3,  6   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
-;  V34 tmp31        [V34,T30] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V32 tmp29        [V32,T36] (  3,  3   )  simd16  ->  [rbp-0xB0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V33 tmp30        [V33,T24] (  3,  6   )  simd16  ->  [rbp-0xC0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V34 tmp31        [V34,T25] (  3,  6   )  simd16  ->  [rbp-0xD0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
 ;* V35 tmp32        [V35    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V36 tmp33        [V36    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V37 tmp34        [V37    ] (  5,  5   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V38 tmp35        [V38,T19] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V39 tmp36        [V39    ] (  5, 10   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V40 tmp37        [V40    ] (  5, 10   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V37 tmp34        [V37    ] (  5,  5   )  struct ( 8) [rbp-0xD8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V38 tmp35        [V38,T15] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V39 tmp36        [V39    ] (  5, 10   )  struct ( 8) [rbp-0xE0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V40 tmp37        [V40    ] (  5, 10   )  struct ( 8) [rbp-0xE8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V41 tmp38        [V41    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V42 tmp39        [V42    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;  V43 tmp40        [V43,T02] (  8,  8   )   short  ->  rcx         "Inline return value spill temp"
+;  V43 tmp40        [V43,T02] (  8,  8   )   short  ->  rdi         "Inline return value spill temp"
 ;* V44 tmp41        [V44    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
 ;* V45 tmp42        [V45    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V46 tmp43        [V46    ] (  5,  5   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V47 tmp44        [V47,T20] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V48 tmp45        [V48    ] (  5, 10   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V49 tmp46        [V49    ] (  5, 10   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V46 tmp43        [V46    ] (  5,  5   )  struct ( 8) [rbp-0xF0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V47 tmp44        [V47,T16] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V48 tmp45        [V48    ] (  5, 10   )  struct ( 8) [rbp-0xF8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V49 tmp46        [V49    ] (  5, 10   )  struct ( 8) [rbp-0x100]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V50 tmp47        [V50    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V51 tmp48        [V51    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
 ;  V52 tmp49        [V52,T03] (  8,  8   )   short  ->  rsi         "Inline return value spill temp"
 ;* V53 tmp50        [V53    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
 ;* V54 tmp51        [V54    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V55 tmp52        [V55,T42] (  3,  3   )  simd16  ->  [rbp-0xC0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V55 tmp52        [V55,T37] (  3,  3   )  simd16  ->  [rbp-0x110]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
 ;* V56 tmp53        [V56    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[short]>
-;  V57 tmp54        [V57,T31] (  3,  6   )  simd32  ->  mm1         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[short]>
-;  V58 tmp55        [V58,T32] (  3,  6   )  simd32  ->  mm2         "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[short]>
-;  V59 tmp56        [V59,T40] (  2,  4   )  simd16  ->  mm3         "impAppendStmt"
+;  V57 tmp54        [V57,T26] (  3,  6   )  simd32  ->  [rbp-0x130]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[short]>
+;  V58 tmp55        [V58,T27] (  3,  6   )  simd32  ->  [rbp-0x150]  spill-single-def "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[short]>
+;  V59 tmp56        [V59,T35] (  2,  4   )  simd16  ->  [rbp-0x160]  spill-single-def "impAppendStmt"
 ;* V60 tmp57        [V60    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V61 tmp58        [V61,T33] (  3,  6   )  simd16  ->  [rbp-0xD0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
-;  V62 tmp59        [V62,T34] (  3,  6   )  simd16  ->  [rbp-0xE0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V61 tmp58        [V61,T28] (  3,  6   )  simd16  ->  [rbp-0x170]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V62 tmp59        [V62,T29] (  3,  6   )  simd16  ->  [rbp-0x180]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
 ;* V63 tmp60        [V63    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V64 tmp61        [V64    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V65 tmp62        [V65    ] (  5,  5   )  struct ( 8) [rbp-0xE8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V66 tmp63        [V66,T21] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V67 tmp64        [V67    ] (  5, 10   )  struct ( 8) [rbp-0xF0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V68 tmp65        [V68    ] (  5, 10   )  struct ( 8) [rbp-0xF8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V65 tmp62        [V65    ] (  5,  5   )  struct ( 8) [rbp-0x188]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V66 tmp63        [V66,T17] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V67 tmp64        [V67    ] (  5, 10   )  struct ( 8) [rbp-0x190]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V68 tmp65        [V68    ] (  5, 10   )  struct ( 8) [rbp-0x198]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V69 tmp66        [V69    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V70 tmp67        [V70    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;  V71 tmp68        [V71,T04] (  8,  8   )   short  ->  rcx         "Inline return value spill temp"
-;* V72 tmp69        [V72    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;* V73 tmp70        [V73    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V74 tmp71        [V74    ] (  5,  5   )  struct ( 8) [rbp-0x100]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V75 tmp72        [V75,T22] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V76 tmp73        [V76    ] (  5, 10   )  struct ( 8) [rbp-0x108]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V77 tmp74        [V77    ] (  5, 10   )  struct ( 8) [rbp-0x110]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V78 tmp75        [V78    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V79 tmp76        [V79    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;  V80 tmp77        [V80,T05] (  8,  8   )   short  ->  rsi         "Inline return value spill temp"
-;* V81 tmp78        [V81    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;* V82 tmp79        [V82    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V83 tmp80        [V83,T43] (  3,  3   )  simd16  ->  [rbp-0x120]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
-;  V84 tmp81        [V84,T35] (  3,  6   )  simd16  ->  [rbp-0x130]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
-;  V85 tmp82        [V85,T36] (  3,  6   )  simd16  ->  [rbp-0x140]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
-;* V86 tmp83        [V86    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V87 tmp84        [V87    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V88 tmp85        [V88    ] (  5,  5   )  struct ( 8) [rbp-0x148]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V89 tmp86        [V89,T23] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V90 tmp87        [V90    ] (  5, 10   )  struct ( 8) [rbp-0x150]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V91 tmp88        [V91    ] (  5, 10   )  struct ( 8) [rbp-0x158]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V92 tmp89        [V92    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V93 tmp90        [V93    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;  V94 tmp91        [V94,T06] (  8,  8   )   short  ->  rcx         "Inline return value spill temp"
-;* V95 tmp92        [V95    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;* V96 tmp93        [V96    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V97 tmp94        [V97    ] (  5,  5   )  struct ( 8) [rbp-0x160]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V98 tmp95        [V98,T24] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V99 tmp96        [V99    ] (  5, 10   )  struct ( 8) [rbp-0x168]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V100 tmp97       [V100    ] (  5, 10   )  struct ( 8) [rbp-0x170]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V101 tmp98       [V101    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V102 tmp99       [V102    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;  V103 tmp100      [V103,T07] (  8,  8   )   short  ->  rsi         "Inline return value spill temp"
-;* V104 tmp101      [V104    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;* V105 tmp102      [V105    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V106 tmp103      [V106,T44] (  3,  3   )  simd16  ->  [rbp-0x180]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
-;* V107 tmp104      [V107    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[short]>
-;* V108 tmp105      [V108    ] (  0,  0   )  struct (64) zero-ref    ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[short]>
-;  V109 tmp106      [V109,T47] (  1,  1   )  simd32  ->  [rbp+0x10]  single-def "field V01._lower (fldOffset=0x0)" P-INDEP
-;  V110 tmp107      [V110,T48] (  1,  1   )  simd32  ->  [rbp+0x30]  single-def "field V01._upper (fldOffset=0x20)" P-INDEP
-;  V111 tmp108      [V111,T49] (  1,  1   )  simd32  ->  [rbp+0x50]  single-def "field V02._lower (fldOffset=0x0)" P-INDEP
-;  V112 tmp109      [V112,T50] (  1,  1   )  simd32  ->  [rbp+0x70]  single-def "field V02._upper (fldOffset=0x20)" P-INDEP
-;  V113 tmp110      [V113,T09] (  2,  2   )    long  ->  rcx         "field V12._00 (fldOffset=0x0)" P-INDEP
-;  V114 tmp111      [V114,T10] (  2,  2   )    long  ->  rax         "field V13._00 (fldOffset=0x0)" P-INDEP
-;  V115 tmp112      [V115    ] (  5,  5   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V14._00 (fldOffset=0x0)" P-DEP
-;  V116 tmp113      [V116    ] (  5,  9   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
-;  V117 tmp114      [V117    ] (  5,  9   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
-;  V118 tmp115      [V118    ] (  5,  5   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
-;  V119 tmp116      [V119    ] (  5,  9   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V120 tmp117      [V120    ] (  5,  9   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V26._00 (fldOffset=0x0)" P-DEP
-;  V121 tmp118      [V121,T11] (  2,  2   )    long  ->  rcx         "field V35._00 (fldOffset=0x0)" P-INDEP
-;  V122 tmp119      [V122,T12] (  2,  2   )    long  ->  rax         "field V36._00 (fldOffset=0x0)" P-INDEP
-;  V123 tmp120      [V123    ] (  5,  5   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
-;  V124 tmp121      [V124    ] (  5,  9   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V39._00 (fldOffset=0x0)" P-DEP
-;  V125 tmp122      [V125    ] (  5,  9   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V40._00 (fldOffset=0x0)" P-DEP
-;  V126 tmp123      [V126    ] (  5,  5   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
-;  V127 tmp124      [V127    ] (  5,  9   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V48._00 (fldOffset=0x0)" P-DEP
-;  V128 tmp125      [V128    ] (  5,  9   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V49._00 (fldOffset=0x0)" P-DEP
-;  V129 tmp126      [V129,T13] (  2,  2   )    long  ->  rcx         "field V63._00 (fldOffset=0x0)" P-INDEP
-;  V130 tmp127      [V130,T14] (  2,  2   )    long  ->  rax         "field V64._00 (fldOffset=0x0)" P-INDEP
-;  V131 tmp128      [V131    ] (  5,  5   )    long  ->  [rbp-0xE8]  do-not-enreg[X] addr-exposed "field V65._00 (fldOffset=0x0)" P-DEP
-;  V132 tmp129      [V132    ] (  5,  9   )    long  ->  [rbp-0xF0]  do-not-enreg[X] addr-exposed "field V67._00 (fldOffset=0x0)" P-DEP
-;  V133 tmp130      [V133    ] (  5,  9   )    long  ->  [rbp-0xF8]  do-not-enreg[X] addr-exposed "field V68._00 (fldOffset=0x0)" P-DEP
-;  V134 tmp131      [V134    ] (  5,  5   )    long  ->  [rbp-0x100]  do-not-enreg[X] addr-exposed "field V74._00 (fldOffset=0x0)" P-DEP
-;  V135 tmp132      [V135    ] (  5,  9   )    long  ->  [rbp-0x108]  do-not-enreg[X] addr-exposed "field V76._00 (fldOffset=0x0)" P-DEP
-;  V136 tmp133      [V136    ] (  5,  9   )    long  ->  [rbp-0x110]  do-not-enreg[X] addr-exposed "field V77._00 (fldOffset=0x0)" P-DEP
-;  V137 tmp134      [V137,T15] (  2,  2   )    long  ->  rcx         "field V86._00 (fldOffset=0x0)" P-INDEP
-;  V138 tmp135      [V138,T16] (  2,  2   )    long  ->  rax         "field V87._00 (fldOffset=0x0)" P-INDEP
-;  V139 tmp136      [V139    ] (  5,  5   )    long  ->  [rbp-0x148]  do-not-enreg[X] addr-exposed "field V88._00 (fldOffset=0x0)" P-DEP
-;  V140 tmp137      [V140    ] (  5,  9   )    long  ->  [rbp-0x150]  do-not-enreg[X] addr-exposed "field V90._00 (fldOffset=0x0)" P-DEP
-;  V141 tmp138      [V141    ] (  5,  9   )    long  ->  [rbp-0x158]  do-not-enreg[X] addr-exposed "field V91._00 (fldOffset=0x0)" P-DEP
-;  V142 tmp139      [V142    ] (  5,  5   )    long  ->  [rbp-0x160]  do-not-enreg[X] addr-exposed "field V97._00 (fldOffset=0x0)" P-DEP
-;  V143 tmp140      [V143    ] (  5,  9   )    long  ->  [rbp-0x168]  do-not-enreg[X] addr-exposed "field V99._00 (fldOffset=0x0)" P-DEP
-;  V144 tmp141      [V144    ] (  5,  9   )    long  ->  [rbp-0x170]  do-not-enreg[X] addr-exposed "field V100._00 (fldOffset=0x0)" P-DEP
-;  V145 tmp142      [V145,T45] (  2,  2   )  simd32  ->  mm0         "field V108._lower (fldOffset=0x0)" P-INDEP
-;  V146 tmp143      [V146,T46] (  2,  2   )  simd32  ->  mm1         "field V108._upper (fldOffset=0x20)" P-INDEP
+;  V71 tmp68        [V71    ] (  5,  5   )  struct ( 8) [rbp-0x1A0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V72 tmp69        [V72,T18] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V73 tmp70        [V73    ] (  5, 10   )  struct ( 8) [rbp-0x1A8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V74 tmp71        [V74    ] (  5, 10   )  struct ( 8) [rbp-0x1B0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V75 tmp72        [V75    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V76 tmp73        [V76    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
+;  V77 tmp74        [V77,T38] (  3,  3   )  simd16  ->  [rbp-0x1C0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V78 tmp75        [V78,T30] (  3,  6   )  simd16  ->  [rbp-0x1D0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V79 tmp76        [V79,T31] (  3,  6   )  simd16  ->  [rbp-0x1E0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;* V80 tmp77        [V80    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V81 tmp78        [V81    ] (  2,  4   )  struct ( 8) [rbp-0x1E8]  do-not-enreg[HS] hidden-struct-arg "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V82 tmp79        [V82    ] (  5,  5   )  struct ( 8) [rbp-0x1F0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V83 tmp80        [V83,T19] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V84 tmp81        [V84    ] (  5, 10   )  struct ( 8) [rbp-0x1F8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V85 tmp82        [V85    ] (  5, 10   )  struct ( 8) [rbp-0x200]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V86 tmp83        [V86    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V87 tmp84        [V87    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
+;  V88 tmp85        [V88    ] (  3,  3   )  simd16  ->  [rbp-0x210]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V89 tmp86        [V89    ] (  3,  3   )  struct (64) [rbp-0x250]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector512`1[short]>
+;  V90 tmp87        [V90,T39] (  1,  1   )  simd32  ->  [rbp+0x10]  single-def "field V01._lower (fldOffset=0x0)" P-INDEP
+;  V91 tmp88        [V91,T40] (  1,  1   )  simd32  ->  [rbp+0x30]  single-def "field V01._upper (fldOffset=0x20)" P-INDEP
+;  V92 tmp89        [V92,T41] (  1,  1   )  simd32  ->  [rbp+0x50]  single-def "field V02._lower (fldOffset=0x0)" P-INDEP
+;  V93 tmp90        [V93,T42] (  1,  1   )  simd32  ->  [rbp+0x70]  single-def "field V02._upper (fldOffset=0x20)" P-INDEP
+;  V94 tmp91        [V94,T06] (  2,  2   )    long  ->  rdi         "field V12._00 (fldOffset=0x0)" P-INDEP
+;  V95 tmp92        [V95,T07] (  2,  2   )    long  ->  rax         "field V13._00 (fldOffset=0x0)" P-INDEP
+;  V96 tmp93        [V96    ] (  5,  5   )    long  ->  [rbp-0x78]  do-not-enreg[X] addr-exposed "field V14._00 (fldOffset=0x0)" P-DEP
+;  V97 tmp94        [V97    ] (  5,  9   )    long  ->  [rbp-0x80]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
+;  V98 tmp95        [V98    ] (  5,  9   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
+;  V99 tmp96        [V99    ] (  5,  5   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+;  V100 tmp97       [V100    ] (  5,  9   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V101 tmp98       [V101    ] (  5,  9   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V26._00 (fldOffset=0x0)" P-DEP
+;  V102 tmp99       [V102,T08] (  2,  2   )    long  ->  rdi         "field V35._00 (fldOffset=0x0)" P-INDEP
+;  V103 tmp100      [V103,T09] (  2,  2   )    long  ->  rsi         "field V36._00 (fldOffset=0x0)" P-INDEP
+;  V104 tmp101      [V104    ] (  5,  5   )    long  ->  [rbp-0xD8]  do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
+;  V105 tmp102      [V105    ] (  5,  9   )    long  ->  [rbp-0xE0]  do-not-enreg[X] addr-exposed "field V39._00 (fldOffset=0x0)" P-DEP
+;  V106 tmp103      [V106    ] (  5,  9   )    long  ->  [rbp-0xE8]  do-not-enreg[X] addr-exposed "field V40._00 (fldOffset=0x0)" P-DEP
+;  V107 tmp104      [V107    ] (  5,  5   )    long  ->  [rbp-0xF0]  do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
+;  V108 tmp105      [V108    ] (  5,  9   )    long  ->  [rbp-0xF8]  do-not-enreg[X] addr-exposed "field V48._00 (fldOffset=0x0)" P-DEP
+;  V109 tmp106      [V109    ] (  5,  9   )    long  ->  [rbp-0x100]  do-not-enreg[X] addr-exposed "field V49._00 (fldOffset=0x0)" P-DEP
+;  V110 tmp107      [V110,T10] (  2,  2   )    long  ->  r15         "field V63._00 (fldOffset=0x0)" P-INDEP
+;  V111 tmp108      [V111,T11] (  2,  2   )    long  ->  rdi         "field V64._00 (fldOffset=0x0)" P-INDEP
+;  V112 tmp109      [V112    ] (  5,  5   )    long  ->  [rbp-0x188]  do-not-enreg[X] addr-exposed "field V65._00 (fldOffset=0x0)" P-DEP
+;  V113 tmp110      [V113    ] (  5,  9   )    long  ->  [rbp-0x190]  do-not-enreg[X] addr-exposed "field V67._00 (fldOffset=0x0)" P-DEP
+;  V114 tmp111      [V114    ] (  5,  9   )    long  ->  [rbp-0x198]  do-not-enreg[X] addr-exposed "field V68._00 (fldOffset=0x0)" P-DEP
+;  V115 tmp112      [V115    ] (  5,  5   )    long  ->  [rbp-0x1A0]  do-not-enreg[X] addr-exposed "field V71._00 (fldOffset=0x0)" P-DEP
+;  V116 tmp113      [V116    ] (  5,  9   )    long  ->  [rbp-0x1A8]  do-not-enreg[X] addr-exposed "field V73._00 (fldOffset=0x0)" P-DEP
+;  V117 tmp114      [V117    ] (  5,  9   )    long  ->  [rbp-0x1B0]  do-not-enreg[X] addr-exposed "field V74._00 (fldOffset=0x0)" P-DEP
+;  V118 tmp115      [V118,T12] (  2,  2   )    long  ->  r15         "field V80._00 (fldOffset=0x0)" P-INDEP
+;  V119 tmp116      [V119,T05] (  2,  3   )    long  ->  [rbp-0x1E8]  do-not-enreg[H] hidden-struct-arg "field V81._00 (fldOffset=0x0)" P-DEP
+;  V120 tmp117      [V120    ] (  5,  5   )    long  ->  [rbp-0x1F0]  do-not-enreg[X] addr-exposed "field V82._00 (fldOffset=0x0)" P-DEP
+;  V121 tmp118      [V121    ] (  5,  9   )    long  ->  [rbp-0x1F8]  do-not-enreg[X] addr-exposed "field V84._00 (fldOffset=0x0)" P-DEP
+;  V122 tmp119      [V122    ] (  5,  9   )    long  ->  [rbp-0x200]  do-not-enreg[X] addr-exposed "field V85._00 (fldOffset=0x0)" P-DEP
+;  V123 tmp120      [V123    ] (  3,  3   )  simd32  ->  [rbp-0x250]  do-not-enreg[XS] addr-exposed "field V89._lower (fldOffset=0x0)" P-DEP
+;  V124 tmp121      [V124    ] (  3,  3   )  simd32  ->  [rbp-0x230]  do-not-enreg[XS] addr-exposed "field V89._upper (fldOffset=0x20)" P-DEP
 ;
-; Lcl frame size = 384
+; Lcl frame size = 608
 
 G_M40213_IG01:
        push     rbp
-       sub      rsp, 384
-       lea      rbp, [rsp+0x180]
-						;; size=16 bbWeight=1 PerfScore 1.75
+       push     r15
+       push     rbx
+       sub      rsp, 608
+       lea      rbp, [rsp+0x270]
+       mov      rbx, rdi
+						;; size=22 bbWeight=1 PerfScore 4.00
 G_M40213_IG02:
        vmovups  ymm0, ymmword ptr [rbp+0x10]
        vmovups  ymm1, ymmword ptr [rbp+0x50]
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm2
+       vmovaps  xmmword ptr [rbp-0x60], xmm2
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rbp-0x20], xmm2
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x30], rax
-       mov      rax, qword ptr [rbp-0x20]
-       mov      qword ptr [rbp-0x38], rax
-       movsx    rax, word  ptr [rbp-0x30]
-       movsx    rcx, word  ptr [rbp-0x38]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x28], cx
-       movsx    rax, word  ptr [rbp-0x2E]
-       movsx    rcx, word  ptr [rbp-0x36]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x26], cx
-       movsx    rax, word  ptr [rbp-0x2C]
-       movsx    rcx, word  ptr [rbp-0x34]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x24], cx
-       movsx    rax, word  ptr [rbp-0x2A]
-       movsx    rcx, word  ptr [rbp-0x32]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x22], cx
-       mov      rcx, qword ptr [rbp-0x28]
-       mov      rax, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x48], rax
-       mov      rax, qword ptr [rbp-0x18]
-       mov      qword ptr [rbp-0x50], rax
-       movsx    rax, word  ptr [rbp-0x48]
-       movsx    rsi, word  ptr [rbp-0x50]
+       vmovaps  xmmword ptr [rbp-0x70], xmm2
+       mov      rax, qword ptr [rbp-0x60]
+       mov      qword ptr [rbp-0x80], rax
+       mov      rax, qword ptr [rbp-0x70]
+       mov      qword ptr [rbp-0x88], rax
+       movsx    rax, word  ptr [rbp-0x80]
+       movsx    rdi, word  ptr [rbp-0x88]
+       cdq      
+       idiv     edx:eax, edi
+       movsx    rdi, ax
+       mov      word  ptr [rbp-0x78], di
+       movsx    rax, word  ptr [rbp-0x7E]
+       movsx    rdi, word  ptr [rbp-0x86]
+       cdq      
+       idiv     edx:eax, edi
+       movsx    rdi, ax
+       mov      word  ptr [rbp-0x76], di
+       movsx    rax, word  ptr [rbp-0x7C]
+       movsx    rdi, word  ptr [rbp-0x84]
+       cdq      
+       idiv     edx:eax, edi
+       movsx    rdi, ax
+       mov      word  ptr [rbp-0x74], di
+       movsx    rax, word  ptr [rbp-0x7A]
+       movsx    rdi, word  ptr [rbp-0x82]
+       cdq      
+       idiv     edx:eax, edi
+       movsx    rdi, ax
+       mov      word  ptr [rbp-0x72], di
+       mov      rdi, qword ptr [rbp-0x78]
+       mov      rax, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x98], rax
+       mov      rax, qword ptr [rbp-0x68]
+       mov      qword ptr [rbp-0xA0], rax
+       movsx    rax, word  ptr [rbp-0x98]
+       movsx    rsi, word  ptr [rbp-0xA0]
        cdq      
        idiv     edx:eax, esi
        movsx    rsi, ax
-       mov      word  ptr [rbp-0x40], si
-       movsx    rax, word  ptr [rbp-0x46]
-       movsx    rsi, word  ptr [rbp-0x4E]
+       mov      word  ptr [rbp-0x90], si
+       movsx    rax, word  ptr [rbp-0x96]
+       movsx    rsi, word  ptr [rbp-0x9E]
        cdq      
        idiv     edx:eax, esi
        movsx    rsi, ax
-       mov      word  ptr [rbp-0x3E], si
-       movsx    rax, word  ptr [rbp-0x44]
-       movsx    rsi, word  ptr [rbp-0x4C]
+       mov      word  ptr [rbp-0x8E], si
+       movsx    rax, word  ptr [rbp-0x94]
+       movsx    rsi, word  ptr [rbp-0x9C]
        cdq      
        idiv     edx:eax, esi
        movsx    rsi, ax
-       mov      word  ptr [rbp-0x3C], si
-       movsx    rax, word  ptr [rbp-0x42]
-       movsx    rsi, word  ptr [rbp-0x4A]
+       mov      word  ptr [rbp-0x8C], si
+       movsx    rax, word  ptr [rbp-0x92]
+       movsx    rsi, word  ptr [rbp-0x9A]
        cdq      
        idiv     edx:eax, esi
        movsx    rsi, ax
-       mov      word  ptr [rbp-0x3A], si
-       mov      rax, qword ptr [rbp-0x40]
-       mov      qword ptr [rbp-0x60], rcx
-						;; size=240 bbWeight=1 PerfScore 283.50
+       mov      word  ptr [rbp-0x8A], si
+       mov      rax, qword ptr [rbp-0x90]
+       mov      qword ptr [rbp-0xB0], rdi
+						;; size=303 bbWeight=1 PerfScore 283.50
 G_M40213_IG03:
-       mov      qword ptr [rbp-0x58], rax
-       vmovaps  xmm2, xmmword ptr [rbp-0x60]
+       mov      qword ptr [rbp-0xA8], rax
+       vmovaps  xmm2, xmmword ptr [rbp-0xB0]
        vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x70], xmm0
+       vmovaps  xmmword ptr [rbp-0xC0], xmm0
        vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x80], xmm0
-       mov      rax, qword ptr [rbp-0x70]
-       mov      qword ptr [rbp-0x90], rax
-       mov      rax, qword ptr [rbp-0x80]
-       mov      qword ptr [rbp-0x98], rax
-       movsx    rax, word  ptr [rbp-0x90]
-       movsx    rcx, word  ptr [rbp-0x98]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x88], cx
-       movsx    rax, word  ptr [rbp-0x8E]
-       movsx    rcx, word  ptr [rbp-0x96]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x86], cx
-       movsx    rax, word  ptr [rbp-0x8C]
-       movsx    rcx, word  ptr [rbp-0x94]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x84], cx
-       movsx    rax, word  ptr [rbp-0x8A]
-       movsx    rcx, word  ptr [rbp-0x92]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x82], cx
-       mov      rcx, qword ptr [rbp-0x88]
-       mov      rax, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0xA8], rax
-       mov      rax, qword ptr [rbp-0x78]
-       mov      qword ptr [rbp-0xB0], rax
-       movsx    rax, word  ptr [rbp-0xA8]
-       movsx    rsi, word  ptr [rbp-0xB0]
+       vmovaps  xmmword ptr [rbp-0xD0], xmm0
+       mov      rax, qword ptr [rbp-0xC0]
+       mov      qword ptr [rbp-0xE0], rax
+       mov      rax, qword ptr [rbp-0xD0]
+       mov      qword ptr [rbp-0xE8], rax
+       movsx    rax, word  ptr [rbp-0xE0]
+       movsx    rdi, word  ptr [rbp-0xE8]
+       cdq      
+       idiv     edx:eax, edi
+       movsx    rdi, ax
+       mov      word  ptr [rbp-0xD8], di
+       movsx    rax, word  ptr [rbp-0xDE]
+       movsx    rdi, word  ptr [rbp-0xE6]
+       cdq      
+       idiv     edx:eax, edi
+       movsx    rdi, ax
+       mov      word  ptr [rbp-0xD6], di
+       movsx    rax, word  ptr [rbp-0xDC]
+       movsx    rdi, word  ptr [rbp-0xE4]
+       cdq      
+       idiv     edx:eax, edi
+       movsx    rdi, ax
+       mov      word  ptr [rbp-0xD4], di
+       movsx    rax, word  ptr [rbp-0xDA]
+       movsx    rdi, word  ptr [rbp-0xE2]
+       cdq      
+       idiv     edx:eax, edi
+       movsx    rdi, ax
+       mov      word  ptr [rbp-0xD2], di
+       mov      rdi, qword ptr [rbp-0xD8]
+       mov      rax, qword ptr [rbp-0xB8]
+       mov      qword ptr [rbp-0xF8], rax
+       mov      rax, qword ptr [rbp-0xC8]
+       mov      qword ptr [rbp-0x100], rax
+       movsx    rax, word  ptr [rbp-0xF8]
+       movsx    rsi, word  ptr [rbp-0x100]
        cdq      
        idiv     edx:eax, esi
        movsx    rsi, ax
-       mov      word  ptr [rbp-0xA0], si
-       movsx    rax, word  ptr [rbp-0xA6]
-       movsx    rsi, word  ptr [rbp-0xAE]
+       mov      word  ptr [rbp-0xF0], si
+       movsx    rax, word  ptr [rbp-0xF6]
+       movsx    rsi, word  ptr [rbp-0xFE]
        cdq      
        idiv     edx:eax, esi
        movsx    rsi, ax
-       mov      word  ptr [rbp-0x9E], si
-       movsx    rax, word  ptr [rbp-0xA4]
-       movsx    rsi, word  ptr [rbp-0xAC]
+       mov      word  ptr [rbp-0xEE], si
+       movsx    rax, word  ptr [rbp-0xF4]
+       movsx    rsi, word  ptr [rbp-0xFC]
        cdq      
        idiv     edx:eax, esi
        movsx    rsi, ax
-       mov      word  ptr [rbp-0x9C], si
-       movsx    rax, word  ptr [rbp-0xA2]
-       movsx    rsi, word  ptr [rbp-0xAA]
+       mov      word  ptr [rbp-0xEC], si
+       movsx    rax, word  ptr [rbp-0xF2]
+       movsx    rsi, word  ptr [rbp-0xFA]
        cdq      
        idiv     edx:eax, esi
        movsx    rsi, ax
-       mov      word  ptr [rbp-0x9A], si
-       mov      rax, qword ptr [rbp-0xA0]
-       mov      qword ptr [rbp-0xC0], rcx
-						;; size=336 bbWeight=1 PerfScore 283.00
+       mov      word  ptr [rbp-0xEA], si
+       mov      rsi, qword ptr [rbp-0xF0]
+       mov      qword ptr [rbp-0x110], rdi
+						;; size=360 bbWeight=1 PerfScore 283.00
 G_M40213_IG04:
-       mov      qword ptr [rbp-0xB8], rax
-       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0xC0], 1
+       mov      qword ptr [rbp-0x108], rsi
+       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0x110], 1
+       vmovups  ymmword ptr [rbp-0x30], ymm0
        vmovups  ymm1, ymmword ptr [rbp+0x30]
+       vmovups  ymmword ptr [rbp-0x130], ymm1
        vmovups  ymm2, ymmword ptr [rbp+0x70]
+       vmovups  ymmword ptr [rbp-0x150], ymm2
        vmovaps  ymm3, ymm1
-       vmovaps  xmmword ptr [rbp-0xD0], xmm3
+       vmovaps  xmmword ptr [rbp-0x170], xmm3
        vmovaps  ymm3, ymm2
-       vmovaps  xmmword ptr [rbp-0xE0], xmm3
-       mov      rax, qword ptr [rbp-0xD0]
-       mov      qword ptr [rbp-0xF0], rax
-       mov      rax, qword ptr [rbp-0xE0]
-       mov      qword ptr [rbp-0xF8], rax
-       movsx    rax, word  ptr [rbp-0xF0]
-       movsx    rcx, word  ptr [rbp-0xF8]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0xE8], cx
-       movsx    rax, word  ptr [rbp-0xEE]
-       movsx    rcx, word  ptr [rbp-0xF6]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0xE6], cx
-       movsx    rax, word  ptr [rbp-0xEC]
-       movsx    rcx, word  ptr [rbp-0xF4]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0xE4], cx
-       movsx    rax, word  ptr [rbp-0xEA]
-       movsx    rcx, word  ptr [rbp-0xF2]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0xE2], cx
-       mov      rcx, qword ptr [rbp-0xE8]
-       mov      rax, qword ptr [rbp-0xC8]
-       mov      qword ptr [rbp-0x108], rax
-       mov      rax, qword ptr [rbp-0xD8]
-       mov      qword ptr [rbp-0x110], rax
-       movsx    rax, word  ptr [rbp-0x108]
-       movsx    rsi, word  ptr [rbp-0x110]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x100], si
-       movsx    rax, word  ptr [rbp-0x106]
-       movsx    rsi, word  ptr [rbp-0x10E]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0xFE], si
-       movsx    rax, word  ptr [rbp-0x104]
-       movsx    rsi, word  ptr [rbp-0x10C]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0xFC], si
-       movsx    rax, word  ptr [rbp-0x102]
-       movsx    rsi, word  ptr [rbp-0x10A]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0xFA], si
-						;; size=354 bbWeight=1 PerfScore 286.50
+       vmovaps  xmmword ptr [rbp-0x180], xmm3
+       mov      rdi, qword ptr [rbp-0x170]
+       mov      qword ptr [rbp-0x190], rdi
+       mov      rdi, qword ptr [rbp-0x180]
+       mov      qword ptr [rbp-0x198], rdi
+       movsx    rdi, word  ptr [rbp-0x190]
+       movsx    rsi, word  ptr [rbp-0x198]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x188], ax
+       movsx    rdi, word  ptr [rbp-0x18E]
+       movsx    rsi, word  ptr [rbp-0x196]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x186], ax
+       movsx    rdi, word  ptr [rbp-0x18C]
+       movsx    rsi, word  ptr [rbp-0x194]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x184], ax
+       movsx    rdi, word  ptr [rbp-0x18A]
+       movsx    rsi, word  ptr [rbp-0x192]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x182], ax
+       mov      r15, qword ptr [rbp-0x188]
+       mov      rdi, qword ptr [rbp-0x168]
+       mov      qword ptr [rbp-0x1A8], rdi
+       mov      rdi, qword ptr [rbp-0x178]
+       mov      qword ptr [rbp-0x1B0], rdi
+       movsx    rdi, word  ptr [rbp-0x1A8]
+       movsx    rsi, word  ptr [rbp-0x1B0]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x1A0], ax
+       movsx    rdi, word  ptr [rbp-0x1A6]
+       movsx    rsi, word  ptr [rbp-0x1AE]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+						;; size=336 bbWeight=1 PerfScore 85.00
 G_M40213_IG05:
-       mov      rax, qword ptr [rbp-0x100]
-       mov      qword ptr [rbp-0x120], rcx
-       mov      qword ptr [rbp-0x118], rax
-       vmovaps  xmm3, xmmword ptr [rbp-0x120]
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x19E], ax
+       movsx    rdi, word  ptr [rbp-0x1A4]
+       movsx    rsi, word  ptr [rbp-0x1AC]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x19C], ax
+       movsx    rdi, word  ptr [rbp-0x1A2]
+       movsx    rsi, word  ptr [rbp-0x1AA]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x19A], ax
+       mov      rdi, qword ptr [rbp-0x1A0]
+       mov      qword ptr [rbp-0x1C0], r15
+       mov      qword ptr [rbp-0x1B8], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x1C0]
+       vmovaps  xmmword ptr [rbp-0x160], xmm0
+       vmovups  ymm1, ymmword ptr [rbp-0x130]
        vextractf128 xmm1, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x130], xmm1
+       vmovaps  xmmword ptr [rbp-0x1D0], xmm1
+       vmovups  ymm2, ymmword ptr [rbp-0x150]
        vextractf128 xmm1, ymm2, 1
-       vmovaps  xmmword ptr [rbp-0x140], xmm1
-       mov      rax, qword ptr [rbp-0x130]
-       mov      qword ptr [rbp-0x150], rax
-       mov      rax, qword ptr [rbp-0x140]
-       mov      qword ptr [rbp-0x158], rax
-       movsx    rax, word  ptr [rbp-0x150]
-       movsx    rcx, word  ptr [rbp-0x158]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x148], cx
-       movsx    rax, word  ptr [rbp-0x14E]
-       movsx    rcx, word  ptr [rbp-0x156]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x146], cx
-       movsx    rax, word  ptr [rbp-0x14C]
-       movsx    rcx, word  ptr [rbp-0x154]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x144], cx
-       movsx    rax, word  ptr [rbp-0x14A]
-       movsx    rcx, word  ptr [rbp-0x152]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x142], cx
-       mov      rcx, qword ptr [rbp-0x148]
-       mov      rax, qword ptr [rbp-0x128]
-       mov      qword ptr [rbp-0x168], rax
-       mov      rax, qword ptr [rbp-0x138]
-       mov      qword ptr [rbp-0x170], rax
-       movsx    rax, word  ptr [rbp-0x168]
-       movsx    rsi, word  ptr [rbp-0x170]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x160], si
-       movsx    rax, word  ptr [rbp-0x166]
-       movsx    rsi, word  ptr [rbp-0x16E]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x15E], si
-       movsx    rax, word  ptr [rbp-0x164]
-       movsx    rsi, word  ptr [rbp-0x16C]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x15C], si
-       movsx    rax, word  ptr [rbp-0x162]
-       movsx    rsi, word  ptr [rbp-0x16A]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x15A], si
-						;; size=360 bbWeight=1 PerfScore 283.00
+       vmovaps  xmmword ptr [rbp-0x1E0], xmm1
+       mov      rdi, qword ptr [rbp-0x1D0]
+       mov      qword ptr [rbp-0x1F8], rdi
+       mov      rdi, qword ptr [rbp-0x1E0]
+       mov      qword ptr [rbp-0x200], rdi
+       movsx    rdi, word  ptr [rbp-0x1F8]
+       movsx    rsi, word  ptr [rbp-0x200]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x1F0], ax
+       movsx    rdi, word  ptr [rbp-0x1F6]
+       movsx    rsi, word  ptr [rbp-0x1FE]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x1EE], ax
+       movsx    rdi, word  ptr [rbp-0x1F4]
+       movsx    rsi, word  ptr [rbp-0x1FC]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x1EC], ax
+       movsx    rdi, word  ptr [rbp-0x1F2]
+       movsx    rsi, word  ptr [rbp-0x1FA]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+						;; size=321 bbWeight=1 PerfScore 89.50
 G_M40213_IG06:
-       mov      rax, qword ptr [rbp-0x160]
-       mov      qword ptr [rbp-0x180], rcx
-       mov      qword ptr [rbp-0x178], rax
-       vinserti128 ymm1, ymm3, xmmword ptr [rbp-0x180], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       vmovups  ymmword ptr [rdi+0x20], ymm1
-       mov      rax, rdi
-						;; size=43 bbWeight=1 PerfScore 11.25
+       mov      word  ptr [rbp-0x1EA], ax
+       mov      r15, qword ptr [rbp-0x1F0]
+       mov      rdi, qword ptr [rbp-0x1C8]
+       mov      qword ptr [rsp], rdi
+       mov      rdi, qword ptr [rbp-0x1D8]
+       mov      qword ptr [rsp+0x08], rdi
+       lea      rdi, [rbp-0x1E8]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector64`1[short]:op_Division(System.Runtime.Intrinsics.Vector64`1[short],System.Runtime.Intrinsics.Vector64`1[short]):System.Runtime.Intrinsics.Vector64`1[short]
+       call     [rax]System.Runtime.Intrinsics.Vector64`1[short]:op_Division(System.Runtime.Intrinsics.Vector64`1[short],System.Runtime.Intrinsics.Vector64`1[short]):System.Runtime.Intrinsics.Vector64`1[short]
+       mov      qword ptr [rbp-0x210], r15
+       mov      rdi, qword ptr [rbp-0x1E8]
+       mov      qword ptr [rsp], rdi
+       lea      rdi, [rbp-0x210]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector128:SetUpperUnsafe[short](byref,System.Runtime.Intrinsics.Vector64`1[short])
+       call     [rax]System.Runtime.Intrinsics.Vector128:SetUpperUnsafe[short](byref,System.Runtime.Intrinsics.Vector64`1[short])
+       vmovaps  xmm0, xmmword ptr [rbp-0x160]
+       vmovups  xmmword ptr [rsp], xmm0
+       vmovaps  xmm0, xmmword ptr [rbp-0x210]
+       vmovups  xmmword ptr [rsp+0x10], xmm0
+       lea      rdi, [rbp-0x50]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector256:Create[short](System.Runtime.Intrinsics.Vector128`1[short],System.Runtime.Intrinsics.Vector128`1[short]):System.Runtime.Intrinsics.Vector256`1[short]
+       call     [rax]System.Runtime.Intrinsics.Vector256:Create[short](System.Runtime.Intrinsics.Vector128`1[short],System.Runtime.Intrinsics.Vector128`1[short]):System.Runtime.Intrinsics.Vector256`1[short]
+       vmovups  ymm0, ymmword ptr [rbp-0x30]
+       vmovups  ymmword ptr [rsp], ymm0
+       lea      rdi, [rbp-0x250]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector512:SetLowerUnsafe[short](byref,System.Runtime.Intrinsics.Vector256`1[short])
+       call     [rax]System.Runtime.Intrinsics.Vector512:SetLowerUnsafe[short](byref,System.Runtime.Intrinsics.Vector256`1[short])
+       vmovups  ymm0, ymmword ptr [rbp-0x50]
+       vmovups  ymmword ptr [rsp], ymm0
+       lea      rdi, [rbp-0x250]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector512:SetUpperUnsafe[short](byref,System.Runtime.Intrinsics.Vector256`1[short])
+       call     [rax]System.Runtime.Intrinsics.Vector512:SetUpperUnsafe[short](byref,System.Runtime.Intrinsics.Vector256`1[short])
+       vmovdqu  ymm0, ymmword ptr [rbp-0x250]
+       vmovdqu  ymmword ptr [rbx], ymm0
+       vmovdqu  ymm0, ymmword ptr [rbp-0x230]
+       vmovdqu  ymmword ptr [rbx+0x20], ymm0
+       mov      rax, rbx
+						;; size=222 bbWeight=1 PerfScore 58.00
 G_M40213_IG07:
        vzeroupper 
-       add      rsp, 384
+       add      rsp, 608
+       pop      rbx
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=15 bbWeight=1 PerfScore 3.75
 
-; Total bytes of code 1361, prolog size 16, PerfScore 1151.75, instruction count 274, allocated bytes for code 1361 (MethodHash=1f9d62ea) for method System.Runtime.Intrinsics.Vector512:Divide[short](System.Runtime.Intrinsics.Vector512`1[short],System.Runtime.Intrinsics.Vector512`1[short]):System.Runtime.Intrinsics.Vector512`1[short] (FullOpts)
+; Total bytes of code 1579, prolog size 19, PerfScore 806.75, instruction count 273, allocated bytes for code 1579 (MethodHash=1f9d62ea) for method System.Runtime.Intrinsics.Vector512:Divide[short](System.Runtime.Intrinsics.Vector512`1[short],System.Runtime.Intrinsics.Vector512`1[short]):System.Runtime.Intrinsics.Vector512`1[short] (FullOpts)
169 (81.25 % of base) - System.Runtime.Intrinsics.Vector256`1[int]:System.Runtime.Intrinsics.ISimdVector,T>.Divide(System.Runtime.Intrinsics.Vector256`1[int],int):System.Runtime.Intrinsics.Vector256`1[int]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[int]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[int],int):System.Runtime.Intrinsics.Vector256`1[int] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 16 single block inlinees; 13 inlinees without PGO data
+; 0 inlinees with PGO data; 16 single block inlinees; 9 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T01] (  4,  4   )   byref  ->  rdi         single-def
-;  V01 arg0         [V01,T15] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[int]>
-;  V02 arg1         [V02,T00] ( 10, 10   )     int  ->  rsi         single-def
+;  V00 RetBuf       [V00,T01] (  4,  4   )   byref  ->  r15         single-def
+;  V01 arg0         [V01,T15] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[int]>
+;  V02 arg1         [V02,T00] ( 10, 10   )     int  ->  rbx         single-def
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T12] (  2,  4   )  simd16  ->  mm1         "impAppendStmt"
+;  V04 tmp1         [V04,T12] (  2,  4   )  simd16  ->  [rbp-0x30]  spill-single-def "impAppendStmt"
 ;* V05 tmp2         [V05    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V06 tmp3         [V06,T10] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V06 tmp3         [V06,T10] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
 ;* V07 tmp4         [V07    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V08 tmp5         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V09 tmp6         [V09    ] (  3,  3   )  struct ( 8) [rbp-0x18]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V09 tmp6         [V09    ] (  3,  3   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V10 tmp7         [V10,T06] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V11 tmp8         [V11    ] (  3,  6   )  struct ( 8) [rbp-0x20]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V11 tmp8         [V11    ] (  3,  6   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V12 tmp9         [V12    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V13 tmp10        [V13    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V14 tmp11        [V14    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V15 tmp12        [V15    ] (  3,  3   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V16 tmp13        [V16,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp14        [V17    ] (  3,  6   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V18 tmp15        [V18    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V19 tmp16        [V19    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V20 tmp17        [V20    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V21 tmp18        [V21,T13] (  3,  3   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
-;  V22 tmp19        [V22,T11] (  3,  6   )  simd16  ->  [rbp-0x50]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
-;* V23 tmp20        [V23    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V24 tmp21        [V24    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V25 tmp22        [V25    ] (  3,  3   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V26 tmp23        [V26,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V27 tmp24        [V27    ] (  3,  6   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V13 tmp10        [V13    ] (  3,  3   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V14 tmp11        [V14,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V15 tmp12        [V15    ] (  3,  6   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V16 tmp13        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V17 tmp14        [V17,T13] (  3,  3   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V18 tmp15        [V18,T11] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;* V19 tmp16        [V19    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V20 tmp17        [V20    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V21 tmp18        [V21    ] (  3,  3   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V22 tmp19        [V22,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V23 tmp20        [V23    ] (  3,  6   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V24 tmp21        [V24    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V25 tmp22        [V25    ] (  3,  3   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V26 tmp23        [V26,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V27 tmp24        [V27    ] (  3,  6   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V28 tmp25        [V28    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V29 tmp26        [V29    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V30 tmp27        [V30    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V31 tmp28        [V31    ] (  3,  3   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V32 tmp29        [V32,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V33 tmp30        [V33    ] (  3,  6   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V34 tmp31        [V34    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V35 tmp32        [V35    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V36 tmp33        [V36    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V37 tmp34        [V37,T14] (  3,  3   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
-;* V38 tmp35        [V38    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[int]>
-;  V39 tmp36        [V39,T02] (  2,  2   )    long  ->  rcx         "field V07._00 (fldOffset=0x0)" P-INDEP
-;  V40 tmp37        [V40,T03] (  2,  2   )    long  ->  rax         "field V08._00 (fldOffset=0x0)" P-INDEP
-;  V41 tmp38        [V41    ] (  3,  3   )    long  ->  [rbp-0x18]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
-;  V42 tmp39        [V42    ] (  3,  5   )    long  ->  [rbp-0x20]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
-;  V43 tmp40        [V43    ] (  3,  3   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;  V44 tmp41        [V44    ] (  3,  5   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
-;  V45 tmp42        [V45,T04] (  2,  2   )    long  ->  rcx         "field V23._00 (fldOffset=0x0)" P-INDEP
-;  V46 tmp43        [V46,T05] (  2,  2   )    long  ->  rax         "field V24._00 (fldOffset=0x0)" P-INDEP
-;  V47 tmp44        [V47    ] (  3,  3   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V48 tmp45        [V48    ] (  3,  5   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
-;  V49 tmp46        [V49    ] (  3,  3   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
-;  V50 tmp47        [V50    ] (  3,  5   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
+;  V29 tmp26        [V29,T14] (  3,  3   )  simd16  ->  [rbp-0xB0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
+;* V30 tmp27        [V30    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[int]>
+;  V31 tmp28        [V31,T02] (  2,  2   )    long  ->  r14         "field V07._00 (fldOffset=0x0)" P-INDEP
+;  V32 tmp29        [V32,T03] (  2,  2   )    long  ->  rdi         "field V08._00 (fldOffset=0x0)" P-INDEP
+;  V33 tmp30        [V33    ] (  3,  3   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
+;  V34 tmp31        [V34    ] (  3,  5   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
+;  V35 tmp32        [V35    ] (  3,  3   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V36 tmp33        [V36    ] (  3,  5   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
+;  V37 tmp34        [V37,T04] (  2,  2   )    long  ->  r14         "field V19._00 (fldOffset=0x0)" P-INDEP
+;  V38 tmp35        [V38,T05] (  2,  2   )    long  ->  rax         "field V20._00 (fldOffset=0x0)" P-INDEP
+;  V39 tmp36        [V39    ] (  3,  3   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V21._00 (fldOffset=0x0)" P-DEP
+;  V40 tmp37        [V40    ] (  3,  5   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+;  V41 tmp38        [V41    ] (  3,  3   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V42 tmp39        [V42    ] (  3,  5   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
 ;
-; Lcl frame size = 128
+; Lcl frame size = 152
 
 G_M30697_IG01:
        push     rbp
-       sub      rsp, 128
-       lea      rbp, [rsp+0x80]
+       push     r15
+       push     r14
+       push     rbx
+       sub      rsp, 152
+       lea      rbp, [rsp+0xB0]
+       mov      r15, rdi
+       mov      ebx, esi
        vmovups  ymm0, ymmword ptr [rbp+0x10]
-						;; size=21 bbWeight=1 PerfScore 5.75
+						;; size=31 bbWeight=1 PerfScore 9.25
 G_M30697_IG02:
+       vmovups  ymmword ptr [rbp+0x10], ymm0
        vmovaps  ymm1, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm1
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x20], rax
-       mov      eax, dword ptr [rbp-0x20]
-       cdq      
-       idiv     edx:eax, esi
-       mov      dword ptr [rbp-0x18], eax
-       mov      eax, dword ptr [rbp-0x1C]
-       cdq      
-       idiv     edx:eax, esi
-       mov      dword ptr [rbp-0x14], eax
-       mov      rcx, qword ptr [rbp-0x18]
-       mov      rax, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x30], rax
-       mov      eax, dword ptr [rbp-0x30]
-       cdq      
-       idiv     edx:eax, esi
-       mov      dword ptr [rbp-0x28], eax
-       mov      eax, dword ptr [rbp-0x2C]
-       cdq      
-       idiv     edx:eax, esi
-       mov      dword ptr [rbp-0x24], eax
-       mov      rax, qword ptr [rbp-0x28]
-       mov      qword ptr [rbp-0x40], rcx
-       mov      qword ptr [rbp-0x38], rax
-       vmovaps  xmm1, xmmword ptr [rbp-0x40]
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x50], xmm0
-       mov      rax, qword ptr [rbp-0x50]
-       mov      qword ptr [rbp-0x60], rax
-       mov      eax, dword ptr [rbp-0x60]
-       cdq      
-       idiv     edx:eax, esi
+       vmovaps  xmmword ptr [rbp-0x40], xmm1
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x50], rdi
+       mov      edi, dword ptr [rbp-0x50]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x48], eax
+       mov      edi, dword ptr [rbp-0x4C]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x44], eax
+       mov      r14, qword ptr [rbp-0x48]
+       mov      rdi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x60], rdi
+       mov      edi, dword ptr [rbp-0x60]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
        mov      dword ptr [rbp-0x58], eax
-       mov      eax, dword ptr [rbp-0x5C]
-       cdq      
-       idiv     edx:eax, esi
+       mov      edi, dword ptr [rbp-0x5C]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
        mov      dword ptr [rbp-0x54], eax
-       mov      rcx, qword ptr [rbp-0x58]
-       mov      rax, qword ptr [rbp-0x48]
-       mov      qword ptr [rbp-0x70], rax
-       mov      eax, dword ptr [rbp-0x70]
-       cdq      
-       idiv     edx:eax, esi
-       mov      dword ptr [rbp-0x68], eax
-       mov      eax, dword ptr [rbp-0x6C]
-       cdq      
-       idiv     edx:eax, esi
-       mov      dword ptr [rbp-0x64], eax
-       mov      rax, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0x80], rcx
-       mov      qword ptr [rbp-0x78], rax
-       vinserti128 ymm0, ymm1, xmmword ptr [rbp-0x80], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       mov      rax, rdi
-						;; size=175 bbWeight=1 PerfScore 249.50
+       mov      rdi, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x70], r14
+       mov      qword ptr [rbp-0x68], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x70]
+       vmovaps  xmmword ptr [rbp-0x30], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x10]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0x80], xmm1
+       mov      rdi, qword ptr [rbp-0x80]
+       mov      qword ptr [rbp-0x90], rdi
+       mov      edi, dword ptr [rbp-0x90]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x88], eax
+       mov      edi, dword ptr [rbp-0x8C]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x84], eax
+       mov      r14, qword ptr [rbp-0x88]
+						;; size=222 bbWeight=1 PerfScore 57.25
 G_M30697_IG03:
+       mov      rdi, qword ptr [rbp-0x78]
+       mov      qword ptr [rbp-0xA0], rdi
+       mov      edi, dword ptr [rbp-0xA0]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x98], eax
+       mov      edi, dword ptr [rbp-0x9C]
+       mov      esi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x94], eax
+       mov      rax, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xB0], r14
+       mov      qword ptr [rbp-0xA8], rax
+       vmovaps  xmm0, xmmword ptr [rbp-0x30]
+       vinserti128 ymm0, ymm0, xmmword ptr [rbp-0xB0], 1
+       vmovups  ymmword ptr [r15], ymm0
+       mov      rax, r15
+						;; size=107 bbWeight=1 PerfScore 25.25
+G_M30697_IG04:
        vzeroupper 
-       add      rsp, 128
+       add      rsp, 152
+       pop      rbx
+       pop      r14
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=17 bbWeight=1 PerfScore 4.25
 
-; Total bytes of code 208, prolog size 16, PerfScore 258.00, instruction count 64, allocated bytes for code 208 (MethodHash=d2e48816) for method System.Runtime.Intrinsics.Vector256`1[int]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[int],int):System.Runtime.Intrinsics.Vector256`1[int] (FullOpts)
+; Total bytes of code 377, prolog size 21, PerfScore 96.00, instruction count 84, allocated bytes for code 377 (MethodHash=d2e48816) for method System.Runtime.Intrinsics.Vector256`1[int]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[int],int):System.Runtime.Intrinsics.Vector256`1[int] (FullOpts)
165 (49.11 % of base) - System.Runtime.Intrinsics.Vector256`1[int]:System.Runtime.Intrinsics.ISimdVector,T>.Divide(System.Runtime.Intrinsics.Vector256`1[int],System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector256`1[int]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[int]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[int],System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector256`1[int] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 20 single block inlinees; 13 inlinees without PGO data
+; 0 inlinees with PGO data; 20 single block inlinees; 8 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T00] (  4,  4   )   byref  ->  rdi         single-def
-;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[int]>
-;  V02 arg1         [V02,T17] (  2,  2   )  simd32  ->  mm1         single-def <System.Runtime.Intrinsics.Vector256`1[int]>
-;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
+;  V00 RetBuf       [V00,T00] (  4,  4   )   byref  ->  rbx         single-def
+;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[int]>
+;  V02 arg1         [V02,T17] (  2,  2   )  simd32  ->  [rbp+0x30]  single-def <System.Runtime.Intrinsics.Vector256`1[int]>
+;  V03 OutArgs      [V03    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  [rbp-0x20]  spill-single-def "impAppendStmt"
 ;* V05 tmp2         [V05    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V06 tmp3         [V06,T09] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
-;  V07 tmp4         [V07,T10] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V06 tmp3         [V06,T09] (  3,  6   )  simd16  ->  [rbp-0x30]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V07 tmp4         [V07,T10] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
 ;* V08 tmp5         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V09 tmp6         [V09    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V10 tmp7         [V10    ] (  3,  3   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V10 tmp7         [V10    ] (  3,  3   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V11 tmp8         [V11,T05] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V12 tmp9         [V12    ] (  3,  6   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V13 tmp10        [V13    ] (  3,  6   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V12 tmp9         [V12    ] (  3,  6   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V13 tmp10        [V13    ] (  3,  6   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
 ;* V14 tmp11        [V14    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V15 tmp12        [V15    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V16 tmp13        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V17 tmp14        [V17    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V18 tmp15        [V18    ] (  3,  3   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V19 tmp16        [V19,T06] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V20 tmp17        [V20    ] (  3,  6   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V21 tmp18        [V21    ] (  3,  6   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V22 tmp19        [V22    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V23 tmp20        [V23    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V24 tmp21        [V24    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V25 tmp22        [V25    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V26 tmp23        [V26,T14] (  3,  3   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
-;  V27 tmp24        [V27,T11] (  3,  6   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
-;  V28 tmp25        [V28,T12] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
-;* V29 tmp26        [V29    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V30 tmp27        [V30    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V31 tmp28        [V31    ] (  3,  3   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V32 tmp29        [V32,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V33 tmp30        [V33    ] (  3,  6   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V34 tmp31        [V34    ] (  3,  6   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V35 tmp32        [V35    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V36 tmp33        [V36    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V37 tmp34        [V37    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V38 tmp35        [V38    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V39 tmp36        [V39    ] (  3,  3   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V40 tmp37        [V40,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V41 tmp38        [V41    ] (  3,  6   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;  V42 tmp39        [V42    ] (  3,  6   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
-;* V43 tmp40        [V43    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V44 tmp41        [V44    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V45 tmp42        [V45    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V46 tmp43        [V46    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V47 tmp44        [V47,T15] (  3,  3   )  simd16  ->  [rbp-0xC0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
-;* V48 tmp45        [V48    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[int]>
-;  V49 tmp46        [V49,T01] (  2,  2   )    long  ->  rcx         "field V08._00 (fldOffset=0x0)" P-INDEP
-;  V50 tmp47        [V50,T02] (  2,  2   )    long  ->  rax         "field V09._00 (fldOffset=0x0)" P-INDEP
-;  V51 tmp48        [V51    ] (  3,  3   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
-;  V52 tmp49        [V52    ] (  3,  5   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
-;  V53 tmp50        [V53    ] (  3,  5   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
-;  V54 tmp51        [V54    ] (  3,  3   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V18._00 (fldOffset=0x0)" P-DEP
-;  V55 tmp52        [V55    ] (  3,  5   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V20._00 (fldOffset=0x0)" P-DEP
-;  V56 tmp53        [V56    ] (  3,  5   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V21._00 (fldOffset=0x0)" P-DEP
-;  V57 tmp54        [V57,T03] (  2,  2   )    long  ->  rcx         "field V29._00 (fldOffset=0x0)" P-INDEP
-;  V58 tmp55        [V58,T04] (  2,  2   )    long  ->  rax         "field V30._00 (fldOffset=0x0)" P-INDEP
-;  V59 tmp56        [V59    ] (  3,  3   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
-;  V60 tmp57        [V60    ] (  3,  5   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
-;  V61 tmp58        [V61    ] (  3,  5   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V34._00 (fldOffset=0x0)" P-DEP
-;  V62 tmp59        [V62    ] (  3,  3   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V39._00 (fldOffset=0x0)" P-DEP
-;  V63 tmp60        [V63    ] (  3,  5   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V41._00 (fldOffset=0x0)" P-DEP
-;  V64 tmp61        [V64    ] (  3,  5   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V42._00 (fldOffset=0x0)" P-DEP
+;  V16 tmp13        [V16    ] (  3,  3   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V17 tmp14        [V17,T06] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V18 tmp15        [V18    ] (  3,  6   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V19 tmp16        [V19    ] (  3,  6   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V20 tmp17        [V20    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V21 tmp18        [V21    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V22 tmp19        [V22,T14] (  3,  3   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V23 tmp20        [V23,T11] (  3,  6   )  simd16  ->  [rbp-0x90]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V24 tmp21        [V24,T12] (  3,  6   )  simd16  ->  [rbp-0xA0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[int]>
+;* V25 tmp22        [V25    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V26 tmp23        [V26    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V27 tmp24        [V27    ] (  3,  3   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V28 tmp25        [V28,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V29 tmp26        [V29    ] (  3,  6   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V30 tmp27        [V30    ] (  3,  6   )  struct ( 8) [rbp-0xB8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V31 tmp28        [V31    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V32 tmp29        [V32    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V33 tmp30        [V33    ] (  3,  3   )  struct ( 8) [rbp-0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V34 tmp31        [V34,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V35 tmp32        [V35    ] (  3,  6   )  struct ( 8) [rbp-0xC8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;  V36 tmp33        [V36    ] (  3,  6   )  struct ( 8) [rbp-0xD0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[int]>
+;* V37 tmp34        [V37    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V38 tmp35        [V38    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V39 tmp36        [V39,T15] (  3,  3   )  simd16  ->  [rbp-0xE0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[int]>
+;  V40 tmp37        [V40,T01] (  2,  2   )    long  ->  r15         "field V08._00 (fldOffset=0x0)" P-INDEP
+;  V41 tmp38        [V41,T02] (  2,  2   )    long  ->  rdi         "field V09._00 (fldOffset=0x0)" P-INDEP
+;  V42 tmp39        [V42    ] (  3,  3   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
+;  V43 tmp40        [V43    ] (  3,  5   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
+;  V44 tmp41        [V44    ] (  3,  5   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V45 tmp42        [V45    ] (  3,  3   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
+;  V46 tmp43        [V46    ] (  3,  5   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V18._00 (fldOffset=0x0)" P-DEP
+;  V47 tmp44        [V47    ] (  3,  5   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V19._00 (fldOffset=0x0)" P-DEP
+;  V48 tmp45        [V48,T03] (  2,  2   )    long  ->  r15         "field V25._00 (fldOffset=0x0)" P-INDEP
+;  V49 tmp46        [V49,T04] (  2,  2   )    long  ->  rdi         "field V26._00 (fldOffset=0x0)" P-INDEP
+;  V50 tmp47        [V50    ] (  3,  3   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+;  V51 tmp48        [V51    ] (  3,  5   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
+;  V52 tmp49        [V52    ] (  3,  5   )    long  ->  [rbp-0xB8]  do-not-enreg[X] addr-exposed "field V30._00 (fldOffset=0x0)" P-DEP
+;  V53 tmp50        [V53    ] (  3,  3   )    long  ->  [rbp-0xC0]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
+;  V54 tmp51        [V54    ] (  3,  5   )    long  ->  [rbp-0xC8]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
+;  V55 tmp52        [V55    ] (  3,  5   )    long  ->  [rbp-0xD0]  do-not-enreg[X] addr-exposed "field V36._00 (fldOffset=0x0)" P-DEP
 ;
-; Lcl frame size = 192
+; Lcl frame size = 240
 
 G_M12621_IG01:
        push     rbp
-       sub      rsp, 192
-       lea      rbp, [rsp+0xC0]
+       push     r15
+       push     rbx
+       sub      rsp, 240
+       lea      rbp, [rsp+0x100]
+       mov      rbx, rdi
        vmovups  ymm0, ymmword ptr [rbp+0x10]
        vmovups  ymm1, ymmword ptr [rbp+0x30]
-						;; size=26 bbWeight=1 PerfScore 9.75
+						;; size=32 bbWeight=1 PerfScore 12.00
 G_M12621_IG02:
+       vmovups  ymmword ptr [rbp+0x10], ymm0
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm2
+       vmovaps  xmmword ptr [rbp-0x30], xmm2
+       vmovups  ymmword ptr [rbp+0x30], ymm1
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rbp-0x20], xmm2
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x30], rax
-       mov      rax, qword ptr [rbp-0x20]
-       mov      qword ptr [rbp-0x38], rax
-       mov      eax, dword ptr [rbp-0x30]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x38]
-       mov      dword ptr [rbp-0x28], eax
-       mov      eax, dword ptr [rbp-0x2C]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x34]
-       mov      dword ptr [rbp-0x24], eax
-       mov      rcx, qword ptr [rbp-0x28]
-       mov      rax, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x48], rax
-       mov      rax, qword ptr [rbp-0x18]
-       mov      qword ptr [rbp-0x50], rax
-       mov      eax, dword ptr [rbp-0x48]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x50]
-       mov      dword ptr [rbp-0x40], eax
-       mov      eax, dword ptr [rbp-0x44]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x4C]
-       mov      dword ptr [rbp-0x3C], eax
-       mov      rax, qword ptr [rbp-0x40]
-       mov      qword ptr [rbp-0x60], rcx
-       mov      qword ptr [rbp-0x58], rax
-       vmovaps  xmm2, xmmword ptr [rbp-0x60]
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x70], xmm0
-       vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x80], xmm0
-       mov      rax, qword ptr [rbp-0x70]
-       mov      qword ptr [rbp-0x90], rax
-       mov      rax, qword ptr [rbp-0x80]
-       mov      qword ptr [rbp-0x98], rax
-       mov      eax, dword ptr [rbp-0x90]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x98]
-       mov      dword ptr [rbp-0x88], eax
-       mov      eax, dword ptr [rbp-0x8C]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0x94]
-       mov      dword ptr [rbp-0x84], eax
-       mov      rcx, qword ptr [rbp-0x88]
-       mov      rax, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0xA8], rax
-       mov      rax, qword ptr [rbp-0x78]
-       mov      qword ptr [rbp-0xB0], rax
-       mov      eax, dword ptr [rbp-0xA8]
-       cdq      
-       idiv     edx:eax, dword ptr [rbp-0xB0]
-       mov      dword ptr [rbp-0xA0], eax
-       mov      eax, dword ptr [rbp-0xA4]
-       cdq      
-						;; size=248 bbWeight=1 PerfScore 226.50
+       vmovaps  xmmword ptr [rbp-0x40], xmm2
+       mov      rdi, qword ptr [rbp-0x30]
+       mov      qword ptr [rbp-0x50], rdi
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x58], rdi
+       mov      edi, dword ptr [rbp-0x50]
+       mov      esi, dword ptr [rbp-0x58]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x48], eax
+       mov      edi, dword ptr [rbp-0x4C]
+       mov      esi, dword ptr [rbp-0x54]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x44], eax
+       mov      r15, qword ptr [rbp-0x48]
+       mov      rdi, qword ptr [rbp-0x28]
+       mov      qword ptr [rbp-0x68], rdi
+       mov      rdi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x70], rdi
+       mov      edi, dword ptr [rbp-0x68]
+       mov      esi, dword ptr [rbp-0x70]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x60], eax
+       mov      edi, dword ptr [rbp-0x64]
+       mov      esi, dword ptr [rbp-0x6C]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0x5C], eax
+       mov      rdi, qword ptr [rbp-0x60]
+       mov      qword ptr [rbp-0x80], r15
+       mov      qword ptr [rbp-0x78], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x80]
+       vmovaps  xmmword ptr [rbp-0x20], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x10]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0x90], xmm1
+       vmovups  ymm1, ymmword ptr [rbp+0x30]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0xA0], xmm1
+       mov      rdi, qword ptr [rbp-0x90]
+       mov      qword ptr [rbp-0xB0], rdi
+       mov      rdi, qword ptr [rbp-0xA0]
+       mov      qword ptr [rbp-0xB8], rdi
+       mov      edi, dword ptr [rbp-0xB0]
+       mov      esi, dword ptr [rbp-0xB8]
+						;; size=248 bbWeight=1 PerfScore 65.50
 G_M12621_IG03:
-       idiv     edx:eax, dword ptr [rbp-0xAC]
-       mov      dword ptr [rbp-0x9C], eax
-       mov      rax, qword ptr [rbp-0xA0]
-       mov      qword ptr [rbp-0xC0], rcx
-       mov      qword ptr [rbp-0xB8], rax
-       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0xC0], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       mov      rax, rdi
-						;; size=50 bbWeight=1 PerfScore 35.25
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0xA8], eax
+       mov      edi, dword ptr [rbp-0xAC]
+       mov      esi, dword ptr [rbp-0xB4]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0xA4], eax
+       mov      r15, qword ptr [rbp-0xA8]
+       mov      rdi, qword ptr [rbp-0x88]
+       mov      qword ptr [rbp-0xC8], rdi
+       mov      rdi, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xD0], rdi
+       mov      edi, dword ptr [rbp-0xC8]
+       mov      esi, dword ptr [rbp-0xD0]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0xC0], eax
+       mov      edi, dword ptr [rbp-0xC4]
+       mov      esi, dword ptr [rbp-0xCC]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[int]:Divide(int,int):int
+       mov      dword ptr [rbp-0xBC], eax
+       mov      rdi, qword ptr [rbp-0xC0]
+       mov      qword ptr [rbp-0xE0], r15
+       mov      qword ptr [rbp-0xD8], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x20]
+       vmovups  xmmword ptr [rsp], xmm0
+       vmovaps  xmm0, xmmword ptr [rbp-0xE0]
+       vmovups  xmmword ptr [rsp+0x10], xmm0
+       mov      rdi, rbx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector256:Create[int](System.Runtime.Intrinsics.Vector128`1[int],System.Runtime.Intrinsics.Vector128`1[int]):System.Runtime.Intrinsics.Vector256`1[int]
+       call     [rax]System.Runtime.Intrinsics.Vector256:Create[int](System.Runtime.Intrinsics.Vector128`1[int],System.Runtime.Intrinsics.Vector128`1[int]):System.Runtime.Intrinsics.Vector256`1[int]
+       mov      rax, rbx
+						;; size=206 bbWeight=1 PerfScore 42.75
 G_M12621_IG04:
        vzeroupper 
-       add      rsp, 192
+       add      rsp, 240
+       pop      rbx
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=15 bbWeight=1 PerfScore 3.75
 
-; Total bytes of code 336, prolog size 16, PerfScore 274.25, instruction count 77, allocated bytes for code 336 (MethodHash=e489ceb2) for method System.Runtime.Intrinsics.Vector256`1[int]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[int],System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector256`1[int] (FullOpts)
+; Total bytes of code 501, prolog size 19, PerfScore 124.00, instruction count 100, allocated bytes for code 501 (MethodHash=e489ceb2) for method System.Runtime.Intrinsics.Vector256`1[int]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[int],System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector256`1[int] (FullOpts)
157 (25.00 % of base) - System.Runtime.Intrinsics.Vector256`1[short]:System.Runtime.Intrinsics.ISimdVector,T>.Divide(System.Runtime.Intrinsics.Vector256`1[short],System.Runtime.Intrinsics.Vector256`1[short]):System.Runtime.Intrinsics.Vector256`1[short]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[short]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[short],System.Runtime.Intrinsics.Vector256`1[short]):System.Runtime.Intrinsics.Vector256`1[short] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 20 single block inlinees; 13 inlinees without PGO data
+; 0 inlinees with PGO data; 20 single block inlinees; 8 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T04] (  4,  4   )   byref  ->  rdi         single-def
-;  V01 arg0         [V01,T20] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[short]>
-;  V02 arg1         [V02,T21] (  2,  2   )  simd32  ->  mm1         single-def <System.Runtime.Intrinsics.Vector256`1[short]>
-;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T17] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
+;  V00 RetBuf       [V00,T00] (  4,  4   )   byref  ->  rbx         single-def
+;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[short]>
+;  V02 arg1         [V02,T17] (  2,  2   )  simd32  ->  [rbp+0x30]  single-def <System.Runtime.Intrinsics.Vector256`1[short]>
+;  V03 OutArgs      [V03    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  [rbp-0x20]  spill-single-def "impAppendStmt"
 ;* V05 tmp2         [V05    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V06 tmp3         [V06,T13] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
-;  V07 tmp4         [V07,T14] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V06 tmp3         [V06,T09] (  3,  6   )  simd16  ->  [rbp-0x30]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V07 tmp4         [V07,T10] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
 ;* V08 tmp5         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V09 tmp6         [V09    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V10 tmp7         [V10    ] (  5,  5   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V11 tmp8         [V11,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V12 tmp9         [V12    ] (  5, 10   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V13 tmp10        [V13    ] (  5, 10   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V10 tmp7         [V10    ] (  5,  5   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V11 tmp8         [V11,T05] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V12 tmp9         [V12    ] (  5, 10   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V13 tmp10        [V13    ] (  5, 10   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V14 tmp11        [V14    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V15 tmp12        [V15    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;  V16 tmp13        [V16,T00] (  8,  8   )   short  ->  rcx         "Inline return value spill temp"
-;* V17 tmp14        [V17    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;* V18 tmp15        [V18    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V19 tmp16        [V19    ] (  5,  5   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V20 tmp17        [V20,T10] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V21 tmp18        [V21    ] (  5, 10   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V22 tmp19        [V22    ] (  5, 10   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V23 tmp20        [V23    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V24 tmp21        [V24    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;  V25 tmp22        [V25,T01] (  8,  8   )   short  ->  rsi         "Inline return value spill temp"
-;* V26 tmp23        [V26    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;* V27 tmp24        [V27    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V28 tmp25        [V28,T18] (  3,  3   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
-;  V29 tmp26        [V29,T15] (  3,  6   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
-;  V30 tmp27        [V30,T16] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
-;* V31 tmp28        [V31    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V32 tmp29        [V32    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V33 tmp30        [V33    ] (  5,  5   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V34 tmp31        [V34,T11] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V35 tmp32        [V35    ] (  5, 10   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V36 tmp33        [V36    ] (  5, 10   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V16 tmp13        [V16    ] (  5,  5   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V17 tmp14        [V17,T06] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V18 tmp15        [V18    ] (  5, 10   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V19 tmp16        [V19    ] (  5, 10   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V20 tmp17        [V20    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V21 tmp18        [V21    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
+;  V22 tmp19        [V22,T14] (  3,  3   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V23 tmp20        [V23,T11] (  3,  6   )  simd16  ->  [rbp-0x90]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V24 tmp21        [V24,T12] (  3,  6   )  simd16  ->  [rbp-0xA0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[short]>
+;* V25 tmp22        [V25    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V26 tmp23        [V26    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V27 tmp24        [V27    ] (  5,  5   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V28 tmp25        [V28,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V29 tmp26        [V29    ] (  5, 10   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V30 tmp27        [V30    ] (  5, 10   )  struct ( 8) [rbp-0xB8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V31 tmp28        [V31    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V32 tmp29        [V32    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
+;  V33 tmp30        [V33    ] (  5,  5   )  struct ( 8) [rbp-0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
+;* V34 tmp31        [V34,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V35 tmp32        [V35    ] (  5, 10   )  struct ( 8) [rbp-0xC8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
+;  V36 tmp33        [V36    ] (  5, 10   )  struct ( 8) [rbp-0xD0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
 ;* V37 tmp34        [V37    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V38 tmp35        [V38    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;  V39 tmp36        [V39,T02] (  8,  8   )   short  ->  rcx         "Inline return value spill temp"
-;* V40 tmp37        [V40    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;* V41 tmp38        [V41    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V42 tmp39        [V42    ] (  5,  5   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V43 tmp40        [V43,T12] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V44 tmp41        [V44    ] (  5, 10   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;  V45 tmp42        [V45    ] (  5, 10   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[short]>
-;* V46 tmp43        [V46    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V47 tmp44        [V47    ] (  0,  0   )   short  ->  zero-ref    "Inline stloc first use temp"
-;  V48 tmp45        [V48,T03] (  8,  8   )   short  ->  rsi         "Inline return value spill temp"
-;* V49 tmp46        [V49    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;* V50 tmp47        [V50    ] (  0,  0   )   short  ->  zero-ref    "Inlining Arg"
-;  V51 tmp48        [V51,T19] (  3,  3   )  simd16  ->  [rbp-0xC0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
-;* V52 tmp49        [V52    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[short]>
-;  V53 tmp50        [V53,T05] (  2,  2   )    long  ->  rcx         "field V08._00 (fldOffset=0x0)" P-INDEP
-;  V54 tmp51        [V54,T06] (  2,  2   )    long  ->  rax         "field V09._00 (fldOffset=0x0)" P-INDEP
-;  V55 tmp52        [V55    ] (  5,  5   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
-;  V56 tmp53        [V56    ] (  5,  9   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
-;  V57 tmp54        [V57    ] (  5,  9   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
-;  V58 tmp55        [V58    ] (  5,  5   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V19._00 (fldOffset=0x0)" P-DEP
-;  V59 tmp56        [V59    ] (  5,  9   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V21._00 (fldOffset=0x0)" P-DEP
-;  V60 tmp57        [V60    ] (  5,  9   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V22._00 (fldOffset=0x0)" P-DEP
-;  V61 tmp58        [V61,T07] (  2,  2   )    long  ->  rcx         "field V31._00 (fldOffset=0x0)" P-INDEP
-;  V62 tmp59        [V62,T08] (  2,  2   )    long  ->  rax         "field V32._00 (fldOffset=0x0)" P-INDEP
-;  V63 tmp60        [V63    ] (  5,  5   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
-;  V64 tmp61        [V64    ] (  5,  9   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
-;  V65 tmp62        [V65    ] (  5,  9   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V36._00 (fldOffset=0x0)" P-DEP
-;  V66 tmp63        [V66    ] (  5,  5   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V42._00 (fldOffset=0x0)" P-DEP
-;  V67 tmp64        [V67    ] (  5,  9   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V44._00 (fldOffset=0x0)" P-DEP
-;  V68 tmp65        [V68    ] (  5,  9   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V45._00 (fldOffset=0x0)" P-DEP
+;  V39 tmp36        [V39,T15] (  3,  3   )  simd16  ->  [rbp-0xE0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[short]>
+;  V40 tmp37        [V40,T01] (  2,  2   )    long  ->  r15         "field V08._00 (fldOffset=0x0)" P-INDEP
+;  V41 tmp38        [V41,T02] (  2,  2   )    long  ->  rdi         "field V09._00 (fldOffset=0x0)" P-INDEP
+;  V42 tmp39        [V42    ] (  5,  5   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
+;  V43 tmp40        [V43    ] (  5,  9   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
+;  V44 tmp41        [V44    ] (  5,  9   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V45 tmp42        [V45    ] (  5,  5   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
+;  V46 tmp43        [V46    ] (  5,  9   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V18._00 (fldOffset=0x0)" P-DEP
+;  V47 tmp44        [V47    ] (  5,  9   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V19._00 (fldOffset=0x0)" P-DEP
+;  V48 tmp45        [V48,T03] (  2,  2   )    long  ->  r15         "field V25._00 (fldOffset=0x0)" P-INDEP
+;  V49 tmp46        [V49,T04] (  2,  2   )    long  ->  rdi         "field V26._00 (fldOffset=0x0)" P-INDEP
+;  V50 tmp47        [V50    ] (  5,  5   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+;  V51 tmp48        [V51    ] (  5,  9   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
+;  V52 tmp49        [V52    ] (  5,  9   )    long  ->  [rbp-0xB8]  do-not-enreg[X] addr-exposed "field V30._00 (fldOffset=0x0)" P-DEP
+;  V53 tmp50        [V53    ] (  5,  5   )    long  ->  [rbp-0xC0]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
+;  V54 tmp51        [V54    ] (  5,  9   )    long  ->  [rbp-0xC8]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
+;  V55 tmp52        [V55    ] (  5,  9   )    long  ->  [rbp-0xD0]  do-not-enreg[X] addr-exposed "field V36._00 (fldOffset=0x0)" P-DEP
 ;
-; Lcl frame size = 192
+; Lcl frame size = 240
 
 G_M52493_IG01:
        push     rbp
-       sub      rsp, 192
-       lea      rbp, [rsp+0xC0]
+       push     r15
+       push     rbx
+       sub      rsp, 240
+       lea      rbp, [rsp+0x100]
+       mov      rbx, rdi
        vmovups  ymm0, ymmword ptr [rbp+0x10]
        vmovups  ymm1, ymmword ptr [rbp+0x30]
-						;; size=26 bbWeight=1 PerfScore 9.75
+						;; size=32 bbWeight=1 PerfScore 12.00
 G_M52493_IG02:
+       vmovups  ymmword ptr [rbp+0x10], ymm0
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm2
+       vmovaps  xmmword ptr [rbp-0x30], xmm2
+       vmovups  ymmword ptr [rbp+0x30], ymm1
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rbp-0x20], xmm2
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x30], rax
-       mov      rax, qword ptr [rbp-0x20]
-       mov      qword ptr [rbp-0x38], rax
-       movsx    rax, word  ptr [rbp-0x30]
-       movsx    rcx, word  ptr [rbp-0x38]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x28], cx
-       movsx    rax, word  ptr [rbp-0x2E]
-       movsx    rcx, word  ptr [rbp-0x36]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x26], cx
-       movsx    rax, word  ptr [rbp-0x2C]
-       movsx    rcx, word  ptr [rbp-0x34]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x24], cx
-       movsx    rax, word  ptr [rbp-0x2A]
-       movsx    rcx, word  ptr [rbp-0x32]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x22], cx
-       mov      rcx, qword ptr [rbp-0x28]
-       mov      rax, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x48], rax
-       mov      rax, qword ptr [rbp-0x18]
-       mov      qword ptr [rbp-0x50], rax
-       movsx    rax, word  ptr [rbp-0x48]
-       movsx    rsi, word  ptr [rbp-0x50]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x40], si
-       movsx    rax, word  ptr [rbp-0x46]
-       movsx    rsi, word  ptr [rbp-0x4E]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x3E], si
-       movsx    rax, word  ptr [rbp-0x44]
-       movsx    rsi, word  ptr [rbp-0x4C]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x3C], si
-       movsx    rax, word  ptr [rbp-0x42]
-       movsx    rsi, word  ptr [rbp-0x4A]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x3A], si
-       mov      rax, qword ptr [rbp-0x40]
-       mov      qword ptr [rbp-0x60], rcx
-       mov      qword ptr [rbp-0x58], rax
-       vmovaps  xmm2, xmmword ptr [rbp-0x60]
-						;; size=239 bbWeight=1 PerfScore 279.50
+       vmovaps  xmmword ptr [rbp-0x40], xmm2
+       mov      rdi, qword ptr [rbp-0x30]
+       mov      qword ptr [rbp-0x50], rdi
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x58], rdi
+       movsx    rdi, word  ptr [rbp-0x50]
+       movsx    rsi, word  ptr [rbp-0x58]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x48], ax
+       movsx    rdi, word  ptr [rbp-0x4E]
+       movsx    rsi, word  ptr [rbp-0x56]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x46], ax
+       movsx    rdi, word  ptr [rbp-0x4C]
+       movsx    rsi, word  ptr [rbp-0x54]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x44], ax
+       movsx    rdi, word  ptr [rbp-0x4A]
+       movsx    rsi, word  ptr [rbp-0x52]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x42], ax
+       mov      r15, qword ptr [rbp-0x48]
+       mov      rdi, qword ptr [rbp-0x28]
+       mov      qword ptr [rbp-0x68], rdi
+       mov      rdi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x70], rdi
+       movsx    rdi, word  ptr [rbp-0x68]
+       movsx    rsi, word  ptr [rbp-0x70]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x60], ax
+       movsx    rdi, word  ptr [rbp-0x66]
+       movsx    rsi, word  ptr [rbp-0x6E]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x5E], ax
+       movsx    rdi, word  ptr [rbp-0x64]
+       movsx    rsi, word  ptr [rbp-0x6C]
+						;; size=230 bbWeight=1 PerfScore 81.00
 G_M52493_IG03:
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x70], xmm0
-       vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x80], xmm0
-       mov      rax, qword ptr [rbp-0x70]
-       mov      qword ptr [rbp-0x90], rax
-       mov      rax, qword ptr [rbp-0x80]
-       mov      qword ptr [rbp-0x98], rax
-       movsx    rax, word  ptr [rbp-0x90]
-       movsx    rcx, word  ptr [rbp-0x98]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x88], cx
-       movsx    rax, word  ptr [rbp-0x8E]
-       movsx    rcx, word  ptr [rbp-0x96]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x86], cx
-       movsx    rax, word  ptr [rbp-0x8C]
-       movsx    rcx, word  ptr [rbp-0x94]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x84], cx
-       movsx    rax, word  ptr [rbp-0x8A]
-       movsx    rcx, word  ptr [rbp-0x92]
-       cdq      
-       idiv     edx:eax, ecx
-       movsx    rcx, ax
-       mov      word  ptr [rbp-0x82], cx
-       mov      rcx, qword ptr [rbp-0x88]
-       mov      rax, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0xA8], rax
-       mov      rax, qword ptr [rbp-0x78]
-       mov      qword ptr [rbp-0xB0], rax
-       movsx    rax, word  ptr [rbp-0xA8]
-       movsx    rsi, word  ptr [rbp-0xB0]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0xA0], si
-       movsx    rax, word  ptr [rbp-0xA6]
-       movsx    rsi, word  ptr [rbp-0xAE]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x9E], si
-       movsx    rax, word  ptr [rbp-0xA4]
-       movsx    rsi, word  ptr [rbp-0xAC]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x9C], si
-       movsx    rax, word  ptr [rbp-0xA2]
-       movsx    rsi, word  ptr [rbp-0xAA]
-       cdq      
-       idiv     edx:eax, esi
-       movsx    rsi, ax
-       mov      word  ptr [rbp-0x9A], si
-       mov      rax, qword ptr [rbp-0xA0]
-       mov      qword ptr [rbp-0xC0], rcx
-       mov      qword ptr [rbp-0xB8], rax
-       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0xC0], 1
-						;; size=344 bbWeight=1 PerfScore 284.00
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x5C], ax
+       movsx    rdi, word  ptr [rbp-0x62]
+       movsx    rsi, word  ptr [rbp-0x6A]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0x5A], ax
+       mov      rdi, qword ptr [rbp-0x60]
+       mov      qword ptr [rbp-0x80], r15
+       mov      qword ptr [rbp-0x78], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x80]
+       vmovaps  xmmword ptr [rbp-0x20], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x10]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0x90], xmm1
+       vmovups  ymm1, ymmword ptr [rbp+0x30]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0xA0], xmm1
+       mov      rdi, qword ptr [rbp-0x90]
+       mov      qword ptr [rbp-0xB0], rdi
+       mov      rdi, qword ptr [rbp-0xA0]
+       mov      qword ptr [rbp-0xB8], rdi
+       movsx    rdi, word  ptr [rbp-0xB0]
+       movsx    rsi, word  ptr [rbp-0xB8]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0xA8], ax
+       movsx    rdi, word  ptr [rbp-0xAE]
+       movsx    rsi, word  ptr [rbp-0xB6]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0xA6], ax
+       movsx    rdi, word  ptr [rbp-0xAC]
+       movsx    rsi, word  ptr [rbp-0xB4]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0xA4], ax
+       movsx    rdi, word  ptr [rbp-0xAA]
+       movsx    rsi, word  ptr [rbp-0xB2]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0xA2], ax
+       mov      r15, qword ptr [rbp-0xA8]
+       mov      rdi, qword ptr [rbp-0x88]
+       mov      qword ptr [rbp-0xC8], rdi
+       mov      rdi, qword ptr [rbp-0x98]
+						;; size=298 bbWeight=1 PerfScore 84.50
 G_M52493_IG04:
-       vmovups  ymmword ptr [rdi], ymm0
-       mov      rax, rdi
-						;; size=7 bbWeight=1 PerfScore 2.25
+       mov      qword ptr [rbp-0xD0], rdi
+       movsx    rdi, word  ptr [rbp-0xC8]
+       movsx    rsi, word  ptr [rbp-0xD0]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0xC0], ax
+       movsx    rdi, word  ptr [rbp-0xC6]
+       movsx    rsi, word  ptr [rbp-0xCE]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0xBE], ax
+       movsx    rdi, word  ptr [rbp-0xC4]
+       movsx    rsi, word  ptr [rbp-0xCC]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0xBC], ax
+       movsx    rdi, word  ptr [rbp-0xC2]
+       movsx    rsi, word  ptr [rbp-0xCA]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[short]:Divide(short,short):short
+       mov      word  ptr [rbp-0xBA], ax
+       mov      rdi, qword ptr [rbp-0xC0]
+       mov      qword ptr [rbp-0xE0], r15
+       mov      qword ptr [rbp-0xD8], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x20]
+       vmovups  xmmword ptr [rsp], xmm0
+       vmovaps  xmm0, xmmword ptr [rbp-0xE0]
+       vmovups  xmmword ptr [rsp+0x10], xmm0
+       mov      rdi, rbx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector256:Create[short](System.Runtime.Intrinsics.Vector128`1[short],System.Runtime.Intrinsics.Vector128`1[short]):System.Runtime.Intrinsics.Vector256`1[short]
+       call     [rax]System.Runtime.Intrinsics.Vector256:Create[short](System.Runtime.Intrinsics.Vector128`1[short],System.Runtime.Intrinsics.Vector128`1[short]):System.Runtime.Intrinsics.Vector256`1[short]
+       mov      rax, rbx
+						;; size=210 bbWeight=1 PerfScore 56.75
 G_M52493_IG05:
        vzeroupper 
-       add      rsp, 192
+       add      rsp, 240
+       pop      rbx
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=15 bbWeight=1 PerfScore 3.75
 
-; Total bytes of code 628, prolog size 16, PerfScore 578.25, instruction count 141, allocated bytes for code 628 (MethodHash=eb5b32f2) for method System.Runtime.Intrinsics.Vector256`1[short]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[short],System.Runtime.Intrinsics.Vector256`1[short]):System.Runtime.Intrinsics.Vector256`1[short] (FullOpts)
+; Total bytes of code 785, prolog size 19, PerfScore 238.00, instruction count 140, allocated bytes for code 785 (MethodHash=eb5b32f2) for method System.Runtime.Intrinsics.Vector256`1[short]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[short],System.Runtime.Intrinsics.Vector256`1[short]):System.Runtime.Intrinsics.Vector256`1[short] (FullOpts)
155 (44.16 % of base) - System.Runtime.Intrinsics.Vector128`1[ubyte]:System.Runtime.Intrinsics.ISimdVector,T>.Dot(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):ubyte
 ; Assembly listing for method System.Runtime.Intrinsics.Vector128`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector128<T>,T>.Dot(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):ubyte (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
-; partially interruptible
+; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 9 single block inlinees; 11 inlinees without PGO data
+; 0 inlinees with PGO data; 11 single block inlinees; 5 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T11] (  2,  2   )  simd16  ->  [rbp+0x10]  do-not-enreg[SF] single-def <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V01 arg1         [V01,T12] (  2,  2   )  simd16  ->  [rbp+0x20]  do-not-enreg[SF] single-def <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V00 arg0         [V00,T13] (  2,  2   )  simd16  ->  [rbp+0x10]  do-not-enreg[SF] single-def <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V01 arg1         [V01,T14] (  2,  2   )  simd16  ->  [rbp+0x20]  do-not-enreg[SF] single-def <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V03 tmp1         [V03,T06] (  2,  4   )     int  ->  rax         "impAppendStmt"
-;  V04 tmp2         [V04,T02] (  8,  8   )   ubyte  ->  rax         ld-addr-op "Inline ldloca(s) first use temp"
-;* V05 tmp3         [V05,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V06 tmp4         [V06    ] (  9, 18   )  struct ( 8) [rbp-0x08]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V07 tmp5         [V07    ] (  9, 18   )  struct ( 8) [rbp-0x10]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V08 tmp6         [V08    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V09 tmp7         [V09,T00] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V10 tmp8         [V10    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V11 tmp9         [V11    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V12 tmp10        [V12    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V13 tmp11        [V13    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V14 tmp12        [V14    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V15 tmp13        [V15    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V16 tmp14        [V16,T04] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V17 tmp15        [V17    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V18 tmp16        [V18    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V19 tmp17        [V19    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V20 tmp18        [V20    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V21 tmp19        [V21    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V22 tmp20        [V22    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V23 tmp21        [V23,T03] (  8,  8   )   ubyte  ->  rcx         ld-addr-op "Inline ldloca(s) first use temp"
-;* V24 tmp22        [V24,T10] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V25 tmp23        [V25    ] (  9, 18   )  struct ( 8) [rbp-0x18]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V26 tmp24        [V26    ] (  9, 18   )  struct ( 8) [rbp-0x20]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V03 tmp1         [V03,T06] (  2,  4   )     int  ->  rbx         "impAppendStmt"
+;* V04 tmp2         [V04    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V05 tmp3         [V05    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V06 tmp4         [V06    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V07 tmp5         [V07    ] (  2,  5   )  struct ( 8) [rbp-0x20]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V08 tmp6         [V08,T02] (  5, 17   )     int  ->  rsi         "Inline stloc first use temp"
+;  V09 tmp7         [V09    ] (  2, 10   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V10 tmp8         [V10    ] (  2, 10   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V11 tmp9         [V11    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V12 tmp10        [V12    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;* V13 tmp11        [V13    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V14 tmp12        [V14    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;* V15 tmp13        [V15    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
+;  V16 tmp14        [V16,T00] ( 16, 16   )   ubyte  ->  rbx         ld-addr-op "Inline ldloca(s) first use temp"
+;* V17 tmp15        [V17,T11] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V18 tmp16        [V18    ] (  9, 18   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V19 tmp17        [V19    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V20 tmp18        [V20    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V21 tmp19        [V21    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V22 tmp20        [V22    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V23 tmp21        [V23    ] (  2,  5   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V24 tmp22        [V24,T03] (  5, 17   )     int  ->  r15         "Inline stloc first use temp"
+;  V25 tmp23        [V25    ] (  2, 10   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V26 tmp24        [V26    ] (  2, 10   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V27 tmp25        [V27    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;  V28 tmp26        [V28,T01] (  8, 16   )     int  ->  registers   "impAppendStmt"
-;* V29 tmp27        [V29    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V30 tmp28        [V30    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V31 tmp29        [V31    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V32 tmp30        [V32    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V33 tmp31        [V33    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;* V34 tmp32        [V34    ] (  0,  0   )     int  ->  zero-ref    "Inlining Arg"
-;  V35 tmp33        [V35,T05] (  8,  8   )   ubyte  ->  registers   "Inline return value spill temp"
-;* V36 tmp34        [V36    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V37 tmp35        [V37    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V38 tmp36        [V38    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V39 tmp37        [V39    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V40 tmp38        [V40    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V41 tmp39        [V41    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V42 tmp40        [V42    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V43 tmp41        [V43    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V44 tmp42        [V44    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V45 tmp43        [V45    ] (  9, 17   )    long  ->  [rbp-0x08]  do-not-enreg[X] addr-exposed "field V06._00 (fldOffset=0x0)" P-DEP
-;  V46 tmp44        [V46    ] (  9, 17   )    long  ->  [rbp-0x10]  do-not-enreg[X] addr-exposed "field V07._00 (fldOffset=0x0)" P-DEP
-;  V47 tmp45        [V47    ] (  9, 17   )    long  ->  [rbp-0x18]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V48 tmp46        [V48    ] (  9, 17   )    long  ->  [rbp-0x20]  do-not-enreg[X] addr-exposed "field V26._00 (fldOffset=0x0)" P-DEP
-;  V49 cse0         [V49,T07] (  2,  2   )     int  ->  rcx         "CSE #01: moderate"
-;  V50 cse1         [V50,T08] (  2,  2   )     int  ->  rdx         "CSE #02: moderate"
+;* V28 tmp26        [V28    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V29 tmp27        [V29,T01] ( 16, 16   )   ubyte  ->  rax         ld-addr-op "Inline ldloca(s) first use temp"
+;* V30 tmp28        [V30,T12] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V31 tmp29        [V31    ] (  9, 18   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V32 tmp30        [V32    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;  V33 tmp31        [V33,T07] (  2,  2   )    long  ->  rsi         "field V04._00 (fldOffset=0x0)" P-INDEP
+;  V34 tmp32        [V34,T08] (  2,  2   )    long  ->  rdi         "field V05._00 (fldOffset=0x0)" P-INDEP
+;* V35 tmp33        [V35    ] (  0,  0   )    long  ->  zero-ref    "field V06._00 (fldOffset=0x0)" P-INDEP
+;  V36 tmp34        [V36    ] (  2,  5   )    long  ->  [rbp-0x20]  do-not-enreg[X] addr-exposed "field V07._00 (fldOffset=0x0)" P-DEP
+;  V37 tmp35        [V37    ] (  2,  9   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
+;  V38 tmp36        [V38    ] (  2,  9   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
+;  V39 tmp37        [V39    ] (  9, 17   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V18._00 (fldOffset=0x0)" P-DEP
+;  V40 tmp38        [V40,T09] (  2,  2   )    long  ->  rdi         "field V20._00 (fldOffset=0x0)" P-INDEP
+;  V41 tmp39        [V41,T10] (  2,  2   )    long  ->  rsi         "field V21._00 (fldOffset=0x0)" P-INDEP
+;* V42 tmp40        [V42    ] (  0,  0   )    long  ->  zero-ref    "field V22._00 (fldOffset=0x0)" P-INDEP
+;  V43 tmp41        [V43    ] (  2,  5   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+;  V44 tmp42        [V44    ] (  2,  9   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V45 tmp43        [V45    ] (  2,  9   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V26._00 (fldOffset=0x0)" P-DEP
+;  V46 tmp44        [V46    ] (  9, 17   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
+;  V47 cse0         [V47,T04] (  4, 16   )    long  ->  rax         "CSE #01: aggressive"
+;  V48 cse1         [V48,T05] (  4, 16   )    long  ->  r14         "CSE #02: aggressive"
 ;
-; Lcl frame size = 32
+; Lcl frame size = 72
 
 G_M52421_IG01:
        push     rbp
-       sub      rsp, 32
-       lea      rbp, [rsp+0x20]
-						;; size=10 bbWeight=1 PerfScore 1.75
+       push     r15
+       push     r14
+       push     rbx
+       sub      rsp, 72
+       lea      rbp, [rsp+0x60]
+						;; size=15 bbWeight=1 PerfScore 4.75
 G_M52421_IG02:
-       mov      rax, qword ptr [rbp+0x10]
-       mov      qword ptr [rbp-0x08], rax
-       mov      rax, qword ptr [rbp+0x20]
-       mov      qword ptr [rbp-0x10], rax
-       movzx    rax, byte  ptr [rbp-0x08]
-       movzx    rcx, byte  ptr [rbp-0x10]
-       imul     eax, ecx
-       movzx    rax, al
-       movzx    rcx, byte  ptr [rbp-0x07]
-       movzx    rdx, byte  ptr [rbp-0x0F]
-       imul     ecx, edx
-       movzx    rcx, cl
-       add      eax, ecx
-       movzx    rcx, al
-       movzx    rax, cl
-       movzx    rcx, byte  ptr [rbp-0x06]
-       movzx    rdx, byte  ptr [rbp-0x0E]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x05]
-       movzx    rdi, byte  ptr [rbp-0x0D]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       add      eax, ecx
-       movzx    rax, al
-       movzx    rdx, byte  ptr [rbp-0x04]
-       movzx    rcx, byte  ptr [rbp-0x0C]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x03]
-       movzx    rdi, byte  ptr [rbp-0x0B]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       add      ecx, eax
-       movzx    rax, cl
-       movzx    rdx, byte  ptr [rbp-0x02]
-       movzx    rcx, byte  ptr [rbp-0x0A]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x01]
-       movzx    rdi, byte  ptr [rbp-0x09]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       add      ecx, eax
-       movzx    rax, cl
-       mov      rcx, qword ptr [rbp+0x18]
-       mov      qword ptr [rbp-0x18], rcx
-       mov      rcx, qword ptr [rbp+0x28]
-       mov      qword ptr [rbp-0x20], rcx
-       movzx    rcx, byte  ptr [rbp-0x18]
-       movzx    rdx, byte  ptr [rbp-0x20]
-       imul     ecx, edx
-       movzx    rcx, cl
-       movzx    rdx, byte  ptr [rbp-0x17]
-       movzx    rdi, byte  ptr [rbp-0x1F]
-       imul     edx, edi
-       movzx    rdx, dl
-       add      ecx, edx
-       movzx    rdx, cl
-       movzx    rcx, dl
-       movzx    rdx, byte  ptr [rbp-0x16]
-       movzx    rdi, byte  ptr [rbp-0x1E]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x15]
-       movzx    rsi, byte  ptr [rbp-0x1D]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-						;; size=247 bbWeight=1 PerfScore 63.00
+       mov      rsi, qword ptr [rbp+0x10]
+       mov      rdi, qword ptr [rbp+0x20]
+       mov      qword ptr [rbp-0x28], rsi
+       mov      qword ptr [rbp-0x30], rdi
+       xor      esi, esi
+       align    [0 bytes for IG03]
+						;; size=18 bbWeight=1 PerfScore 4.25
 G_M52421_IG03:
-       add      ecx, edx
-       movzx    rcx, cl
-       movzx    rdi, byte  ptr [rbp-0x14]
-       movzx    rdx, byte  ptr [rbp-0x1C]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x13]
-       movzx    rsi, byte  ptr [rbp-0x1B]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-       movzx    rdi, byte  ptr [rbp-0x12]
-       movzx    rdx, byte  ptr [rbp-0x1A]
-       imul     edx, edi
-       movzx    rdx, dl
-       movzx    rdi, byte  ptr [rbp-0x11]
-       movzx    rsi, byte  ptr [rbp-0x19]
-       imul     edi, esi
-       movzx    rdi, dil
-       add      edx, edi
-       add      edx, ecx
-       movzx    rcx, dl
-       add      eax, ecx
-       movzx    rax, al
-						;; size=88 bbWeight=1 PerfScore 19.50
+       lea      rdi, [rbp-0x28]
+       movsxd   rax, esi
+       movzx    rdi, byte  ptr [rdi+rax]
+       lea      rcx, [rbp-0x30]
+       movzx    rcx, byte  ptr [rcx+rax]
+       imul     edi, ecx
+       lea      rcx, [rbp-0x20]
+       mov      byte  ptr [rcx+rax], dil
+       inc      esi
+       cmp      esi, 8
+       jl       SHORT G_M52421_IG03
+						;; size=38 bbWeight=4 PerfScore 41.00
 G_M52421_IG04:
-       add      rsp, 32
+       mov      rsi, qword ptr [rbp-0x20]
+       mov      qword ptr [rbp-0x38], rsi
+       movzx    rsi, byte  ptr [rbp-0x38]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x37]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x36]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x35]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x34]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x33]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x32]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       movzx    rsi, byte  ptr [rbp-0x31]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       mov      ebx, eax
+       mov      rdi, qword ptr [rbp+0x18]
+       mov      rsi, qword ptr [rbp+0x28]
+       mov      qword ptr [rbp-0x48], rdi
+       mov      qword ptr [rbp-0x50], rsi
+       xor      r15d, r15d
+						;; size=195 bbWeight=1 PerfScore 44.25
+G_M52421_IG05:
+       lea      rdi, [rbp-0x48]
+       movsxd   r14, r15d
+       movzx    rdi, byte  ptr [rdi+r14]
+       lea      rsi, [rbp-0x50]
+       movzx    rsi, byte  ptr [rsi+r14]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+       lea      rsi, [rbp-0x40]
+       mov      byte  ptr [rsi+r14], al
+       inc      r15d
+       cmp      r15d, 8
+       jl       SHORT G_M52421_IG05
+						;; size=50 bbWeight=4 PerfScore 46.00
+G_M52421_IG06:
+       mov      rsi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x58], rsi
+       movzx    rsi, byte  ptr [rbp-0x58]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x57]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x56]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x55]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x54]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x53]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x52]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rsi, byte  ptr [rbp-0x51]
+       mov      edi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       movzx    rdi, bl
+       mov      esi, eax
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+       nop      
+						;; size=179 bbWeight=1 PerfScore 42.00
+G_M52421_IG07:
+       add      rsp, 72
+       pop      rbx
+       pop      r14
+       pop      r15
        pop      rbp
        ret      
-						;; size=6 bbWeight=1 PerfScore 1.75
+						;; size=11 bbWeight=1 PerfScore 3.25
 
-; Total bytes of code 351, prolog size 10, PerfScore 86.00, instruction count 104, allocated bytes for code 351 (MethodHash=ae36333a) for method System.Runtime.Intrinsics.Vector128`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector128<T>,T>.Dot(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):ubyte (FullOpts)
+; Total bytes of code 506, prolog size 15, PerfScore 185.50, instruction count 127, allocated bytes for code 506 (MethodHash=ae36333a) for method System.Runtime.Intrinsics.Vector128`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector128<T>,T>.Dot(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):ubyte (FullOpts)
133 (32.36 % of base) - System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector,T>.Divide(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):System.Runtime.Intrinsics.Vector256`1[ubyte]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
-; fully interruptible
+; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 20 single block inlinees; 13 inlinees without PGO data
+; 0 inlinees with PGO data; 20 single block inlinees; 8 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T08] (  4,  4   )   byref  ->  rdi         single-def
-;  V01 arg0         [V01,T20] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V02 arg1         [V02,T21] (  2,  2   )  simd32  ->  mm1         single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T17] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
+;  V00 RetBuf       [V00,T08] (  4,  4   )   byref  ->  rbx         single-def
+;  V01 arg0         [V01,T20] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V02 arg1         [V02,T21] (  2,  2   )  simd32  ->  [rbp+0x30]  single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V03 OutArgs      [V03    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V04 tmp1         [V04,T17] (  2,  4   )  simd16  ->  [rbp-0x30]  spill-single-def "impAppendStmt"
 ;* V05 tmp2         [V05    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V06 tmp3         [V06,T13] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V07 tmp4         [V07,T14] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V06 tmp3         [V06,T13] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V07 tmp4         [V07,T14] (  3,  6   )  simd16  ->  [rbp-0x50]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V08 tmp5         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V09 tmp6         [V09    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V10 tmp7         [V10    ] (  2,  5   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V11 tmp8         [V11,T00] (  5, 17   )     int  ->  rcx         "Inline stloc first use temp"
-;  V12 tmp9         [V12    ] (  2, 10   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V13 tmp10        [V13    ] (  2, 10   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V10 tmp7         [V10    ] (  2,  5   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V11 tmp8         [V11,T00] (  5, 17   )     int  ->  r15         "Inline stloc first use temp"
+;  V12 tmp9         [V12    ] (  2, 10   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V13 tmp10        [V13    ] (  2, 10   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V14 tmp11        [V14    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V15 tmp12        [V15    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V16 tmp13        [V16    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V17 tmp14        [V17    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V18 tmp15        [V18    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V19 tmp16        [V19    ] (  2,  5   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V20 tmp17        [V20,T01] (  5, 17   )     int  ->  rsi         "Inline stloc first use temp"
-;  V21 tmp18        [V21    ] (  2, 10   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V22 tmp19        [V22    ] (  2, 10   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V23 tmp20        [V23    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V24 tmp21        [V24    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V25 tmp22        [V25    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V26 tmp23        [V26    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V27 tmp24        [V27    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V28 tmp25        [V28,T18] (  3,  3   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V29 tmp26        [V29,T15] (  3,  6   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;  V30 tmp27        [V30,T16] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V31 tmp28        [V31    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V32 tmp29        [V32    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V33 tmp30        [V33    ] (  2,  5   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V34 tmp31        [V34,T02] (  5, 17   )     int  ->  rcx         "Inline stloc first use temp"
-;  V35 tmp32        [V35    ] (  2, 10   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V36 tmp33        [V36    ] (  2, 10   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V16 tmp13        [V16    ] (  2,  5   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V17 tmp14        [V17,T01] (  5, 17   )     int  ->  r14         "Inline stloc first use temp"
+;  V18 tmp15        [V18    ] (  2, 10   )  struct ( 8) [rbp-0x78]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V19 tmp16        [V19    ] (  2, 10   )  struct ( 8) [rbp-0x80]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V20 tmp17        [V20    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V21 tmp18        [V21    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V22 tmp19        [V22,T18] (  3,  3   )  simd16  ->  [rbp-0x90]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V23 tmp20        [V23,T15] (  3,  6   )  simd16  ->  [rbp-0xA0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V24 tmp21        [V24,T16] (  3,  6   )  simd16  ->  [rbp-0xB0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V25 tmp22        [V25    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V26 tmp23        [V26    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V27 tmp24        [V27    ] (  2,  5   )  struct ( 8) [rbp-0xB8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V28 tmp25        [V28,T02] (  5, 17   )     int  ->  r15         "Inline stloc first use temp"
+;  V29 tmp26        [V29    ] (  2, 10   )  struct ( 8) [rbp-0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V30 tmp27        [V30    ] (  2, 10   )  struct ( 8) [rbp-0xC8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V31 tmp28        [V31    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
+;* V32 tmp29        [V32    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
+;  V33 tmp30        [V33    ] (  2,  5   )  struct ( 8) [rbp-0xD0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V34 tmp31        [V34,T03] (  5, 17   )     int  ->  r14         "Inline stloc first use temp"
+;  V35 tmp32        [V35    ] (  2, 10   )  struct ( 8) [rbp-0xD8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;  V36 tmp33        [V36    ] (  2, 10   )  struct ( 8) [rbp-0xE0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
 ;* V37 tmp34        [V37    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
 ;* V38 tmp35        [V38    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V39 tmp36        [V39    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V40 tmp37        [V40    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V41 tmp38        [V41    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V42 tmp39        [V42    ] (  2,  5   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V43 tmp40        [V43,T03] (  5, 17   )     int  ->  rsi         "Inline stloc first use temp"
-;  V44 tmp41        [V44    ] (  2, 10   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;  V45 tmp42        [V45    ] (  2, 10   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V46 tmp43        [V46    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
-;* V47 tmp44        [V47    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline stloc first use temp"
-;* V48 tmp45        [V48    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V49 tmp46        [V49    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;* V50 tmp47        [V50    ] (  0,  0   )   ubyte  ->  zero-ref    "Inlining Arg"
-;  V51 tmp48        [V51,T19] (  3,  3   )  simd16  ->  [rbp-0xC0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V52 tmp49        [V52    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-;  V53 tmp50        [V53,T09] (  2,  2   )    long  ->  rcx         "field V08._00 (fldOffset=0x0)" P-INDEP
-;  V54 tmp51        [V54,T10] (  2,  2   )    long  ->  rax         "field V09._00 (fldOffset=0x0)" P-INDEP
-;  V55 tmp52        [V55    ] (  2,  5   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
-;  V56 tmp53        [V56    ] (  2,  9   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
-;  V57 tmp54        [V57    ] (  2,  9   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
-;  V58 tmp55        [V58    ] (  2,  5   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V19._00 (fldOffset=0x0)" P-DEP
-;  V59 tmp56        [V59    ] (  2,  9   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V21._00 (fldOffset=0x0)" P-DEP
-;  V60 tmp57        [V60    ] (  2,  9   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V22._00 (fldOffset=0x0)" P-DEP
-;  V61 tmp58        [V61,T11] (  2,  2   )    long  ->  rcx         "field V31._00 (fldOffset=0x0)" P-INDEP
-;  V62 tmp59        [V62,T12] (  2,  2   )    long  ->  rax         "field V32._00 (fldOffset=0x0)" P-INDEP
-;  V63 tmp60        [V63    ] (  2,  5   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
-;  V64 tmp61        [V64    ] (  2,  9   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
-;  V65 tmp62        [V65    ] (  2,  9   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V36._00 (fldOffset=0x0)" P-DEP
-;  V66 tmp63        [V66    ] (  2,  5   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V42._00 (fldOffset=0x0)" P-DEP
-;  V67 tmp64        [V67    ] (  2,  9   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V44._00 (fldOffset=0x0)" P-DEP
-;  V68 tmp65        [V68    ] (  2,  9   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V45._00 (fldOffset=0x0)" P-DEP
-;  V69 cse0         [V69,T04] (  4, 16   )    long  ->  rsi         "CSE #01: aggressive"
-;  V70 cse1         [V70,T05] (  4, 16   )    long  ->   r8         "CSE #02: aggressive"
-;  V71 cse2         [V71,T06] (  4, 16   )    long  ->  rsi         "CSE #03: aggressive"
-;  V72 cse3         [V72,T07] (  4, 16   )    long  ->   r8         "CSE #04: aggressive"
+;  V39 tmp36        [V39,T19] (  3,  3   )  simd16  ->  [rbp-0xF0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;  V40 tmp37        [V40,T09] (  2,  2   )    long  ->  r15         "field V08._00 (fldOffset=0x0)" P-INDEP
+;  V41 tmp38        [V41,T10] (  2,  2   )    long  ->  rdi         "field V09._00 (fldOffset=0x0)" P-INDEP
+;  V42 tmp39        [V42    ] (  2,  5   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
+;  V43 tmp40        [V43    ] (  2,  9   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
+;  V44 tmp41        [V44    ] (  2,  9   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V45 tmp42        [V45    ] (  2,  5   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
+;  V46 tmp43        [V46    ] (  2,  9   )    long  ->  [rbp-0x78]  do-not-enreg[X] addr-exposed "field V18._00 (fldOffset=0x0)" P-DEP
+;  V47 tmp44        [V47    ] (  2,  9   )    long  ->  [rbp-0x80]  do-not-enreg[X] addr-exposed "field V19._00 (fldOffset=0x0)" P-DEP
+;  V48 tmp45        [V48,T11] (  2,  2   )    long  ->  r15         "field V25._00 (fldOffset=0x0)" P-INDEP
+;  V49 tmp46        [V49,T12] (  2,  2   )    long  ->  rdi         "field V26._00 (fldOffset=0x0)" P-INDEP
+;  V50 tmp47        [V50    ] (  2,  5   )    long  ->  [rbp-0xB8]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+;  V51 tmp48        [V51    ] (  2,  9   )    long  ->  [rbp-0xC0]  do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
+;  V52 tmp49        [V52    ] (  2,  9   )    long  ->  [rbp-0xC8]  do-not-enreg[X] addr-exposed "field V30._00 (fldOffset=0x0)" P-DEP
+;  V53 tmp50        [V53    ] (  2,  5   )    long  ->  [rbp-0xD0]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
+;  V54 tmp51        [V54    ] (  2,  9   )    long  ->  [rbp-0xD8]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
+;  V55 tmp52        [V55    ] (  2,  9   )    long  ->  [rbp-0xE0]  do-not-enreg[X] addr-exposed "field V36._00 (fldOffset=0x0)" P-DEP
+;  V56 cse0         [V56,T04] (  4, 16   )    long  ->  r14         "CSE #01: aggressive"
+;  V57 cse1         [V57,T05] (  4, 16   )    long  ->  r13         "CSE #02: aggressive"
+;  V58 cse2         [V58,T06] (  4, 16   )    long  ->  r14         "CSE #03: aggressive"
+;  V59 cse3         [V59,T07] (  4, 16   )    long  ->  r13         "CSE #04: aggressive"
 ;
-; Lcl frame size = 192
+; Lcl frame size = 240
 
 G_M49741_IG01:
        push     rbp
-       sub      rsp, 192
-       lea      rbp, [rsp+0xC0]
+       push     r15
+       push     r14
+       push     r13
+       push     rbx
+       sub      rsp, 240
+       lea      rbp, [rsp+0x110]
+       mov      rbx, rdi
        vmovups  ymm0, ymmword ptr [rbp+0x10]
        vmovups  ymm1, ymmword ptr [rbp+0x30]
-						;; size=26 bbWeight=1 PerfScore 9.75
+						;; size=36 bbWeight=1 PerfScore 14.00
 G_M49741_IG02:
+       vmovups  ymmword ptr [rbp+0x10], ymm0
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm2
+       vmovaps  xmmword ptr [rbp-0x40], xmm2
+       vmovups  ymmword ptr [rbp+0x30], ymm1
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rbp-0x20], xmm2
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x30], rax
-       mov      rax, qword ptr [rbp-0x20]
-       mov      qword ptr [rbp-0x38], rax
-       xor      ecx, ecx
-       align    [2 bytes for IG03]
-						;; size=38 bbWeight=1 PerfScore 7.00
+       vmovaps  xmmword ptr [rbp-0x50], xmm2
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x60], rdi
+       mov      rdi, qword ptr [rbp-0x50]
+       mov      qword ptr [rbp-0x68], rdi
+       xor      r15d, r15d
+						;; size=47 bbWeight=1 PerfScore 8.75
 G_M49741_IG03:
-       lea      rax, [rbp-0x30]
-       movsxd   rsi, ecx
-       movzx    rax, byte  ptr [rax+rsi]
-       lea      rdx, [rbp-0x38]
-       movzx    r8, byte  ptr [rdx+rsi]
-       xor      edx, edx
-       div      edx:eax, r8d
-       lea      rdx, [rbp-0x28]
-       mov      byte  ptr [rdx+rsi], al
-       inc      ecx
-       cmp      ecx, 8
+       lea      rdi, [rbp-0x60]
+       movsxd   r14, r15d
+       movzx    rdi, byte  ptr [rdi+r14]
+       lea      rsi, [rbp-0x68]
+       movzx    rsi, byte  ptr [rsi+r14]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       lea      rdi, [rbp-0x58]
+       mov      byte  ptr [rdi+r14], al
+       inc      r15d
+       cmp      r15d, 8
        jl       SHORT G_M49741_IG03
-						;; size=39 bbWeight=4 PerfScore 134.00
+						;; size=50 bbWeight=4 PerfScore 46.00
 G_M49741_IG04:
-       mov      rcx, qword ptr [rbp-0x28]
-       mov      rax, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x48], rax
-       mov      rax, qword ptr [rbp-0x18]
-       mov      qword ptr [rbp-0x50], rax
-       xor      esi, esi
-       align    [3 bytes for IG05]
-						;; size=25 bbWeight=1 PerfScore 5.50
+       mov      r15, qword ptr [rbp-0x58]
+       mov      rdi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x78], rdi
+       mov      rdi, qword ptr [rbp-0x48]
+       mov      qword ptr [rbp-0x80], rdi
+       xor      r14d, r14d
+						;; size=23 bbWeight=1 PerfScore 5.25
 G_M49741_IG05:
-       lea      rax, [rbp-0x48]
-       movsxd   r8, esi
-       movzx    rax, byte  ptr [rax+r8]
-       lea      rdx, [rbp-0x50]
-       movzx    r9, byte  ptr [rdx+r8]
-       xor      edx, edx
-       div      edx:eax, r9d
-       lea      rdx, [rbp-0x40]
-       mov      byte  ptr [rdx+r8], al
-       inc      esi
-       cmp      esi, 8
+       lea      rdi, [rbp-0x78]
+       movsxd   r13, r14d
+       movzx    rdi, byte  ptr [rdi+r13]
+       lea      rsi, [rbp-0x80]
+       movzx    rsi, byte  ptr [rsi+r13]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       lea      rdi, [rbp-0x70]
+       mov      byte  ptr [rdi+r13], al
+       inc      r14d
+       cmp      r14d, 8
        jl       SHORT G_M49741_IG05
-						;; size=41 bbWeight=4 PerfScore 134.00
+						;; size=50 bbWeight=4 PerfScore 46.00
 G_M49741_IG06:
-       mov      rax, qword ptr [rbp-0x40]
-       mov      qword ptr [rbp-0x60], rcx
-       mov      qword ptr [rbp-0x58], rax
-       vmovaps  xmm2, xmmword ptr [rbp-0x60]
+       mov      rdi, qword ptr [rbp-0x70]
+       mov      qword ptr [rbp-0x90], r15
+       mov      qword ptr [rbp-0x88], rdi
+       vmovaps  xmm2, xmmword ptr [rbp-0x90]
+       vmovaps  xmmword ptr [rbp-0x30], xmm2
+       vmovups  ymm0, ymmword ptr [rbp+0x10]
        vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x70], xmm0
+       vmovaps  xmmword ptr [rbp-0xA0], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x30]
        vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x80], xmm0
-       mov      rax, qword ptr [rbp-0x70]
-       mov      qword ptr [rbp-0x90], rax
-       mov      rax, qword ptr [rbp-0x80]
-       mov      qword ptr [rbp-0x98], rax
-       xor      ecx, ecx
-       align    [0 bytes for IG07]
-						;; size=63 bbWeight=1 PerfScore 16.25
+       vmovaps  xmmword ptr [rbp-0xB0], xmm0
+       mov      rdi, qword ptr [rbp-0xA0]
+       mov      qword ptr [rbp-0xC0], rdi
+       mov      rdi, qword ptr [rbp-0xB0]
+       mov      qword ptr [rbp-0xC8], rdi
+       xor      r15d, r15d
+						;; size=100 bbWeight=1 PerfScore 25.25
 G_M49741_IG07:
-       lea      rax, [rbp-0x90]
-       movsxd   rsi, ecx
-       movzx    rax, byte  ptr [rax+rsi]
-       lea      rdx, [rbp-0x98]
-       movzx    r8, byte  ptr [rdx+rsi]
-       xor      edx, edx
-       div      edx:eax, r8d
-       lea      rdx, [rbp-0x88]
-       mov      byte  ptr [rdx+rsi], al
-       inc      ecx
-       cmp      ecx, 8
+       lea      rdi, [rbp-0xC0]
+       movsxd   r14, r15d
+       movzx    rdi, byte  ptr [rdi+r14]
+       lea      rsi, [rbp-0xC8]
+       movzx    rsi, byte  ptr [rsi+r14]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       lea      rdi, [rbp-0xB8]
+       mov      byte  ptr [rdi+r14], al
+       inc      r15d
+       cmp      r15d, 8
        jl       SHORT G_M49741_IG07
-						;; size=48 bbWeight=4 PerfScore 134.00
+						;; size=59 bbWeight=4 PerfScore 46.00
 G_M49741_IG08:
-       mov      rcx, qword ptr [rbp-0x88]
-       mov      rax, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0xA8], rax
-       mov      rax, qword ptr [rbp-0x78]
-       mov      qword ptr [rbp-0xB0], rax
-       xor      esi, esi
-       align    [0 bytes for IG09]
-						;; size=31 bbWeight=1 PerfScore 5.25
+       mov      r15, qword ptr [rbp-0xB8]
+       mov      rdi, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xD8], rdi
+       mov      rdi, qword ptr [rbp-0xA8]
+       mov      qword ptr [rbp-0xE0], rdi
+       xor      r14d, r14d
+						;; size=38 bbWeight=1 PerfScore 5.25
 G_M49741_IG09:
-       lea      rax, [rbp-0xA8]
-       movsxd   r8, esi
-       movzx    rax, byte  ptr [rax+r8]
-       lea      rdx, [rbp-0xB0]
-       movzx    r9, byte  ptr [rdx+r8]
-       xor      edx, edx
-       div      edx:eax, r9d
-       lea      rdx, [rbp-0xA0]
-       mov      byte  ptr [rdx+r8], al
-       inc      esi
-       cmp      esi, 8
+       lea      rdi, [rbp-0xD8]
+       movsxd   r13, r14d
+       movzx    rdi, byte  ptr [rdi+r13]
+       lea      rsi, [rbp-0xE0]
+       movzx    rsi, byte  ptr [rsi+r13]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Divide(ubyte,ubyte):ubyte
+       lea      rdi, [rbp-0xD0]
+       mov      byte  ptr [rdi+r13], al
+       inc      r14d
+       cmp      r14d, 8
        jl       SHORT G_M49741_IG09
-						;; size=50 bbWeight=4 PerfScore 134.00
+						;; size=59 bbWeight=4 PerfScore 46.00
 G_M49741_IG10:
-       mov      rax, qword ptr [rbp-0xA0]
-       mov      qword ptr [rbp-0xC0], rcx
-       mov      qword ptr [rbp-0xB8], rax
-       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0xC0], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       mov      rax, rdi
-						;; size=38 bbWeight=1 PerfScore 9.25
+       mov      rdi, qword ptr [rbp-0xD0]
+       mov      qword ptr [rbp-0xF0], r15
+       mov      qword ptr [rbp-0xE8], rdi
+       vmovaps  xmm2, xmmword ptr [rbp-0x30]
+       vmovups  xmmword ptr [rsp], xmm2
+       vmovaps  xmm0, xmmword ptr [rbp-0xF0]
+       vmovups  xmmword ptr [rsp+0x10], xmm0
+       mov      rdi, rbx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector256:Create[ubyte](System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector256`1[ubyte]
+       call     [rax]System.Runtime.Intrinsics.Vector256:Create[ubyte](System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector256`1[ubyte]
+       mov      rax, rbx
+						;; size=63 bbWeight=1 PerfScore 14.75
 G_M49741_IG11:
        vzeroupper 
-       add      rsp, 192
+       add      rsp, 240
+       pop      rbx
+       pop      r13
+       pop      r14
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=19 bbWeight=1 PerfScore 4.75
 
-; Total bytes of code 411, prolog size 26, PerfScore 591.75, instruction count 101, allocated bytes for code 411 (MethodHash=ef7c3db2) for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts)
+; Total bytes of code 544, prolog size 23, PerfScore 262.00, instruction count 116, allocated bytes for code 544 (MethodHash=ef7c3db2) for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Divide(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts)
132 (62.56 % of base) - System.Runtime.Intrinsics.Vector256:Dot[long](System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):long
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256:Dot[long](System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):long (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
-; rsp based frame
+; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 10 single block inlinees; 11 inlinees without PGO data
+; 0 inlinees with PGO data; 22 single block inlinees; 17 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T08] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V01 arg1         [V01,T09] (  2,  2   )  simd32  ->  mm1         single-def <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V00 arg0         [V00,T30] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V01 arg1         [V01,T31] (  2,  2   )  simd32  ->  mm1         single-def <System.Runtime.Intrinsics.Vector256`1[long]>
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V03 tmp1         [V03,T00] (  2,  4   )    long  ->  rax         "impAppendStmt"
-;  V04 tmp2         [V04,T04] (  3,  6   )  simd16  ->  [rsp+0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V05 tmp3         [V05,T05] (  3,  6   )  simd16  ->  [rsp+0x60]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V06 tmp4         [V06,T01] (  2,  4   )    long  ->  rax         "impAppendStmt"
-;* V07 tmp5         [V07    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V08 tmp6         [V08    ] (  2,  4   )  struct ( 8) [rsp+0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V09 tmp7         [V09    ] (  2,  4   )  struct ( 8) [rsp+0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V10 tmp8         [V10    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V11 tmp9         [V11    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V12 tmp10        [V12    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V13 tmp11        [V13    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V14 tmp12        [V14    ] (  2,  4   )  struct ( 8) [rsp+0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V15 tmp13        [V15    ] (  2,  4   )  struct ( 8) [rsp+0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V16 tmp14        [V16    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V17 tmp15        [V17    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V18 tmp16        [V18    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V19 tmp17        [V19    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V20 tmp18        [V20    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V21 tmp19        [V21,T06] (  3,  6   )  simd16  ->  [rsp+0x30]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V22 tmp20        [V22,T07] (  3,  6   )  simd16  ->  [rsp+0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V23 tmp21        [V23,T02] (  2,  4   )    long  ->  rcx         "impAppendStmt"
-;* V24 tmp22        [V24    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V25 tmp23        [V25    ] (  2,  4   )  struct ( 8) [rsp+0x18]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V26 tmp24        [V26    ] (  2,  4   )  struct ( 8) [rsp+0x10]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V27 tmp25        [V27    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V28 tmp26        [V28    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V29 tmp27        [V29    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V30 tmp28        [V30    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op "Inline ldloca(s) first use temp"
-;  V31 tmp29        [V31    ] (  2,  4   )  struct ( 8) [rsp+0x08]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V32 tmp30        [V32    ] (  2,  4   )  struct ( 8) [rsp+0x00]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V33 tmp31        [V33    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V34 tmp32        [V34    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V35 tmp33        [V35    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;* V36 tmp34        [V36    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V37 tmp35        [V37    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V38 tmp36        [V38,T03] (  2,  2   )    long  ->  rax         "Inline return value spill temp"
-;  V39 tmp37        [V39    ] (  2,  3   )    long  ->  [rsp+0x58]  do-not-enreg[X] addr-exposed "field V08._00 (fldOffset=0x0)" P-DEP
-;  V40 tmp38        [V40    ] (  2,  3   )    long  ->  [rsp+0x50]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
-;  V41 tmp39        [V41    ] (  2,  3   )    long  ->  [rsp+0x48]  do-not-enreg[X] addr-exposed "field V14._00 (fldOffset=0x0)" P-DEP
-;  V42 tmp40        [V42    ] (  2,  3   )    long  ->  [rsp+0x40]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;  V43 tmp41        [V43    ] (  2,  3   )    long  ->  [rsp+0x18]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V44 tmp42        [V44    ] (  2,  3   )    long  ->  [rsp+0x10]  do-not-enreg[X] addr-exposed "field V26._00 (fldOffset=0x0)" P-DEP
-;  V45 tmp43        [V45    ] (  2,  3   )    long  ->  [rsp+0x08]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
-;  V46 tmp44        [V46    ] (  2,  3   )    long  ->  [rsp+0x00]  do-not-enreg[X] addr-exposed "field V32._00 (fldOffset=0x0)" P-DEP
+;  V03 tmp1         [V03,T00] (  2,  4   )    long  ->  rbx         "impAppendStmt"
+;  V04 tmp2         [V04,T26] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V05 tmp3         [V05,T27] (  3,  6   )  simd16  ->  [rbp-0x30]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V06 tmp4         [V06,T01] (  2,  4   )    long  ->  rbx         "impAppendStmt"
+;* V07 tmp5         [V07    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V08 tmp6         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V09 tmp7         [V09    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V10 tmp8         [V10    ] (  2,  2   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V11 tmp9         [V11,T18] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V12 tmp10        [V12    ] (  2,  4   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V13 tmp11        [V13    ] (  2,  4   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V14 tmp12        [V14    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V15 tmp13        [V15    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;* V16 tmp14        [V16    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V17 tmp15        [V17    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V18 tmp16        [V18,T03] (  2,  2   )    long  ->  rbx         ld-addr-op "Inline ldloca(s) first use temp"
+;* V19 tmp17        [V19,T19] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V20 tmp18        [V20    ] (  2,  4   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V21 tmp19        [V21    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V22 tmp20        [V22    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V23 tmp21        [V23    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;* V24 tmp22        [V24    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V25 tmp23        [V25    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V26 tmp24        [V26    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V27 tmp25        [V27    ] (  2,  2   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V28 tmp26        [V28,T20] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V29 tmp27        [V29    ] (  2,  4   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V30 tmp28        [V30    ] (  2,  4   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V31 tmp29        [V31    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V32 tmp30        [V32    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;* V33 tmp31        [V33    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V34 tmp32        [V34    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V35 tmp33        [V35,T04] (  2,  2   )    long  ->  r15         ld-addr-op "Inline ldloca(s) first use temp"
+;* V36 tmp34        [V36,T21] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V37 tmp35        [V37    ] (  2,  4   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V38 tmp36        [V38    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V39 tmp37        [V39    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V40 tmp38        [V40    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;* V41 tmp39        [V41    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V42 tmp40        [V42    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V43 tmp41        [V43,T28] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V44 tmp42        [V44,T29] (  3,  6   )  simd16  ->  [rbp-0x90]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V45 tmp43        [V45,T02] (  2,  4   )    long  ->  r15         "impAppendStmt"
+;* V46 tmp44        [V46    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V47 tmp45        [V47    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V48 tmp46        [V48    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V49 tmp47        [V49    ] (  2,  2   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V50 tmp48        [V50,T22] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V51 tmp49        [V51    ] (  2,  4   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V52 tmp50        [V52    ] (  2,  4   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V53 tmp51        [V53    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V54 tmp52        [V54    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;* V55 tmp53        [V55    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V56 tmp54        [V56    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V57 tmp55        [V57,T05] (  2,  2   )    long  ->  r15         ld-addr-op "Inline ldloca(s) first use temp"
+;* V58 tmp56        [V58,T23] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V59 tmp57        [V59    ] (  2,  4   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V60 tmp58        [V60    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V61 tmp59        [V61    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V62 tmp60        [V62    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;* V63 tmp61        [V63    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V64 tmp62        [V64    ] (  0,  0   )  struct ( 8) zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V65 tmp63        [V65    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V66 tmp64        [V66    ] (  2,  2   )  struct ( 8) [rbp-0xB8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V67 tmp65        [V67,T24] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V68 tmp66        [V68    ] (  2,  4   )  struct ( 8) [rbp-0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V69 tmp67        [V69    ] (  2,  4   )  struct ( 8) [rbp-0xC8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V70 tmp68        [V70    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V71 tmp69        [V71    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;* V72 tmp70        [V72    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V73 tmp71        [V73    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V74 tmp72        [V74,T06] (  2,  2   )    long  ->  rsi         ld-addr-op "Inline ldloca(s) first use temp"
+;* V75 tmp73        [V75,T25] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V76 tmp74        [V76    ] (  2,  4   )  struct ( 8) [rbp-0xD0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V77 tmp75        [V77    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V78 tmp76        [V78    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
+;* V79 tmp77        [V79    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V80 tmp78        [V80,T07] (  2,  2   )    long  ->  rsi         "field V07._00 (fldOffset=0x0)" P-INDEP
+;  V81 tmp79        [V81,T08] (  2,  2   )    long  ->  rdi         "field V08._00 (fldOffset=0x0)" P-INDEP
+;* V82 tmp80        [V82    ] (  0,  0   )    long  ->  zero-ref    "field V09._00 (fldOffset=0x0)" P-INDEP
+;  V83 tmp81        [V83    ] (  2,  2   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
+;  V84 tmp82        [V84    ] (  2,  3   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
+;  V85 tmp83        [V85    ] (  2,  3   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V86 tmp84        [V86    ] (  2,  3   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V20._00 (fldOffset=0x0)" P-DEP
+;  V87 tmp85        [V87,T09] (  2,  2   )    long  ->  rsi         "field V24._00 (fldOffset=0x0)" P-INDEP
+;  V88 tmp86        [V88,T10] (  2,  2   )    long  ->  rdi         "field V25._00 (fldOffset=0x0)" P-INDEP
+;* V89 tmp87        [V89    ] (  0,  0   )    long  ->  zero-ref    "field V26._00 (fldOffset=0x0)" P-INDEP
+;  V90 tmp88        [V90    ] (  2,  2   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+;  V91 tmp89        [V91    ] (  2,  3   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
+;  V92 tmp90        [V92    ] (  2,  3   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V30._00 (fldOffset=0x0)" P-DEP
+;  V93 tmp91        [V93    ] (  2,  3   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
+;  V94 tmp92        [V94,T11] (  2,  2   )    long  ->  rsi         "field V46._00 (fldOffset=0x0)" P-INDEP
+;  V95 tmp93        [V95,T12] (  2,  2   )    long  ->  rdi         "field V47._00 (fldOffset=0x0)" P-INDEP
+;* V96 tmp94        [V96    ] (  0,  0   )    long  ->  zero-ref    "field V48._00 (fldOffset=0x0)" P-INDEP
+;  V97 tmp95        [V97    ] (  2,  2   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V49._00 (fldOffset=0x0)" P-DEP
+;  V98 tmp96        [V98    ] (  2,  3   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V51._00 (fldOffset=0x0)" P-DEP
+;  V99 tmp97        [V99    ] (  2,  3   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V52._00 (fldOffset=0x0)" P-DEP
+;  V100 tmp98       [V100    ] (  2,  3   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V59._00 (fldOffset=0x0)" P-DEP
+;  V101 tmp99       [V101,T13] (  2,  2   )    long  ->  rsi         "field V63._00 (fldOffset=0x0)" P-INDEP
+;  V102 tmp100      [V102,T14] (  2,  2   )    long  ->  rdi         "field V64._00 (fldOffset=0x0)" P-INDEP
+;* V103 tmp101      [V103    ] (  0,  0   )    long  ->  zero-ref    "field V65._00 (fldOffset=0x0)" P-INDEP
+;  V104 tmp102      [V104    ] (  2,  2   )    long  ->  [rbp-0xB8]  do-not-enreg[X] addr-exposed "field V66._00 (fldOffset=0x0)" P-DEP
+;  V105 tmp103      [V105    ] (  2,  3   )    long  ->  [rbp-0xC0]  do-not-enreg[X] addr-exposed "field V68._00 (fldOffset=0x0)" P-DEP
+;  V106 tmp104      [V106    ] (  2,  3   )    long  ->  [rbp-0xC8]  do-not-enreg[X] addr-exposed "field V69._00 (fldOffset=0x0)" P-DEP
+;  V107 tmp105      [V107    ] (  2,  3   )    long  ->  [rbp-0xD0]  do-not-enreg[X] addr-exposed "field V76._00 (fldOffset=0x0)" P-DEP
+;  V108 cse0        [V108,T15] (  2,  2   )    long  ->  rbx         "CSE #01: moderate"
+;  V109 cse1        [V109,T16] (  2,  2   )    long  ->  r15         "CSE #02: moderate"
+;  V110 cse2        [V110,T17] (  2,  2   )    long  ->  r15         "CSE #03: moderate"
 ;
-; Lcl frame size = 136
+; Lcl frame size = 192
 
 G_M27773_IG01:
-       sub      rsp, 136
-       vmovups  ymm0, ymmword ptr [rsp+0x90]
-       vmovups  ymm1, ymmword ptr [rsp+0xB0]
-						;; size=25 bbWeight=1 PerfScore 8.25
+       push     rbp
+       push     r15
+       push     rbx
+       sub      rsp, 192
+       lea      rbp, [rsp+0xD0]
+       vmovups  ymm0, ymmword ptr [rbp+0x10]
+       vmovups  ymm1, ymmword ptr [rbp+0x30]
+						;; size=29 bbWeight=1 PerfScore 11.75
 G_M27773_IG02:
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rsp+0x70], xmm2
+       vmovaps  xmmword ptr [rbp-0x20], xmm2
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rsp+0x60], xmm2
-       mov      rax, qword ptr [rsp+0x70]
-       mov      qword ptr [rsp+0x58], rax
-       mov      rax, qword ptr [rsp+0x60]
-       mov      qword ptr [rsp+0x50], rax
-       mov      rax, qword ptr [rsp+0x58]
-       imul     rax, qword ptr [rsp+0x50]
-       mov      rcx, qword ptr [rsp+0x78]
-       mov      qword ptr [rsp+0x48], rcx
-       mov      rcx, qword ptr [rsp+0x68]
-       mov      qword ptr [rsp+0x40], rcx
-       mov      rcx, qword ptr [rsp+0x48]
-       imul     rcx, qword ptr [rsp+0x40]
-       add      rax, rcx
+       vmovaps  xmmword ptr [rbp-0x30], xmm2
+       mov      rsi, qword ptr [rbp-0x20]
+       mov      rdi, qword ptr [rbp-0x30]
+       mov      qword ptr [rbp-0x40], rsi
+       mov      qword ptr [rbp-0x48], rdi
+       mov      rsi, qword ptr [rbp-0x40]
+       imul     rsi, qword ptr [rbp-0x48]
+       mov      qword ptr [rbp-0x38], rsi
+       mov      rsi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x50], rsi
+       mov      rbx, qword ptr [rbp-0x50]
+       mov      rsi, qword ptr [rbp-0x18]
+       mov      rdi, qword ptr [rbp-0x28]
+       mov      qword ptr [rbp-0x60], rsi
+       mov      qword ptr [rbp-0x68], rdi
+       mov      rsi, qword ptr [rbp-0x60]
+       imul     rsi, qword ptr [rbp-0x68]
+       mov      qword ptr [rbp-0x58], rsi
+       mov      rsi, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x70], rsi
+       mov      r15, qword ptr [rbp-0x70]
+       add      rbx, r15
        vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rsp+0x30], xmm0
+       vmovaps  xmmword ptr [rbp-0x80], xmm0
        vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rsp+0x20], xmm0
-       mov      rcx, qword ptr [rsp+0x30]
-       mov      qword ptr [rsp+0x18], rcx
-       mov      rcx, qword ptr [rsp+0x20]
-       mov      qword ptr [rsp+0x10], rcx
-       mov      rcx, qword ptr [rsp+0x18]
-       imul     rcx, qword ptr [rsp+0x10]
-       mov      rdx, qword ptr [rsp+0x38]
-       mov      qword ptr [rsp+0x08], rdx
-       mov      rdx, qword ptr [rsp+0x28]
-       mov      qword ptr [rsp], rdx
-       add      rax, rcx
-       mov      rcx, qword ptr [rsp+0x08]
-       imul     rcx, qword ptr [rsp]
-       add      rax, rcx
-						;; size=175 bbWeight=1 PerfScore 45.25
+       vmovaps  xmmword ptr [rbp-0x90], xmm0
+       mov      rsi, qword ptr [rbp-0x80]
+       mov      rdi, qword ptr [rbp-0x90]
+       mov      qword ptr [rbp-0xA0], rsi
+       mov      qword ptr [rbp-0xA8], rdi
+       mov      rsi, qword ptr [rbp-0xA0]
+       imul     rsi, qword ptr [rbp-0xA8]
+       mov      qword ptr [rbp-0x98], rsi
+       mov      rsi, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xB0], rsi
+       mov      r15, qword ptr [rbp-0xB0]
+       mov      rsi, qword ptr [rbp-0x78]
+       mov      rdi, qword ptr [rbp-0x88]
+       mov      qword ptr [rbp-0xC0], rsi
+       mov      qword ptr [rbp-0xC8], rdi
+       mov      rsi, qword ptr [rbp-0xC0]
+       imul     rsi, qword ptr [rbp-0xC8]
+       mov      qword ptr [rbp-0xB8], rsi
+       mov      rsi, qword ptr [rbp-0xB8]
+       mov      qword ptr [rbp-0xD0], rsi
+       mov      rsi, qword ptr [rbp-0xD0]
+       xor      edi, edi
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       mov      rsi, rax
+       mov      rdi, r15
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Add(long,long):long
+       add      rax, rbx
+						;; size=299 bbWeight=1 PerfScore 68.25
 G_M27773_IG03:
        vzeroupper 
-       add      rsp, 136
+       add      rsp, 192
+       pop      rbx
+       pop      r15
+       pop      rbp
        ret      
-						;; size=11 bbWeight=1 PerfScore 2.25
+						;; size=15 bbWeight=1 PerfScore 3.75
 
-; Total bytes of code 211, prolog size 7, PerfScore 55.75, instruction count 41, allocated bytes for code 211 (MethodHash=9cc79382) for method System.Runtime.Intrinsics.Vector256:Dot[long](System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):long (FullOpts)
+; Total bytes of code 343, prolog size 19, PerfScore 83.75, instruction count 70, allocated bytes for code 343 (MethodHash=9cc79382) for method System.Runtime.Intrinsics.Vector256:Dot[long](System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):long (FullOpts)
121 (41.72 % of base) - System.Runtime.Intrinsics.Vector256`1[long]:System.Runtime.Intrinsics.ISimdVector,T>.Multiply(System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):System.Runtime.Intrinsics.Vector256`1[long]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[long]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Multiply(System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):System.Runtime.Intrinsics.Vector256`1[long] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 20 single block inlinees; 13 inlinees without PGO data
+; 0 inlinees with PGO data; 20 single block inlinees; 8 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T00] (  4,  4   )   byref  ->  rdi         single-def
-;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V02 arg1         [V02,T17] (  2,  2   )  simd32  ->  mm1         single-def <System.Runtime.Intrinsics.Vector256`1[long]>
-;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  mm2         "impAppendStmt"
+;  V00 RetBuf       [V00,T00] (  4,  4   )   byref  ->  rbx         single-def
+;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V02 arg1         [V02,T17] (  2,  2   )  simd32  ->  [rbp+0x30]  single-def <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V03 OutArgs      [V03    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  [rbp-0x20]  spill-single-def "impAppendStmt"
 ;* V05 tmp2         [V05    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V06 tmp3         [V06,T09] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V07 tmp4         [V07,T10] (  3,  6   )  simd16  ->  [rbp-0x20]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V06 tmp3         [V06,T09] (  3,  6   )  simd16  ->  [rbp-0x30]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V07 tmp4         [V07,T10] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
 ;* V08 tmp5         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V09 tmp6         [V09    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V10 tmp7         [V10    ] (  2,  2   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V10 tmp7         [V10    ] (  2,  2   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V11 tmp8         [V11,T05] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V12 tmp9         [V12    ] (  2,  4   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V13 tmp10        [V13    ] (  2,  4   )  struct ( 8) [rbp-0x38]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V12 tmp9         [V12    ] (  2,  4   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V13 tmp10        [V13    ] (  2,  4   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V14 tmp11        [V14    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
 ;* V15 tmp12        [V15    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V16 tmp13        [V16    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V17 tmp14        [V17    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V18 tmp15        [V18    ] (  2,  2   )  struct ( 8) [rbp-0x40]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V19 tmp16        [V19,T06] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V20 tmp17        [V20    ] (  2,  4   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V21 tmp18        [V21    ] (  2,  4   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V22 tmp19        [V22    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V23 tmp20        [V23    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V24 tmp21        [V24    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V25 tmp22        [V25    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V26 tmp23        [V26,T14] (  3,  3   )  simd16  ->  [rbp-0x60]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V27 tmp24        [V27,T11] (  3,  6   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V28 tmp25        [V28,T12] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;* V29 tmp26        [V29    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V30 tmp27        [V30    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V31 tmp28        [V31    ] (  2,  2   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V32 tmp29        [V32,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V33 tmp30        [V33    ] (  2,  4   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V34 tmp31        [V34    ] (  2,  4   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V35 tmp32        [V35    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V36 tmp33        [V36    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V37 tmp34        [V37    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V38 tmp35        [V38    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V39 tmp36        [V39    ] (  2,  2   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V40 tmp37        [V40,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V41 tmp38        [V41    ] (  2,  4   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V42 tmp39        [V42    ] (  2,  4   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V43 tmp40        [V43    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
-;* V44 tmp41        [V44    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V45 tmp42        [V45    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V46 tmp43        [V46    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V47 tmp44        [V47,T15] (  3,  3   )  simd16  ->  [rbp-0xC0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
-;* V48 tmp45        [V48    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V49 tmp46        [V49,T01] (  2,  2   )    long  ->  rax         "field V08._00 (fldOffset=0x0)" P-INDEP
-;  V50 tmp47        [V50,T02] (  2,  2   )    long  ->  rcx         "field V09._00 (fldOffset=0x0)" P-INDEP
-;  V51 tmp48        [V51    ] (  2,  2   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
-;  V52 tmp49        [V52    ] (  2,  3   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
-;  V53 tmp50        [V53    ] (  2,  3   )    long  ->  [rbp-0x38]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
-;  V54 tmp51        [V54    ] (  2,  2   )    long  ->  [rbp-0x40]  do-not-enreg[X] addr-exposed "field V18._00 (fldOffset=0x0)" P-DEP
-;  V55 tmp52        [V55    ] (  2,  3   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V20._00 (fldOffset=0x0)" P-DEP
-;  V56 tmp53        [V56    ] (  2,  3   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V21._00 (fldOffset=0x0)" P-DEP
-;  V57 tmp54        [V57,T03] (  2,  2   )    long  ->  rax         "field V29._00 (fldOffset=0x0)" P-INDEP
-;  V58 tmp55        [V58,T04] (  2,  2   )    long  ->  rcx         "field V30._00 (fldOffset=0x0)" P-INDEP
-;  V59 tmp56        [V59    ] (  2,  2   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
-;  V60 tmp57        [V60    ] (  2,  3   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
-;  V61 tmp58        [V61    ] (  2,  3   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V34._00 (fldOffset=0x0)" P-DEP
-;  V62 tmp59        [V62    ] (  2,  2   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V39._00 (fldOffset=0x0)" P-DEP
-;  V63 tmp60        [V63    ] (  2,  3   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V41._00 (fldOffset=0x0)" P-DEP
-;  V64 tmp61        [V64    ] (  2,  3   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V42._00 (fldOffset=0x0)" P-DEP
+;  V16 tmp13        [V16    ] (  2,  2   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V17 tmp14        [V17,T06] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V18 tmp15        [V18    ] (  2,  4   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V19 tmp16        [V19    ] (  2,  4   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V20 tmp17        [V20    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V21 tmp18        [V21    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V22 tmp19        [V22,T14] (  3,  3   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V23 tmp20        [V23,T11] (  3,  6   )  simd16  ->  [rbp-0x90]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V24 tmp21        [V24,T12] (  3,  6   )  simd16  ->  [rbp-0xA0]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;* V25 tmp22        [V25    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V26 tmp23        [V26    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V27 tmp24        [V27    ] (  2,  2   )  struct ( 8) [rbp-0xA8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V28 tmp25        [V28,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V29 tmp26        [V29    ] (  2,  4   )  struct ( 8) [rbp-0xB0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V30 tmp27        [V30    ] (  2,  4   )  struct ( 8) [rbp-0xB8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V31 tmp28        [V31    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V32 tmp29        [V32    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V33 tmp30        [V33    ] (  2,  2   )  struct ( 8) [rbp-0xC0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V34 tmp31        [V34,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V35 tmp32        [V35    ] (  2,  4   )  struct ( 8) [rbp-0xC8]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V36 tmp33        [V36    ] (  2,  4   )  struct ( 8) [rbp-0xD0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V37 tmp34        [V37    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V38 tmp35        [V38    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V39 tmp36        [V39,T15] (  3,  3   )  simd16  ->  [rbp-0xE0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V40 tmp37        [V40,T01] (  2,  2   )    long  ->  r15         "field V08._00 (fldOffset=0x0)" P-INDEP
+;  V41 tmp38        [V41,T02] (  2,  2   )    long  ->  rdi         "field V09._00 (fldOffset=0x0)" P-INDEP
+;  V42 tmp39        [V42    ] (  2,  2   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
+;  V43 tmp40        [V43    ] (  2,  3   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
+;  V44 tmp41        [V44    ] (  2,  3   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V45 tmp42        [V45    ] (  2,  2   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V16._00 (fldOffset=0x0)" P-DEP
+;  V46 tmp43        [V46    ] (  2,  3   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V18._00 (fldOffset=0x0)" P-DEP
+;  V47 tmp44        [V47    ] (  2,  3   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V19._00 (fldOffset=0x0)" P-DEP
+;  V48 tmp45        [V48,T03] (  2,  2   )    long  ->  r15         "field V25._00 (fldOffset=0x0)" P-INDEP
+;  V49 tmp46        [V49,T04] (  2,  2   )    long  ->  rdi         "field V26._00 (fldOffset=0x0)" P-INDEP
+;  V50 tmp47        [V50    ] (  2,  2   )    long  ->  [rbp-0xA8]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+;  V51 tmp48        [V51    ] (  2,  3   )    long  ->  [rbp-0xB0]  do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
+;  V52 tmp49        [V52    ] (  2,  3   )    long  ->  [rbp-0xB8]  do-not-enreg[X] addr-exposed "field V30._00 (fldOffset=0x0)" P-DEP
+;  V53 tmp50        [V53    ] (  2,  2   )    long  ->  [rbp-0xC0]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
+;  V54 tmp51        [V54    ] (  2,  3   )    long  ->  [rbp-0xC8]  do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
+;  V55 tmp52        [V55    ] (  2,  3   )    long  ->  [rbp-0xD0]  do-not-enreg[X] addr-exposed "field V36._00 (fldOffset=0x0)" P-DEP
 ;
-; Lcl frame size = 192
+; Lcl frame size = 240
 
 G_M16594_IG01:
        push     rbp
-       sub      rsp, 192
-       lea      rbp, [rsp+0xC0]
+       push     r15
+       push     rbx
+       sub      rsp, 240
+       lea      rbp, [rsp+0x100]
+       mov      rbx, rdi
        vmovups  ymm0, ymmword ptr [rbp+0x10]
        vmovups  ymm1, ymmword ptr [rbp+0x30]
-						;; size=26 bbWeight=1 PerfScore 9.75
+						;; size=32 bbWeight=1 PerfScore 12.00
 G_M16594_IG02:
+       vmovups  ymmword ptr [rbp+0x10], ymm0
        vmovaps  ymm2, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm2
+       vmovaps  xmmword ptr [rbp-0x30], xmm2
+       vmovups  ymmword ptr [rbp+0x30], ymm1
        vmovaps  ymm2, ymm1
-       vmovaps  xmmword ptr [rbp-0x20], xmm2
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x30], rax
-       mov      rax, qword ptr [rbp-0x20]
-       mov      qword ptr [rbp-0x38], rax
-       mov      rax, qword ptr [rbp-0x30]
-       imul     rax, qword ptr [rbp-0x38]
-       mov      qword ptr [rbp-0x28], rax
-       mov      rax, qword ptr [rbp-0x28]
-       mov      rcx, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x48], rcx
-       mov      rcx, qword ptr [rbp-0x18]
-       mov      qword ptr [rbp-0x50], rcx
-       mov      rcx, qword ptr [rbp-0x48]
-       imul     rcx, qword ptr [rbp-0x50]
-       mov      qword ptr [rbp-0x40], rcx
-       mov      rcx, qword ptr [rbp-0x40]
+       vmovaps  xmmword ptr [rbp-0x40], xmm2
+       mov      rdi, qword ptr [rbp-0x30]
+       mov      qword ptr [rbp-0x50], rdi
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x58], rdi
+       mov      rdi, qword ptr [rbp-0x50]
+       mov      rsi, qword ptr [rbp-0x58]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       mov      qword ptr [rbp-0x48], rax
+       mov      r15, qword ptr [rbp-0x48]
+       mov      rdi, qword ptr [rbp-0x28]
+       mov      qword ptr [rbp-0x68], rdi
+       mov      rdi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x70], rdi
+       mov      rdi, qword ptr [rbp-0x68]
+       mov      rsi, qword ptr [rbp-0x70]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
        mov      qword ptr [rbp-0x60], rax
-       mov      qword ptr [rbp-0x58], rcx
-       vmovaps  xmm2, xmmword ptr [rbp-0x60]
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x70], xmm0
-       vextractf128 xmm0, ymm1, 1
-       vmovaps  xmmword ptr [rbp-0x80], xmm0
-       mov      rax, qword ptr [rbp-0x70]
-       mov      qword ptr [rbp-0x90], rax
-       mov      rax, qword ptr [rbp-0x80]
-       mov      qword ptr [rbp-0x98], rax
-       mov      rax, qword ptr [rbp-0x90]
-       imul     rax, qword ptr [rbp-0x98]
-       mov      qword ptr [rbp-0x88], rax
-       mov      rax, qword ptr [rbp-0x88]
-       mov      rcx, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0xA8], rcx
-       mov      rcx, qword ptr [rbp-0x78]
-       mov      qword ptr [rbp-0xB0], rcx
-       mov      rcx, qword ptr [rbp-0xA8]
-       imul     rcx, qword ptr [rbp-0xB0]
-       mov      qword ptr [rbp-0xA0], rcx
-       mov      rcx, qword ptr [rbp-0xA0]
-       mov      qword ptr [rbp-0xC0], rax
-       mov      qword ptr [rbp-0xB8], rcx
-       vinserti128 ymm0, ymm2, xmmword ptr [rbp-0xC0], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       mov      rax, rdi
-						;; size=252 bbWeight=1 PerfScore 65.75
+       mov      rdi, qword ptr [rbp-0x60]
+       mov      qword ptr [rbp-0x80], r15
+       mov      qword ptr [rbp-0x78], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x80]
+       vmovaps  xmmword ptr [rbp-0x20], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x10]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0x90], xmm1
+       vmovups  ymm1, ymmword ptr [rbp+0x30]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0xA0], xmm1
+       mov      rdi, qword ptr [rbp-0x90]
+       mov      qword ptr [rbp-0xB0], rdi
+       mov      rdi, qword ptr [rbp-0xA0]
+       mov      qword ptr [rbp-0xB8], rdi
+       mov      rdi, qword ptr [rbp-0xB0]
+       mov      rsi, qword ptr [rbp-0xB8]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       mov      qword ptr [rbp-0xA8], rax
+       mov      r15, qword ptr [rbp-0xA8]
+       mov      rdi, qword ptr [rbp-0x88]
+       mov      qword ptr [rbp-0xC8], rdi
+       mov      rdi, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xD0], rdi
+       mov      rdi, qword ptr [rbp-0xC8]
+       mov      rsi, qword ptr [rbp-0xD0]
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+						;; size=292 bbWeight=1 PerfScore 64.50
 G_M16594_IG03:
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       mov      qword ptr [rbp-0xC0], rax
+       mov      rdi, qword ptr [rbp-0xC0]
+       mov      qword ptr [rbp-0xE0], r15
+       mov      qword ptr [rbp-0xD8], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x20]
+       vmovups  xmmword ptr [rsp], xmm0
+       vmovaps  xmm0, xmmword ptr [rbp-0xE0]
+       vmovups  xmmword ptr [rsp+0x10], xmm0
+       mov      rdi, rbx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector256:Create[long](System.Runtime.Intrinsics.Vector128`1[long],System.Runtime.Intrinsics.Vector128`1[long]):System.Runtime.Intrinsics.Vector256`1[long]
+       call     [rax]System.Runtime.Intrinsics.Vector256:Create[long](System.Runtime.Intrinsics.Vector128`1[long],System.Runtime.Intrinsics.Vector128`1[long]):System.Runtime.Intrinsics.Vector256`1[long]
+       mov      rax, rbx
+						;; size=72 bbWeight=1 PerfScore 18.75
+G_M16594_IG04:
        vzeroupper 
-       add      rsp, 192
+       add      rsp, 240
+       pop      rbx
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=15 bbWeight=1 PerfScore 3.75
 
-; Total bytes of code 290, prolog size 16, PerfScore 78.25, instruction count 57, allocated bytes for code 290 (MethodHash=093fbf2d) for method System.Runtime.Intrinsics.Vector256`1[long]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Multiply(System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):System.Runtime.Intrinsics.Vector256`1[long] (FullOpts)
+; Total bytes of code 411, prolog size 19, PerfScore 99.00, instruction count 80, allocated bytes for code 411 (MethodHash=093fbf2d) for method System.Runtime.Intrinsics.Vector256`1[long]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Multiply(System.Runtime.Intrinsics.Vector256`1[long],System.Runtime.Intrinsics.Vector256`1[long]):System.Runtime.Intrinsics.Vector256`1[long] (FullOpts)
118 (61.78 % of base) - System.Runtime.Intrinsics.Vector256`1[double]:System.Runtime.Intrinsics.ISimdVector,T>.ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256`1[double],int):System.Runtime.Intrinsics.Vector256`1[double]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[double]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256`1[double],int):System.Runtime.Intrinsics.Vector256`1[double] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 16 single block inlinees; 13 inlinees without PGO data
+; 0 inlinees with PGO data; 16 single block inlinees; 9 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T00] (  4,  4   )   byref  ->  rdi         single-def
-;  V01 arg0         [V01,T16] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[double]>
-;  V02 arg1         [V02,T02] (  3,  3   )     int  ->  rsi         single-def
-;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T13] (  2,  4   )  simd16  ->  mm1         "impAppendStmt"
+;  V00 RetBuf       [V00,T01] (  4,  4   )   byref  ->  r15         single-def
+;  V01 arg0         [V01,T15] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[double]>
+;  V02 arg1         [V02,T00] (  6,  6   )     int  ->  rbx         single-def
+;  V03 OutArgs      [V03    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V04 tmp1         [V04,T12] (  2,  4   )  simd16  ->  [rbp-0x30]  spill-single-def "impAppendStmt"
 ;* V05 tmp2         [V05    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V06 tmp3         [V06,T11] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[double]>
+;  V06 tmp3         [V06,T10] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[double]>
 ;* V07 tmp4         [V07    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[double]>
 ;* V08 tmp5         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[double]>
-;  V09 tmp6         [V09    ] (  2,  2   )  struct ( 8) [rbp-0x18]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[double]>
-;* V10 tmp7         [V10,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V11 tmp8         [V11    ] (  2,  4   )  struct ( 8) [rbp-0x20]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[double]>
+;  V09 tmp6         [V09    ] (  2,  2   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[double]>
+;* V10 tmp7         [V10,T06] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V11 tmp8         [V11    ] (  2,  4   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[double]>
 ;* V12 tmp9         [V12    ] (  0,  0   )  double  ->  zero-ref    "Inline stloc first use temp"
 ;* V13 tmp10        [V13    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
 ;* V14 tmp11        [V14    ] (  0,  0   )  double  ->  zero-ref    "Inlining Arg"
-;  V15 tmp12        [V15    ] (  2,  2   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[double]>
-;* V16 tmp13        [V16,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp14        [V17    ] (  2,  4   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[double]>
+;  V15 tmp12        [V15    ] (  2,  2   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[double]>
+;* V16 tmp13        [V16,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V17 tmp14        [V17    ] (  2,  4   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[double]>
 ;* V18 tmp15        [V18    ] (  0,  0   )  double  ->  zero-ref    "Inline stloc first use temp"
-;* V19 tmp16        [V19    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
-;* V20 tmp17        [V20    ] (  0,  0   )  double  ->  zero-ref    "Inlining Arg"
-;  V21 tmp18        [V21,T14] (  3,  3   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[double]>
-;  V22 tmp19        [V22,T12] (  3,  6   )  simd16  ->  [rbp-0x50]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[double]>
-;* V23 tmp20        [V23    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[double]>
-;* V24 tmp21        [V24    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[double]>
-;  V25 tmp22        [V25    ] (  2,  2   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[double]>
-;* V26 tmp23        [V26,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V27 tmp24        [V27    ] (  2,  4   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[double]>
-;* V28 tmp25        [V28    ] (  0,  0   )  double  ->  zero-ref    "Inline stloc first use temp"
-;* V29 tmp26        [V29    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
-;* V30 tmp27        [V30    ] (  0,  0   )  double  ->  zero-ref    "Inlining Arg"
-;  V31 tmp28        [V31    ] (  2,  2   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[double]>
-;* V32 tmp29        [V32,T10] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V33 tmp30        [V33    ] (  2,  4   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[double]>
-;* V34 tmp31        [V34    ] (  0,  0   )  double  ->  zero-ref    "Inline stloc first use temp"
-;* V35 tmp32        [V35    ] (  0,  0   )  double  ->  zero-ref    "Inline return value spill temp"
-;* V36 tmp33        [V36    ] (  0,  0   )  double  ->  zero-ref    "Inlining Arg"
-;  V37 tmp34        [V37,T15] (  3,  3   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[double]>
-;* V38 tmp35        [V38    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[double]>
-;  V39 tmp36        [V39,T03] (  2,  2   )    long  ->  rax         "field V07._00 (fldOffset=0x0)" P-INDEP
-;  V40 tmp37        [V40,T04] (  2,  2   )    long  ->  rcx         "field V08._00 (fldOffset=0x0)" P-INDEP
-;  V41 tmp38        [V41    ] (  2,  2   )    long  ->  [rbp-0x18]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
-;  V42 tmp39        [V42    ] (  2,  3   )    long  ->  [rbp-0x20]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
-;  V43 tmp40        [V43    ] (  2,  2   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;  V44 tmp41        [V44    ] (  2,  3   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
-;  V45 tmp42        [V45,T05] (  2,  2   )    long  ->  rax         "field V23._00 (fldOffset=0x0)" P-INDEP
-;  V46 tmp43        [V46,T06] (  2,  2   )    long  ->  rcx         "field V24._00 (fldOffset=0x0)" P-INDEP
-;  V47 tmp44        [V47    ] (  2,  2   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V48 tmp45        [V48    ] (  2,  3   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
-;  V49 tmp46        [V49    ] (  2,  2   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
-;  V50 tmp47        [V50    ] (  2,  3   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
-;  V51 cse0         [V51,T01] (  5,  5   )     int  ->  rsi         "CSE #01: moderate"
+;  V19 tmp16        [V19,T13] (  3,  3   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[double]>
+;  V20 tmp17        [V20,T11] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[double]>
+;* V21 tmp18        [V21    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[double]>
+;* V22 tmp19        [V22    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[double]>
+;  V23 tmp20        [V23    ] (  2,  2   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[double]>
+;* V24 tmp21        [V24,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V25 tmp22        [V25    ] (  2,  4   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[double]>
+;* V26 tmp23        [V26    ] (  0,  0   )  double  ->  zero-ref    "Inline stloc first use temp"
+;  V27 tmp24        [V27    ] (  2,  2   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[double]>
+;* V28 tmp25        [V28,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V29 tmp26        [V29    ] (  2,  4   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[double]>
+;* V30 tmp27        [V30    ] (  0,  0   )  double  ->  zero-ref    "Inline stloc first use temp"
+;  V31 tmp28        [V31,T14] (  3,  3   )  simd16  ->  [rbp-0xB0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[double]>
+;  V32 tmp29        [V32,T02] (  2,  2   )    long  ->  r14         "field V07._00 (fldOffset=0x0)" P-INDEP
+;  V33 tmp30        [V33,T03] (  2,  2   )    long  ->  rdi         "field V08._00 (fldOffset=0x0)" P-INDEP
+;  V34 tmp31        [V34    ] (  2,  2   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
+;  V35 tmp32        [V35    ] (  2,  3   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
+;  V36 tmp33        [V36    ] (  2,  2   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
+;  V37 tmp34        [V37    ] (  2,  3   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
+;  V38 tmp35        [V38,T04] (  2,  2   )    long  ->  r14         "field V21._00 (fldOffset=0x0)" P-INDEP
+;  V39 tmp36        [V39,T05] (  2,  2   )    long  ->  rdi         "field V22._00 (fldOffset=0x0)" P-INDEP
+;  V40 tmp37        [V40    ] (  2,  2   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+;  V41 tmp38        [V41    ] (  2,  3   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V42 tmp39        [V42    ] (  2,  2   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
+;  V43 tmp40        [V43    ] (  2,  3   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
 ;
-; Lcl frame size = 128
+; Lcl frame size = 184
 
 G_M32972_IG01:
        push     rbp
-       sub      rsp, 128
-       lea      rbp, [rsp+0x80]
-       vmovups  ymm0, ymmword ptr [rbp+0x10]
-						;; size=21 bbWeight=1 PerfScore 5.75
+       push     r15
+       push     r14
+       push     rbx
+       sub      rsp, 184
+       lea      rbp, [rsp+0xD0]
+       mov      r15, rdi
+       mov      ebx, esi
+       vmovups  ymm1, ymmword ptr [rbp+0x10]
+						;; size=31 bbWeight=1 PerfScore 9.25
 G_M32972_IG02:
-       vmovaps  ymm1, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm1
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x20], rax
-       mov      rax, qword ptr [rbp-0x20]
-       and      esi, 63
-       sarx     rax, rax, rsi
-       mov      qword ptr [rbp-0x18], rax
-       mov      rax, qword ptr [rbp-0x18]
-       mov      rcx, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x30], rcx
-       mov      rcx, qword ptr [rbp-0x30]
-       sarx     rcx, rcx, rsi
-       mov      qword ptr [rbp-0x28], rcx
-       mov      rcx, qword ptr [rbp-0x28]
-       mov      qword ptr [rbp-0x40], rax
-       mov      qword ptr [rbp-0x38], rcx
-       vmovaps  xmm1, xmmword ptr [rbp-0x40]
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x50], xmm0
-       mov      rax, qword ptr [rbp-0x50]
-       mov      qword ptr [rbp-0x60], rax
-       mov      rax, qword ptr [rbp-0x60]
-       sarx     rax, rax, rsi
-       mov      qword ptr [rbp-0x58], rax
-       mov      rax, qword ptr [rbp-0x58]
-       mov      rcx, qword ptr [rbp-0x48]
-       mov      qword ptr [rbp-0x70], rcx
-       mov      rcx, qword ptr [rbp-0x70]
-       sarx     rcx, rcx, rsi
-       mov      qword ptr [rbp-0x68], rcx
-       mov      rcx, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0x80], rax
-       mov      qword ptr [rbp-0x78], rcx
-       vinsertf128 ymm0, ymm1, xmmword ptr [rbp-0x80], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       mov      rax, rdi
-						;; size=158 bbWeight=1 PerfScore 39.75
+       vmovups  ymmword ptr [rbp+0x10], ymm1
+       vmovaps  ymm0, ymm1
+       vmovaps  xmmword ptr [rbp-0x40], xmm0
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x50], rdi
+       mov      rdi, qword ptr [rbp-0x50]
+       sarx     rdi, rdi, rbx
+       mov      qword ptr [rbp-0x48], rdi
+       mov      r14, qword ptr [rbp-0x48]
+       mov      rdi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x60], rdi
+       vmovsd   xmm0, qword ptr [rbp-0x60]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[double]:ShiftRightArithmetic(double,int):double
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[double]:ShiftRightArithmetic(double,int):double
+       vmovsd   qword ptr [rbp-0x58], xmm0
+       mov      rdi, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x70], r14
+       mov      qword ptr [rbp-0x68], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x70]
+       vmovaps  xmmword ptr [rbp-0x30], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x10]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0x80], xmm1
+       mov      rdi, qword ptr [rbp-0x80]
+       mov      qword ptr [rbp-0x90], rdi
+       vmovsd   xmm0, qword ptr [rbp-0x90]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[double]:ShiftRightArithmetic(double,int):double
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[double]:ShiftRightArithmetic(double,int):double
+       vmovsd   qword ptr [rbp-0x88], xmm0
+       mov      r14, qword ptr [rbp-0x88]
+       mov      rdi, qword ptr [rbp-0x78]
+       mov      qword ptr [rbp-0xA0], rdi
+       vmovsd   xmm0, qword ptr [rbp-0xA0]
+       mov      edi, ebx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[double]:ShiftRightArithmetic(double,int):double
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[double]:ShiftRightArithmetic(double,int):double
+       vmovsd   qword ptr [rbp-0x98], xmm0
+       mov      rdi, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xB0], r14
+       mov      qword ptr [rbp-0xA8], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x30]
+       vmovups  xmmword ptr [rsp], xmm0
+       vmovaps  xmm0, xmmword ptr [rbp-0xB0]
+       vmovups  xmmword ptr [rsp+0x10], xmm0
+       mov      rdi, r15
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Vector256:Create[double](System.Runtime.Intrinsics.Vector128`1[double],System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector256`1[double]
+       call     [rax]System.Runtime.Intrinsics.Vector256:Create[double](System.Runtime.Intrinsics.Vector128`1[double],System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector256`1[double]
+       mov      rax, r15
+						;; size=261 bbWeight=1 PerfScore 66.00
 G_M32972_IG03:
        vzeroupper 
-       add      rsp, 128
+       add      rsp, 184
+       pop      rbx
+       pop      r14
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=17 bbWeight=1 PerfScore 4.25
 
-; Total bytes of code 191, prolog size 16, PerfScore 48.25, instruction count 45, allocated bytes for code 191 (MethodHash=35477f33) for method System.Runtime.Intrinsics.Vector256`1[double]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256`1[double],int):System.Runtime.Intrinsics.Vector256`1[double] (FullOpts)
+; Total bytes of code 309, prolog size 21, PerfScore 79.50, instruction count 66, allocated bytes for code 309 (MethodHash=35477f33) for method System.Runtime.Intrinsics.Vector256`1[double]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256`1[double],int):System.Runtime.Intrinsics.Vector256`1[double] (FullOpts)
117 (64.64 % of base) - System.Runtime.Intrinsics.Vector256:Multiply[long](long,System.Runtime.Intrinsics.Vector256`1[long]):System.Runtime.Intrinsics.Vector256`1[long]
 ; Assembly listing for method System.Runtime.Intrinsics.Vector256:Multiply[long](long,System.Runtime.Intrinsics.Vector256`1[long]):System.Runtime.Intrinsics.Vector256`1[long] (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; partially interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 16 single block inlinees; 13 inlinees without PGO data
+; 0 inlinees with PGO data; 16 single block inlinees; 9 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 RetBuf       [V00,T01] (  4,  4   )   byref  ->  rdi         single-def
-;  V01 arg0         [V01,T00] (  6,  6   )    long  ->  rsi         single-def
-;  V02 arg1         [V02,T15] (  2,  2   )  simd32  ->  mm0         single-def <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V00 RetBuf       [V00,T01] (  4,  4   )   byref  ->  r15         single-def
+;  V01 arg0         [V01,T00] (  6,  6   )    long  ->  rbx         single-def
+;  V02 arg1         [V02,T15] (  2,  2   )  simd32  ->  [rbp+0x10]  single-def <System.Runtime.Intrinsics.Vector256`1[long]>
 ;# V03 OutArgs      [V03    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V04 tmp1         [V04,T12] (  2,  4   )  simd16  ->  mm1         "impAppendStmt"
+;  V04 tmp1         [V04,T12] (  2,  4   )  simd16  ->  [rbp-0x30]  spill-single-def "impAppendStmt"
 ;* V05 tmp2         [V05    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V06 tmp3         [V06,T10] (  3,  6   )  simd16  ->  [rbp-0x10]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V06 tmp3         [V06,T10] (  3,  6   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
 ;* V07 tmp4         [V07    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V08 tmp5         [V08    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V09 tmp6         [V09    ] (  2,  2   )  struct ( 8) [rbp-0x18]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V09 tmp6         [V09    ] (  2,  2   )  struct ( 8) [rbp-0x48]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V10 tmp7         [V10,T06] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V11 tmp8         [V11    ] (  2,  4   )  struct ( 8) [rbp-0x20]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V11 tmp8         [V11    ] (  2,  4   )  struct ( 8) [rbp-0x50]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V12 tmp9         [V12    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V13 tmp10        [V13    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V14 tmp11        [V14    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V15 tmp12        [V15    ] (  2,  2   )  struct ( 8) [rbp-0x28]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V16 tmp13        [V16,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V17 tmp14        [V17    ] (  2,  4   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V18 tmp15        [V18    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V19 tmp16        [V19    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V20 tmp17        [V20    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V21 tmp18        [V21,T13] (  3,  3   )  simd16  ->  [rbp-0x40]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
-;  V22 tmp19        [V22,T11] (  3,  6   )  simd16  ->  [rbp-0x50]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
-;* V23 tmp20        [V23    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V24 tmp21        [V24    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
-;  V25 tmp22        [V25    ] (  2,  2   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V26 tmp23        [V26,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V27 tmp24        [V27    ] (  2,  4   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V13 tmp10        [V13    ] (  2,  2   )  struct ( 8) [rbp-0x58]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V14 tmp11        [V14,T07] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V15 tmp12        [V15    ] (  2,  4   )  struct ( 8) [rbp-0x60]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V16 tmp13        [V16    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V17 tmp14        [V17,T13] (  3,  3   )  simd16  ->  [rbp-0x70]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
+;  V18 tmp15        [V18,T11] (  3,  6   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[long]>
+;* V19 tmp16        [V19    ] (  0,  0   )  struct ( 8) zero-ref    "impAppendStmt" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V20 tmp17        [V20    ] (  0,  0   )  struct ( 8) zero-ref    "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[long]>
+;  V21 tmp18        [V21    ] (  2,  2   )  struct ( 8) [rbp-0x88]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V22 tmp19        [V22,T08] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V23 tmp20        [V23    ] (  2,  4   )  struct ( 8) [rbp-0x90]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V24 tmp21        [V24    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V25 tmp22        [V25    ] (  2,  2   )  struct ( 8) [rbp-0x98]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
+;* V26 tmp23        [V26,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;  V27 tmp24        [V27    ] (  2,  4   )  struct ( 8) [rbp-0xA0]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
 ;* V28 tmp25        [V28    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V29 tmp26        [V29    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V30 tmp27        [V30    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V31 tmp28        [V31    ] (  2,  2   )  struct ( 8) [rbp-0x68]  do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V32 tmp29        [V32,T09] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V33 tmp30        [V33    ] (  2,  4   )  struct ( 8) [rbp-0x70]  do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[long]>
-;* V34 tmp31        [V34    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;* V35 tmp32        [V35    ] (  0,  0   )    long  ->  zero-ref    "Inline return value spill temp"
-;* V36 tmp33        [V36    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V37 tmp34        [V37,T14] (  3,  3   )  simd16  ->  [rbp-0x80]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
-;* V38 tmp35        [V38    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[long]>
-;  V39 tmp36        [V39,T02] (  2,  2   )    long  ->  rax         "field V07._00 (fldOffset=0x0)" P-INDEP
-;  V40 tmp37        [V40,T03] (  2,  2   )    long  ->  rcx         "field V08._00 (fldOffset=0x0)" P-INDEP
-;  V41 tmp38        [V41    ] (  2,  2   )    long  ->  [rbp-0x18]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
-;  V42 tmp39        [V42    ] (  2,  3   )    long  ->  [rbp-0x20]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
-;  V43 tmp40        [V43    ] (  2,  2   )    long  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;  V44 tmp41        [V44    ] (  2,  3   )    long  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V17._00 (fldOffset=0x0)" P-DEP
-;  V45 tmp42        [V45,T04] (  2,  2   )    long  ->  rax         "field V23._00 (fldOffset=0x0)" P-INDEP
-;  V46 tmp43        [V46,T05] (  2,  2   )    long  ->  rcx         "field V24._00 (fldOffset=0x0)" P-INDEP
-;  V47 tmp44        [V47    ] (  2,  2   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
-;  V48 tmp45        [V48    ] (  2,  3   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
-;  V49 tmp46        [V49    ] (  2,  2   )    long  ->  [rbp-0x68]  do-not-enreg[X] addr-exposed "field V31._00 (fldOffset=0x0)" P-DEP
-;  V50 tmp47        [V50    ] (  2,  3   )    long  ->  [rbp-0x70]  do-not-enreg[X] addr-exposed "field V33._00 (fldOffset=0x0)" P-DEP
+;  V29 tmp26        [V29,T14] (  3,  3   )  simd16  ->  [rbp-0xB0]  do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector128`1[long]>
+;* V30 tmp27        [V30    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[long]>
+;  V31 tmp28        [V31,T02] (  2,  2   )    long  ->  r14         "field V07._00 (fldOffset=0x0)" P-INDEP
+;  V32 tmp29        [V32,T03] (  2,  2   )    long  ->  rdi         "field V08._00 (fldOffset=0x0)" P-INDEP
+;  V33 tmp30        [V33    ] (  2,  2   )    long  ->  [rbp-0x48]  do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
+;  V34 tmp31        [V34    ] (  2,  3   )    long  ->  [rbp-0x50]  do-not-enreg[X] addr-exposed "field V11._00 (fldOffset=0x0)" P-DEP
+;  V35 tmp32        [V35    ] (  2,  2   )    long  ->  [rbp-0x58]  do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+;  V36 tmp33        [V36    ] (  2,  3   )    long  ->  [rbp-0x60]  do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
+;  V37 tmp34        [V37,T04] (  2,  2   )    long  ->  r14         "field V19._00 (fldOffset=0x0)" P-INDEP
+;  V38 tmp35        [V38,T05] (  2,  2   )    long  ->  rax         "field V20._00 (fldOffset=0x0)" P-INDEP
+;  V39 tmp36        [V39    ] (  2,  2   )    long  ->  [rbp-0x88]  do-not-enreg[X] addr-exposed "field V21._00 (fldOffset=0x0)" P-DEP
+;  V40 tmp37        [V40    ] (  2,  3   )    long  ->  [rbp-0x90]  do-not-enreg[X] addr-exposed "field V23._00 (fldOffset=0x0)" P-DEP
+;  V41 tmp38        [V41    ] (  2,  2   )    long  ->  [rbp-0x98]  do-not-enreg[X] addr-exposed "field V25._00 (fldOffset=0x0)" P-DEP
+;  V42 tmp39        [V42    ] (  2,  3   )    long  ->  [rbp-0xA0]  do-not-enreg[X] addr-exposed "field V27._00 (fldOffset=0x0)" P-DEP
 ;
-; Lcl frame size = 128
+; Lcl frame size = 152
 
 G_M40782_IG01:
        push     rbp
-       sub      rsp, 128
-       lea      rbp, [rsp+0x80]
+       push     r15
+       push     r14
+       push     rbx
+       sub      rsp, 152
+       lea      rbp, [rsp+0xB0]
+       mov      r15, rdi
+       mov      rbx, rsi
        vmovups  ymm0, ymmword ptr [rbp+0x10]
-						;; size=21 bbWeight=1 PerfScore 5.75
+						;; size=32 bbWeight=1 PerfScore 9.25
 G_M40782_IG02:
+       vmovups  ymmword ptr [rbp+0x10], ymm0
        vmovaps  ymm1, ymm0
-       vmovaps  xmmword ptr [rbp-0x10], xmm1
-       mov      rax, qword ptr [rbp-0x10]
-       mov      qword ptr [rbp-0x20], rax
-       mov      rax, rsi
-       imul     rax, qword ptr [rbp-0x20]
-       mov      qword ptr [rbp-0x18], rax
-       mov      rax, qword ptr [rbp-0x18]
-       mov      rcx, qword ptr [rbp-0x08]
-       mov      qword ptr [rbp-0x30], rcx
-       mov      rcx, rsi
-       imul     rcx, qword ptr [rbp-0x30]
-       mov      qword ptr [rbp-0x28], rcx
-       mov      rcx, qword ptr [rbp-0x28]
-       mov      qword ptr [rbp-0x40], rax
-       mov      qword ptr [rbp-0x38], rcx
-       vmovaps  xmm1, xmmword ptr [rbp-0x40]
-       vextractf128 xmm0, ymm0, 1
-       vmovaps  xmmword ptr [rbp-0x50], xmm0
-       mov      rax, qword ptr [rbp-0x50]
-       mov      qword ptr [rbp-0x60], rax
-       mov      rax, rsi
-       imul     rax, qword ptr [rbp-0x60]
+       vmovaps  xmmword ptr [rbp-0x40], xmm1
+       mov      rdi, qword ptr [rbp-0x40]
+       mov      qword ptr [rbp-0x50], rdi
+       mov      rdi, qword ptr [rbp-0x50]
+       mov      rsi, rbx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       mov      qword ptr [rbp-0x48], rax
+       mov      r14, qword ptr [rbp-0x48]
+       mov      rdi, qword ptr [rbp-0x38]
+       mov      qword ptr [rbp-0x60], rdi
+       mov      rdi, qword ptr [rbp-0x60]
+       mov      rsi, rbx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
        mov      qword ptr [rbp-0x58], rax
-       mov      rax, qword ptr [rbp-0x58]
-       mov      rcx, qword ptr [rbp-0x48]
-       mov      qword ptr [rbp-0x70], rcx
-       imul     rsi, qword ptr [rbp-0x70]
-       mov      qword ptr [rbp-0x68], rsi
-       mov      rcx, qword ptr [rbp-0x68]
-       mov      qword ptr [rbp-0x80], rax
-       mov      qword ptr [rbp-0x78], rcx
-       vinserti128 ymm0, ymm1, xmmword ptr [rbp-0x80], 1
-       vmovups  ymmword ptr [rdi], ymm0
-       mov      rax, rdi
-						;; size=148 bbWeight=1 PerfScore 50.25
+       mov      rdi, qword ptr [rbp-0x58]
+       mov      qword ptr [rbp-0x70], r14
+       mov      qword ptr [rbp-0x68], rdi
+       vmovaps  xmm0, xmmword ptr [rbp-0x70]
+       vmovaps  xmmword ptr [rbp-0x30], xmm0
+       vmovups  ymm1, ymmword ptr [rbp+0x10]
+       vextractf128 xmm1, ymm1, 1
+       vmovaps  xmmword ptr [rbp-0x80], xmm1
+       mov      rdi, qword ptr [rbp-0x80]
+       mov      qword ptr [rbp-0x90], rdi
+       mov      rdi, qword ptr [rbp-0x90]
+       mov      rsi, rbx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       mov      qword ptr [rbp-0x88], rax
+       mov      r14, qword ptr [rbp-0x88]
+       mov      rdi, qword ptr [rbp-0x78]
+       mov      qword ptr [rbp-0xA0], rdi
+       mov      rdi, qword ptr [rbp-0xA0]
+       mov      rsi, rbx
+       mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       call     [rax]System.Runtime.Intrinsics.Scalar`1[long]:Multiply(long,long):long
+       mov      qword ptr [rbp-0x98], rax
+       mov      rax, qword ptr [rbp-0x98]
+       mov      qword ptr [rbp-0xB0], r14
+       mov      qword ptr [rbp-0xA8], rax
+       vmovaps  xmm0, xmmword ptr [rbp-0x30]
+       vinserti128 ymm0, ymm0, xmmword ptr [rbp-0xB0], 1
+       vmovups  ymmword ptr [r15], ymm0
+       mov      rax, r15
+						;; size=249 bbWeight=1 PerfScore 60.50
 G_M40782_IG03:
        vzeroupper 
-       add      rsp, 128
+       add      rsp, 152
+       pop      rbx
+       pop      r14
+       pop      r15
        pop      rbp
        ret      
-						;; size=12 bbWeight=1 PerfScore 2.75
+						;; size=17 bbWeight=1 PerfScore 4.25
 
-; Total bytes of code 181, prolog size 16, PerfScore 58.75, instruction count 43, allocated bytes for code 181 (MethodHash=2fe960b1) for method System.Runtime.Intrinsics.Vector256:Multiply[long](long,System.Runtime.Intrinsics.Vector256`1[long]):System.Runtime.Intrinsics.Vector256`1[long] (FullOpts)
+; Total bytes of code 298, prolog size 21, PerfScore 74.00, instruction count 64, allocated bytes for code 298 (MethodHash=2fe960b1) for method System.Runtime.Intrinsics.Vector256:Multiply[long](long,System.Runtime.Intrinsics.Vector256`1[long]):System.Runtime.Intrinsics.Vector256`1[long] (FullOpts)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment