This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Ryzen 7 3700x vs i7-6700 | |
============= LATENCY ============================================================================== | |
instruction | IPC ( rel[%]), CPI ( rel[%]) | |
------------------------------------------+--------------------------------------------------------- | |
m128 addps | 0.33-0.25 ( 33.3[%]), 3.00-4.00 ( -25.0[%]) | |
m128 aesdec | 0.25-0.25 ( 0.0[%]), 4.00-4.00 ( -0.0[%]) | |
m128 aesdeclast | 0.25-0.25 ( -0.0[%]), 4.00-4.00 ( 0.0[%]) | |
m128 aesenc | 0.25-0.25 ( 0.0[%]), 4.00-4.00 ( -0.0[%]) | |
m128 aesenclast | 0.25-0.25 ( -0.0[%]), 4.00-4.00 ( 0.0[%]) | |
m128 blendps | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 blendvps | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 cvtps2dq | 0.33-0.25 ( 33.4[%]), 3.00-4.00 ( -25.0[%]) | |
m128 divpd | 0.08-0.08 ( 0.0[%]), 13.00-13.00 ( -0.0[%]) | |
m128 divps | 0.10-0.09 ( 10.0[%]), 10.00-11.00 ( -9.1[%]) | |
m128 dpps | 0.07-0.08 ( -13.3[%]), 15.00-13.00 ( 15.4[%]) | |
m128 haddps | 0.50-0.33 ( 50.0[%]), 2.00-3.00 ( -33.3[%]) | |
m128 loadps->movq | 0.11-0.12 ( -11.1[%]), 9.00-8.00 ( 12.5[%]) | |
m128 movaps [mem] | 0.11-0.12 ( -11.1[%]), 9.00-8.00 ( 12.5[%]) | |
m128 movdqu [mem+1] | 0.10-0.12 ( -20.0[%]), 10.00-8.00 ( 25.0[%]) | |
m128 movdqu [mem+2MB-1] (cross page) | 0.09-0.07 ( 36.4[%]), 11.00-15.00 ( -26.7[%]) | |
m128 movdqu [mem+63] (cross cache) | 0.09-0.07 ( 27.3[%]), 11.00-14.00 ( -21.4[%]) | |
m128 movq->movq | 0.17-0.25 ( -33.3[%]), 6.00-4.00 ( 50.0[%]) | |
m128 mulps | 0.33-0.25 ( 33.4[%]), 3.00-4.00 ( -25.0[%]) | |
m128 padd | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 pclmulqdq | 0.22-0.14 ( 53.0[%]), 4.58-7.00 ( -34.6[%]) | |
m128 pcmpestri->movq | 0.09-0.08 ( 9.1[%]), 11.00-12.00 ( -8.3[%]) | |
m128 pcmpestrm | 0.14-0.11 ( 22.8[%]), 7.33-9.00 ( -18.5[%]) | |
m128 pcmpistri->movq | 0.09-0.08 ( 9.1[%]), 11.00-12.00 ( -8.3[%]) | |
m128 pcmpistrm | 0.14-0.11 ( 26.5[%]), 7.00-8.85 ( -20.9[%]) | |
m128 phaddd | 0.50-0.33 ( 50.1[%]), 2.00-3.00 ( -33.4[%]) | |
m128 pinsrd | 0.56-0.50 ( 11.9[%]), 1.79-2.00 ( -10.6[%]) | |
m128 pinsrd->pextr | 0.12-0.17 ( -25.0[%]), 8.00-6.00 ( 33.3[%]) | |
m128 pmovmskb->movq | 0.17-0.25 ( -33.3[%]), 6.00-4.00 ( 49.9[%]) | |
m128 pmuldq | 0.33-0.20 ( 66.7[%]), 3.00-5.00 ( -40.0[%]) | |
m128 pmullw | 0.33-0.20 ( 66.7[%]), 3.00-5.00 ( -40.0[%]) | |
m128 pshufb | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 pxor | 4.00-3.89 ( 2.8[%]), 0.25-0.26 ( -2.7[%]) | |
m128 rcpps | 0.20-0.25 ( -20.0[%]), 5.00-4.00 ( 25.0[%]) | |
m128 rsqrtps | 0.20-0.25 ( -20.0[%]), 5.00-4.00 ( 25.0[%]) | |
m128 shufps | 1.00-1.00 ( 0.1[%]), 1.00-1.00 ( -0.1[%]) | |
m128 vfmapd | 0.20-0.25 ( -20.0[%]), 5.00-4.00 ( 25.0[%]) | |
m128 vfmaps | 0.20-0.25 ( -20.0[%]), 5.00-4.00 ( 25.0[%]) | |
m128 xorps | 4.00-3.89 ( 2.8[%]), 0.25-0.26 ( -2.7[%]) | |
m256 gather32(<ld+ins>x8 + perm) | 0.05-0.06 ( -4.0[%]), 18.75-18.00 ( 4.2[%]) | |
m256 gather64(<ld+ins>x4 + perm) | 0.07-0.08 ( -12.5[%]), 14.86-13.00 ( 14.3[%]) | |
m256 movaps [mem] | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m256 vaddps | 0.33-0.25 ( 33.4[%]), 3.00-4.00 ( -25.0[%]) | |
m256 vdivpd | 0.08-0.08 ( 0.0[%]), 13.00-13.00 ( -0.0[%]) | |
m256 vdivps | 0.10-0.09 ( 10.0[%]), 10.00-11.00 ( -9.1[%]) | |
m256 vfmapd | 0.20-0.25 ( -20.0[%]), 5.00-4.00 ( 25.0[%]) | |
m256 vfmaps | 0.20-0.25 ( -20.0[%]), 5.00-4.00 ( 25.0[%]) | |
m256 vgatherdpd | 0.05-0.05 ( 5.3[%]), 19.00-20.00 ( -5.0[%]) | |
m256 vmovdqu [mem+1] | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m256 vmovdqu [mem+2MB-1] (cross page) | 1.00-0.26 ( 290.3[%]), 1.00-3.90 ( -74.4[%]) | |
m256 vmovdqu [mem+63] (cross cache) | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m256 vmulps | 0.33-0.25 ( 33.4[%]), 3.00-4.00 ( -25.0[%]) | |
m256 vpaddd | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m256 vpblendvb | 1.00-0.50 ( 100.0[%]), 1.00-2.00 ( -50.0[%]) | |
m256 vperm2f128 | 0.33-0.33 ( 0.0[%]), 3.00-3.00 ( -0.0[%]) | |
m256 vpermpd | 0.17-0.33 ( -50.0[%]), 6.00-3.00 ( 100.0[%]) | |
m256 vpermps | 0.12-0.33 ( -62.5[%]), 8.00-3.00 ( 166.5[%]) | |
m256 vpgatherdd | 0.04-0.05 ( -8.3[%]), 24.00-22.00 ( 9.1[%]) | |
m256 vpmovsxwd | 0.25-0.33 ( -25.0[%]), 4.00-3.00 ( 33.3[%]) | |
m256 vpshufb | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m256 vpxor | 4.00-3.89 ( 2.8[%]), 0.25-0.26 ( -2.7[%]) | |
m256 vrcpps | 0.20-0.25 ( -20.0[%]), 5.00-4.00 ( 25.0[%]) | |
m256 vrsqrtps | 0.20-0.25 ( -20.0[%]), 5.00-4.00 ( 25.0[%]) | |
m256 vsqrtps | 0.07-0.08 ( -14.3[%]), 14.00-12.00 ( 16.6[%]) | |
m256 vxorps | 4.00-3.89 ( 2.8[%]), 0.25-0.26 ( -2.7[%]) | |
reg64 add | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
reg64 crc32 | 0.33-0.33 ( 0.0[%]), 3.00-3.00 ( -0.0[%]) | |
reg64 lea | 1.00-1.00 ( 0.1[%]), 1.00-1.00 ( -0.1[%]) | |
reg64 load | 0.25-0.20 ( 25.0[%]), 4.00-5.00 ( -20.0[%]) | |
reg64 popcnt | 1.00-0.33 ( 200.0[%]), 1.00-3.00 ( -66.7[%]) | |
reg64 store [mem+0]->load[mem+0] | 0.03-0.13 ( -79.4[%]), 37.56-7.75 ( 384.8[%]) | |
reg64 store [mem+0]->load[mem+1] | 0.03-0.05 ( -49.2[%]), 37.38-19.00 ( 96.7[%]) | |
reg64 xor | 3.88-3.94 ( -1.5[%]), 0.26-0.25 ( 1.6[%]) | |
reg64 xor dst,dst | 3.88-3.93 ( -1.3[%]), 0.26-0.25 ( 1.3[%]) | |
============= THROUGHPUT =========================================================================== | |
instruction | IPC ( rel[%]), CPI ( rel[%]) | |
------------------------------------------+--------------------------------------------------------- | |
m128 addps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 aesdec | 2.00-1.00 ( 100.0[%]), 0.50-1.00 ( -50.0[%]) | |
m128 aesdeclast | 2.00-1.00 ( 100.0[%]), 0.50-1.00 ( -50.0[%]) | |
m128 aesenc | 2.00-1.00 ( 100.4[%]), 0.50-1.00 ( -50.1[%]) | |
m128 aesenclast | 2.00-1.00 ( 100.4[%]), 0.50-1.00 ( -50.1[%]) | |
m128 blendps | 3.00-3.00 ( 0.0[%]), 0.33-0.33 ( -0.0[%]) | |
m128 blendvps | 2.00-1.97 ( 1.4[%]), 0.50-0.51 ( -1.4[%]) | |
m128 cvtps2dq | 1.00-2.00 ( -50.0[%]), 1.00-0.50 ( 100.0[%]) | |
m128 divpd | 0.20-0.25 ( -20.0[%]), 5.00-4.00 ( 25.0[%]) | |
m128 divps | 0.29-0.33 ( -14.3[%]), 3.50-3.00 ( 16.6[%]) | |
m128 dpps | 0.25-0.63 ( -60.0[%]), 4.00-1.60 ( 150.3[%]) | |
m128 haddps | 0.50-0.50 ( -0.0[%]), 2.00-2.00 ( 0.0[%]) | |
m128 loadps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 movaps [mem] | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 movdqu [mem+1] | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 movdqu [mem+2MB-1] (cross page) | 1.00-0.26 ( 289.8[%]), 1.00-3.90 ( -74.3[%]) | |
m128 movdqu [mem+63] (cross cache) | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m128 movq->movq | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 mulps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 padd | 3.00-3.00 ( 0.0[%]), 0.33-0.33 ( -0.0[%]) | |
m128 pclmulqdq | 0.50-1.00 ( -50.0[%]), 2.00-1.00 ( 100.0[%]) | |
m128 pcmpestri | 0.33-0.25 ( 34.3[%]), 3.00-4.03 ( -25.5[%]) | |
m128 pcmpestrm | 0.33-0.20 ( 67.6[%]), 3.00-5.03 ( -40.3[%]) | |
m128 pcmpistri | 0.50-0.33 ( 50.1[%]), 2.00-3.00 ( -33.4[%]) | |
m128 pcmpistrm | 0.50-0.33 ( 50.0[%]), 2.00-3.00 ( -33.3[%]) | |
m128 phaddd | 0.50-0.50 ( 0.0[%]), 2.00-2.00 ( -0.0[%]) | |
m128 pinsrd | 0.78-0.50 ( 55.8[%]), 1.28-2.00 ( -35.8[%]) | |
m128 pmovmskb | 1.00-1.00 ( 0.1[%]), 1.00-1.00 ( -0.1[%]) | |
m128 pmuldq | 1.00-2.00 ( -50.0[%]), 1.00-0.50 ( 100.0[%]) | |
m128 pmullw | 1.00-2.00 ( -50.0[%]), 1.00-0.50 ( 100.0[%]) | |
m128 pshufb | 2.00-1.00 ( 100.0[%]), 0.50-1.00 ( -50.0[%]) | |
m128 pxor | 4.00-3.89 ( 2.8[%]), 0.25-0.26 ( -2.7[%]) | |
m128 rcpps | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m128 rsqrtps | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 shufps | 2.00-1.00 ( 100.0[%]), 0.50-1.00 ( -50.0[%]) | |
m128 vfmapd | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 vfmaps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 xorps | 4.00-3.89 ( 2.7[%]), 0.25-0.26 ( -2.6[%]) | |
m256 gather32(<ld+ins>x8 + perm) | 0.25-0.14 ( 75.0[%]), 4.00-7.00 ( -42.9[%]) | |
m256 gather64(<ld+ins>x4 + perm) | 0.50-0.33 ( 50.0[%]), 2.00-3.00 ( -33.3[%]) | |
m256 movaps [mem] | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m256 vaddps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m256 vdivpd | 0.20-0.12 ( 60.0[%]), 5.00-8.00 ( -37.5[%]) | |
m256 vdivps | 0.29-0.20 ( 42.9[%]), 3.50-5.00 ( -30.0[%]) | |
m256 vfmapd | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m256 vfmaps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m256 vgatherdpd | 0.11-0.25 ( -55.5[%]), 9.00-4.00 ( 124.9[%]) | |
m256 vmovdqu [mem+1] | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m256 vmovdqu [mem+2MB-1] (cross page) | 1.00-0.26 ( 284.5[%]), 1.00-3.85 ( -74.0[%]) | |
m256 vmovdqu [mem+63] (cross cache) | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m256 vmulps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m256 vpaddd | 3.00-3.00 ( 0.0[%]), 0.33-0.33 ( -0.0[%]) | |
m256 vpblendvb | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m256 vperm2f128 | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m256 vpermpd | 0.78-1.00 ( -21.6[%]), 1.28-1.00 ( 27.6[%]) | |
m256 vpermps | 0.50-1.00 ( -50.0[%]), 2.00-1.00 ( 99.9[%]) | |
m256 vpgatherdd | 0.06-0.20 ( -68.7[%]), 16.00-5.00 ( 219.9[%]) | |
m256 vpmovmskb | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m256 vpmovsxwd | 0.88-1.00 ( -12.3[%]), 1.14-1.00 ( 14.0[%]) | |
m256 vpshufb | 2.00-1.00 ( 100.1[%]), 0.50-1.00 ( -50.0[%]) | |
m256 vpxor | 4.00-3.89 ( 2.8[%]), 0.25-0.26 ( -2.7[%]) | |
m256 vrcpps | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m256 vrsqrtps | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m256 vsqrtps | 0.18-0.17 ( 9.1[%]), 5.50-6.00 ( -8.4[%]) | |
m256 vxorps | 4.00-3.89 ( 2.8[%]), 0.25-0.26 ( -2.7[%]) | |
reg64 add | 3.86-3.94 ( -1.8[%]), 0.26-0.25 ( 1.9[%]) | |
reg64 crc32 | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
reg64 lea | 3.86-2.00 ( 93.2[%]), 0.26-0.50 ( -48.2[%]) | |
reg64 load | 1.60-1.60 ( 0.1[%]), 0.63-0.63 ( -0.1[%]) | |
reg64 popcnt | 3.86-1.00 ( 286.4[%]), 0.26-1.00 ( -74.1[%]) | |
reg64 store [mem+0]->load[mem+0] | 0.26-0.74 ( -65.5[%]), 3.91-1.35 ( 190.0[%]) | |
reg64 store [mem+0]->load[mem+1] | 0.07-0.08 ( -7.1[%]), 14.00-13.00 ( 7.7[%]) | |
reg64 xor | 3.88-3.93 ( -1.4[%]), 0.26-0.25 ( 1.4[%]) | |
reg64 xor dst,dst | 3.88-3.94 ( -1.5[%]), 0.26-0.25 ( 1.5[%]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Ryzen 7 3700x vs Ryzen 7 1700 | |
============= LATENCY ============================================================================== | |
instruction | IPC ( rel[%]), CPI ( rel[%]) | |
------------------------------------------+--------------------------------------------------------- | |
m128 addps | 0.33-0.33 ( 0.0[%]), 3.00-3.00 ( -0.0[%]) | |
m128 aesdec | 0.25-0.25 ( 0.0[%]), 4.00-4.00 ( -0.0[%]) | |
m128 aesdeclast | 0.25-0.25 ( 0.0[%]), 4.00-4.00 ( -0.0[%]) | |
m128 aesenc | 0.25-0.25 ( 0.0[%]), 4.00-4.00 ( -0.0[%]) | |
m128 aesenclast | 0.25-0.25 ( 0.0[%]), 4.00-4.00 ( -0.0[%]) | |
m128 blendps | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 blendvps | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 cvtps2dq | 0.33-0.25 ( 33.3[%]), 3.00-4.00 ( -25.0[%]) | |
m128 divpd | 0.08-0.12 ( -38.5[%]), 13.00-8.00 ( 62.5[%]) | |
m128 divps | 0.10-0.10 ( 0.0[%]), 10.00-10.00 ( -0.0[%]) | |
m128 dpps | 0.07-0.07 ( 0.0[%]), 15.00-15.00 ( -0.0[%]) | |
m128 haddps | 0.50-0.50 ( -0.0[%]), 2.00-2.00 ( 0.0[%]) | |
m128 loadps->movq | 0.11-0.11 ( 0.0[%]), 9.00-9.00 ( -0.0[%]) | |
m128 movaps [mem] | 0.11-0.11 ( 0.0[%]), 9.00-9.00 ( -0.0[%]) | |
m128 movdqu [mem+1] | 0.10-0.10 ( 0.0[%]), 10.00-10.00 ( -0.0[%]) | |
m128 movdqu [mem+2MB-1] (cross page) | 0.09-0.09 ( 0.3[%]), 11.00-11.03 ( -0.3[%]) | |
m128 movdqu [mem+63] (cross cache) | 0.09-0.09 ( 0.1[%]), 11.00-11.01 ( -0.1[%]) | |
m128 movq->movq | 0.17-0.17 ( 0.2[%]), 6.00-6.01 ( -0.2[%]) | |
m128 mulps | 0.33-0.33 ( 0.3[%]), 3.00-3.01 ( -0.3[%]) | |
m128 padd | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 pclmulqdq | 0.22-0.22 ( -1.7[%]), 4.58-4.50 ( 1.7[%]) | |
m128 pcmpestri->movq | 0.09-0.09 ( 0.0[%]), 11.00-11.00 ( -0.0[%]) | |
m128 pcmpestrm | 0.14-0.12 ( 15.9[%]), 7.33-8.50 ( -13.7[%]) | |
m128 pcmpistri->movq | 0.09-0.09 ( 0.0[%]), 11.00-11.00 ( -0.0[%]) | |
m128 pcmpistrm | 0.14-0.14 ( 0.0[%]), 7.00-7.00 ( -0.0[%]) | |
m128 phaddd | 0.50-0.50 ( -0.0[%]), 2.00-2.00 ( 0.0[%]) | |
m128 pinsrd | 0.56-0.59 ( -4.7[%]), 1.79-1.70 ( 5.0[%]) | |
m128 pinsrd->pextr | 0.12-0.12 ( 0.0[%]), 8.00-8.00 ( -0.0[%]) | |
m128 pmovmskb->movq | 0.17-0.17 ( 0.0[%]), 6.00-6.00 ( -0.0[%]) | |
m128 pmuldq | 0.33-0.33 ( 0.1[%]), 3.00-3.00 ( -0.1[%]) | |
m128 pmullw | 0.33-0.33 ( 0.0[%]), 3.00-3.00 ( -0.0[%]) | |
m128 pshufb | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 pxor | 4.00-4.00 ( 0.0[%]), 0.25-0.25 ( -0.0[%]) | |
m128 rcpps | 0.20-0.20 ( 0.1[%]), 5.00-5.01 ( -0.1[%]) | |
m128 rsqrtps | 0.20-0.20 ( 0.0[%]), 5.00-5.00 ( -0.0[%]) | |
m128 shufps | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 vfmapd | 0.20-0.20 ( -0.0[%]), 5.00-5.00 ( 0.0[%]) | |
m128 vfmaps | 0.20-0.20 ( 0.0[%]), 5.00-5.00 ( -0.0[%]) | |
m128 xorps | 4.00-4.00 ( 0.0[%]), 0.25-0.25 ( -0.0[%]) | |
m256 gather32(<ld+ins>x8 + perm) | 0.05-0.06 ( -7.3[%]), 18.75-17.39 ( 7.8[%]) | |
m256 gather64(<ld+ins>x4 + perm) | 0.07-0.08 ( -12.4[%]), 14.86-13.02 ( 14.2[%]) | |
m256 movaps [mem] | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m256 vaddps | 0.33-0.33 ( 0.0[%]), 3.00-3.00 ( -0.0[%]) | |
m256 vdivpd | 0.08-0.12 ( -38.5[%]), 13.00-8.00 ( 62.5[%]) | |
m256 vdivps | 0.10-0.10 ( 0.0[%]), 10.00-10.00 ( -0.0[%]) | |
m256 vfmapd | 0.20-0.20 ( 0.0[%]), 5.00-5.00 ( -0.0[%]) | |
m256 vfmaps | 0.20-0.20 ( 0.0[%]), 5.00-5.00 ( -0.0[%]) | |
m256 vgatherdpd | 0.05-0.06 ( -4.5[%]), 19.00-18.14 ( 4.7[%]) | |
m256 vmovdqu [mem+1] | 1.00-0.67 ( 50.0[%]), 1.00-1.50 ( -33.3[%]) | |
m256 vmovdqu [mem+2MB-1] (cross page) | 1.00-0.67 ( 50.0[%]), 1.00-1.50 ( -33.3[%]) | |
m256 vmovdqu [mem+63] (cross cache) | 1.00-0.67 ( 50.0[%]), 1.00-1.50 ( -33.3[%]) | |
m256 vmulps | 0.33-0.33 ( 0.0[%]), 3.00-3.00 ( -0.0[%]) | |
m256 vpaddd | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m256 vpblendvb | 1.00-0.50 ( 100.1[%]), 1.00-2.00 ( -50.0[%]) | |
m256 vperm2f128 | 0.33-0.33 ( 0.0[%]), 3.00-3.00 ( -0.0[%]) | |
m256 vpermpd | 0.17-0.50 ( -66.6[%]), 6.00-2.01 ( 199.1[%]) | |
m256 vpermps | 0.12-0.20 ( -37.5[%]), 8.00-5.00 ( 60.0[%]) | |
m256 vpgatherdd | 0.04-0.05 ( -11.2[%]), 24.00-21.31 ( 12.6[%]) | |
m256 vpmovsxwd | 0.25-0.50 ( -50.0[%]), 4.00-2.00 ( 100.0[%]) | |
m256 vpshufb | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m256 vpxor | 4.00-2.00 ( 100.0[%]), 0.25-0.50 ( -50.0[%]) | |
m256 vrcpps | 0.20-0.20 ( 0.0[%]), 5.00-5.00 ( -0.0[%]) | |
m256 vrsqrtps | 0.20-0.20 ( 0.0[%]), 5.00-5.00 ( -0.0[%]) | |
m256 vsqrtps | 0.07-0.12 ( -42.8[%]), 14.00-8.00 ( 75.0[%]) | |
m256 vxorps | 4.00-2.00 ( 100.0[%]), 0.25-0.50 ( -50.0[%]) | |
reg64 add | 1.00-0.95 ( 4.9[%]), 1.00-1.05 ( -4.7[%]) | |
reg64 crc32 | 0.33-0.33 ( 0.5[%]), 3.00-3.02 ( -0.5[%]) | |
reg64 lea | 1.00-1.00 ( 0.1[%]), 1.00-1.00 ( -0.1[%]) | |
reg64 load | 0.25-0.25 ( 0.0[%]), 4.00-4.00 ( -0.0[%]) | |
reg64 popcnt | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
reg64 store [mem+0]->load[mem+0] | 0.03-0.04 ( -30.3[%]), 37.56-26.18 ( 43.5[%]) | |
reg64 store [mem+0]->load[mem+1] | 0.03-0.03 ( -16.2[%]), 37.38-31.32 ( 19.3[%]) | |
reg64 xor | 3.88-3.88 ( -0.0[%]), 0.26-0.26 ( 0.0[%]) | |
reg64 xor dst,dst | 3.88-3.88 ( 0.0[%]), 0.26-0.26 ( -0.0[%]) | |
============= THROUGHPUT =========================================================================== | |
instruction | IPC ( rel[%]), CPI ( rel[%]) | |
------------------------------------------+--------------------------------------------------------- | |
m128 addps | 2.00-2.00 ( 0.1[%]), 0.50-0.50 ( -0.1[%]) | |
m128 aesdec | 2.00-1.95 ( 2.3[%]), 0.50-0.51 ( -2.3[%]) | |
m128 aesdeclast | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 aesenc | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 aesenclast | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( 0.0[%]) | |
m128 blendps | 3.00-2.00 ( 50.0[%]), 0.33-0.50 ( -33.3[%]) | |
m128 blendvps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 cvtps2dq | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 divpd | 0.20-0.25 ( -20.0[%]), 5.00-4.00 ( 25.0[%]) | |
m128 divps | 0.29-0.33 ( -14.3[%]), 3.50-3.00 ( 16.7[%]) | |
m128 dpps | 0.25-0.25 ( 0.0[%]), 4.00-4.00 ( -0.0[%]) | |
m128 haddps | 0.50-0.50 ( 0.1[%]), 2.00-2.00 ( -0.1[%]) | |
m128 loadps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 movaps [mem] | 2.00-2.00 ( 0.1[%]), 0.50-0.50 ( -0.1[%]) | |
m128 movdqu [mem+1] | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 movdqu [mem+2MB-1] (cross page) | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 movdqu [mem+63] (cross cache) | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m128 movq->movq | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 mulps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 padd | 3.00-3.00 ( 0.0[%]), 0.33-0.33 ( -0.0[%]) | |
m128 pclmulqdq | 0.50-0.50 ( 0.0[%]), 2.00-2.00 ( -0.0[%]) | |
m128 pcmpestri | 0.33-0.33 ( 0.0[%]), 3.00-3.00 ( -0.0[%]) | |
m128 pcmpestrm | 0.33-0.33 ( 0.4[%]), 3.00-3.01 ( -0.4[%]) | |
m128 pcmpistri | 0.50-0.50 ( -0.0[%]), 2.00-2.00 ( 0.0[%]) | |
m128 pcmpistrm | 0.50-0.50 ( 0.0[%]), 2.00-2.00 ( -0.0[%]) | |
m128 phaddd | 0.50-0.50 ( 0.0[%]), 2.00-2.00 ( -0.0[%]) | |
m128 pinsrd | 0.78-0.76 ( 2.1[%]), 1.28-1.31 ( -2.0[%]) | |
m128 pmovmskb | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m128 pmuldq | 1.00-1.00 ( 0.1[%]), 1.00-1.00 ( -0.1[%]) | |
m128 pmullw | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m128 pshufb | 2.00-2.00 ( 0.1[%]), 0.50-0.50 ( -0.1[%]) | |
m128 pxor | 4.00-4.00 ( 0.0[%]), 0.25-0.25 ( -0.0[%]) | |
m128 rcpps | 1.00-0.99 ( 0.9[%]), 1.00-1.01 ( -0.9[%]) | |
m128 rsqrtps | 1.00-1.00 ( -0.0[%]), 1.00-1.00 ( 0.0[%]) | |
m128 shufps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 vfmapd | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 vfmaps | 2.00-2.00 ( 0.0[%]), 0.50-0.50 ( -0.0[%]) | |
m128 xorps | 4.00-4.00 ( -0.1[%]), 0.25-0.25 ( 0.1[%]) | |
m256 gather32(<ld+ins>x8 + perm) | 0.25-0.20 ( 25.7[%]), 4.00-5.03 ( -20.4[%]) | |
m256 gather64(<ld+ins>x4 + perm) | 0.50-0.33 ( 51.4[%]), 2.00-3.03 ( -33.9[%]) | |
m256 movaps [mem] | 2.00-0.99 ( 101.4[%]), 0.50-1.01 ( -50.4[%]) | |
m256 vaddps | 2.00-1.00 ( 100.0[%]), 0.50-1.00 ( -50.0[%]) | |
m256 vdivpd | 0.20-0.12 ( 60.1[%]), 5.00-8.01 ( -37.5[%]) | |
m256 vdivps | 0.29-0.17 ( 71.4[%]), 3.50-6.00 ( -41.7[%]) | |
m256 vfmapd | 2.00-1.00 ( 100.0[%]), 0.50-1.00 ( -50.0[%]) | |
m256 vfmaps | 2.00-0.97 ( 106.6[%]), 0.50-1.03 ( -51.6[%]) | |
m256 vgatherdpd | 0.11-0.08 ( 33.3[%]), 9.00-12.00 ( -25.0[%]) | |
m256 vmovdqu [mem+1] | 2.00-0.67 ( 200.0[%]), 0.50-1.50 ( -66.7[%]) | |
m256 vmovdqu [mem+2MB-1] (cross page) | 1.00-0.67 ( 50.0[%]), 1.00-1.50 ( -33.3[%]) | |
m256 vmovdqu [mem+63] (cross cache) | 1.00-0.67 ( 50.0[%]), 1.00-1.50 ( -33.3[%]) | |
m256 vmulps | 2.00-0.99 ( 102.6[%]), 0.50-1.01 ( -50.6[%]) | |
m256 vpaddd | 3.00-1.50 ( 100.0[%]), 0.33-0.67 ( -50.0[%]) | |
m256 vpblendvb | 1.00-0.50 ( 100.0[%]), 1.00-2.00 ( -50.0[%]) | |
m256 vperm2f128 | 1.00-0.31 ( 220.9[%]), 1.00-3.21 ( -68.8[%]) | |
m256 vpermpd | 0.78-0.50 ( 56.8[%]), 1.28-2.00 ( -36.2[%]) | |
m256 vpermps | 0.50-0.25 ( 100.0[%]), 2.00-4.00 ( -50.0[%]) | |
m256 vpgatherdd | 0.06-0.05 ( 25.0[%]), 16.00-20.00 ( -20.0[%]) | |
m256 vpmovmskb | 1.00-1.00 ( 0.0[%]), 1.00-1.00 ( -0.0[%]) | |
m256 vpmovsxwd | 0.88-0.50 ( 75.4[%]), 1.14-2.00 ( -43.0[%]) | |
m256 vpshufb | 2.00-1.00 ( 100.1[%]), 0.50-1.00 ( -50.0[%]) | |
m256 vpxor | 4.00-2.00 ( 100.0[%]), 0.25-0.50 ( -50.0[%]) | |
m256 vrcpps | 1.00-0.50 ( 100.0[%]), 1.00-2.00 ( -50.0[%]) | |
m256 vrsqrtps | 1.00-0.50 ( 100.0[%]), 1.00-2.00 ( -50.0[%]) | |
m256 vsqrtps | 0.18-0.12 ( 45.5[%]), 5.50-8.00 ( -31.2[%]) | |
m256 vxorps | 4.00-2.00 ( 100.1[%]), 0.25-0.50 ( -50.0[%]) | |
reg64 add | 3.86-3.88 ( -0.4[%]), 0.26-0.26 ( 0.4[%]) | |
reg64 crc32 | 1.00-0.33 ( 201.6[%]), 1.00-3.02 ( -66.8[%]) | |
reg64 lea | 3.86-3.88 ( -0.4[%]), 0.26-0.26 ( 0.4[%]) | |
reg64 load | 1.60-1.60 ( 0.0[%]), 0.63-0.63 ( -0.0[%]) | |
reg64 popcnt | 3.86-3.88 ( -0.4[%]), 0.26-0.26 ( 0.4[%]) | |
reg64 store [mem+0]->load[mem+0] | 0.26-0.36 ( -29.9[%]), 3.91-2.75 ( 42.6[%]) | |
reg64 store [mem+0]->load[mem+1] | 0.07-0.06 ( 21.4[%]), 14.00-17.00 ( -17.7[%]) | |
reg64 xor | 3.88-3.69 ( 5.1[%]), 0.26-0.27 ( -4.8[%]) | |
reg64 xor dst,dst | 3.88-3.87 ( 0.1[%]), 0.26-0.26 ( -0.1[%]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment