Skip to content

Instantly share code, notes, and snippets.

@magurosan
Created December 6, 2020 15:43
Show Gist options
  • Save magurosan/87bb3d4e0cabd41a091e9f4224112f97 to your computer and use it in GitHub Desktop.
Save magurosan/87bb3d4e0cabd41a091e9f4224112f97 to your computer and use it in GitHub Desktop.
class inst l/t cpi ipc
reg64 add latency 3.633499e-01 2.752168e+00
reg64 add throughput 7.145596e-02 1.399463e+01
reg64 lea latency 1.119077e-02 8.935934e+01
reg64 lea throughput 1.119077e-02 8.935934e+01
reg64 xor dst,dst latency 4.510581e-02 2.217009e+01
reg64 xor dst,dst throughput 4.510581e-02 2.217009e+01
reg64 xor latency 4.510093e-02 2.217249e+01
reg64 xor throughput 4.511082e-02 2.216763e+01
reg64 load latency 9.582788e-01 1.043538e+00
reg64 load throughput 1.342345e-01 7.449647e+00
reg64 crc32 latency 9.472221e-01 1.055719e+00
reg64 crc32 throughput 3.129443e-01 3.195457e+00
reg64 store [mem+0]->load[mem+0] latency 1.687144e+00 5.927175e-01
reg64 store [mem+0]->load[mem+0] throughput 5.448710e-01 1.835297e+00
reg64 store [mem+0]->load[mem+1] latency 7.018959e+00 1.424713e-01
reg64 store [mem+0]->load[mem+1] throughput 4.747778e+00 2.106248e-01
m128 pxor latency 4.335679e-02 2.306444e+01
m128 pxor throughput 4.335679e-02 2.306444e+01
m128 padd latency 6.302022e-01 1.586792e+00
m128 padd throughput 8.102692e-02 1.234158e+01
m128 pmuldq latency 1.682997e+00 5.941782e-01
m128 pmuldq throughput 2.510370e-01 3.983477e+00
m128 loadps throughput 1.141760e-01 8.758407e+00
m128 loadps->movq latency 3.398277e+00 2.942667e-01
m128 movq->movq latency 3.278414e+00 3.050255e-01
m128 movq->movq throughput 2.713468e-01 3.685320e+00
m128 xorps latency 4.518467e-02 2.213140e+01
m128 xorps throughput 4.649162e-02 2.150925e+01
m128 addps latency 9.679529e-01 1.033108e+00
m128 addps throughput 8.258099e-02 1.210932e+01
m128 mulps latency 1.264175e+00 7.910299e-01
m128 mulps throughput 1.084629e-01 9.219746e+00
m128 divps latency 2.586126e+00 3.866787e-01
m128 divps throughput 3.146066e-01 3.178573e+00
m128 divpd latency 3.137659e+00 3.187089e-01
m128 divpd throughput 3.229953e-01 3.096020e+00
m128 rsqrtps latency 3.154154e+00 3.170422e-01
m128 rsqrtps throughput 6.301138e-01 1.587015e+00
m128 rcpps latency 2.534363e+00 3.945765e-01
m128 rcpps throughput 3.159487e-01 3.165071e+00
m128 blendps latency 1.856126e-02 5.387566e+01
m128 blendps throughput 1.856126e-02 5.387566e+01
m128 blendvps latency 6.315356e-01 1.583442e+00
m128 blendvps throughput 1.603938e-01 6.234654e+00
m128 pshufb latency 1.275875e+00 7.837759e-01
m128 pshufb throughput 2.496772e-01 4.005171e+00
m128 shufps latency 6.248067e-01 1.600495e+00
m128 shufps throughput 7.803366e-02 1.281498e+01
m128 pmullw latency 9.441287e-01 1.059178e+00
m128 pmullw throughput 7.971997e-02 1.254391e+01
m128 phaddd latency 6.258575e-01 1.597808e+00
m128 phaddd throughput 7.803345e-02 1.281502e+01
m128 haddps latency 6.289217e-01 1.590023e+00
m128 haddps throughput 8.076223e-02 1.238203e+01
m128 pinsrd latency 6.544501e-01 1.528000e+00
m128 pinsrd throughput 1.040478e-01 9.610970e+00
m128 pinsrd->pextr latency 3.793549e+00 2.636054e-01
m128 dpps latency 4.425632e+00 2.259564e-01
m128 dpps throughput 5.016681e-01 1.993350e+00
m128 cvtps2dq latency 1.874447e+00 5.334907e-01
m128 cvtps2dq throughput 1.560582e-01 6.407866e+00
m128 pmovmskb throughput 6.174071e-01 1.619677e+00
m128 pmovmskb->movq latency 4.693552e+00 2.130582e-01
m128 movq->movq latency 3.176239e+00 3.148378e-01
m128 movaps [mem] latency 3.159753e+00 3.164805e-01
m128 movaps [mem] throughput 1.040389e-01 9.611792e+00
m128 movdqu [mem+1] latency 3.137933e+00 3.186811e-01
m128 movdqu [mem+1] throughput 1.040476e-01 9.610989e+00
m128 movdqu [mem+63] (cross cache) latency 3.153933e+00 3.170644e-01
m128 movdqu [mem+63] (cross cache) throughput 1.040476e-01 9.610989e+00
m128 movdqu [mem+2MB-1] (cross page) latency 1.161660e+01 8.608370e-02
m128 movdqu [mem+2MB-1] (cross page) throughput 9.724988e+00 1.028279e-01
m128 pcmpistri throughput 2.923561e+02 3.420486e-03
m128 pcmpistri->movq latency 2.967785e+02 3.369517e-03
m128 pcmpistrm throughput 2.921591e+02 3.422793e-03
m128 pcmpistrm latency 2.947030e+02 3.393247e-03
m128 pcmpestri throughput 3.875678e+02 2.580194e-03
m128 pcmpestri->movq latency 3.854777e+02 2.594183e-03
m128 pcmpestrm throughput 3.861277e+02 2.589817e-03
m128 pcmpestrm latency 3.852831e+02 2.595494e-03
reg64 popcnt latency 5.338525e+00 1.873177e-01
reg64 popcnt throughput 6.804913e-01 1.469526e+00
m128 aesenc latency 1.637176e+00 6.108078e-01
m128 aesenc throughput 1.994063e-01 5.014887e+00
m128 aesenclast latency 1.738663e+00 5.751545e-01
m128 aesenclast throughput 2.129873e-01 4.695116e+00
m128 aesdec latency 1.826321e+00 5.475487e-01
m128 aesdec throughput 2.443350e-01 4.092742e+00
m128 aesdeclast latency 1.700251e+00 5.881484e-01
m128 aesdeclast throughput 1.824254e-01 5.481693e+00
m128 pclmulqdq latency 9.710346e-01 1.029829e+00
m128 pclmulqdq throughput 8.076223e-02 1.238203e+01
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment