Last active
August 29, 2015 14:23
-
-
Save gut/6d8a4feb1e7992ad6a85 to your computer and use it in GitHub Desktop.
Testing "LD vs LQ" and "STD vs STQ" in POWER8. Why is "LQ" slower than "LD"?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ for i in ld-test.c lq-test.c std-test.c stq-test.c; do | |
/opt/at8.0/bin/gcc -O2 $i -o ${i//.c} | |
echo -n -e "\n$i" | |
time ./${i//.c} | |
done | |
ld-test.c | |
real 0m2.594s | |
user 0m2.550s | |
sys 0m0.000s | |
lq-test.c | |
real 0m3.714s | |
user 0m3.640s | |
sys 0m0.010s | |
std-test.c | |
real 0m3.674s | |
user 0m3.590s | |
sys 0m0.000s | |
stq-test.c | |
real 0m1.839s | |
user 0m1.790s | |
sys 0m0.000s | |
$ # Why was "ldq" slower than "ld"? It makes no sense to use it if it's slower than the more flexible "ld" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
int main(void) | |
{ | |
void *mem = malloc(1024); | |
int loop; | |
static const int max_iter = 100000000; | |
for (loop = 0; loop < max_iter; loop++) | |
{ | |
__asm__( | |
"ld 4, 0(%0)\n\t" | |
"ld 4, 8(%0)\n\t" | |
"ld 4, 16(%0)\n\t" | |
"ld 4, 24(%0)\n\t" | |
"ld 4, 32(%0)\n\t" | |
"ld 4, 40(%0)\n\t" | |
"ld 4, 48(%0)\n\t" | |
"ld 4, 56(%0)\n\t" | |
"ld 4, 64(%0)\n\t" | |
"ld 4, 72(%0)\n\t" | |
"ld 4, 80(%0)\n\t" | |
"ld 4, 88(%0)\n\t" | |
"ld 4, 96(%0)\n\t" | |
"ld 4, 104(%0)\n\t" | |
"ld 4, 112(%0)\n\t" | |
"ld 4, 120(%0)\n\t" | |
"ld 4, 128(%0)\n\t" | |
"ld 4, 136(%0)\n\t" | |
"ld 4, 144(%0)\n\t" | |
"ld 4, 152(%0)\n\t" | |
"ld 4, 160(%0)\n\t" | |
"ld 4, 168(%0)\n\t" | |
"ld 4, 176(%0)\n\t" | |
"ld 4, 184(%0)\n\t" | |
"ld 4, 192(%0)\n\t" | |
"ld 4, 200(%0)\n\t" | |
"ld 4, 208(%0)\n\t" | |
"ld 4, 216(%0)\n\t" | |
"ld 4, 224(%0)\n\t" | |
"ld 4, 232(%0)\n\t" | |
"ld 4, 240(%0)\n\t" | |
"ld 4, 248(%0)\n\t" | |
"ld 4, 256(%0)\n\t" | |
"ld 4, 264(%0)\n\t" | |
"ld 4, 272(%0)\n\t" | |
"ld 4, 280(%0)\n\t" | |
"ld 4, 288(%0)\n\t" | |
"ld 4, 296(%0)\n\t" | |
"ld 4, 304(%0)\n\t" | |
"ld 4, 312(%0)\n\t" | |
"ld 4, 320(%0)\n\t" | |
"ld 4, 328(%0)\n\t" | |
"ld 4, 336(%0)\n\t" | |
"ld 4, 344(%0)\n\t" | |
"ld 4, 352(%0)\n\t" | |
"ld 4, 360(%0)\n\t" | |
"ld 4, 368(%0)\n\t" | |
"ld 4, 376(%0)\n\t" | |
"ld 4, 384(%0)\n\t" | |
"ld 4, 392(%0)\n\t" | |
"ld 4, 400(%0)\n\t" | |
"ld 4, 408(%0)\n\t" | |
"ld 4, 416(%0)\n\t" | |
"ld 4, 424(%0)\n\t" | |
"ld 4, 432(%0)\n\t" | |
"ld 4, 440(%0)\n\t" | |
"ld 4, 448(%0)\n\t" | |
"ld 4, 456(%0)\n\t" | |
"ld 4, 464(%0)\n\t" | |
"ld 4, 472(%0)\n\t" | |
"ld 4, 480(%0)\n\t" | |
"ld 4, 488(%0)\n\t" | |
"ld 4, 496(%0)\n\t" | |
"ld 4, 504(%0)\n\t" | |
"ld 4, 512(%0)\n\t" | |
"ld 4, 520(%0)\n\t" | |
"ld 4, 528(%0)\n\t" | |
"ld 4, 536(%0)\n\t" | |
"ld 4, 544(%0)\n\t" | |
"ld 4, 552(%0)\n\t" | |
"ld 4, 560(%0)\n\t" | |
"ld 4, 568(%0)\n\t" | |
"ld 4, 576(%0)\n\t" | |
"ld 4, 584(%0)\n\t" | |
"ld 4, 592(%0)\n\t" | |
"ld 4, 600(%0)\n\t" | |
"ld 4, 608(%0)\n\t" | |
"ld 4, 616(%0)\n\t" | |
"ld 4, 624(%0)\n\t" | |
"ld 4, 632(%0)\n\t" | |
"ld 4, 640(%0)\n\t" | |
"ld 4, 648(%0)\n\t" | |
"ld 4, 656(%0)\n\t" | |
"ld 4, 664(%0)\n\t" | |
"ld 4, 672(%0)\n\t" | |
"ld 4, 680(%0)\n\t" | |
"ld 4, 688(%0)\n\t" | |
"ld 4, 696(%0)\n\t" | |
"ld 4, 704(%0)\n\t" | |
"ld 4, 712(%0)\n\t" | |
"ld 4, 720(%0)\n\t" | |
"ld 4, 728(%0)\n\t" | |
"ld 4, 736(%0)\n\t" | |
"ld 4, 744(%0)\n\t" | |
"ld 4, 752(%0)\n\t" | |
"ld 4, 760(%0)\n\t" | |
"ld 4, 768(%0)\n\t" | |
"ld 4, 776(%0)\n\t" | |
"ld 4, 784(%0)\n\t" | |
"ld 4, 792(%0)\n\t" | |
"ld 4, 800(%0)\n\t" | |
"ld 4, 808(%0)\n\t" | |
"ld 4, 816(%0)\n\t" | |
"ld 4, 824(%0)\n\t" | |
"ld 4, 832(%0)\n\t" | |
"ld 4, 840(%0)\n\t" | |
"ld 4, 848(%0)\n\t" | |
"ld 4, 856(%0)\n\t" | |
"ld 4, 864(%0)\n\t" | |
"ld 4, 872(%0)\n\t" | |
"ld 4, 880(%0)\n\t" | |
"ld 4, 888(%0)\n\t" | |
"ld 4, 896(%0)\n\t" | |
"ld 4, 904(%0)\n\t" | |
"ld 4, 912(%0)\n\t" | |
"ld 4, 920(%0)\n\t" | |
"ld 4, 928(%0)\n\t" | |
"ld 4, 936(%0)\n\t" | |
"ld 4, 944(%0)\n\t" | |
"ld 4, 952(%0)\n\t" | |
"ld 4, 960(%0)\n\t" | |
"ld 4, 968(%0)\n\t" | |
"ld 4, 976(%0)\n\t" | |
"ld 4, 984(%0)\n\t" | |
"ld 4, 992(%0)\n\t" | |
"ld 4, 1000(%0)\n\t" | |
"ld 4, 1008(%0)\n\t" | |
"ld 4, 1016(%0)\n\t" | |
: | |
: "r"(mem) | |
: "r4"); | |
} | |
} | |
// vim: noai:ts=4:sw=4:sts=4:et: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
int main(void) | |
{ | |
void *mem = malloc(1024); | |
int loop; | |
static const int max_iter = 100000000; | |
for (loop = 0; loop < max_iter; loop++) | |
{ | |
__asm__( | |
"lq 4, 0(%0)\n\t" | |
"lq 4, 16(%0)\n\t" | |
"lq 4, 32(%0)\n\t" | |
"lq 4, 48(%0)\n\t" | |
"lq 4, 64(%0)\n\t" | |
"lq 4, 80(%0)\n\t" | |
"lq 4, 96(%0)\n\t" | |
"lq 4, 112(%0)\n\t" | |
"lq 4, 128(%0)\n\t" | |
"lq 4, 144(%0)\n\t" | |
"lq 4, 160(%0)\n\t" | |
"lq 4, 176(%0)\n\t" | |
"lq 4, 192(%0)\n\t" | |
"lq 4, 208(%0)\n\t" | |
"lq 4, 224(%0)\n\t" | |
"lq 4, 240(%0)\n\t" | |
"lq 4, 256(%0)\n\t" | |
"lq 4, 272(%0)\n\t" | |
"lq 4, 288(%0)\n\t" | |
"lq 4, 304(%0)\n\t" | |
"lq 4, 320(%0)\n\t" | |
"lq 4, 336(%0)\n\t" | |
"lq 4, 352(%0)\n\t" | |
"lq 4, 368(%0)\n\t" | |
"lq 4, 384(%0)\n\t" | |
"lq 4, 400(%0)\n\t" | |
"lq 4, 416(%0)\n\t" | |
"lq 4, 432(%0)\n\t" | |
"lq 4, 448(%0)\n\t" | |
"lq 4, 464(%0)\n\t" | |
"lq 4, 480(%0)\n\t" | |
"lq 4, 496(%0)\n\t" | |
"lq 4, 512(%0)\n\t" | |
"lq 4, 528(%0)\n\t" | |
"lq 4, 544(%0)\n\t" | |
"lq 4, 560(%0)\n\t" | |
"lq 4, 576(%0)\n\t" | |
"lq 4, 592(%0)\n\t" | |
"lq 4, 608(%0)\n\t" | |
"lq 4, 624(%0)\n\t" | |
"lq 4, 640(%0)\n\t" | |
"lq 4, 656(%0)\n\t" | |
"lq 4, 672(%0)\n\t" | |
"lq 4, 688(%0)\n\t" | |
"lq 4, 704(%0)\n\t" | |
"lq 4, 720(%0)\n\t" | |
"lq 4, 736(%0)\n\t" | |
"lq 4, 752(%0)\n\t" | |
"lq 4, 768(%0)\n\t" | |
"lq 4, 784(%0)\n\t" | |
"lq 4, 800(%0)\n\t" | |
"lq 4, 816(%0)\n\t" | |
"lq 4, 832(%0)\n\t" | |
"lq 4, 848(%0)\n\t" | |
"lq 4, 864(%0)\n\t" | |
"lq 4, 880(%0)\n\t" | |
"lq 4, 896(%0)\n\t" | |
"lq 4, 912(%0)\n\t" | |
"lq 4, 928(%0)\n\t" | |
"lq 4, 944(%0)\n\t" | |
"lq 4, 960(%0)\n\t" | |
"lq 4, 976(%0)\n\t" | |
"lq 4, 992(%0)\n\t" | |
"lq 4, 1008(%0)\n\t" | |
: | |
: "r"(mem) | |
: "r4", "r5"); | |
} | |
} | |
// vim: noai:ts=4:sw=4:sts=4:et: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
int main(void) | |
{ | |
void *mem = malloc(1024); | |
int loop; | |
static const int max_iter = 100000000; | |
for (loop = 0; loop < max_iter; loop++) | |
{ | |
__asm__( | |
"li 4, %1\n\t" | |
"std 4, 0(%0)\n\t" | |
"std 4, 8(%0)\n\t" | |
"std 4, 16(%0)\n\t" | |
"std 4, 24(%0)\n\t" | |
"std 4, 32(%0)\n\t" | |
"std 4, 40(%0)\n\t" | |
"std 4, 48(%0)\n\t" | |
"std 4, 56(%0)\n\t" | |
"std 4, 64(%0)\n\t" | |
"std 4, 72(%0)\n\t" | |
"std 4, 80(%0)\n\t" | |
"std 4, 88(%0)\n\t" | |
"std 4, 96(%0)\n\t" | |
"std 4, 104(%0)\n\t" | |
"std 4, 112(%0)\n\t" | |
"std 4, 120(%0)\n\t" | |
"std 4, 128(%0)\n\t" | |
"std 4, 136(%0)\n\t" | |
"std 4, 144(%0)\n\t" | |
"std 4, 152(%0)\n\t" | |
"std 4, 160(%0)\n\t" | |
"std 4, 168(%0)\n\t" | |
"std 4, 176(%0)\n\t" | |
"std 4, 184(%0)\n\t" | |
"std 4, 192(%0)\n\t" | |
"std 4, 200(%0)\n\t" | |
"std 4, 208(%0)\n\t" | |
"std 4, 216(%0)\n\t" | |
"std 4, 224(%0)\n\t" | |
"std 4, 232(%0)\n\t" | |
"std 4, 240(%0)\n\t" | |
"std 4, 248(%0)\n\t" | |
"std 4, 256(%0)\n\t" | |
"std 4, 264(%0)\n\t" | |
"std 4, 272(%0)\n\t" | |
"std 4, 280(%0)\n\t" | |
"std 4, 288(%0)\n\t" | |
"std 4, 296(%0)\n\t" | |
"std 4, 304(%0)\n\t" | |
"std 4, 312(%0)\n\t" | |
"std 4, 320(%0)\n\t" | |
"std 4, 328(%0)\n\t" | |
"std 4, 336(%0)\n\t" | |
"std 4, 344(%0)\n\t" | |
"std 4, 352(%0)\n\t" | |
"std 4, 360(%0)\n\t" | |
"std 4, 368(%0)\n\t" | |
"std 4, 376(%0)\n\t" | |
"std 4, 384(%0)\n\t" | |
"std 4, 392(%0)\n\t" | |
"std 4, 400(%0)\n\t" | |
"std 4, 408(%0)\n\t" | |
"std 4, 416(%0)\n\t" | |
"std 4, 424(%0)\n\t" | |
"std 4, 432(%0)\n\t" | |
"std 4, 440(%0)\n\t" | |
"std 4, 448(%0)\n\t" | |
"std 4, 456(%0)\n\t" | |
"std 4, 464(%0)\n\t" | |
"std 4, 472(%0)\n\t" | |
"std 4, 480(%0)\n\t" | |
"std 4, 488(%0)\n\t" | |
"std 4, 496(%0)\n\t" | |
"std 4, 504(%0)\n\t" | |
"std 4, 512(%0)\n\t" | |
"std 4, 520(%0)\n\t" | |
"std 4, 528(%0)\n\t" | |
"std 4, 536(%0)\n\t" | |
"std 4, 544(%0)\n\t" | |
"std 4, 552(%0)\n\t" | |
"std 4, 560(%0)\n\t" | |
"std 4, 568(%0)\n\t" | |
"std 4, 576(%0)\n\t" | |
"std 4, 584(%0)\n\t" | |
"std 4, 592(%0)\n\t" | |
"std 4, 600(%0)\n\t" | |
"std 4, 608(%0)\n\t" | |
"std 4, 616(%0)\n\t" | |
"std 4, 624(%0)\n\t" | |
"std 4, 632(%0)\n\t" | |
"std 4, 640(%0)\n\t" | |
"std 4, 648(%0)\n\t" | |
"std 4, 656(%0)\n\t" | |
"std 4, 664(%0)\n\t" | |
"std 4, 672(%0)\n\t" | |
"std 4, 680(%0)\n\t" | |
"std 4, 688(%0)\n\t" | |
"std 4, 696(%0)\n\t" | |
"std 4, 704(%0)\n\t" | |
"std 4, 712(%0)\n\t" | |
"std 4, 720(%0)\n\t" | |
"std 4, 728(%0)\n\t" | |
"std 4, 736(%0)\n\t" | |
"std 4, 744(%0)\n\t" | |
"std 4, 752(%0)\n\t" | |
"std 4, 760(%0)\n\t" | |
"std 4, 768(%0)\n\t" | |
"std 4, 776(%0)\n\t" | |
"std 4, 784(%0)\n\t" | |
"std 4, 792(%0)\n\t" | |
"std 4, 800(%0)\n\t" | |
"std 4, 808(%0)\n\t" | |
"std 4, 816(%0)\n\t" | |
"std 4, 824(%0)\n\t" | |
"std 4, 832(%0)\n\t" | |
"std 4, 840(%0)\n\t" | |
"std 4, 848(%0)\n\t" | |
"std 4, 856(%0)\n\t" | |
"std 4, 864(%0)\n\t" | |
"std 4, 872(%0)\n\t" | |
"std 4, 880(%0)\n\t" | |
"std 4, 888(%0)\n\t" | |
"std 4, 896(%0)\n\t" | |
"std 4, 904(%0)\n\t" | |
"std 4, 912(%0)\n\t" | |
"std 4, 920(%0)\n\t" | |
"std 4, 928(%0)\n\t" | |
"std 4, 936(%0)\n\t" | |
"std 4, 944(%0)\n\t" | |
"std 4, 952(%0)\n\t" | |
"std 4, 960(%0)\n\t" | |
"std 4, 968(%0)\n\t" | |
"std 4, 976(%0)\n\t" | |
"std 4, 984(%0)\n\t" | |
"std 4, 992(%0)\n\t" | |
"std 4, 1000(%0)\n\t" | |
"std 4, 1008(%0)\n\t" | |
"std 4, 1016(%0)\n\t" | |
: | |
: "r"(mem), "r"(loop) | |
: "r4"); | |
} | |
} | |
// vim: noai:ts=4:sw=4:sts=4:et: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
int main(void) | |
{ | |
void *mem = malloc(1024); | |
int loop; | |
static const int max_iter = 100000000; | |
for (loop = 0; loop < max_iter; loop++) | |
{ | |
__asm__( | |
"li 4, %1\n\t" | |
"li 5, %1\n\t" | |
"stq 4, 0(%0)\n\t" | |
"stq 4, 16(%0)\n\t" | |
"stq 4, 32(%0)\n\t" | |
"stq 4, 48(%0)\n\t" | |
"stq 4, 64(%0)\n\t" | |
"stq 4, 80(%0)\n\t" | |
"stq 4, 96(%0)\n\t" | |
"stq 4, 112(%0)\n\t" | |
"stq 4, 128(%0)\n\t" | |
"stq 4, 144(%0)\n\t" | |
"stq 4, 160(%0)\n\t" | |
"stq 4, 176(%0)\n\t" | |
"stq 4, 192(%0)\n\t" | |
"stq 4, 208(%0)\n\t" | |
"stq 4, 224(%0)\n\t" | |
"stq 4, 240(%0)\n\t" | |
"stq 4, 256(%0)\n\t" | |
"stq 4, 272(%0)\n\t" | |
"stq 4, 288(%0)\n\t" | |
"stq 4, 304(%0)\n\t" | |
"stq 4, 320(%0)\n\t" | |
"stq 4, 336(%0)\n\t" | |
"stq 4, 352(%0)\n\t" | |
"stq 4, 368(%0)\n\t" | |
"stq 4, 384(%0)\n\t" | |
"stq 4, 400(%0)\n\t" | |
"stq 4, 416(%0)\n\t" | |
"stq 4, 432(%0)\n\t" | |
"stq 4, 448(%0)\n\t" | |
"stq 4, 464(%0)\n\t" | |
"stq 4, 480(%0)\n\t" | |
"stq 4, 496(%0)\n\t" | |
"stq 4, 512(%0)\n\t" | |
"stq 4, 528(%0)\n\t" | |
"stq 4, 544(%0)\n\t" | |
"stq 4, 560(%0)\n\t" | |
"stq 4, 576(%0)\n\t" | |
"stq 4, 592(%0)\n\t" | |
"stq 4, 608(%0)\n\t" | |
"stq 4, 624(%0)\n\t" | |
"stq 4, 640(%0)\n\t" | |
"stq 4, 656(%0)\n\t" | |
"stq 4, 672(%0)\n\t" | |
"stq 4, 688(%0)\n\t" | |
"stq 4, 704(%0)\n\t" | |
"stq 4, 720(%0)\n\t" | |
"stq 4, 736(%0)\n\t" | |
"stq 4, 752(%0)\n\t" | |
"stq 4, 768(%0)\n\t" | |
"stq 4, 784(%0)\n\t" | |
"stq 4, 800(%0)\n\t" | |
"stq 4, 816(%0)\n\t" | |
"stq 4, 832(%0)\n\t" | |
"stq 4, 848(%0)\n\t" | |
"stq 4, 864(%0)\n\t" | |
"stq 4, 880(%0)\n\t" | |
"stq 4, 896(%0)\n\t" | |
"stq 4, 912(%0)\n\t" | |
"stq 4, 928(%0)\n\t" | |
"stq 4, 944(%0)\n\t" | |
"stq 4, 960(%0)\n\t" | |
"stq 4, 976(%0)\n\t" | |
"stq 4, 992(%0)\n\t" | |
"stq 4, 1008(%0)\n\t" | |
: | |
: "r"(mem), "r"(loop) | |
: "r4", "r5"); | |
} | |
} | |
// vim: noai:ts=4:sw=4:sts=4:et: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment