Last active
July 22, 2020 04:26
-
-
Save MaskRay/ba7547f6a587176666ff77527cf12c4d to your computer and use it in GitHub Desktop.
PowerPC64 vectorization bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bad: file format elf64-powerpcle | |
Disassembly of section .init: | |
0000000010000440 <00000024.plt_call.__gmon_start__>: | |
10000440: 18 00 41 f8 std r2,24(r1) | |
10000444: 18 81 82 e9 ld r12,-32488(r2) | |
10000448: a6 03 89 7d mtctr r12 | |
1000044c: 20 04 80 4e bctr | |
... | |
0000000010000460 <_init>: | |
10000460: 02 10 40 3c lis r2,4098 | |
10000464: 00 7f 42 38 addi r2,r2,32512 | |
10000468: a6 02 08 7c mflr r0 | |
1000046c: 10 00 01 f8 std r0,16(r1) | |
10000470: a1 ff 21 f8 stdu r1,-96(r1) | |
10000474: 00 00 00 60 nop | |
10000478: 08 80 02 e8 ld r0,-32760(r2) | |
1000047c: 00 00 a0 2f cmpdi cr7,r0,0 | |
10000480: 0c 00 fe 41 beq+ cr7,1000048c <_init+0x2c> | |
10000484: bd ff ff 4b bl 10000440 <00000024.plt_call.__gmon_start__> | |
10000488: 18 00 41 e8 ld r2,24(r1) | |
1000048c: 60 00 21 38 addi r1,r1,96 | |
10000490: 10 00 01 e8 ld r0,16(r1) | |
10000494: a6 03 08 7c mtlr r0 | |
10000498: 20 00 80 4e blr | |
Disassembly of section .text: | |
00000000100004a0 <00000019.plt_call.__libc_start_main@@GLIBC_2.17>: | |
100004a0: 18 00 41 f8 std r2,24(r1) | |
100004a4: 10 81 82 e9 ld r12,-32496(r2) | |
100004a8: a6 03 89 7d mtctr r12 | |
100004ac: 20 04 80 4e bctr | |
... | |
00000000100004c0 <_start>: | |
100004c0: 02 10 40 3c lis r2,4098 | |
100004c4: 00 7f 42 38 addi r2,r2,32512 | |
100004c8: 78 0b 29 7c mr r9,r1 | |
100004cc: e4 06 21 78 rldicr r1,r1,0,59 | |
100004d0: 00 00 00 38 li r0,0 | |
100004d4: 81 ff 21 f8 stdu r1,-128(r1) | |
100004d8: a6 03 08 7c mtlr r0 | |
100004dc: 00 00 01 f8 std r0,0(r1) | |
100004e0: 10 80 02 e9 ld r8,-32752(r2) | |
100004e4: bc ff ff 4b b 100004a0 <00000019.plt_call.__libc_start_main@@GLIBC_2.17> | |
100004e8: 00 00 00 60 nop | |
... | |
100004f4: 40 20 0c 00 .long 0xc2040 | |
100004f8: 2c 00 00 00 .long 0x2c | |
100004fc: 06 00 5f 73 andi. r31,r26,6 | |
10000500: 74 61 72 74 andis. r18,r3,24948 | |
10000504: 00 00 00 60 nop | |
10000508: 00 00 00 60 nop | |
1000050c: 00 00 00 60 nop | |
0000000010000510 <deregister_tm_clones>: | |
10000510: 02 10 40 3c lis r2,4098 | |
10000514: 00 7f 42 38 addi r2,r2,32512 | |
10000518: 00 00 00 60 nop | |
1000051c: 00 00 00 60 nop | |
10000520: 30 81 62 38 addi r3,r2,-32464 | |
10000524: 30 81 22 39 addi r9,r2,-32464 | |
10000528: 00 18 a9 7f cmpd cr7,r9,r3 | |
1000052c: 20 00 9e 4d beqlr cr7 | |
10000530: 00 00 00 60 nop | |
10000534: 18 80 22 e9 ld r9,-32744(r2) | |
10000538: 00 00 a9 2f cmpdi cr7,r9,0 | |
1000053c: 20 00 9e 4d beqlr cr7 | |
10000540: a6 02 08 7c mflr r0 | |
10000544: 78 4b 2c 7d mr r12,r9 | |
10000548: a6 03 29 7d mtctr r9 | |
1000054c: 10 00 01 f8 std r0,16(r1) | |
10000550: e1 ff 21 f8 stdu r1,-32(r1) | |
10000554: 18 00 41 f8 std r2,24(r1) | |
10000558: 21 04 80 4e bctrl | |
1000055c: 18 00 41 e8 ld r2,24(r1) | |
10000560: 20 00 21 38 addi r1,r1,32 | |
10000564: 10 00 01 e8 ld r0,16(r1) | |
10000568: a6 03 08 7c mtlr r0 | |
1000056c: 20 00 80 4e blr | |
0000000010000570 <register_tm_clones>: | |
10000570: 02 10 40 3c lis r2,4098 | |
10000574: 00 7f 42 38 addi r2,r2,32512 | |
10000578: 00 00 00 60 nop | |
1000057c: 00 00 00 60 nop | |
10000580: 30 81 62 38 addi r3,r2,-32464 | |
10000584: 30 81 82 38 addi r4,r2,-32464 | |
10000588: 50 20 83 7c subf r4,r3,r4 | |
1000058c: 74 1e 84 7c sradi r4,r4,3 | |
10000590: 74 0e 84 7c sradi r4,r4,1 | |
10000594: 95 01 84 7c addze. r4,r4 | |
10000598: 20 00 82 4d beqlr | |
1000059c: 00 00 00 60 nop | |
100005a0: 20 80 22 e9 ld r9,-32736(r2) | |
100005a4: 00 00 a9 2f cmpdi cr7,r9,0 | |
100005a8: 20 00 9e 4d beqlr cr7 | |
100005ac: a6 02 08 7c mflr r0 | |
100005b0: 78 4b 2c 7d mr r12,r9 | |
100005b4: a6 03 29 7d mtctr r9 | |
100005b8: 10 00 01 f8 std r0,16(r1) | |
100005bc: e1 ff 21 f8 stdu r1,-32(r1) | |
100005c0: 18 00 41 f8 std r2,24(r1) | |
100005c4: 21 04 80 4e bctrl | |
100005c8: 18 00 41 e8 ld r2,24(r1) | |
100005cc: 20 00 21 38 addi r1,r1,32 | |
100005d0: 10 00 01 e8 ld r0,16(r1) | |
100005d4: a6 03 08 7c mtlr r0 | |
100005d8: 20 00 80 4e blr | |
100005dc: 00 00 42 60 ori r2,r2,0 | |
00000000100005e0 <__do_global_dtors_aux>: | |
100005e0: 02 10 40 3c lis r2,4098 | |
100005e4: 00 7f 42 38 addi r2,r2,32512 | |
100005e8: f8 ff e1 fb std r31,-8(r1) | |
100005ec: 00 00 00 60 nop | |
100005f0: d1 ff 21 f8 stdu r1,-48(r1) | |
100005f4: 30 81 22 89 lbz r9,-32464(r2) | |
100005f8: 00 00 89 2f cmpwi cr7,r9,0 | |
100005fc: 20 00 9e 40 bne cr7,1000061c <__do_global_dtors_aux+0x3c> | |
10000600: a6 02 08 7c mflr r0 | |
10000604: 40 00 01 f8 std r0,64(r1) | |
10000608: 11 ff ff 4b bl 10000518 <deregister_tm_clones+0x8> | |
1000060c: 40 00 01 e8 ld r0,64(r1) | |
10000610: 01 00 20 39 li r9,1 | |
10000614: 30 81 22 99 stb r9,-32464(r2) | |
10000618: a6 03 08 7c mtlr r0 | |
1000061c: 30 00 21 38 addi r1,r1,48 | |
10000620: f8 ff e1 eb ld r31,-8(r1) | |
10000624: 20 00 80 4e blr | |
10000628: 00 00 00 60 nop | |
1000062c: 00 00 42 60 ori r2,r2,0 | |
0000000010000630 <frame_dummy>: | |
10000630: 02 10 40 3c lis r2,4098 | |
10000634: 00 7f 42 38 addi r2,r2,32512 | |
10000638: 40 ff ff 4b b 10000578 <register_tm_clones+0x8> | |
1000063c: 00 00 00 60 nop | |
0000000010000640 <min(signed char, signed char)>: | |
struct sint8x8 { int8x8 s; }; | |
struct sint8x16 { int8x16 s; }; | |
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } }; | |
signed char min(signed char a, signed char b) { | |
10000640: f7 ff 61 98 stb r3,-9(r1) | |
10000644: f6 ff 81 98 stb r4,-10(r1) | |
return a < b ? a : b; | |
10000648: f7 ff 61 88 lbz r3,-9(r1) | |
1000064c: 74 07 63 7c extsb r3,r3 | |
10000650: f6 ff 81 88 lbz r4,-10(r1) | |
10000654: 74 07 84 7c extsb r4,r4 | |
10000658: 00 20 03 7c cmpw r3,r4 | |
1000065c: 10 00 80 40 bge 1000066c <min(signed char, signed char)+0x2c> | |
10000660: f7 ff 61 88 lbz r3,-9(r1) | |
10000664: f0 ff 61 90 stw r3,-16(r1) | |
10000668: 0c 00 00 48 b 10000674 <min(signed char, signed char)+0x34> | |
1000066c: f6 ff 61 88 lbz r3,-10(r1) | |
10000670: f0 ff 61 90 stw r3,-16(r1) | |
10000674: f0 ff 61 80 lwz r3,-16(r1) | |
10000678: 74 07 63 7c extsb r3,r3 | |
1000067c: 20 00 80 4e blr | |
... | |
1000068c: 00 00 00 60 nop | |
0000000010000690 <split_by(sint8x2)>: | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
v.s[i] = min(a.s[i], b.s[i]); | |
return v; | |
} | |
pair<sint8x1> split_by(sint8x2 v) { | |
10000690: f0 ff 81 38 addi r4,r1,-16 | |
10000694: e8 ff 61 b0 sth r3,-24(r1) | |
sint8x1 a, b; | |
a.s[0] = v.s[0]; | |
10000698: e8 ff 61 a0 lhz r3,-24(r1) | |
1000069c: e0 ff 61 98 stb r3,-32(r1) | |
100006a0: e8 ff 61 38 addi r3,r1,-24 | |
b.s[0] = v.s[1]; | |
100006a4: 5b 1e 40 7c lxsihzx vs34,0,r3 | |
100006a8: 4c 12 43 10 vsplth v2,v2,3 | |
100006ac: ec 11 42 10 vsldoi v2,v2,v2,7 | |
100006b0: d8 ff 61 38 addi r3,r1,-40 | |
100006b4: 1b 1f 40 7c stxsibx vs34,0,r3 | |
return {{a, b}}; | |
100006b8: 78 23 83 7c mr r3,r4 | |
100006bc: e0 ff a1 38 addi r5,r1,-32 | |
100006c0: 00 00 a5 88 lbz r5,0(r5) | |
100006c4: 00 00 a3 98 stb r5,0(r3) | |
100006c8: 01 00 64 38 addi r3,r4,1 | |
100006cc: d8 ff 81 38 addi r4,r1,-40 | |
100006d0: 00 00 84 88 lbz r4,0(r4) | |
100006d4: 00 00 83 98 stb r4,0(r3) | |
100006d8: f0 ff 61 a0 lhz r3,-16(r1) | |
100006dc: 20 00 80 4e blr | |
... | |
100006ec: 00 00 00 60 nop | |
00000000100006f0 <split_by(sint8x4)>: | |
} | |
pair<sint8x2> split_by(sint8x4 v) { | |
100006f0: f0 ff 81 38 addi r4,r1,-16 | |
100006f4: e8 ff 61 90 stw r3,-24(r1) | |
sint8x2 a, b; | |
a.s = __builtin_shufflevector(v.s, v.s, 0, 1); | |
100006f8: e8 ff 61 80 lwz r3,-24(r1) | |
100006fc: e0 ff 61 b0 sth r3,-32(r1) | |
10000700: e8 ff 61 38 addi r3,r1,-24 | |
b.s = __builtin_shufflevector(v.s, v.s, 2, 3); | |
10000704: ee 1e 00 7c lfiwzx f0,0,r3 | |
10000708: 81 05 40 f0 xscpsgndp vs34,vs0,vs0 | |
1000070c: 4c 12 42 10 vsplth v2,v2,2 | |
10000710: 2c 12 42 10 vsldoi v2,v2,v2,8 | |
10000714: d8 ff 61 38 addi r3,r1,-40 | |
10000718: 5b 1f 40 7c stxsihx vs34,0,r3 | |
return {{a, b}}; | |
1000071c: 78 23 83 7c mr r3,r4 | |
10000720: e0 ff a1 38 addi r5,r1,-32 | |
10000724: 00 00 a5 a0 lhz r5,0(r5) | |
10000728: 00 00 a3 b0 sth r5,0(r3) | |
1000072c: 02 00 64 38 addi r3,r4,2 | |
10000730: d8 ff 81 38 addi r4,r1,-40 | |
10000734: 00 00 84 a0 lhz r4,0(r4) | |
10000738: 00 00 83 b0 sth r4,0(r3) | |
1000073c: f0 ff 61 80 lwz r3,-16(r1) | |
10000740: 20 00 80 4e blr | |
... | |
0000000010000750 <split_by(sint8x8)>: | |
} | |
pair<sint8x4> split_by(sint8x8 v) { | |
10000750: f0 ff 81 38 addi r4,r1,-16 | |
10000754: e8 ff 61 f8 std r3,-24(r1) | |
sint8x4 a, b; | |
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3); | |
10000758: e8 ff 61 e8 ld r3,-24(r1) | |
1000075c: e0 ff 61 90 stw r3,-32(r1) | |
10000760: 0c 00 60 38 li r3,12 | |
10000764: e8 ff a1 38 addi r5,r1,-24 | |
b.s = __builtin_shufflevector(v.s, v.s, 4, 5, 6, 7); | |
10000768: d8 1a 05 7c lxvwsx vs0,r5,r3 | |
1000076c: 10 02 00 f0 xxsldwi vs0,vs0,vs0,2 | |
10000770: d8 ff 61 38 addi r3,r1,-40 | |
10000774: ae 1f 00 7c stfiwx f0,0,r3 | |
return {{a, b}}; | |
10000778: 78 23 83 7c mr r3,r4 | |
1000077c: e0 ff a1 38 addi r5,r1,-32 | |
10000780: 00 00 a5 80 lwz r5,0(r5) | |
10000784: 00 00 a3 90 stw r5,0(r3) | |
10000788: 04 00 64 38 addi r3,r4,4 | |
1000078c: d8 ff 81 38 addi r4,r1,-40 | |
10000790: 00 00 84 80 lwz r4,0(r4) | |
10000794: 00 00 83 90 stw r4,0(r3) | |
10000798: f0 ff 61 e8 ld r3,-16(r1) | |
1000079c: 20 00 80 4e blr | |
... | |
100007ac: 00 00 00 60 nop | |
00000000100007b0 <split_by(sint8x16)>: | |
} | |
pair<sint8x8> split_by(sint8x16 v) { | |
100007b0: dd ff 41 f4 stxv vs34,-48(r1) | |
sint8x8 a, b; | |
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3, 4, 5, 6, 7); | |
100007b4: d0 ff 61 e8 ld r3,-48(r1) | |
100007b8: c8 ff 61 f8 std r3,-56(r1) | |
b.s = __builtin_shufflevector(v.s, v.s, 8, 9, 10, 11, 12, 13, 14, 15); | |
100007bc: d8 ff 61 38 addi r3,r1,-40 | |
100007c0: 99 1a 40 7c lxvdsx vs34,0,r3 | |
100007c4: 56 12 02 f0 xxswapd vs0,vs34 | |
100007c8: c0 ff 01 d8 stfd f0,-64(r1) | |
return {{a, b}}; | |
100007cc: c8 ff 61 e8 ld r3,-56(r1) | |
100007d0: e8 ff 61 f8 std r3,-24(r1) | |
100007d4: c0 ff 61 e8 ld r3,-64(r1) | |
100007d8: f0 ff 61 f8 std r3,-16(r1) | |
100007dc: e8 ff 61 e8 ld r3,-24(r1) | |
100007e0: f0 ff 81 e8 ld r4,-16(r1) | |
100007e4: 20 00 80 4e blr | |
... | |
100007f4: 00 00 00 60 nop | |
100007f8: 00 00 00 60 nop | |
100007fc: 00 00 00 60 nop | |
0000000010000800 <hmin(sint8x1)>: | |
} | |
signed char hmin(sint8x1 v) { | |
10000800: f0 ff 61 98 stb r3,-16(r1) | |
return v.s[0]; | |
10000804: f0 ff 61 88 lbz r3,-16(r1) | |
10000808: 74 07 63 7c extsb r3,r3 | |
1000080c: 20 00 80 4e blr | |
... | |
1000081c: 00 00 00 60 nop | |
0000000010000820 <hmin(sint8x2)>: | |
} | |
signed char hmin(sint8x2 v) { | |
10000820: 02 10 40 3c lis r2,4098 | |
10000824: 00 7f 42 38 addi r2,r2,32512 | |
10000828: a6 02 08 7c mflr r0 | |
1000082c: f8 ff e1 fb std r31,-8(r1) | |
10000830: 10 00 01 f8 std r0,16(r1) | |
10000834: 61 ff 21 f8 stdu r1,-160(r1) | |
10000838: 78 0b 3f 7c mr r31,r1 | |
1000083c: 90 00 7f b0 sth r3,144(r31) | |
auto a = split_by(v); | |
10000840: 80 00 7f 38 addi r3,r31,128 | |
10000844: 90 00 9f 38 addi r4,r31,144 | |
10000848: 00 00 84 a0 lhz r4,0(r4) | |
1000084c: 00 00 83 b0 sth r4,0(r3) | |
10000850: 80 00 7f a0 lhz r3,128(r31) | |
10000854: 3d fe ff 4b bl 10000690 <split_by(sint8x2)> | |
10000858: 00 00 00 60 nop | |
1000085c: 88 00 7f b0 sth r3,136(r31) | |
return hmin(min(a[0], a[1])); | |
10000860: 88 00 7f 38 addi r3,r31,136 | |
10000864: 00 00 80 38 li r4,0 | |
10000868: 99 03 00 48 bl 10000c00 <pair<sint8x1>::operator[](unsigned long) const> | |
1000086c: 00 00 00 60 nop | |
10000870: 70 00 7f 98 stb r3,112(r31) | |
10000874: 88 00 7f 38 addi r3,r31,136 | |
10000878: 01 00 80 38 li r4,1 | |
1000087c: 85 03 00 48 bl 10000c00 <pair<sint8x1>::operator[](unsigned long) const> | |
10000880: 00 00 00 60 nop | |
10000884: 68 00 7f 98 stb r3,104(r31) | |
10000888: 70 00 7f 38 addi r3,r31,112 | |
1000088c: 68 00 9f 38 addi r4,r31,104 | |
10000890: e9 02 00 48 bl 10000b78 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x8> | |
10000894: 00 00 00 60 nop | |
10000898: 78 00 7f 98 stb r3,120(r31) | |
1000089c: 78 00 7f 88 lbz r3,120(r31) | |
100008a0: 61 ff ff 4b bl 10000800 <hmin(sint8x1)> | |
100008a4: 00 00 00 60 nop | |
100008a8: 74 07 63 7c extsb r3,r3 | |
100008ac: a0 00 21 38 addi r1,r1,160 | |
100008b0: 10 00 01 e8 ld r0,16(r1) | |
100008b4: f8 ff e1 eb ld r31,-8(r1) | |
100008b8: a6 03 08 7c mtlr r0 | |
100008bc: 20 00 80 4e blr | |
... | |
100008cc: 00 00 00 60 nop | |
00000000100008d0 <hmin(sint8x4)>: | |
} | |
signed char hmin(sint8x4 v) { | |
100008d0: 02 10 40 3c lis r2,4098 | |
100008d4: 00 7f 42 38 addi r2,r2,32512 | |
100008d8: a6 02 08 7c mflr r0 | |
100008dc: f8 ff e1 fb std r31,-8(r1) | |
100008e0: 10 00 01 f8 std r0,16(r1) | |
100008e4: 61 ff 21 f8 stdu r1,-160(r1) | |
100008e8: 78 0b 3f 7c mr r31,r1 | |
100008ec: 90 00 7f 90 stw r3,144(r31) | |
auto a = split_by(v); | |
100008f0: 80 00 7f 38 addi r3,r31,128 | |
100008f4: 90 00 9f 38 addi r4,r31,144 | |
100008f8: 00 00 84 80 lwz r4,0(r4) | |
100008fc: 00 00 83 90 stw r4,0(r3) | |
10000900: 80 00 7f 80 lwz r3,128(r31) | |
10000904: ed fd ff 4b bl 100006f0 <split_by(sint8x4)> | |
10000908: 00 00 00 60 nop | |
1000090c: 88 00 7f 90 stw r3,136(r31) | |
return hmin(min(a[0], a[1])); | |
10000910: 88 00 7f 38 addi r3,r31,136 | |
10000914: 00 00 80 38 li r4,0 | |
10000918: 09 04 00 48 bl 10000d20 <pair<sint8x2>::operator[](unsigned long) const> | |
1000091c: 00 00 00 60 nop | |
10000920: 70 00 7f b0 sth r3,112(r31) | |
10000924: 88 00 7f 38 addi r3,r31,136 | |
10000928: 01 00 80 38 li r4,1 | |
1000092c: f5 03 00 48 bl 10000d20 <pair<sint8x2>::operator[](unsigned long) const> | |
10000930: 00 00 00 60 nop | |
10000934: 68 00 7f b0 sth r3,104(r31) | |
10000938: 70 00 7f 38 addi r3,r31,112 | |
1000093c: 68 00 9f 38 addi r4,r31,104 | |
10000940: 09 03 00 48 bl 10000c48 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0x8> | |
10000944: 00 00 00 60 nop | |
10000948: 78 00 7f b0 sth r3,120(r31) | |
1000094c: 78 00 7f a0 lhz r3,120(r31) | |
10000950: d9 fe ff 4b bl 10000828 <hmin(sint8x2)+0x8> | |
10000954: 00 00 00 60 nop | |
10000958: 74 07 63 7c extsb r3,r3 | |
1000095c: a0 00 21 38 addi r1,r1,160 | |
10000960: 10 00 01 e8 ld r0,16(r1) | |
10000964: f8 ff e1 eb ld r31,-8(r1) | |
10000968: a6 03 08 7c mtlr r0 | |
1000096c: 20 00 80 4e blr | |
... | |
1000097c: 00 00 00 60 nop | |
0000000010000980 <hmin(sint8x8)>: | |
} | |
signed char hmin(sint8x8 v) { | |
10000980: 02 10 40 3c lis r2,4098 | |
10000984: 00 7f 42 38 addi r2,r2,32512 | |
10000988: a6 02 08 7c mflr r0 | |
1000098c: f8 ff e1 fb std r31,-8(r1) | |
10000990: 10 00 01 f8 std r0,16(r1) | |
10000994: 61 ff 21 f8 stdu r1,-160(r1) | |
10000998: 78 0b 3f 7c mr r31,r1 | |
1000099c: 90 00 7f f8 std r3,144(r31) | |
auto a = split_by(v); | |
100009a0: 80 00 7f 38 addi r3,r31,128 | |
100009a4: 90 00 9f 38 addi r4,r31,144 | |
100009a8: 00 00 84 e8 ld r4,0(r4) | |
100009ac: 00 00 83 f8 std r4,0(r3) | |
100009b0: 80 00 7f e8 ld r3,128(r31) | |
100009b4: 9d fd ff 4b bl 10000750 <split_by(sint8x8)> | |
100009b8: 00 00 00 60 nop | |
100009bc: 88 00 7f f8 std r3,136(r31) | |
return hmin(min(a[0], a[1])); | |
100009c0: 88 00 7f 38 addi r3,r31,136 | |
100009c4: 00 00 80 38 li r4,0 | |
100009c8: 69 04 00 48 bl 10000e30 <pair<sint8x4>::operator[](unsigned long) const> | |
100009cc: 00 00 00 60 nop | |
100009d0: 70 00 7f 90 stw r3,112(r31) | |
100009d4: 88 00 7f 38 addi r3,r31,136 | |
100009d8: 01 00 80 38 li r4,1 | |
100009dc: 55 04 00 48 bl 10000e30 <pair<sint8x4>::operator[](unsigned long) const> | |
100009e0: 00 00 00 60 nop | |
100009e4: 68 00 7f 90 stw r3,104(r31) | |
100009e8: 70 00 7f 38 addi r3,r31,112 | |
100009ec: 68 00 9f 38 addi r4,r31,104 | |
100009f0: 79 03 00 48 bl 10000d68 <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0x8> | |
100009f4: 00 00 00 60 nop | |
100009f8: 78 00 7f 90 stw r3,120(r31) | |
100009fc: 78 00 7f 80 lwz r3,120(r31) | |
10000a00: d9 fe ff 4b bl 100008d8 <hmin(sint8x4)+0x8> | |
10000a04: 00 00 00 60 nop | |
10000a08: 74 07 63 7c extsb r3,r3 | |
10000a0c: a0 00 21 38 addi r1,r1,160 | |
10000a10: 10 00 01 e8 ld r0,16(r1) | |
10000a14: f8 ff e1 eb ld r31,-8(r1) | |
10000a18: a6 03 08 7c mtlr r0 | |
10000a1c: 20 00 80 4e blr | |
... | |
10000a2c: 00 00 00 60 nop | |
0000000010000a30 <hmin(sint8x16)>: | |
} | |
signed char hmin(sint8x16 v) { | |
10000a30: 02 10 40 3c lis r2,4098 | |
10000a34: 00 7f 42 38 addi r2,r2,32512 | |
10000a38: a6 02 08 7c mflr r0 | |
10000a3c: f8 ff e1 fb std r31,-8(r1) | |
10000a40: 10 00 01 f8 std r0,16(r1) | |
10000a44: 41 ff 21 f8 stdu r1,-192(r1) | |
10000a48: 78 0b 3f 7c mr r31,r1 | |
10000a4c: ad 00 5f f4 stxv vs34,160(r31) | |
auto a = split_by(v); | |
10000a50: a8 00 7f e8 ld r3,168(r31) | |
10000a54: 88 00 7f f8 std r3,136(r31) | |
10000a58: a0 00 7f e8 ld r3,160(r31) | |
10000a5c: 80 00 7f f8 std r3,128(r31) | |
10000a60: 81 00 1f f4 lxv vs0,128(r31) | |
10000a64: 91 04 40 f0 xxlor vs34,vs0,vs0 | |
10000a68: 49 fd ff 4b bl 100007b0 <split_by(sint8x16)> | |
10000a6c: 90 00 7f f8 std r3,144(r31) | |
10000a70: 98 00 9f f8 std r4,152(r31) | |
return hmin(min(a[0], a[1])); | |
10000a74: 90 00 7f 38 addi r3,r31,144 | |
10000a78: 00 00 80 38 li r4,0 | |
10000a7c: c5 04 00 48 bl 10000f40 <pair<sint8x8>::operator[](unsigned long) const> | |
10000a80: 00 00 00 60 nop | |
10000a84: 70 00 7f f8 std r3,112(r31) | |
10000a88: 90 00 7f 38 addi r3,r31,144 | |
10000a8c: 01 00 80 38 li r4,1 | |
10000a90: b1 04 00 48 bl 10000f40 <pair<sint8x8>::operator[](unsigned long) const> | |
10000a94: 00 00 00 60 nop | |
10000a98: 68 00 7f f8 std r3,104(r31) | |
10000a9c: 70 00 7f 38 addi r3,r31,112 | |
10000aa0: 68 00 9f 38 addi r4,r31,104 | |
10000aa4: d5 03 00 48 bl 10000e78 <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0x8> | |
10000aa8: 00 00 00 60 nop | |
10000aac: 78 00 7f f8 std r3,120(r31) | |
10000ab0: 78 00 7f e8 ld r3,120(r31) | |
10000ab4: d5 fe ff 4b bl 10000988 <hmin(sint8x8)+0x8> | |
10000ab8: 00 00 00 60 nop | |
10000abc: 74 07 63 7c extsb r3,r3 | |
10000ac0: c0 00 21 38 addi r1,r1,192 | |
10000ac4: 10 00 01 e8 ld r0,16(r1) | |
10000ac8: f8 ff e1 eb ld r31,-8(r1) | |
10000acc: a6 03 08 7c mtlr r0 | |
10000ad0: 20 00 80 4e blr | |
... | |
0000000010000ae0 <main>: | |
} | |
int main(void) { | |
10000ae0: 02 10 40 3c lis r2,4098 | |
10000ae4: 00 7f 42 38 addi r2,r2,32512 | |
10000ae8: a6 02 08 7c mflr r0 | |
10000aec: f8 ff e1 fb std r31,-8(r1) | |
10000af0: 10 00 01 f8 std r0,16(r1) | |
10000af4: a1 ff 21 f8 stdu r1,-96(r1) | |
10000af8: 78 0b 3f 7c mr r31,r1 | |
10000afc: 00 00 60 38 li r3,0 | |
10000b00: 54 00 7f 90 stw r3,84(r31) | |
const long data[] = {0x00010102464c457f, 0}; | |
10000b04: fe ff 62 3c addis r3,r2,-2 | |
10000b08: f8 91 63 38 addi r3,r3,-28168 | |
10000b0c: 08 00 83 e8 ld r4,8(r3) | |
10000b10: 48 00 9f f8 std r4,72(r31) | |
10000b14: 00 00 63 e8 ld r3,0(r3) | |
10000b18: 40 00 7f f8 std r3,64(r31) | |
sint8x16 v; | |
__builtin_memcpy(&v.s, data, 16); | |
10000b1c: 48 00 7f e8 ld r3,72(r31) | |
10000b20: 38 00 7f f8 std r3,56(r31) | |
10000b24: 40 00 7f e8 ld r3,64(r31) | |
10000b28: 30 00 7f f8 std r3,48(r31) | |
return hmin(v); | |
10000b2c: 38 00 7f e8 ld r3,56(r31) | |
10000b30: 28 00 7f f8 std r3,40(r31) | |
10000b34: 30 00 7f e8 ld r3,48(r31) | |
10000b38: 20 00 7f f8 std r3,32(r31) | |
10000b3c: 21 00 1f f4 lxv vs0,32(r31) | |
10000b40: 91 04 40 f0 xxlor vs34,vs0,vs0 | |
10000b44: f5 fe ff 4b bl 10000a38 <hmin(sint8x16)+0x8> | |
10000b48: 74 07 63 7c extsb r3,r3 | |
10000b4c: b4 07 63 7c extsw r3,r3 | |
10000b50: 60 00 21 38 addi r1,r1,96 | |
10000b54: 10 00 01 e8 ld r0,16(r1) | |
10000b58: f8 ff e1 eb ld r31,-8(r1) | |
10000b5c: a6 03 08 7c mtlr r0 | |
10000b60: 20 00 80 4e blr | |
... | |
0000000010000b70 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)>: | |
T min(const T &a, const T &b) { | |
10000b70: 02 10 40 3c lis r2,4098 | |
10000b74: 00 7f 42 38 addi r2,r2,32512 | |
10000b78: a6 02 08 7c mflr r0 | |
10000b7c: f8 ff e1 fb std r31,-8(r1) | |
10000b80: 10 00 01 f8 std r0,16(r1) | |
10000b84: b1 ff 21 f8 stdu r1,-80(r1) | |
10000b88: 78 0b 3f 7c mr r31,r1 | |
10000b8c: 38 00 7f f8 std r3,56(r31) | |
10000b90: 30 00 9f f8 std r4,48(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000b94: 00 00 60 38 li r3,0 | |
10000b98: 28 00 7f f8 std r3,40(r31) | |
10000b9c: 28 00 7f e8 ld r3,40(r31) | |
10000ba0: 01 00 23 28 cmpldi r3,1 | |
10000ba4: 34 00 80 40 bge 10000bd8 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x68> | |
v.s[i] = min(a.s[i], b.s[i]); | |
10000ba8: 38 00 7f e8 ld r3,56(r31) | |
10000bac: 30 00 9f e8 ld r4,48(r31) | |
10000bb0: 00 00 84 88 lbz r4,0(r4) | |
10000bb4: 00 00 63 88 lbz r3,0(r3) | |
10000bb8: 74 07 63 7c extsb r3,r3 | |
10000bbc: 74 07 84 7c extsb r4,r4 | |
10000bc0: 81 fa ff 4b bl 10000640 <min(signed char, signed char)> | |
10000bc4: 40 00 7f 98 stb r3,64(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000bc8: 28 00 7f e8 ld r3,40(r31) | |
10000bcc: 01 00 63 38 addi r3,r3,1 | |
10000bd0: 28 00 7f f8 std r3,40(r31) | |
10000bd4: c8 ff ff 4b b 10000b9c <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x2c> | |
10000bd8: 40 00 7f 88 lbz r3,64(r31) | |
return v; | |
10000bdc: 50 00 21 38 addi r1,r1,80 | |
10000be0: 10 00 01 e8 ld r0,16(r1) | |
10000be4: f8 ff e1 eb ld r31,-8(r1) | |
10000be8: a6 03 08 7c mtlr r0 | |
10000bec: 20 00 80 4e blr | |
... | |
10000bfc: 00 00 00 60 nop | |
0000000010000c00 <pair<sint8x1>::operator[](unsigned long) const>: | |
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } }; | |
10000c00: e8 ff 61 f8 std r3,-24(r1) | |
10000c04: e0 ff 81 f8 std r4,-32(r1) | |
10000c08: e8 ff 61 e8 ld r3,-24(r1) | |
10000c0c: e0 ff 81 e8 ld r4,-32(r1) | |
10000c10: 14 22 63 7c add r3,r3,r4 | |
10000c14: f0 ff 81 38 addi r4,r1,-16 | |
10000c18: 00 00 63 88 lbz r3,0(r3) | |
10000c1c: 00 00 64 98 stb r3,0(r4) | |
10000c20: f0 ff 61 88 lbz r3,-16(r1) | |
10000c24: 20 00 80 4e blr | |
... | |
10000c34: 00 00 00 60 nop | |
10000c38: 00 00 00 60 nop | |
10000c3c: 00 00 00 60 nop | |
0000000010000c40 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)>: | |
T min(const T &a, const T &b) { | |
10000c40: 02 10 40 3c lis r2,4098 | |
10000c44: 00 7f 42 38 addi r2,r2,32512 | |
10000c48: a6 02 08 7c mflr r0 | |
10000c4c: f8 ff e1 fb std r31,-8(r1) | |
10000c50: 10 00 01 f8 std r0,16(r1) | |
10000c54: 91 ff 21 f8 stdu r1,-112(r1) | |
10000c58: 78 0b 3f 7c mr r31,r1 | |
10000c5c: 58 00 7f f8 std r3,88(r31) | |
10000c60: 50 00 9f f8 std r4,80(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000c64: 00 00 60 38 li r3,0 | |
10000c68: 48 00 7f f8 std r3,72(r31) | |
10000c6c: 48 00 7f e8 ld r3,72(r31) | |
10000c70: 02 00 23 28 cmpldi r3,2 | |
10000c74: 7c 00 80 40 bge 10000cf0 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0xb0> | |
v.s[i] = min(a.s[i], b.s[i]); | |
10000c78: 58 00 7f e8 ld r3,88(r31) | |
10000c7c: 5b 1e 40 7c lxsihzx vs34,0,r3 | |
10000c80: 4c 12 43 10 vsplth v2,v2,3 | |
10000c84: 48 00 7f e8 ld r3,72(r31) | |
10000c88: 50 00 9f e8 ld r4,80(r31) | |
10000c8c: 5b 26 60 7c lxsihzx vs35,0,r4 | |
10000c90: 4c 1a 63 10 vsplth v3,v3,3 | |
10000c94: 0d 17 83 10 vextubrx r4,r3,v2 | |
10000c98: 74 07 84 7c extsb r4,r4 | |
10000c9c: 0d 1f 63 10 vextubrx r3,r3,v3 | |
10000ca0: 74 07 63 7c extsb r3,r3 | |
10000ca4: 28 00 7f f8 std r3,40(r31) | |
10000ca8: 78 23 83 7c mr r3,r4 | |
10000cac: 28 00 9f e8 ld r4,40(r31) | |
10000cb0: 91 f9 ff 4b bl 10000640 <min(signed char, signed char)> | |
10000cb4: 48 00 9f e8 ld r4,72(r31) | |
10000cb8: 20 07 84 78 clrldi r4,r4,60 | |
10000cbc: 60 00 bf 38 addi r5,r31,96 | |
10000cc0: 5b 2e 40 7c lxsihzx vs34,0,r5 | |
10000cc4: 4c 12 43 10 vsplth v2,v2,3 | |
10000cc8: 3d 00 5f f4 stxv vs34,48(r31) | |
10000ccc: 30 00 df 38 addi r6,r31,48 | |
10000cd0: ae 21 66 7c stbx r3,r6,r4 | |
10000cd4: 39 00 5f f4 lxv vs34,48(r31) | |
10000cd8: 2c 12 42 10 vsldoi v2,v2,v2,8 | |
10000cdc: 5b 2f 40 7c stxsihx vs34,0,r5 | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000ce0: 48 00 7f e8 ld r3,72(r31) | |
10000ce4: 01 00 63 38 addi r3,r3,1 | |
10000ce8: 48 00 7f f8 std r3,72(r31) | |
10000cec: 80 ff ff 4b b 10000c6c <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0x2c> | |
10000cf0: 60 00 7f a0 lhz r3,96(r31) | |
return v; | |
10000cf4: 70 00 21 38 addi r1,r1,112 | |
10000cf8: 10 00 01 e8 ld r0,16(r1) | |
10000cfc: f8 ff e1 eb ld r31,-8(r1) | |
10000d00: a6 03 08 7c mtlr r0 | |
10000d04: 20 00 80 4e blr | |
... | |
10000d14: 00 00 00 60 nop | |
10000d18: 00 00 00 60 nop | |
10000d1c: 00 00 00 60 nop | |
0000000010000d20 <pair<sint8x2>::operator[](unsigned long) const>: | |
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } }; | |
10000d20: e8 ff 61 f8 std r3,-24(r1) | |
10000d24: e0 ff 81 f8 std r4,-32(r1) | |
10000d28: e8 ff 61 e8 ld r3,-24(r1) | |
10000d2c: e0 ff 81 e8 ld r4,-32(r1) | |
10000d30: a4 0f 84 78 rldicr r4,r4,1,62 | |
10000d34: 14 22 63 7c add r3,r3,r4 | |
10000d38: f0 ff 81 38 addi r4,r1,-16 | |
10000d3c: 00 00 63 a0 lhz r3,0(r3) | |
10000d40: 00 00 64 b0 sth r3,0(r4) | |
10000d44: f0 ff 61 a0 lhz r3,-16(r1) | |
10000d48: 20 00 80 4e blr | |
... | |
10000d58: 00 00 00 60 nop | |
10000d5c: 00 00 00 60 nop | |
0000000010000d60 <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)>: | |
T min(const T &a, const T &b) { | |
10000d60: 02 10 40 3c lis r2,4098 | |
10000d64: 00 7f 42 38 addi r2,r2,32512 | |
10000d68: a6 02 08 7c mflr r0 | |
10000d6c: f8 ff e1 fb std r31,-8(r1) | |
10000d70: 10 00 01 f8 std r0,16(r1) | |
10000d74: 91 ff 21 f8 stdu r1,-112(r1) | |
10000d78: 78 0b 3f 7c mr r31,r1 | |
10000d7c: 58 00 7f f8 std r3,88(r31) | |
10000d80: 50 00 9f f8 std r4,80(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000d84: 00 00 60 38 li r3,0 | |
10000d88: 48 00 7f f8 std r3,72(r31) | |
10000d8c: 48 00 7f e8 ld r3,72(r31) | |
10000d90: 04 00 23 28 cmpldi r3,4 | |
10000d94: 78 00 80 40 bge 10000e0c <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0xac> | |
v.s[i] = min(a.s[i], b.s[i]); | |
10000d98: 58 00 7f e8 ld r3,88(r31) | |
10000d9c: ee 1e 00 7c lfiwzx f0,0,r3 | |
10000da0: 51 02 40 f0 xxswapd vs34,vs0 | |
10000da4: 48 00 7f e8 ld r3,72(r31) | |
10000da8: 50 00 9f e8 ld r4,80(r31) | |
10000dac: ee 26 00 7c lfiwzx f0,0,r4 | |
10000db0: 51 02 60 f0 xxswapd vs35,vs0 | |
10000db4: 0d 17 83 10 vextubrx r4,r3,v2 | |
10000db8: 74 07 84 7c extsb r4,r4 | |
10000dbc: 0d 1f 63 10 vextubrx r3,r3,v3 | |
10000dc0: 74 07 63 7c extsb r3,r3 | |
10000dc4: 28 00 7f f8 std r3,40(r31) | |
10000dc8: 78 23 83 7c mr r3,r4 | |
10000dcc: 28 00 9f e8 ld r4,40(r31) | |
10000dd0: 71 f8 ff 4b bl 10000640 <min(signed char, signed char)> | |
10000dd4: 48 00 9f e8 ld r4,72(r31) | |
10000dd8: 20 07 84 78 clrldi r4,r4,60 | |
10000ddc: 60 00 bf 38 addi r5,r31,96 | |
10000de0: ee 2e 00 7c lfiwzx f0,0,r5 | |
10000de4: 50 02 00 f0 xxswapd vs0,vs0 | |
10000de8: 35 00 1f f4 stxv vs0,48(r31) | |
10000dec: 30 00 bf 38 addi r5,r31,48 | |
10000df0: ae 21 65 7c stbx r3,r5,r4 | |
10000df4: 30 00 7f 80 lwz r3,48(r31) | |
10000df8: 60 00 7f 90 stw r3,96(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000dfc: 48 00 7f e8 ld r3,72(r31) | |
10000e00: 01 00 63 38 addi r3,r3,1 | |
10000e04: 48 00 7f f8 std r3,72(r31) | |
10000e08: 84 ff ff 4b b 10000d8c <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0x2c> | |
10000e0c: 60 00 7f 80 lwz r3,96(r31) | |
return v; | |
10000e10: 70 00 21 38 addi r1,r1,112 | |
10000e14: 10 00 01 e8 ld r0,16(r1) | |
10000e18: f8 ff e1 eb ld r31,-8(r1) | |
10000e1c: a6 03 08 7c mtlr r0 | |
10000e20: 20 00 80 4e blr | |
... | |
0000000010000e30 <pair<sint8x4>::operator[](unsigned long) const>: | |
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } }; | |
10000e30: e8 ff 61 f8 std r3,-24(r1) | |
10000e34: e0 ff 81 f8 std r4,-32(r1) | |
10000e38: e8 ff 61 e8 ld r3,-24(r1) | |
10000e3c: e0 ff 81 e8 ld r4,-32(r1) | |
10000e40: 64 17 84 78 rldicr r4,r4,2,61 | |
10000e44: 14 22 63 7c add r3,r3,r4 | |
10000e48: f0 ff 81 38 addi r4,r1,-16 | |
10000e4c: 00 00 63 80 lwz r3,0(r3) | |
10000e50: 00 00 64 90 stw r3,0(r4) | |
10000e54: f0 ff 61 80 lwz r3,-16(r1) | |
10000e58: 20 00 80 4e blr | |
... | |
10000e68: 00 00 00 60 nop | |
10000e6c: 00 00 00 60 nop | |
0000000010000e70 <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)>: | |
T min(const T &a, const T &b) { | |
10000e70: 02 10 40 3c lis r2,4098 | |
10000e74: 00 7f 42 38 addi r2,r2,32512 | |
10000e78: a6 02 08 7c mflr r0 | |
10000e7c: f8 ff e1 fb std r31,-8(r1) | |
10000e80: 10 00 01 f8 std r0,16(r1) | |
10000e84: 91 ff 21 f8 stdu r1,-112(r1) | |
10000e88: 78 0b 3f 7c mr r31,r1 | |
10000e8c: 58 00 7f f8 std r3,88(r31) | |
10000e90: 50 00 9f f8 std r4,80(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000e94: 00 00 60 38 li r3,0 | |
10000e98: 48 00 7f f8 std r3,72(r31) | |
10000e9c: 48 00 7f e8 ld r3,72(r31) | |
10000ea0: 08 00 23 28 cmpldi r3,8 | |
10000ea4: 78 00 80 40 bge 10000f1c <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0xac> | |
v.s[i] = min(a.s[i], b.s[i]); | |
10000ea8: 58 00 7f e8 ld r3,88(r31) | |
10000eac: 00 00 03 c8 lfd f0,0(r3) | |
10000eb0: 51 02 40 f0 xxswapd vs34,vs0 | |
10000eb4: 48 00 7f e8 ld r3,72(r31) | |
10000eb8: 50 00 9f e8 ld r4,80(r31) | |
10000ebc: 00 00 04 c8 lfd f0,0(r4) | |
10000ec0: 51 02 60 f0 xxswapd vs35,vs0 | |
10000ec4: 0d 17 83 10 vextubrx r4,r3,v2 | |
10000ec8: 74 07 84 7c extsb r4,r4 | |
10000ecc: 0d 1f 63 10 vextubrx r3,r3,v3 | |
10000ed0: 74 07 63 7c extsb r3,r3 | |
10000ed4: 28 00 7f f8 std r3,40(r31) | |
10000ed8: 78 23 83 7c mr r3,r4 | |
10000edc: 28 00 9f e8 ld r4,40(r31) | |
10000ee0: 61 f7 ff 4b bl 10000640 <min(signed char, signed char)> | |
10000ee4: 48 00 9f e8 ld r4,72(r31) | |
10000ee8: 20 07 84 78 clrldi r4,r4,60 | |
10000eec: 60 00 1f c8 lfd f0,96(r31) | |
10000ef0: 51 02 40 f0 xxswapd vs34,vs0 | |
10000ef4: 3d 00 5f f4 stxv vs34,48(r31) | |
10000ef8: 30 00 bf 38 addi r5,r31,48 | |
10000efc: ae 21 65 7c stbx r3,r5,r4 | |
10000f00: 31 00 1f f4 lxv vs0,48(r31) | |
10000f04: 50 02 00 f0 xxswapd vs0,vs0 | |
10000f08: 60 00 1f d8 stfd f0,96(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000f0c: 48 00 7f e8 ld r3,72(r31) | |
10000f10: 01 00 63 38 addi r3,r3,1 | |
10000f14: 48 00 7f f8 std r3,72(r31) | |
10000f18: 84 ff ff 4b b 10000e9c <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0x2c> | |
return v; | |
10000f1c: 60 00 7f e8 ld r3,96(r31) | |
10000f20: 70 00 21 38 addi r1,r1,112 | |
10000f24: 10 00 01 e8 ld r0,16(r1) | |
10000f28: f8 ff e1 eb ld r31,-8(r1) | |
10000f2c: a6 03 08 7c mtlr r0 | |
10000f30: 20 00 80 4e blr | |
... | |
0000000010000f40 <pair<sint8x8>::operator[](unsigned long) const>: | |
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } }; | |
10000f40: e8 ff 61 f8 std r3,-24(r1) | |
10000f44: e0 ff 81 f8 std r4,-32(r1) | |
10000f48: e8 ff 61 e8 ld r3,-24(r1) | |
10000f4c: e0 ff 81 e8 ld r4,-32(r1) | |
10000f50: 24 1f 84 78 rldicr r4,r4,3,60 | |
10000f54: 14 22 63 7c add r3,r3,r4 | |
10000f58: f0 ff 81 38 addi r4,r1,-16 | |
10000f5c: 00 00 63 e8 ld r3,0(r3) | |
10000f60: 00 00 64 f8 std r3,0(r4) | |
10000f64: f0 ff 61 e8 ld r3,-16(r1) | |
10000f68: 20 00 80 4e blr | |
... | |
10000f78: 00 00 00 60 nop | |
10000f7c: 00 00 00 60 nop | |
0000000010000f80 <__libc_csu_init>: | |
10000f80: 02 10 40 3c lis r2,4098 | |
10000f84: 00 7f 42 38 addi r2,r2,32512 | |
10000f88: a6 02 08 7c mflr r0 | |
10000f8c: d0 ff 41 fb std r26,-48(r1) | |
10000f90: d8 ff 61 fb std r27,-40(r1) | |
10000f94: 78 2b ba 7c mr r26,r5 | |
10000f98: e0 ff 81 fb std r28,-32(r1) | |
10000f9c: e8 ff a1 fb std r29,-24(r1) | |
10000fa0: ff ff a2 3f addis r29,r2,-1 | |
10000fa4: 78 1b 7c 7c mr r28,r3 | |
10000fa8: f0 ff c1 fb std r30,-16(r1) | |
10000fac: ff ff c2 3f addis r30,r2,-1 | |
10000fb0: c8 7d bd 3b addi r29,r29,32200 | |
10000fb4: c0 7d de 3b addi r30,r30,32192 | |
10000fb8: 78 23 9b 7c mr r27,r4 | |
10000fbc: 50 e8 be 7f subf r29,r30,r29 | |
10000fc0: 10 00 01 f8 std r0,16(r1) | |
10000fc4: b1 ff 21 f8 stdu r1,-80(r1) | |
10000fc8: a1 f4 ff 4b bl 10000468 <_init+0x8> | |
10000fcc: 00 00 00 60 nop | |
10000fd0: 75 1e bd 7f sradi. r29,r29,3 | |
10000fd4: 4c 00 82 41 beq 10001020 <__libc_csu_init+0xa0> | |
10000fd8: 18 00 41 f8 std r2,24(r1) | |
10000fdc: 48 00 e1 fb std r31,72(r1) | |
10000fe0: f8 ff de 3b addi r30,r30,-8 | |
10000fe4: 00 00 e0 3b li r31,0 | |
10000fe8: 00 00 00 60 nop | |
10000fec: 00 00 42 60 ori r2,r2,0 | |
10000ff0: 09 00 3e e9 ldu r9,8(r30) | |
10000ff4: 78 d3 45 7f mr r5,r26 | |
10000ff8: 78 db 64 7f mr r4,r27 | |
10000ffc: 78 e3 83 7f mr r3,r28 | |
10001000: 01 00 ff 3b addi r31,r31,1 | |
10001004: a6 03 29 7d mtctr r9 | |
10001008: 78 4b 2c 7d mr r12,r9 | |
1000100c: 21 04 80 4e bctrl | |
10001010: 18 00 41 e8 ld r2,24(r1) | |
10001014: 40 f8 bd 7f cmpld cr7,r29,r31 | |
10001018: d8 ff 9e 40 bne cr7,10000ff0 <__libc_csu_init+0x70> | |
1000101c: 48 00 e1 eb ld r31,72(r1) | |
10001020: 50 00 21 38 addi r1,r1,80 | |
10001024: 10 00 01 e8 ld r0,16(r1) | |
10001028: d0 ff 41 eb ld r26,-48(r1) | |
1000102c: d8 ff 61 eb ld r27,-40(r1) | |
10001030: e0 ff 81 eb ld r28,-32(r1) | |
10001034: e8 ff a1 eb ld r29,-24(r1) | |
10001038: f0 ff c1 eb ld r30,-16(r1) | |
1000103c: a6 03 08 7c mtlr r0 | |
10001040: 20 00 80 4e blr | |
10001044: 00 00 00 00 .long 0x0 | |
10001048: 00 00 00 01 .long 0x1000000 | |
1000104c: 80 06 00 00 .long 0x680 | |
0000000010001050 <__libc_csu_fini>: | |
10001050: 20 00 80 4e blr | |
... | |
10001060: 90 ef 01 00 .long 0x1ef90 | |
10001064: 00 00 00 00 .long 0x0 | |
0000000010001068 <__glink_PLTresolve>: | |
10001068: a6 02 08 7c mflr r0 | |
1000106c: 05 00 9f 42 bcl 20,4*cr7+so,10001070 <__glink_PLTresolve+0x8> | |
10001070: a6 02 68 7d mflr r11 | |
10001074: 18 00 41 f8 std r2,24(r1) | |
10001078: f0 ff 4b e8 ld r2,-16(r11) | |
1000107c: a6 03 08 7c mtlr r0 | |
10001080: 50 60 8b 7d subf r12,r11,r12 | |
10001084: 14 5a 62 7d add r11,r2,r11 | |
10001088: d0 ff 0c 38 addi r0,r12,-48 | |
1000108c: 00 00 8b e9 ld r12,0(r11) | |
10001090: 82 f0 00 78 rldicl r0,r0,62,2 | |
10001094: a6 03 89 7d mtctr r12 | |
10001098: 08 00 6b e9 ld r11,8(r11) | |
1000109c: 20 04 80 4e bctr | |
00000000100010a0 <__libc_start_main@plt>: | |
100010a0: c8 ff ff 4b b 10001068 <__glink_PLTresolve> | |
00000000100010a4 <__gmon_start__@plt>: | |
100010a4: c4 ff ff 4b b 10001068 <__glink_PLTresolve> | |
Disassembly of section .fini: | |
00000000100010a8 <_fini>: | |
100010a8: 02 10 40 3c lis r2,4098 | |
100010ac: 00 7f 42 38 addi r2,r2,32512 | |
100010b0: a6 02 08 7c mflr r0 | |
100010b4: 10 00 01 f8 std r0,16(r1) | |
100010b8: a1 ff 21 f8 stdu r1,-96(r1) | |
100010bc: 60 00 21 38 addi r1,r1,96 | |
100010c0: 10 00 01 e8 ld r0,16(r1) | |
100010c4: a6 03 08 7c mtlr r0 | |
100010c8: 20 00 80 4e blr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
good: file format elf64-powerpcle | |
Disassembly of section .init: | |
0000000010000440 <00000024.plt_call.__gmon_start__>: | |
10000440: 18 00 41 f8 std r2,24(r1) | |
10000444: 18 81 82 e9 ld r12,-32488(r2) | |
10000448: a6 03 89 7d mtctr r12 | |
1000044c: 20 04 80 4e bctr | |
... | |
0000000010000460 <_init>: | |
10000460: 02 10 40 3c lis r2,4098 | |
10000464: 00 7f 42 38 addi r2,r2,32512 | |
10000468: a6 02 08 7c mflr r0 | |
1000046c: 10 00 01 f8 std r0,16(r1) | |
10000470: a1 ff 21 f8 stdu r1,-96(r1) | |
10000474: 00 00 00 60 nop | |
10000478: 08 80 02 e8 ld r0,-32760(r2) | |
1000047c: 00 00 a0 2f cmpdi cr7,r0,0 | |
10000480: 0c 00 fe 41 beq+ cr7,1000048c <_init+0x2c> | |
10000484: bd ff ff 4b bl 10000440 <00000024.plt_call.__gmon_start__> | |
10000488: 18 00 41 e8 ld r2,24(r1) | |
1000048c: 60 00 21 38 addi r1,r1,96 | |
10000490: 10 00 01 e8 ld r0,16(r1) | |
10000494: a6 03 08 7c mtlr r0 | |
10000498: 20 00 80 4e blr | |
Disassembly of section .text: | |
00000000100004a0 <00000019.plt_call.__libc_start_main@@GLIBC_2.17>: | |
100004a0: 18 00 41 f8 std r2,24(r1) | |
100004a4: 10 81 82 e9 ld r12,-32496(r2) | |
100004a8: a6 03 89 7d mtctr r12 | |
100004ac: 20 04 80 4e bctr | |
... | |
00000000100004c0 <_start>: | |
100004c0: 02 10 40 3c lis r2,4098 | |
100004c4: 00 7f 42 38 addi r2,r2,32512 | |
100004c8: 78 0b 29 7c mr r9,r1 | |
100004cc: e4 06 21 78 rldicr r1,r1,0,59 | |
100004d0: 00 00 00 38 li r0,0 | |
100004d4: 81 ff 21 f8 stdu r1,-128(r1) | |
100004d8: a6 03 08 7c mtlr r0 | |
100004dc: 00 00 01 f8 std r0,0(r1) | |
100004e0: 10 80 02 e9 ld r8,-32752(r2) | |
100004e4: bc ff ff 4b b 100004a0 <00000019.plt_call.__libc_start_main@@GLIBC_2.17> | |
100004e8: 00 00 00 60 nop | |
... | |
100004f4: 40 20 0c 00 .long 0xc2040 | |
100004f8: 2c 00 00 00 .long 0x2c | |
100004fc: 06 00 5f 73 andi. r31,r26,6 | |
10000500: 74 61 72 74 andis. r18,r3,24948 | |
10000504: 00 00 00 60 nop | |
10000508: 00 00 00 60 nop | |
1000050c: 00 00 00 60 nop | |
0000000010000510 <deregister_tm_clones>: | |
10000510: 02 10 40 3c lis r2,4098 | |
10000514: 00 7f 42 38 addi r2,r2,32512 | |
10000518: 00 00 00 60 nop | |
1000051c: 00 00 00 60 nop | |
10000520: 30 81 62 38 addi r3,r2,-32464 | |
10000524: 30 81 22 39 addi r9,r2,-32464 | |
10000528: 00 18 a9 7f cmpd cr7,r9,r3 | |
1000052c: 20 00 9e 4d beqlr cr7 | |
10000530: 00 00 00 60 nop | |
10000534: 18 80 22 e9 ld r9,-32744(r2) | |
10000538: 00 00 a9 2f cmpdi cr7,r9,0 | |
1000053c: 20 00 9e 4d beqlr cr7 | |
10000540: a6 02 08 7c mflr r0 | |
10000544: 78 4b 2c 7d mr r12,r9 | |
10000548: a6 03 29 7d mtctr r9 | |
1000054c: 10 00 01 f8 std r0,16(r1) | |
10000550: e1 ff 21 f8 stdu r1,-32(r1) | |
10000554: 18 00 41 f8 std r2,24(r1) | |
10000558: 21 04 80 4e bctrl | |
1000055c: 18 00 41 e8 ld r2,24(r1) | |
10000560: 20 00 21 38 addi r1,r1,32 | |
10000564: 10 00 01 e8 ld r0,16(r1) | |
10000568: a6 03 08 7c mtlr r0 | |
1000056c: 20 00 80 4e blr | |
0000000010000570 <register_tm_clones>: | |
10000570: 02 10 40 3c lis r2,4098 | |
10000574: 00 7f 42 38 addi r2,r2,32512 | |
10000578: 00 00 00 60 nop | |
1000057c: 00 00 00 60 nop | |
10000580: 30 81 62 38 addi r3,r2,-32464 | |
10000584: 30 81 82 38 addi r4,r2,-32464 | |
10000588: 50 20 83 7c subf r4,r3,r4 | |
1000058c: 74 1e 84 7c sradi r4,r4,3 | |
10000590: 74 0e 84 7c sradi r4,r4,1 | |
10000594: 95 01 84 7c addze. r4,r4 | |
10000598: 20 00 82 4d beqlr | |
1000059c: 00 00 00 60 nop | |
100005a0: 20 80 22 e9 ld r9,-32736(r2) | |
100005a4: 00 00 a9 2f cmpdi cr7,r9,0 | |
100005a8: 20 00 9e 4d beqlr cr7 | |
100005ac: a6 02 08 7c mflr r0 | |
100005b0: 78 4b 2c 7d mr r12,r9 | |
100005b4: a6 03 29 7d mtctr r9 | |
100005b8: 10 00 01 f8 std r0,16(r1) | |
100005bc: e1 ff 21 f8 stdu r1,-32(r1) | |
100005c0: 18 00 41 f8 std r2,24(r1) | |
100005c4: 21 04 80 4e bctrl | |
100005c8: 18 00 41 e8 ld r2,24(r1) | |
100005cc: 20 00 21 38 addi r1,r1,32 | |
100005d0: 10 00 01 e8 ld r0,16(r1) | |
100005d4: a6 03 08 7c mtlr r0 | |
100005d8: 20 00 80 4e blr | |
100005dc: 00 00 42 60 ori r2,r2,0 | |
00000000100005e0 <__do_global_dtors_aux>: | |
100005e0: 02 10 40 3c lis r2,4098 | |
100005e4: 00 7f 42 38 addi r2,r2,32512 | |
100005e8: f8 ff e1 fb std r31,-8(r1) | |
100005ec: 00 00 00 60 nop | |
100005f0: d1 ff 21 f8 stdu r1,-48(r1) | |
100005f4: 30 81 22 89 lbz r9,-32464(r2) | |
100005f8: 00 00 89 2f cmpwi cr7,r9,0 | |
100005fc: 20 00 9e 40 bne cr7,1000061c <__do_global_dtors_aux+0x3c> | |
10000600: a6 02 08 7c mflr r0 | |
10000604: 40 00 01 f8 std r0,64(r1) | |
10000608: 11 ff ff 4b bl 10000518 <deregister_tm_clones+0x8> | |
1000060c: 40 00 01 e8 ld r0,64(r1) | |
10000610: 01 00 20 39 li r9,1 | |
10000614: 30 81 22 99 stb r9,-32464(r2) | |
10000618: a6 03 08 7c mtlr r0 | |
1000061c: 30 00 21 38 addi r1,r1,48 | |
10000620: f8 ff e1 eb ld r31,-8(r1) | |
10000624: 20 00 80 4e blr | |
10000628: 00 00 00 60 nop | |
1000062c: 00 00 42 60 ori r2,r2,0 | |
0000000010000630 <frame_dummy>: | |
10000630: 02 10 40 3c lis r2,4098 | |
10000634: 00 7f 42 38 addi r2,r2,32512 | |
10000638: 40 ff ff 4b b 10000578 <register_tm_clones+0x8> | |
1000063c: 00 00 00 60 nop | |
0000000010000640 <min(signed char, signed char)>: | |
struct sint8x8 { int8x8 s; }; | |
struct sint8x16 { int8x16 s; }; | |
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } }; | |
signed char min(signed char a, signed char b) { | |
10000640: f7 ff 61 98 stb r3,-9(r1) | |
10000644: f6 ff 81 98 stb r4,-10(r1) | |
return a < b ? a : b; | |
10000648: f7 ff 61 88 lbz r3,-9(r1) | |
1000064c: 74 07 63 7c extsb r3,r3 | |
10000650: f6 ff 81 88 lbz r4,-10(r1) | |
10000654: 74 07 84 7c extsb r4,r4 | |
10000658: 00 20 03 7c cmpw r3,r4 | |
1000065c: 10 00 80 40 bge 1000066c <min(signed char, signed char)+0x2c> | |
10000660: f7 ff 61 88 lbz r3,-9(r1) | |
10000664: f0 ff 61 90 stw r3,-16(r1) | |
10000668: 0c 00 00 48 b 10000674 <min(signed char, signed char)+0x34> | |
1000066c: f6 ff 61 88 lbz r3,-10(r1) | |
10000670: f0 ff 61 90 stw r3,-16(r1) | |
10000674: f0 ff 61 80 lwz r3,-16(r1) | |
10000678: 74 07 63 7c extsb r3,r3 | |
1000067c: 20 00 80 4e blr | |
... | |
1000068c: 00 00 00 60 nop | |
0000000010000690 <split_by(sint8x2)>: | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
v.s[i] = min(a.s[i], b.s[i]); | |
return v; | |
} | |
pair<sint8x1> split_by(sint8x2 v) { | |
10000690: f0 ff 81 38 addi r4,r1,-16 | |
10000694: e8 ff 61 b0 sth r3,-24(r1) | |
sint8x1 a, b; | |
a.s[0] = v.s[0]; | |
10000698: e8 ff 61 a0 lhz r3,-24(r1) | |
1000069c: e0 ff 61 98 stb r3,-32(r1) | |
100006a0: e8 ff 61 38 addi r3,r1,-24 | |
b.s[0] = v.s[1]; | |
100006a4: 5b 1e 40 7c lxsihzx vs34,0,r3 | |
100006a8: 4c 12 43 10 vsplth v2,v2,3 | |
100006ac: ec 11 42 10 vsldoi v2,v2,v2,7 | |
100006b0: d8 ff 61 38 addi r3,r1,-40 | |
100006b4: 1b 1f 40 7c stxsibx vs34,0,r3 | |
return {{a, b}}; | |
100006b8: 78 23 83 7c mr r3,r4 | |
100006bc: e0 ff a1 38 addi r5,r1,-32 | |
100006c0: 00 00 a5 88 lbz r5,0(r5) | |
100006c4: 00 00 a3 98 stb r5,0(r3) | |
100006c8: 01 00 64 38 addi r3,r4,1 | |
100006cc: d8 ff 81 38 addi r4,r1,-40 | |
100006d0: 00 00 84 88 lbz r4,0(r4) | |
100006d4: 00 00 83 98 stb r4,0(r3) | |
100006d8: f0 ff 61 a0 lhz r3,-16(r1) | |
100006dc: 20 00 80 4e blr | |
... | |
100006ec: 00 00 00 60 nop | |
00000000100006f0 <split_by(sint8x4)>: | |
} | |
pair<sint8x2> split_by(sint8x4 v) { | |
100006f0: f0 ff 81 38 addi r4,r1,-16 | |
100006f4: e8 ff 61 90 stw r3,-24(r1) | |
sint8x2 a, b; | |
a.s = __builtin_shufflevector(v.s, v.s, 0, 1); | |
100006f8: e8 ff 61 80 lwz r3,-24(r1) | |
100006fc: e0 ff 61 b0 sth r3,-32(r1) | |
10000700: e8 ff 61 38 addi r3,r1,-24 | |
b.s = __builtin_shufflevector(v.s, v.s, 2, 3); | |
10000704: ee 1e 00 7c lfiwzx f0,0,r3 | |
10000708: 51 02 40 f0 xxswapd vs34,vs0 | |
1000070c: 4c 12 46 10 vsplth v2,v2,6 | |
10000710: 2c 12 42 10 vsldoi v2,v2,v2,8 | |
10000714: d8 ff 61 38 addi r3,r1,-40 | |
10000718: 5b 1f 40 7c stxsihx vs34,0,r3 | |
return {{a, b}}; | |
1000071c: 78 23 83 7c mr r3,r4 | |
10000720: e0 ff a1 38 addi r5,r1,-32 | |
10000724: 00 00 a5 a0 lhz r5,0(r5) | |
10000728: 00 00 a3 b0 sth r5,0(r3) | |
1000072c: 02 00 64 38 addi r3,r4,2 | |
10000730: d8 ff 81 38 addi r4,r1,-40 | |
10000734: 00 00 84 a0 lhz r4,0(r4) | |
10000738: 00 00 83 b0 sth r4,0(r3) | |
1000073c: f0 ff 61 80 lwz r3,-16(r1) | |
10000740: 20 00 80 4e blr | |
... | |
0000000010000750 <split_by(sint8x8)>: | |
} | |
pair<sint8x4> split_by(sint8x8 v) { | |
10000750: f0 ff 81 38 addi r4,r1,-16 | |
10000754: e8 ff 61 f8 std r3,-24(r1) | |
sint8x4 a, b; | |
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3); | |
10000758: e8 ff 61 e8 ld r3,-24(r1) | |
1000075c: e0 ff 61 90 stw r3,-32(r1) | |
b.s = __builtin_shufflevector(v.s, v.s, 4, 5, 6, 7); | |
10000760: ec ff 61 38 addi r3,r1,-20 | |
10000764: d8 1a 00 7c lxvwsx vs0,0,r3 | |
10000768: 10 02 00 f0 xxsldwi vs0,vs0,vs0,2 | |
1000076c: d8 ff 61 38 addi r3,r1,-40 | |
10000770: ae 1f 00 7c stfiwx f0,0,r3 | |
return {{a, b}}; | |
10000774: 78 23 83 7c mr r3,r4 | |
10000778: e0 ff a1 38 addi r5,r1,-32 | |
1000077c: 00 00 a5 80 lwz r5,0(r5) | |
10000780: 00 00 a3 90 stw r5,0(r3) | |
10000784: 04 00 64 38 addi r3,r4,4 | |
10000788: d8 ff 81 38 addi r4,r1,-40 | |
1000078c: 00 00 84 80 lwz r4,0(r4) | |
10000790: 00 00 83 90 stw r4,0(r3) | |
10000794: f0 ff 61 e8 ld r3,-16(r1) | |
10000798: 20 00 80 4e blr | |
... | |
100007a8: 00 00 00 60 nop | |
100007ac: 00 00 00 60 nop | |
00000000100007b0 <split_by(sint8x16)>: | |
} | |
pair<sint8x8> split_by(sint8x16 v) { | |
100007b0: dd ff 41 f4 stxv vs34,-48(r1) | |
sint8x8 a, b; | |
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3, 4, 5, 6, 7); | |
100007b4: d0 ff 61 e8 ld r3,-48(r1) | |
100007b8: c8 ff 61 f8 std r3,-56(r1) | |
b.s = __builtin_shufflevector(v.s, v.s, 8, 9, 10, 11, 12, 13, 14, 15); | |
100007bc: d8 ff 61 38 addi r3,r1,-40 | |
100007c0: 99 1a 40 7c lxvdsx vs34,0,r3 | |
100007c4: 56 12 02 f0 xxswapd vs0,vs34 | |
100007c8: c0 ff 01 d8 stfd f0,-64(r1) | |
return {{a, b}}; | |
100007cc: c8 ff 61 e8 ld r3,-56(r1) | |
100007d0: e8 ff 61 f8 std r3,-24(r1) | |
100007d4: c0 ff 61 e8 ld r3,-64(r1) | |
100007d8: f0 ff 61 f8 std r3,-16(r1) | |
100007dc: e8 ff 61 e8 ld r3,-24(r1) | |
100007e0: f0 ff 81 e8 ld r4,-16(r1) | |
100007e4: 20 00 80 4e blr | |
... | |
100007f4: 00 00 00 60 nop | |
100007f8: 00 00 00 60 nop | |
100007fc: 00 00 00 60 nop | |
0000000010000800 <hmin(sint8x1)>: | |
} | |
signed char hmin(sint8x1 v) { | |
10000800: f0 ff 61 98 stb r3,-16(r1) | |
return v.s[0]; | |
10000804: f0 ff 61 88 lbz r3,-16(r1) | |
10000808: 74 07 63 7c extsb r3,r3 | |
1000080c: 20 00 80 4e blr | |
... | |
1000081c: 00 00 00 60 nop | |
0000000010000820 <hmin(sint8x2)>: | |
} | |
signed char hmin(sint8x2 v) { | |
10000820: 02 10 40 3c lis r2,4098 | |
10000824: 00 7f 42 38 addi r2,r2,32512 | |
10000828: a6 02 08 7c mflr r0 | |
1000082c: f8 ff e1 fb std r31,-8(r1) | |
10000830: 10 00 01 f8 std r0,16(r1) | |
10000834: 61 ff 21 f8 stdu r1,-160(r1) | |
10000838: 78 0b 3f 7c mr r31,r1 | |
1000083c: 90 00 7f b0 sth r3,144(r31) | |
auto a = split_by(v); | |
10000840: 80 00 7f 38 addi r3,r31,128 | |
10000844: 90 00 9f 38 addi r4,r31,144 | |
10000848: 00 00 84 a0 lhz r4,0(r4) | |
1000084c: 00 00 83 b0 sth r4,0(r3) | |
10000850: 80 00 7f a0 lhz r3,128(r31) | |
10000854: 3d fe ff 4b bl 10000690 <split_by(sint8x2)> | |
10000858: 00 00 00 60 nop | |
1000085c: 88 00 7f b0 sth r3,136(r31) | |
return hmin(min(a[0], a[1])); | |
10000860: 88 00 7f 38 addi r3,r31,136 | |
10000864: 00 00 80 38 li r4,0 | |
10000868: 99 03 00 48 bl 10000c00 <pair<sint8x1>::operator[](unsigned long) const> | |
1000086c: 00 00 00 60 nop | |
10000870: 70 00 7f 98 stb r3,112(r31) | |
10000874: 88 00 7f 38 addi r3,r31,136 | |
10000878: 01 00 80 38 li r4,1 | |
1000087c: 85 03 00 48 bl 10000c00 <pair<sint8x1>::operator[](unsigned long) const> | |
10000880: 00 00 00 60 nop | |
10000884: 68 00 7f 98 stb r3,104(r31) | |
10000888: 70 00 7f 38 addi r3,r31,112 | |
1000088c: 68 00 9f 38 addi r4,r31,104 | |
10000890: e9 02 00 48 bl 10000b78 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x8> | |
10000894: 00 00 00 60 nop | |
10000898: 78 00 7f 98 stb r3,120(r31) | |
1000089c: 78 00 7f 88 lbz r3,120(r31) | |
100008a0: 61 ff ff 4b bl 10000800 <hmin(sint8x1)> | |
100008a4: 00 00 00 60 nop | |
100008a8: 74 07 63 7c extsb r3,r3 | |
100008ac: a0 00 21 38 addi r1,r1,160 | |
100008b0: 10 00 01 e8 ld r0,16(r1) | |
100008b4: f8 ff e1 eb ld r31,-8(r1) | |
100008b8: a6 03 08 7c mtlr r0 | |
100008bc: 20 00 80 4e blr | |
... | |
100008cc: 00 00 00 60 nop | |
00000000100008d0 <hmin(sint8x4)>: | |
} | |
signed char hmin(sint8x4 v) { | |
100008d0: 02 10 40 3c lis r2,4098 | |
100008d4: 00 7f 42 38 addi r2,r2,32512 | |
100008d8: a6 02 08 7c mflr r0 | |
100008dc: f8 ff e1 fb std r31,-8(r1) | |
100008e0: 10 00 01 f8 std r0,16(r1) | |
100008e4: 61 ff 21 f8 stdu r1,-160(r1) | |
100008e8: 78 0b 3f 7c mr r31,r1 | |
100008ec: 90 00 7f 90 stw r3,144(r31) | |
auto a = split_by(v); | |
100008f0: 80 00 7f 38 addi r3,r31,128 | |
100008f4: 90 00 9f 38 addi r4,r31,144 | |
100008f8: 00 00 84 80 lwz r4,0(r4) | |
100008fc: 00 00 83 90 stw r4,0(r3) | |
10000900: 80 00 7f 80 lwz r3,128(r31) | |
10000904: ed fd ff 4b bl 100006f0 <split_by(sint8x4)> | |
10000908: 00 00 00 60 nop | |
1000090c: 88 00 7f 90 stw r3,136(r31) | |
return hmin(min(a[0], a[1])); | |
10000910: 88 00 7f 38 addi r3,r31,136 | |
10000914: 00 00 80 38 li r4,0 | |
10000918: 09 04 00 48 bl 10000d20 <pair<sint8x2>::operator[](unsigned long) const> | |
1000091c: 00 00 00 60 nop | |
10000920: 70 00 7f b0 sth r3,112(r31) | |
10000924: 88 00 7f 38 addi r3,r31,136 | |
10000928: 01 00 80 38 li r4,1 | |
1000092c: f5 03 00 48 bl 10000d20 <pair<sint8x2>::operator[](unsigned long) const> | |
10000930: 00 00 00 60 nop | |
10000934: 68 00 7f b0 sth r3,104(r31) | |
10000938: 70 00 7f 38 addi r3,r31,112 | |
1000093c: 68 00 9f 38 addi r4,r31,104 | |
10000940: 09 03 00 48 bl 10000c48 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0x8> | |
10000944: 00 00 00 60 nop | |
10000948: 78 00 7f b0 sth r3,120(r31) | |
1000094c: 78 00 7f a0 lhz r3,120(r31) | |
10000950: d9 fe ff 4b bl 10000828 <hmin(sint8x2)+0x8> | |
10000954: 00 00 00 60 nop | |
10000958: 74 07 63 7c extsb r3,r3 | |
1000095c: a0 00 21 38 addi r1,r1,160 | |
10000960: 10 00 01 e8 ld r0,16(r1) | |
10000964: f8 ff e1 eb ld r31,-8(r1) | |
10000968: a6 03 08 7c mtlr r0 | |
1000096c: 20 00 80 4e blr | |
... | |
1000097c: 00 00 00 60 nop | |
0000000010000980 <hmin(sint8x8)>: | |
} | |
signed char hmin(sint8x8 v) { | |
10000980: 02 10 40 3c lis r2,4098 | |
10000984: 00 7f 42 38 addi r2,r2,32512 | |
10000988: a6 02 08 7c mflr r0 | |
1000098c: f8 ff e1 fb std r31,-8(r1) | |
10000990: 10 00 01 f8 std r0,16(r1) | |
10000994: 61 ff 21 f8 stdu r1,-160(r1) | |
10000998: 78 0b 3f 7c mr r31,r1 | |
1000099c: 90 00 7f f8 std r3,144(r31) | |
auto a = split_by(v); | |
100009a0: 80 00 7f 38 addi r3,r31,128 | |
100009a4: 90 00 9f 38 addi r4,r31,144 | |
100009a8: 00 00 84 e8 ld r4,0(r4) | |
100009ac: 00 00 83 f8 std r4,0(r3) | |
100009b0: 80 00 7f e8 ld r3,128(r31) | |
100009b4: 9d fd ff 4b bl 10000750 <split_by(sint8x8)> | |
100009b8: 00 00 00 60 nop | |
100009bc: 88 00 7f f8 std r3,136(r31) | |
return hmin(min(a[0], a[1])); | |
100009c0: 88 00 7f 38 addi r3,r31,136 | |
100009c4: 00 00 80 38 li r4,0 | |
100009c8: 69 04 00 48 bl 10000e30 <pair<sint8x4>::operator[](unsigned long) const> | |
100009cc: 00 00 00 60 nop | |
100009d0: 70 00 7f 90 stw r3,112(r31) | |
100009d4: 88 00 7f 38 addi r3,r31,136 | |
100009d8: 01 00 80 38 li r4,1 | |
100009dc: 55 04 00 48 bl 10000e30 <pair<sint8x4>::operator[](unsigned long) const> | |
100009e0: 00 00 00 60 nop | |
100009e4: 68 00 7f 90 stw r3,104(r31) | |
100009e8: 70 00 7f 38 addi r3,r31,112 | |
100009ec: 68 00 9f 38 addi r4,r31,104 | |
100009f0: 79 03 00 48 bl 10000d68 <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0x8> | |
100009f4: 00 00 00 60 nop | |
100009f8: 78 00 7f 90 stw r3,120(r31) | |
100009fc: 78 00 7f 80 lwz r3,120(r31) | |
10000a00: d9 fe ff 4b bl 100008d8 <hmin(sint8x4)+0x8> | |
10000a04: 00 00 00 60 nop | |
10000a08: 74 07 63 7c extsb r3,r3 | |
10000a0c: a0 00 21 38 addi r1,r1,160 | |
10000a10: 10 00 01 e8 ld r0,16(r1) | |
10000a14: f8 ff e1 eb ld r31,-8(r1) | |
10000a18: a6 03 08 7c mtlr r0 | |
10000a1c: 20 00 80 4e blr | |
... | |
10000a2c: 00 00 00 60 nop | |
0000000010000a30 <hmin(sint8x16)>: | |
} | |
signed char hmin(sint8x16 v) { | |
10000a30: 02 10 40 3c lis r2,4098 | |
10000a34: 00 7f 42 38 addi r2,r2,32512 | |
10000a38: a6 02 08 7c mflr r0 | |
10000a3c: f8 ff e1 fb std r31,-8(r1) | |
10000a40: 10 00 01 f8 std r0,16(r1) | |
10000a44: 41 ff 21 f8 stdu r1,-192(r1) | |
10000a48: 78 0b 3f 7c mr r31,r1 | |
10000a4c: ad 00 5f f4 stxv vs34,160(r31) | |
auto a = split_by(v); | |
10000a50: a8 00 7f e8 ld r3,168(r31) | |
10000a54: 88 00 7f f8 std r3,136(r31) | |
10000a58: a0 00 7f e8 ld r3,160(r31) | |
10000a5c: 80 00 7f f8 std r3,128(r31) | |
10000a60: 81 00 1f f4 lxv vs0,128(r31) | |
10000a64: 91 04 40 f0 xxlor vs34,vs0,vs0 | |
10000a68: 49 fd ff 4b bl 100007b0 <split_by(sint8x16)> | |
10000a6c: 90 00 7f f8 std r3,144(r31) | |
10000a70: 98 00 9f f8 std r4,152(r31) | |
return hmin(min(a[0], a[1])); | |
10000a74: 90 00 7f 38 addi r3,r31,144 | |
10000a78: 00 00 80 38 li r4,0 | |
10000a7c: c5 04 00 48 bl 10000f40 <pair<sint8x8>::operator[](unsigned long) const> | |
10000a80: 00 00 00 60 nop | |
10000a84: 70 00 7f f8 std r3,112(r31) | |
10000a88: 90 00 7f 38 addi r3,r31,144 | |
10000a8c: 01 00 80 38 li r4,1 | |
10000a90: b1 04 00 48 bl 10000f40 <pair<sint8x8>::operator[](unsigned long) const> | |
10000a94: 00 00 00 60 nop | |
10000a98: 68 00 7f f8 std r3,104(r31) | |
10000a9c: 70 00 7f 38 addi r3,r31,112 | |
10000aa0: 68 00 9f 38 addi r4,r31,104 | |
10000aa4: d5 03 00 48 bl 10000e78 <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0x8> | |
10000aa8: 00 00 00 60 nop | |
10000aac: 78 00 7f f8 std r3,120(r31) | |
10000ab0: 78 00 7f e8 ld r3,120(r31) | |
10000ab4: d5 fe ff 4b bl 10000988 <hmin(sint8x8)+0x8> | |
10000ab8: 00 00 00 60 nop | |
10000abc: 74 07 63 7c extsb r3,r3 | |
10000ac0: c0 00 21 38 addi r1,r1,192 | |
10000ac4: 10 00 01 e8 ld r0,16(r1) | |
10000ac8: f8 ff e1 eb ld r31,-8(r1) | |
10000acc: a6 03 08 7c mtlr r0 | |
10000ad0: 20 00 80 4e blr | |
... | |
0000000010000ae0 <main>: | |
} | |
int main(void) { | |
10000ae0: 02 10 40 3c lis r2,4098 | |
10000ae4: 00 7f 42 38 addi r2,r2,32512 | |
10000ae8: a6 02 08 7c mflr r0 | |
10000aec: f8 ff e1 fb std r31,-8(r1) | |
10000af0: 10 00 01 f8 std r0,16(r1) | |
10000af4: a1 ff 21 f8 stdu r1,-96(r1) | |
10000af8: 78 0b 3f 7c mr r31,r1 | |
10000afc: 00 00 60 38 li r3,0 | |
10000b00: 54 00 7f 90 stw r3,84(r31) | |
const long data[] = {0x00010102464c457f, 0}; | |
10000b04: fe ff 62 3c addis r3,r2,-2 | |
10000b08: f8 91 63 38 addi r3,r3,-28168 | |
10000b0c: 08 00 83 e8 ld r4,8(r3) | |
10000b10: 48 00 9f f8 std r4,72(r31) | |
10000b14: 00 00 63 e8 ld r3,0(r3) | |
10000b18: 40 00 7f f8 std r3,64(r31) | |
sint8x16 v; | |
__builtin_memcpy(&v.s, data, 16); | |
10000b1c: 48 00 7f e8 ld r3,72(r31) | |
10000b20: 38 00 7f f8 std r3,56(r31) | |
10000b24: 40 00 7f e8 ld r3,64(r31) | |
10000b28: 30 00 7f f8 std r3,48(r31) | |
return hmin(v); | |
10000b2c: 38 00 7f e8 ld r3,56(r31) | |
10000b30: 28 00 7f f8 std r3,40(r31) | |
10000b34: 30 00 7f e8 ld r3,48(r31) | |
10000b38: 20 00 7f f8 std r3,32(r31) | |
10000b3c: 21 00 1f f4 lxv vs0,32(r31) | |
10000b40: 91 04 40 f0 xxlor vs34,vs0,vs0 | |
10000b44: f5 fe ff 4b bl 10000a38 <hmin(sint8x16)+0x8> | |
10000b48: 74 07 63 7c extsb r3,r3 | |
10000b4c: b4 07 63 7c extsw r3,r3 | |
10000b50: 60 00 21 38 addi r1,r1,96 | |
10000b54: 10 00 01 e8 ld r0,16(r1) | |
10000b58: f8 ff e1 eb ld r31,-8(r1) | |
10000b5c: a6 03 08 7c mtlr r0 | |
10000b60: 20 00 80 4e blr | |
... | |
0000000010000b70 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)>: | |
T min(const T &a, const T &b) { | |
10000b70: 02 10 40 3c lis r2,4098 | |
10000b74: 00 7f 42 38 addi r2,r2,32512 | |
10000b78: a6 02 08 7c mflr r0 | |
10000b7c: f8 ff e1 fb std r31,-8(r1) | |
10000b80: 10 00 01 f8 std r0,16(r1) | |
10000b84: b1 ff 21 f8 stdu r1,-80(r1) | |
10000b88: 78 0b 3f 7c mr r31,r1 | |
10000b8c: 38 00 7f f8 std r3,56(r31) | |
10000b90: 30 00 9f f8 std r4,48(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000b94: 00 00 60 38 li r3,0 | |
10000b98: 28 00 7f f8 std r3,40(r31) | |
10000b9c: 28 00 7f e8 ld r3,40(r31) | |
10000ba0: 01 00 23 28 cmpldi r3,1 | |
10000ba4: 34 00 80 40 bge 10000bd8 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x68> | |
v.s[i] = min(a.s[i], b.s[i]); | |
10000ba8: 38 00 7f e8 ld r3,56(r31) | |
10000bac: 30 00 9f e8 ld r4,48(r31) | |
10000bb0: 00 00 84 88 lbz r4,0(r4) | |
10000bb4: 00 00 63 88 lbz r3,0(r3) | |
10000bb8: 74 07 63 7c extsb r3,r3 | |
10000bbc: 74 07 84 7c extsb r4,r4 | |
10000bc0: 81 fa ff 4b bl 10000640 <min(signed char, signed char)> | |
10000bc4: 40 00 7f 98 stb r3,64(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000bc8: 28 00 7f e8 ld r3,40(r31) | |
10000bcc: 01 00 63 38 addi r3,r3,1 | |
10000bd0: 28 00 7f f8 std r3,40(r31) | |
10000bd4: c8 ff ff 4b b 10000b9c <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x2c> | |
10000bd8: 40 00 7f 88 lbz r3,64(r31) | |
return v; | |
10000bdc: 50 00 21 38 addi r1,r1,80 | |
10000be0: 10 00 01 e8 ld r0,16(r1) | |
10000be4: f8 ff e1 eb ld r31,-8(r1) | |
10000be8: a6 03 08 7c mtlr r0 | |
10000bec: 20 00 80 4e blr | |
... | |
10000bfc: 00 00 00 60 nop | |
0000000010000c00 <pair<sint8x1>::operator[](unsigned long) const>: | |
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } }; | |
10000c00: e8 ff 61 f8 std r3,-24(r1) | |
10000c04: e0 ff 81 f8 std r4,-32(r1) | |
10000c08: e8 ff 61 e8 ld r3,-24(r1) | |
10000c0c: e0 ff 81 e8 ld r4,-32(r1) | |
10000c10: 14 22 63 7c add r3,r3,r4 | |
10000c14: f0 ff 81 38 addi r4,r1,-16 | |
10000c18: 00 00 63 88 lbz r3,0(r3) | |
10000c1c: 00 00 64 98 stb r3,0(r4) | |
10000c20: f0 ff 61 88 lbz r3,-16(r1) | |
10000c24: 20 00 80 4e blr | |
... | |
10000c34: 00 00 00 60 nop | |
10000c38: 00 00 00 60 nop | |
10000c3c: 00 00 00 60 nop | |
0000000010000c40 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)>: | |
T min(const T &a, const T &b) { | |
10000c40: 02 10 40 3c lis r2,4098 | |
10000c44: 00 7f 42 38 addi r2,r2,32512 | |
10000c48: a6 02 08 7c mflr r0 | |
10000c4c: f8 ff e1 fb std r31,-8(r1) | |
10000c50: 10 00 01 f8 std r0,16(r1) | |
10000c54: 91 ff 21 f8 stdu r1,-112(r1) | |
10000c58: 78 0b 3f 7c mr r31,r1 | |
10000c5c: 58 00 7f f8 std r3,88(r31) | |
10000c60: 50 00 9f f8 std r4,80(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000c64: 00 00 60 38 li r3,0 | |
10000c68: 48 00 7f f8 std r3,72(r31) | |
10000c6c: 48 00 7f e8 ld r3,72(r31) | |
10000c70: 02 00 23 28 cmpldi r3,2 | |
10000c74: 7c 00 80 40 bge 10000cf0 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0xb0> | |
v.s[i] = min(a.s[i], b.s[i]); | |
10000c78: 58 00 7f e8 ld r3,88(r31) | |
10000c7c: 5b 1e 40 7c lxsihzx vs34,0,r3 | |
10000c80: 4c 12 43 10 vsplth v2,v2,3 | |
10000c84: 48 00 7f e8 ld r3,72(r31) | |
10000c88: 50 00 9f e8 ld r4,80(r31) | |
10000c8c: 5b 26 60 7c lxsihzx vs35,0,r4 | |
10000c90: 4c 1a 63 10 vsplth v3,v3,3 | |
10000c94: 0d 17 83 10 vextubrx r4,r3,v2 | |
10000c98: 74 07 84 7c extsb r4,r4 | |
10000c9c: 0d 1f 63 10 vextubrx r3,r3,v3 | |
10000ca0: 74 07 63 7c extsb r3,r3 | |
10000ca4: 28 00 7f f8 std r3,40(r31) | |
10000ca8: 78 23 83 7c mr r3,r4 | |
10000cac: 28 00 9f e8 ld r4,40(r31) | |
10000cb0: 91 f9 ff 4b bl 10000640 <min(signed char, signed char)> | |
10000cb4: 48 00 9f e8 ld r4,72(r31) | |
10000cb8: 20 07 84 78 clrldi r4,r4,60 | |
10000cbc: 60 00 bf 38 addi r5,r31,96 | |
10000cc0: 5b 2e 40 7c lxsihzx vs34,0,r5 | |
10000cc4: 4c 12 43 10 vsplth v2,v2,3 | |
10000cc8: 3d 00 5f f4 stxv vs34,48(r31) | |
10000ccc: 30 00 df 38 addi r6,r31,48 | |
10000cd0: ae 21 66 7c stbx r3,r6,r4 | |
10000cd4: 39 00 5f f4 lxv vs34,48(r31) | |
10000cd8: 2c 12 42 10 vsldoi v2,v2,v2,8 | |
10000cdc: 5b 2f 40 7c stxsihx vs34,0,r5 | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000ce0: 48 00 7f e8 ld r3,72(r31) | |
10000ce4: 01 00 63 38 addi r3,r3,1 | |
10000ce8: 48 00 7f f8 std r3,72(r31) | |
10000cec: 80 ff ff 4b b 10000c6c <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0x2c> | |
10000cf0: 60 00 7f a0 lhz r3,96(r31) | |
return v; | |
10000cf4: 70 00 21 38 addi r1,r1,112 | |
10000cf8: 10 00 01 e8 ld r0,16(r1) | |
10000cfc: f8 ff e1 eb ld r31,-8(r1) | |
10000d00: a6 03 08 7c mtlr r0 | |
10000d04: 20 00 80 4e blr | |
... | |
10000d14: 00 00 00 60 nop | |
10000d18: 00 00 00 60 nop | |
10000d1c: 00 00 00 60 nop | |
0000000010000d20 <pair<sint8x2>::operator[](unsigned long) const>: | |
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } }; | |
10000d20: e8 ff 61 f8 std r3,-24(r1) | |
10000d24: e0 ff 81 f8 std r4,-32(r1) | |
10000d28: e8 ff 61 e8 ld r3,-24(r1) | |
10000d2c: e0 ff 81 e8 ld r4,-32(r1) | |
10000d30: a4 0f 84 78 rldicr r4,r4,1,62 | |
10000d34: 14 22 63 7c add r3,r3,r4 | |
10000d38: f0 ff 81 38 addi r4,r1,-16 | |
10000d3c: 00 00 63 a0 lhz r3,0(r3) | |
10000d40: 00 00 64 b0 sth r3,0(r4) | |
10000d44: f0 ff 61 a0 lhz r3,-16(r1) | |
10000d48: 20 00 80 4e blr | |
... | |
10000d58: 00 00 00 60 nop | |
10000d5c: 00 00 00 60 nop | |
0000000010000d60 <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)>: | |
T min(const T &a, const T &b) { | |
10000d60: 02 10 40 3c lis r2,4098 | |
10000d64: 00 7f 42 38 addi r2,r2,32512 | |
10000d68: a6 02 08 7c mflr r0 | |
10000d6c: f8 ff e1 fb std r31,-8(r1) | |
10000d70: 10 00 01 f8 std r0,16(r1) | |
10000d74: 91 ff 21 f8 stdu r1,-112(r1) | |
10000d78: 78 0b 3f 7c mr r31,r1 | |
10000d7c: 58 00 7f f8 std r3,88(r31) | |
10000d80: 50 00 9f f8 std r4,80(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000d84: 00 00 60 38 li r3,0 | |
10000d88: 48 00 7f f8 std r3,72(r31) | |
10000d8c: 48 00 7f e8 ld r3,72(r31) | |
10000d90: 04 00 23 28 cmpldi r3,4 | |
10000d94: 78 00 80 40 bge 10000e0c <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0xac> | |
v.s[i] = min(a.s[i], b.s[i]); | |
10000d98: 58 00 7f e8 ld r3,88(r31) | |
10000d9c: ee 1e 00 7c lfiwzx f0,0,r3 | |
10000da0: 51 02 40 f0 xxswapd vs34,vs0 | |
10000da4: 48 00 7f e8 ld r3,72(r31) | |
10000da8: 50 00 9f e8 ld r4,80(r31) | |
10000dac: ee 26 00 7c lfiwzx f0,0,r4 | |
10000db0: 51 02 60 f0 xxswapd vs35,vs0 | |
10000db4: 0d 17 83 10 vextubrx r4,r3,v2 | |
10000db8: 74 07 84 7c extsb r4,r4 | |
10000dbc: 0d 1f 63 10 vextubrx r3,r3,v3 | |
10000dc0: 74 07 63 7c extsb r3,r3 | |
10000dc4: 28 00 7f f8 std r3,40(r31) | |
10000dc8: 78 23 83 7c mr r3,r4 | |
10000dcc: 28 00 9f e8 ld r4,40(r31) | |
10000dd0: 71 f8 ff 4b bl 10000640 <min(signed char, signed char)> | |
10000dd4: 48 00 9f e8 ld r4,72(r31) | |
10000dd8: 20 07 84 78 clrldi r4,r4,60 | |
10000ddc: 60 00 bf 38 addi r5,r31,96 | |
10000de0: ee 2e 00 7c lfiwzx f0,0,r5 | |
10000de4: 50 02 00 f0 xxswapd vs0,vs0 | |
10000de8: 35 00 1f f4 stxv vs0,48(r31) | |
10000dec: 30 00 bf 38 addi r5,r31,48 | |
10000df0: ae 21 65 7c stbx r3,r5,r4 | |
10000df4: 30 00 7f 80 lwz r3,48(r31) | |
10000df8: 60 00 7f 90 stw r3,96(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000dfc: 48 00 7f e8 ld r3,72(r31) | |
10000e00: 01 00 63 38 addi r3,r3,1 | |
10000e04: 48 00 7f f8 std r3,72(r31) | |
10000e08: 84 ff ff 4b b 10000d8c <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0x2c> | |
10000e0c: 60 00 7f 80 lwz r3,96(r31) | |
return v; | |
10000e10: 70 00 21 38 addi r1,r1,112 | |
10000e14: 10 00 01 e8 ld r0,16(r1) | |
10000e18: f8 ff e1 eb ld r31,-8(r1) | |
10000e1c: a6 03 08 7c mtlr r0 | |
10000e20: 20 00 80 4e blr | |
... | |
0000000010000e30 <pair<sint8x4>::operator[](unsigned long) const>: | |
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } }; | |
10000e30: e8 ff 61 f8 std r3,-24(r1) | |
10000e34: e0 ff 81 f8 std r4,-32(r1) | |
10000e38: e8 ff 61 e8 ld r3,-24(r1) | |
10000e3c: e0 ff 81 e8 ld r4,-32(r1) | |
10000e40: 64 17 84 78 rldicr r4,r4,2,61 | |
10000e44: 14 22 63 7c add r3,r3,r4 | |
10000e48: f0 ff 81 38 addi r4,r1,-16 | |
10000e4c: 00 00 63 80 lwz r3,0(r3) | |
10000e50: 00 00 64 90 stw r3,0(r4) | |
10000e54: f0 ff 61 80 lwz r3,-16(r1) | |
10000e58: 20 00 80 4e blr | |
... | |
10000e68: 00 00 00 60 nop | |
10000e6c: 00 00 00 60 nop | |
0000000010000e70 <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)>: | |
T min(const T &a, const T &b) { | |
10000e70: 02 10 40 3c lis r2,4098 | |
10000e74: 00 7f 42 38 addi r2,r2,32512 | |
10000e78: a6 02 08 7c mflr r0 | |
10000e7c: f8 ff e1 fb std r31,-8(r1) | |
10000e80: 10 00 01 f8 std r0,16(r1) | |
10000e84: 91 ff 21 f8 stdu r1,-112(r1) | |
10000e88: 78 0b 3f 7c mr r31,r1 | |
10000e8c: 58 00 7f f8 std r3,88(r31) | |
10000e90: 50 00 9f f8 std r4,80(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000e94: 00 00 60 38 li r3,0 | |
10000e98: 48 00 7f f8 std r3,72(r31) | |
10000e9c: 48 00 7f e8 ld r3,72(r31) | |
10000ea0: 08 00 23 28 cmpldi r3,8 | |
10000ea4: 78 00 80 40 bge 10000f1c <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0xac> | |
v.s[i] = min(a.s[i], b.s[i]); | |
10000ea8: 58 00 7f e8 ld r3,88(r31) | |
10000eac: 00 00 03 c8 lfd f0,0(r3) | |
10000eb0: 51 02 40 f0 xxswapd vs34,vs0 | |
10000eb4: 48 00 7f e8 ld r3,72(r31) | |
10000eb8: 50 00 9f e8 ld r4,80(r31) | |
10000ebc: 00 00 04 c8 lfd f0,0(r4) | |
10000ec0: 51 02 60 f0 xxswapd vs35,vs0 | |
10000ec4: 0d 17 83 10 vextubrx r4,r3,v2 | |
10000ec8: 74 07 84 7c extsb r4,r4 | |
10000ecc: 0d 1f 63 10 vextubrx r3,r3,v3 | |
10000ed0: 74 07 63 7c extsb r3,r3 | |
10000ed4: 28 00 7f f8 std r3,40(r31) | |
10000ed8: 78 23 83 7c mr r3,r4 | |
10000edc: 28 00 9f e8 ld r4,40(r31) | |
10000ee0: 61 f7 ff 4b bl 10000640 <min(signed char, signed char)> | |
10000ee4: 48 00 9f e8 ld r4,72(r31) | |
10000ee8: 20 07 84 78 clrldi r4,r4,60 | |
10000eec: 60 00 1f c8 lfd f0,96(r31) | |
10000ef0: 51 02 40 f0 xxswapd vs34,vs0 | |
10000ef4: 3d 00 5f f4 stxv vs34,48(r31) | |
10000ef8: 30 00 bf 38 addi r5,r31,48 | |
10000efc: ae 21 65 7c stbx r3,r5,r4 | |
10000f00: 31 00 1f f4 lxv vs0,48(r31) | |
10000f04: 50 02 00 f0 xxswapd vs0,vs0 | |
10000f08: 60 00 1f d8 stfd f0,96(r31) | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
10000f0c: 48 00 7f e8 ld r3,72(r31) | |
10000f10: 01 00 63 38 addi r3,r3,1 | |
10000f14: 48 00 7f f8 std r3,72(r31) | |
10000f18: 84 ff ff 4b b 10000e9c <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0x2c> | |
return v; | |
10000f1c: 60 00 7f e8 ld r3,96(r31) | |
10000f20: 70 00 21 38 addi r1,r1,112 | |
10000f24: 10 00 01 e8 ld r0,16(r1) | |
10000f28: f8 ff e1 eb ld r31,-8(r1) | |
10000f2c: a6 03 08 7c mtlr r0 | |
10000f30: 20 00 80 4e blr | |
... | |
0000000010000f40 <pair<sint8x8>::operator[](unsigned long) const>: | |
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } }; | |
10000f40: e8 ff 61 f8 std r3,-24(r1) | |
10000f44: e0 ff 81 f8 std r4,-32(r1) | |
10000f48: e8 ff 61 e8 ld r3,-24(r1) | |
10000f4c: e0 ff 81 e8 ld r4,-32(r1) | |
10000f50: 24 1f 84 78 rldicr r4,r4,3,60 | |
10000f54: 14 22 63 7c add r3,r3,r4 | |
10000f58: f0 ff 81 38 addi r4,r1,-16 | |
10000f5c: 00 00 63 e8 ld r3,0(r3) | |
10000f60: 00 00 64 f8 std r3,0(r4) | |
10000f64: f0 ff 61 e8 ld r3,-16(r1) | |
10000f68: 20 00 80 4e blr | |
... | |
10000f78: 00 00 00 60 nop | |
10000f7c: 00 00 00 60 nop | |
0000000010000f80 <__libc_csu_init>: | |
10000f80: 02 10 40 3c lis r2,4098 | |
10000f84: 00 7f 42 38 addi r2,r2,32512 | |
10000f88: a6 02 08 7c mflr r0 | |
10000f8c: d0 ff 41 fb std r26,-48(r1) | |
10000f90: d8 ff 61 fb std r27,-40(r1) | |
10000f94: 78 2b ba 7c mr r26,r5 | |
10000f98: e0 ff 81 fb std r28,-32(r1) | |
10000f9c: e8 ff a1 fb std r29,-24(r1) | |
10000fa0: ff ff a2 3f addis r29,r2,-1 | |
10000fa4: 78 1b 7c 7c mr r28,r3 | |
10000fa8: f0 ff c1 fb std r30,-16(r1) | |
10000fac: ff ff c2 3f addis r30,r2,-1 | |
10000fb0: c8 7d bd 3b addi r29,r29,32200 | |
10000fb4: c0 7d de 3b addi r30,r30,32192 | |
10000fb8: 78 23 9b 7c mr r27,r4 | |
10000fbc: 50 e8 be 7f subf r29,r30,r29 | |
10000fc0: 10 00 01 f8 std r0,16(r1) | |
10000fc4: b1 ff 21 f8 stdu r1,-80(r1) | |
10000fc8: a1 f4 ff 4b bl 10000468 <_init+0x8> | |
10000fcc: 00 00 00 60 nop | |
10000fd0: 75 1e bd 7f sradi. r29,r29,3 | |
10000fd4: 4c 00 82 41 beq 10001020 <__libc_csu_init+0xa0> | |
10000fd8: 18 00 41 f8 std r2,24(r1) | |
10000fdc: 48 00 e1 fb std r31,72(r1) | |
10000fe0: f8 ff de 3b addi r30,r30,-8 | |
10000fe4: 00 00 e0 3b li r31,0 | |
10000fe8: 00 00 00 60 nop | |
10000fec: 00 00 42 60 ori r2,r2,0 | |
10000ff0: 09 00 3e e9 ldu r9,8(r30) | |
10000ff4: 78 d3 45 7f mr r5,r26 | |
10000ff8: 78 db 64 7f mr r4,r27 | |
10000ffc: 78 e3 83 7f mr r3,r28 | |
10001000: 01 00 ff 3b addi r31,r31,1 | |
10001004: a6 03 29 7d mtctr r9 | |
10001008: 78 4b 2c 7d mr r12,r9 | |
1000100c: 21 04 80 4e bctrl | |
10001010: 18 00 41 e8 ld r2,24(r1) | |
10001014: 40 f8 bd 7f cmpld cr7,r29,r31 | |
10001018: d8 ff 9e 40 bne cr7,10000ff0 <__libc_csu_init+0x70> | |
1000101c: 48 00 e1 eb ld r31,72(r1) | |
10001020: 50 00 21 38 addi r1,r1,80 | |
10001024: 10 00 01 e8 ld r0,16(r1) | |
10001028: d0 ff 41 eb ld r26,-48(r1) | |
1000102c: d8 ff 61 eb ld r27,-40(r1) | |
10001030: e0 ff 81 eb ld r28,-32(r1) | |
10001034: e8 ff a1 eb ld r29,-24(r1) | |
10001038: f0 ff c1 eb ld r30,-16(r1) | |
1000103c: a6 03 08 7c mtlr r0 | |
10001040: 20 00 80 4e blr | |
10001044: 00 00 00 00 .long 0x0 | |
10001048: 00 00 00 01 .long 0x1000000 | |
1000104c: 80 06 00 00 .long 0x680 | |
0000000010001050 <__libc_csu_fini>: | |
10001050: 20 00 80 4e blr | |
... | |
10001060: 90 ef 01 00 .long 0x1ef90 | |
10001064: 00 00 00 00 .long 0x0 | |
0000000010001068 <__glink_PLTresolve>: | |
10001068: a6 02 08 7c mflr r0 | |
1000106c: 05 00 9f 42 bcl 20,4*cr7+so,10001070 <__glink_PLTresolve+0x8> | |
10001070: a6 02 68 7d mflr r11 | |
10001074: 18 00 41 f8 std r2,24(r1) | |
10001078: f0 ff 4b e8 ld r2,-16(r11) | |
1000107c: a6 03 08 7c mtlr r0 | |
10001080: 50 60 8b 7d subf r12,r11,r12 | |
10001084: 14 5a 62 7d add r11,r2,r11 | |
10001088: d0 ff 0c 38 addi r0,r12,-48 | |
1000108c: 00 00 8b e9 ld r12,0(r11) | |
10001090: 82 f0 00 78 rldicl r0,r0,62,2 | |
10001094: a6 03 89 7d mtctr r12 | |
10001098: 08 00 6b e9 ld r11,8(r11) | |
1000109c: 20 04 80 4e bctr | |
00000000100010a0 <__libc_start_main@plt>: | |
100010a0: c8 ff ff 4b b 10001068 <__glink_PLTresolve> | |
00000000100010a4 <__gmon_start__@plt>: | |
100010a4: c4 ff ff 4b b 10001068 <__glink_PLTresolve> | |
Disassembly of section .fini: | |
00000000100010a8 <_fini>: | |
100010a8: 02 10 40 3c lis r2,4098 | |
100010ac: 00 7f 42 38 addi r2,r2,32512 | |
100010b0: a6 02 08 7c mflr r0 | |
100010b4: 10 00 01 f8 std r0,16(r1) | |
100010b8: a1 ff 21 f8 stdu r1,-96(r1) | |
100010bc: 60 00 21 38 addi r1,r1,96 | |
100010c0: 10 00 01 e8 ld r0,16(r1) | |
100010c4: a6 03 08 7c mtlr r0 | |
100010c8: 20 00 80 4e blr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
typedef unsigned long size_t; | |
typedef signed char int8x1 __attribute__((vector_size(1))); | |
typedef signed char int8x2 __attribute__((vector_size(2))); | |
typedef signed char int8x4 __attribute__((vector_size(4))); | |
typedef signed char int8x8 __attribute__((vector_size(8))); | |
typedef signed char int8x16 __attribute__((vector_size(16))); | |
struct sint8x1 { int8x1 s; }; | |
struct sint8x2 { int8x2 s; }; | |
struct sint8x4 { int8x4 s; }; | |
struct sint8x8 { int8x8 s; }; | |
struct sint8x16 { int8x16 s; }; | |
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } }; | |
signed char min(signed char a, signed char b) { | |
return a < b ? a : b; | |
} | |
template <class T> | |
T min(const T &a, const T &b) { | |
T v; | |
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++) | |
v.s[i] = min(a.s[i], b.s[i]); | |
return v; | |
} | |
pair<sint8x1> split_by(sint8x2 v) { | |
sint8x1 a, b; | |
a.s[0] = v.s[0]; | |
b.s[0] = v.s[1]; | |
return {{a, b}}; | |
} | |
pair<sint8x2> split_by(sint8x4 v) { | |
sint8x2 a, b; | |
a.s = __builtin_shufflevector(v.s, v.s, 0, 1); | |
b.s = __builtin_shufflevector(v.s, v.s, 2, 3); | |
return {{a, b}}; | |
} | |
pair<sint8x4> split_by(sint8x8 v) { | |
sint8x4 a, b; | |
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3); | |
b.s = __builtin_shufflevector(v.s, v.s, 4, 5, 6, 7); | |
return {{a, b}}; | |
} | |
pair<sint8x8> split_by(sint8x16 v) { | |
sint8x8 a, b; | |
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3, 4, 5, 6, 7); | |
b.s = __builtin_shufflevector(v.s, v.s, 8, 9, 10, 11, 12, 13, 14, 15); | |
return {{a, b}}; | |
} | |
signed char hmin(sint8x1 v) { | |
return v.s[0]; | |
} | |
signed char hmin(sint8x2 v) { | |
auto a = split_by(v); | |
return hmin(min(a[0], a[1])); | |
} | |
signed char hmin(sint8x4 v) { | |
auto a = split_by(v); | |
return hmin(min(a[0], a[1])); | |
} | |
signed char hmin(sint8x8 v) { | |
auto a = split_by(v); | |
return hmin(min(a[0], a[1])); | |
} | |
signed char hmin(sint8x16 v) { | |
auto a = split_by(v); | |
return hmin(min(a[0], a[1])); | |
} | |
int main(void) { | |
const long data[] = {0x00010102464c457f, 0}; | |
sint8x16 v; | |
__builtin_memcpy(&v.s, data, 16); | |
return hmin(v); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Compiled and linked with
clang++ -g a.cc -mcpu=pwr9 hmin
(-O0). It should return 0 but the incorrect program returns -1 (shell exit code 255).diff -U good.s bad.s
split_by(sint8x8)
inbad.s
is incorrect.