Skip to content

Instantly share code, notes, and snippets.

@MaskRay
Last active July 22, 2020 04:26
Show Gist options
  • Save MaskRay/ba7547f6a587176666ff77527cf12c4d to your computer and use it in GitHub Desktop.
Save MaskRay/ba7547f6a587176666ff77527cf12c4d to your computer and use it in GitHub Desktop.
PowerPC64 vectorization bug
bad: file format elf64-powerpcle
Disassembly of section .init:
0000000010000440 <00000024.plt_call.__gmon_start__>:
10000440: 18 00 41 f8 std r2,24(r1)
10000444: 18 81 82 e9 ld r12,-32488(r2)
10000448: a6 03 89 7d mtctr r12
1000044c: 20 04 80 4e bctr
...
0000000010000460 <_init>:
10000460: 02 10 40 3c lis r2,4098
10000464: 00 7f 42 38 addi r2,r2,32512
10000468: a6 02 08 7c mflr r0
1000046c: 10 00 01 f8 std r0,16(r1)
10000470: a1 ff 21 f8 stdu r1,-96(r1)
10000474: 00 00 00 60 nop
10000478: 08 80 02 e8 ld r0,-32760(r2)
1000047c: 00 00 a0 2f cmpdi cr7,r0,0
10000480: 0c 00 fe 41 beq+ cr7,1000048c <_init+0x2c>
10000484: bd ff ff 4b bl 10000440 <00000024.plt_call.__gmon_start__>
10000488: 18 00 41 e8 ld r2,24(r1)
1000048c: 60 00 21 38 addi r1,r1,96
10000490: 10 00 01 e8 ld r0,16(r1)
10000494: a6 03 08 7c mtlr r0
10000498: 20 00 80 4e blr
Disassembly of section .text:
00000000100004a0 <00000019.plt_call.__libc_start_main@@GLIBC_2.17>:
100004a0: 18 00 41 f8 std r2,24(r1)
100004a4: 10 81 82 e9 ld r12,-32496(r2)
100004a8: a6 03 89 7d mtctr r12
100004ac: 20 04 80 4e bctr
...
00000000100004c0 <_start>:
100004c0: 02 10 40 3c lis r2,4098
100004c4: 00 7f 42 38 addi r2,r2,32512
100004c8: 78 0b 29 7c mr r9,r1
100004cc: e4 06 21 78 rldicr r1,r1,0,59
100004d0: 00 00 00 38 li r0,0
100004d4: 81 ff 21 f8 stdu r1,-128(r1)
100004d8: a6 03 08 7c mtlr r0
100004dc: 00 00 01 f8 std r0,0(r1)
100004e0: 10 80 02 e9 ld r8,-32752(r2)
100004e4: bc ff ff 4b b 100004a0 <00000019.plt_call.__libc_start_main@@GLIBC_2.17>
100004e8: 00 00 00 60 nop
...
100004f4: 40 20 0c 00 .long 0xc2040
100004f8: 2c 00 00 00 .long 0x2c
100004fc: 06 00 5f 73 andi. r31,r26,6
10000500: 74 61 72 74 andis. r18,r3,24948
10000504: 00 00 00 60 nop
10000508: 00 00 00 60 nop
1000050c: 00 00 00 60 nop
0000000010000510 <deregister_tm_clones>:
10000510: 02 10 40 3c lis r2,4098
10000514: 00 7f 42 38 addi r2,r2,32512
10000518: 00 00 00 60 nop
1000051c: 00 00 00 60 nop
10000520: 30 81 62 38 addi r3,r2,-32464
10000524: 30 81 22 39 addi r9,r2,-32464
10000528: 00 18 a9 7f cmpd cr7,r9,r3
1000052c: 20 00 9e 4d beqlr cr7
10000530: 00 00 00 60 nop
10000534: 18 80 22 e9 ld r9,-32744(r2)
10000538: 00 00 a9 2f cmpdi cr7,r9,0
1000053c: 20 00 9e 4d beqlr cr7
10000540: a6 02 08 7c mflr r0
10000544: 78 4b 2c 7d mr r12,r9
10000548: a6 03 29 7d mtctr r9
1000054c: 10 00 01 f8 std r0,16(r1)
10000550: e1 ff 21 f8 stdu r1,-32(r1)
10000554: 18 00 41 f8 std r2,24(r1)
10000558: 21 04 80 4e bctrl
1000055c: 18 00 41 e8 ld r2,24(r1)
10000560: 20 00 21 38 addi r1,r1,32
10000564: 10 00 01 e8 ld r0,16(r1)
10000568: a6 03 08 7c mtlr r0
1000056c: 20 00 80 4e blr
0000000010000570 <register_tm_clones>:
10000570: 02 10 40 3c lis r2,4098
10000574: 00 7f 42 38 addi r2,r2,32512
10000578: 00 00 00 60 nop
1000057c: 00 00 00 60 nop
10000580: 30 81 62 38 addi r3,r2,-32464
10000584: 30 81 82 38 addi r4,r2,-32464
10000588: 50 20 83 7c subf r4,r3,r4
1000058c: 74 1e 84 7c sradi r4,r4,3
10000590: 74 0e 84 7c sradi r4,r4,1
10000594: 95 01 84 7c addze. r4,r4
10000598: 20 00 82 4d beqlr
1000059c: 00 00 00 60 nop
100005a0: 20 80 22 e9 ld r9,-32736(r2)
100005a4: 00 00 a9 2f cmpdi cr7,r9,0
100005a8: 20 00 9e 4d beqlr cr7
100005ac: a6 02 08 7c mflr r0
100005b0: 78 4b 2c 7d mr r12,r9
100005b4: a6 03 29 7d mtctr r9
100005b8: 10 00 01 f8 std r0,16(r1)
100005bc: e1 ff 21 f8 stdu r1,-32(r1)
100005c0: 18 00 41 f8 std r2,24(r1)
100005c4: 21 04 80 4e bctrl
100005c8: 18 00 41 e8 ld r2,24(r1)
100005cc: 20 00 21 38 addi r1,r1,32
100005d0: 10 00 01 e8 ld r0,16(r1)
100005d4: a6 03 08 7c mtlr r0
100005d8: 20 00 80 4e blr
100005dc: 00 00 42 60 ori r2,r2,0
00000000100005e0 <__do_global_dtors_aux>:
100005e0: 02 10 40 3c lis r2,4098
100005e4: 00 7f 42 38 addi r2,r2,32512
100005e8: f8 ff e1 fb std r31,-8(r1)
100005ec: 00 00 00 60 nop
100005f0: d1 ff 21 f8 stdu r1,-48(r1)
100005f4: 30 81 22 89 lbz r9,-32464(r2)
100005f8: 00 00 89 2f cmpwi cr7,r9,0
100005fc: 20 00 9e 40 bne cr7,1000061c <__do_global_dtors_aux+0x3c>
10000600: a6 02 08 7c mflr r0
10000604: 40 00 01 f8 std r0,64(r1)
10000608: 11 ff ff 4b bl 10000518 <deregister_tm_clones+0x8>
1000060c: 40 00 01 e8 ld r0,64(r1)
10000610: 01 00 20 39 li r9,1
10000614: 30 81 22 99 stb r9,-32464(r2)
10000618: a6 03 08 7c mtlr r0
1000061c: 30 00 21 38 addi r1,r1,48
10000620: f8 ff e1 eb ld r31,-8(r1)
10000624: 20 00 80 4e blr
10000628: 00 00 00 60 nop
1000062c: 00 00 42 60 ori r2,r2,0
0000000010000630 <frame_dummy>:
10000630: 02 10 40 3c lis r2,4098
10000634: 00 7f 42 38 addi r2,r2,32512
10000638: 40 ff ff 4b b 10000578 <register_tm_clones+0x8>
1000063c: 00 00 00 60 nop
0000000010000640 <min(signed char, signed char)>:
struct sint8x8 { int8x8 s; };
struct sint8x16 { int8x16 s; };
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } };
signed char min(signed char a, signed char b) {
10000640: f7 ff 61 98 stb r3,-9(r1)
10000644: f6 ff 81 98 stb r4,-10(r1)
return a < b ? a : b;
10000648: f7 ff 61 88 lbz r3,-9(r1)
1000064c: 74 07 63 7c extsb r3,r3
10000650: f6 ff 81 88 lbz r4,-10(r1)
10000654: 74 07 84 7c extsb r4,r4
10000658: 00 20 03 7c cmpw r3,r4
1000065c: 10 00 80 40 bge 1000066c <min(signed char, signed char)+0x2c>
10000660: f7 ff 61 88 lbz r3,-9(r1)
10000664: f0 ff 61 90 stw r3,-16(r1)
10000668: 0c 00 00 48 b 10000674 <min(signed char, signed char)+0x34>
1000066c: f6 ff 61 88 lbz r3,-10(r1)
10000670: f0 ff 61 90 stw r3,-16(r1)
10000674: f0 ff 61 80 lwz r3,-16(r1)
10000678: 74 07 63 7c extsb r3,r3
1000067c: 20 00 80 4e blr
...
1000068c: 00 00 00 60 nop
0000000010000690 <split_by(sint8x2)>:
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
v.s[i] = min(a.s[i], b.s[i]);
return v;
}
pair<sint8x1> split_by(sint8x2 v) {
10000690: f0 ff 81 38 addi r4,r1,-16
10000694: e8 ff 61 b0 sth r3,-24(r1)
sint8x1 a, b;
a.s[0] = v.s[0];
10000698: e8 ff 61 a0 lhz r3,-24(r1)
1000069c: e0 ff 61 98 stb r3,-32(r1)
100006a0: e8 ff 61 38 addi r3,r1,-24
b.s[0] = v.s[1];
100006a4: 5b 1e 40 7c lxsihzx vs34,0,r3
100006a8: 4c 12 43 10 vsplth v2,v2,3
100006ac: ec 11 42 10 vsldoi v2,v2,v2,7
100006b0: d8 ff 61 38 addi r3,r1,-40
100006b4: 1b 1f 40 7c stxsibx vs34,0,r3
return {{a, b}};
100006b8: 78 23 83 7c mr r3,r4
100006bc: e0 ff a1 38 addi r5,r1,-32
100006c0: 00 00 a5 88 lbz r5,0(r5)
100006c4: 00 00 a3 98 stb r5,0(r3)
100006c8: 01 00 64 38 addi r3,r4,1
100006cc: d8 ff 81 38 addi r4,r1,-40
100006d0: 00 00 84 88 lbz r4,0(r4)
100006d4: 00 00 83 98 stb r4,0(r3)
100006d8: f0 ff 61 a0 lhz r3,-16(r1)
100006dc: 20 00 80 4e blr
...
100006ec: 00 00 00 60 nop
00000000100006f0 <split_by(sint8x4)>:
}
pair<sint8x2> split_by(sint8x4 v) {
100006f0: f0 ff 81 38 addi r4,r1,-16
100006f4: e8 ff 61 90 stw r3,-24(r1)
sint8x2 a, b;
a.s = __builtin_shufflevector(v.s, v.s, 0, 1);
100006f8: e8 ff 61 80 lwz r3,-24(r1)
100006fc: e0 ff 61 b0 sth r3,-32(r1)
10000700: e8 ff 61 38 addi r3,r1,-24
b.s = __builtin_shufflevector(v.s, v.s, 2, 3);
10000704: ee 1e 00 7c lfiwzx f0,0,r3
10000708: 81 05 40 f0 xscpsgndp vs34,vs0,vs0
1000070c: 4c 12 42 10 vsplth v2,v2,2
10000710: 2c 12 42 10 vsldoi v2,v2,v2,8
10000714: d8 ff 61 38 addi r3,r1,-40
10000718: 5b 1f 40 7c stxsihx vs34,0,r3
return {{a, b}};
1000071c: 78 23 83 7c mr r3,r4
10000720: e0 ff a1 38 addi r5,r1,-32
10000724: 00 00 a5 a0 lhz r5,0(r5)
10000728: 00 00 a3 b0 sth r5,0(r3)
1000072c: 02 00 64 38 addi r3,r4,2
10000730: d8 ff 81 38 addi r4,r1,-40
10000734: 00 00 84 a0 lhz r4,0(r4)
10000738: 00 00 83 b0 sth r4,0(r3)
1000073c: f0 ff 61 80 lwz r3,-16(r1)
10000740: 20 00 80 4e blr
...
0000000010000750 <split_by(sint8x8)>:
}
pair<sint8x4> split_by(sint8x8 v) {
10000750: f0 ff 81 38 addi r4,r1,-16
10000754: e8 ff 61 f8 std r3,-24(r1)
sint8x4 a, b;
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3);
10000758: e8 ff 61 e8 ld r3,-24(r1)
1000075c: e0 ff 61 90 stw r3,-32(r1)
10000760: 0c 00 60 38 li r3,12
10000764: e8 ff a1 38 addi r5,r1,-24
b.s = __builtin_shufflevector(v.s, v.s, 4, 5, 6, 7);
10000768: d8 1a 05 7c lxvwsx vs0,r5,r3
1000076c: 10 02 00 f0 xxsldwi vs0,vs0,vs0,2
10000770: d8 ff 61 38 addi r3,r1,-40
10000774: ae 1f 00 7c stfiwx f0,0,r3
return {{a, b}};
10000778: 78 23 83 7c mr r3,r4
1000077c: e0 ff a1 38 addi r5,r1,-32
10000780: 00 00 a5 80 lwz r5,0(r5)
10000784: 00 00 a3 90 stw r5,0(r3)
10000788: 04 00 64 38 addi r3,r4,4
1000078c: d8 ff 81 38 addi r4,r1,-40
10000790: 00 00 84 80 lwz r4,0(r4)
10000794: 00 00 83 90 stw r4,0(r3)
10000798: f0 ff 61 e8 ld r3,-16(r1)
1000079c: 20 00 80 4e blr
...
100007ac: 00 00 00 60 nop
00000000100007b0 <split_by(sint8x16)>:
}
pair<sint8x8> split_by(sint8x16 v) {
100007b0: dd ff 41 f4 stxv vs34,-48(r1)
sint8x8 a, b;
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3, 4, 5, 6, 7);
100007b4: d0 ff 61 e8 ld r3,-48(r1)
100007b8: c8 ff 61 f8 std r3,-56(r1)
b.s = __builtin_shufflevector(v.s, v.s, 8, 9, 10, 11, 12, 13, 14, 15);
100007bc: d8 ff 61 38 addi r3,r1,-40
100007c0: 99 1a 40 7c lxvdsx vs34,0,r3
100007c4: 56 12 02 f0 xxswapd vs0,vs34
100007c8: c0 ff 01 d8 stfd f0,-64(r1)
return {{a, b}};
100007cc: c8 ff 61 e8 ld r3,-56(r1)
100007d0: e8 ff 61 f8 std r3,-24(r1)
100007d4: c0 ff 61 e8 ld r3,-64(r1)
100007d8: f0 ff 61 f8 std r3,-16(r1)
100007dc: e8 ff 61 e8 ld r3,-24(r1)
100007e0: f0 ff 81 e8 ld r4,-16(r1)
100007e4: 20 00 80 4e blr
...
100007f4: 00 00 00 60 nop
100007f8: 00 00 00 60 nop
100007fc: 00 00 00 60 nop
0000000010000800 <hmin(sint8x1)>:
}
signed char hmin(sint8x1 v) {
10000800: f0 ff 61 98 stb r3,-16(r1)
return v.s[0];
10000804: f0 ff 61 88 lbz r3,-16(r1)
10000808: 74 07 63 7c extsb r3,r3
1000080c: 20 00 80 4e blr
...
1000081c: 00 00 00 60 nop
0000000010000820 <hmin(sint8x2)>:
}
signed char hmin(sint8x2 v) {
10000820: 02 10 40 3c lis r2,4098
10000824: 00 7f 42 38 addi r2,r2,32512
10000828: a6 02 08 7c mflr r0
1000082c: f8 ff e1 fb std r31,-8(r1)
10000830: 10 00 01 f8 std r0,16(r1)
10000834: 61 ff 21 f8 stdu r1,-160(r1)
10000838: 78 0b 3f 7c mr r31,r1
1000083c: 90 00 7f b0 sth r3,144(r31)
auto a = split_by(v);
10000840: 80 00 7f 38 addi r3,r31,128
10000844: 90 00 9f 38 addi r4,r31,144
10000848: 00 00 84 a0 lhz r4,0(r4)
1000084c: 00 00 83 b0 sth r4,0(r3)
10000850: 80 00 7f a0 lhz r3,128(r31)
10000854: 3d fe ff 4b bl 10000690 <split_by(sint8x2)>
10000858: 00 00 00 60 nop
1000085c: 88 00 7f b0 sth r3,136(r31)
return hmin(min(a[0], a[1]));
10000860: 88 00 7f 38 addi r3,r31,136
10000864: 00 00 80 38 li r4,0
10000868: 99 03 00 48 bl 10000c00 <pair<sint8x1>::operator[](unsigned long) const>
1000086c: 00 00 00 60 nop
10000870: 70 00 7f 98 stb r3,112(r31)
10000874: 88 00 7f 38 addi r3,r31,136
10000878: 01 00 80 38 li r4,1
1000087c: 85 03 00 48 bl 10000c00 <pair<sint8x1>::operator[](unsigned long) const>
10000880: 00 00 00 60 nop
10000884: 68 00 7f 98 stb r3,104(r31)
10000888: 70 00 7f 38 addi r3,r31,112
1000088c: 68 00 9f 38 addi r4,r31,104
10000890: e9 02 00 48 bl 10000b78 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x8>
10000894: 00 00 00 60 nop
10000898: 78 00 7f 98 stb r3,120(r31)
1000089c: 78 00 7f 88 lbz r3,120(r31)
100008a0: 61 ff ff 4b bl 10000800 <hmin(sint8x1)>
100008a4: 00 00 00 60 nop
100008a8: 74 07 63 7c extsb r3,r3
100008ac: a0 00 21 38 addi r1,r1,160
100008b0: 10 00 01 e8 ld r0,16(r1)
100008b4: f8 ff e1 eb ld r31,-8(r1)
100008b8: a6 03 08 7c mtlr r0
100008bc: 20 00 80 4e blr
...
100008cc: 00 00 00 60 nop
00000000100008d0 <hmin(sint8x4)>:
}
signed char hmin(sint8x4 v) {
100008d0: 02 10 40 3c lis r2,4098
100008d4: 00 7f 42 38 addi r2,r2,32512
100008d8: a6 02 08 7c mflr r0
100008dc: f8 ff e1 fb std r31,-8(r1)
100008e0: 10 00 01 f8 std r0,16(r1)
100008e4: 61 ff 21 f8 stdu r1,-160(r1)
100008e8: 78 0b 3f 7c mr r31,r1
100008ec: 90 00 7f 90 stw r3,144(r31)
auto a = split_by(v);
100008f0: 80 00 7f 38 addi r3,r31,128
100008f4: 90 00 9f 38 addi r4,r31,144
100008f8: 00 00 84 80 lwz r4,0(r4)
100008fc: 00 00 83 90 stw r4,0(r3)
10000900: 80 00 7f 80 lwz r3,128(r31)
10000904: ed fd ff 4b bl 100006f0 <split_by(sint8x4)>
10000908: 00 00 00 60 nop
1000090c: 88 00 7f 90 stw r3,136(r31)
return hmin(min(a[0], a[1]));
10000910: 88 00 7f 38 addi r3,r31,136
10000914: 00 00 80 38 li r4,0
10000918: 09 04 00 48 bl 10000d20 <pair<sint8x2>::operator[](unsigned long) const>
1000091c: 00 00 00 60 nop
10000920: 70 00 7f b0 sth r3,112(r31)
10000924: 88 00 7f 38 addi r3,r31,136
10000928: 01 00 80 38 li r4,1
1000092c: f5 03 00 48 bl 10000d20 <pair<sint8x2>::operator[](unsigned long) const>
10000930: 00 00 00 60 nop
10000934: 68 00 7f b0 sth r3,104(r31)
10000938: 70 00 7f 38 addi r3,r31,112
1000093c: 68 00 9f 38 addi r4,r31,104
10000940: 09 03 00 48 bl 10000c48 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0x8>
10000944: 00 00 00 60 nop
10000948: 78 00 7f b0 sth r3,120(r31)
1000094c: 78 00 7f a0 lhz r3,120(r31)
10000950: d9 fe ff 4b bl 10000828 <hmin(sint8x2)+0x8>
10000954: 00 00 00 60 nop
10000958: 74 07 63 7c extsb r3,r3
1000095c: a0 00 21 38 addi r1,r1,160
10000960: 10 00 01 e8 ld r0,16(r1)
10000964: f8 ff e1 eb ld r31,-8(r1)
10000968: a6 03 08 7c mtlr r0
1000096c: 20 00 80 4e blr
...
1000097c: 00 00 00 60 nop
0000000010000980 <hmin(sint8x8)>:
}
signed char hmin(sint8x8 v) {
10000980: 02 10 40 3c lis r2,4098
10000984: 00 7f 42 38 addi r2,r2,32512
10000988: a6 02 08 7c mflr r0
1000098c: f8 ff e1 fb std r31,-8(r1)
10000990: 10 00 01 f8 std r0,16(r1)
10000994: 61 ff 21 f8 stdu r1,-160(r1)
10000998: 78 0b 3f 7c mr r31,r1
1000099c: 90 00 7f f8 std r3,144(r31)
auto a = split_by(v);
100009a0: 80 00 7f 38 addi r3,r31,128
100009a4: 90 00 9f 38 addi r4,r31,144
100009a8: 00 00 84 e8 ld r4,0(r4)
100009ac: 00 00 83 f8 std r4,0(r3)
100009b0: 80 00 7f e8 ld r3,128(r31)
100009b4: 9d fd ff 4b bl 10000750 <split_by(sint8x8)>
100009b8: 00 00 00 60 nop
100009bc: 88 00 7f f8 std r3,136(r31)
return hmin(min(a[0], a[1]));
100009c0: 88 00 7f 38 addi r3,r31,136
100009c4: 00 00 80 38 li r4,0
100009c8: 69 04 00 48 bl 10000e30 <pair<sint8x4>::operator[](unsigned long) const>
100009cc: 00 00 00 60 nop
100009d0: 70 00 7f 90 stw r3,112(r31)
100009d4: 88 00 7f 38 addi r3,r31,136
100009d8: 01 00 80 38 li r4,1
100009dc: 55 04 00 48 bl 10000e30 <pair<sint8x4>::operator[](unsigned long) const>
100009e0: 00 00 00 60 nop
100009e4: 68 00 7f 90 stw r3,104(r31)
100009e8: 70 00 7f 38 addi r3,r31,112
100009ec: 68 00 9f 38 addi r4,r31,104
100009f0: 79 03 00 48 bl 10000d68 <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0x8>
100009f4: 00 00 00 60 nop
100009f8: 78 00 7f 90 stw r3,120(r31)
100009fc: 78 00 7f 80 lwz r3,120(r31)
10000a00: d9 fe ff 4b bl 100008d8 <hmin(sint8x4)+0x8>
10000a04: 00 00 00 60 nop
10000a08: 74 07 63 7c extsb r3,r3
10000a0c: a0 00 21 38 addi r1,r1,160
10000a10: 10 00 01 e8 ld r0,16(r1)
10000a14: f8 ff e1 eb ld r31,-8(r1)
10000a18: a6 03 08 7c mtlr r0
10000a1c: 20 00 80 4e blr
...
10000a2c: 00 00 00 60 nop
0000000010000a30 <hmin(sint8x16)>:
}
signed char hmin(sint8x16 v) {
10000a30: 02 10 40 3c lis r2,4098
10000a34: 00 7f 42 38 addi r2,r2,32512
10000a38: a6 02 08 7c mflr r0
10000a3c: f8 ff e1 fb std r31,-8(r1)
10000a40: 10 00 01 f8 std r0,16(r1)
10000a44: 41 ff 21 f8 stdu r1,-192(r1)
10000a48: 78 0b 3f 7c mr r31,r1
10000a4c: ad 00 5f f4 stxv vs34,160(r31)
auto a = split_by(v);
10000a50: a8 00 7f e8 ld r3,168(r31)
10000a54: 88 00 7f f8 std r3,136(r31)
10000a58: a0 00 7f e8 ld r3,160(r31)
10000a5c: 80 00 7f f8 std r3,128(r31)
10000a60: 81 00 1f f4 lxv vs0,128(r31)
10000a64: 91 04 40 f0 xxlor vs34,vs0,vs0
10000a68: 49 fd ff 4b bl 100007b0 <split_by(sint8x16)>
10000a6c: 90 00 7f f8 std r3,144(r31)
10000a70: 98 00 9f f8 std r4,152(r31)
return hmin(min(a[0], a[1]));
10000a74: 90 00 7f 38 addi r3,r31,144
10000a78: 00 00 80 38 li r4,0
10000a7c: c5 04 00 48 bl 10000f40 <pair<sint8x8>::operator[](unsigned long) const>
10000a80: 00 00 00 60 nop
10000a84: 70 00 7f f8 std r3,112(r31)
10000a88: 90 00 7f 38 addi r3,r31,144
10000a8c: 01 00 80 38 li r4,1
10000a90: b1 04 00 48 bl 10000f40 <pair<sint8x8>::operator[](unsigned long) const>
10000a94: 00 00 00 60 nop
10000a98: 68 00 7f f8 std r3,104(r31)
10000a9c: 70 00 7f 38 addi r3,r31,112
10000aa0: 68 00 9f 38 addi r4,r31,104
10000aa4: d5 03 00 48 bl 10000e78 <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0x8>
10000aa8: 00 00 00 60 nop
10000aac: 78 00 7f f8 std r3,120(r31)
10000ab0: 78 00 7f e8 ld r3,120(r31)
10000ab4: d5 fe ff 4b bl 10000988 <hmin(sint8x8)+0x8>
10000ab8: 00 00 00 60 nop
10000abc: 74 07 63 7c extsb r3,r3
10000ac0: c0 00 21 38 addi r1,r1,192
10000ac4: 10 00 01 e8 ld r0,16(r1)
10000ac8: f8 ff e1 eb ld r31,-8(r1)
10000acc: a6 03 08 7c mtlr r0
10000ad0: 20 00 80 4e blr
...
0000000010000ae0 <main>:
}
int main(void) {
10000ae0: 02 10 40 3c lis r2,4098
10000ae4: 00 7f 42 38 addi r2,r2,32512
10000ae8: a6 02 08 7c mflr r0
10000aec: f8 ff e1 fb std r31,-8(r1)
10000af0: 10 00 01 f8 std r0,16(r1)
10000af4: a1 ff 21 f8 stdu r1,-96(r1)
10000af8: 78 0b 3f 7c mr r31,r1
10000afc: 00 00 60 38 li r3,0
10000b00: 54 00 7f 90 stw r3,84(r31)
const long data[] = {0x00010102464c457f, 0};
10000b04: fe ff 62 3c addis r3,r2,-2
10000b08: f8 91 63 38 addi r3,r3,-28168
10000b0c: 08 00 83 e8 ld r4,8(r3)
10000b10: 48 00 9f f8 std r4,72(r31)
10000b14: 00 00 63 e8 ld r3,0(r3)
10000b18: 40 00 7f f8 std r3,64(r31)
sint8x16 v;
__builtin_memcpy(&v.s, data, 16);
10000b1c: 48 00 7f e8 ld r3,72(r31)
10000b20: 38 00 7f f8 std r3,56(r31)
10000b24: 40 00 7f e8 ld r3,64(r31)
10000b28: 30 00 7f f8 std r3,48(r31)
return hmin(v);
10000b2c: 38 00 7f e8 ld r3,56(r31)
10000b30: 28 00 7f f8 std r3,40(r31)
10000b34: 30 00 7f e8 ld r3,48(r31)
10000b38: 20 00 7f f8 std r3,32(r31)
10000b3c: 21 00 1f f4 lxv vs0,32(r31)
10000b40: 91 04 40 f0 xxlor vs34,vs0,vs0
10000b44: f5 fe ff 4b bl 10000a38 <hmin(sint8x16)+0x8>
10000b48: 74 07 63 7c extsb r3,r3
10000b4c: b4 07 63 7c extsw r3,r3
10000b50: 60 00 21 38 addi r1,r1,96
10000b54: 10 00 01 e8 ld r0,16(r1)
10000b58: f8 ff e1 eb ld r31,-8(r1)
10000b5c: a6 03 08 7c mtlr r0
10000b60: 20 00 80 4e blr
...
0000000010000b70 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)>:
T min(const T &a, const T &b) {
10000b70: 02 10 40 3c lis r2,4098
10000b74: 00 7f 42 38 addi r2,r2,32512
10000b78: a6 02 08 7c mflr r0
10000b7c: f8 ff e1 fb std r31,-8(r1)
10000b80: 10 00 01 f8 std r0,16(r1)
10000b84: b1 ff 21 f8 stdu r1,-80(r1)
10000b88: 78 0b 3f 7c mr r31,r1
10000b8c: 38 00 7f f8 std r3,56(r31)
10000b90: 30 00 9f f8 std r4,48(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000b94: 00 00 60 38 li r3,0
10000b98: 28 00 7f f8 std r3,40(r31)
10000b9c: 28 00 7f e8 ld r3,40(r31)
10000ba0: 01 00 23 28 cmpldi r3,1
10000ba4: 34 00 80 40 bge 10000bd8 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x68>
v.s[i] = min(a.s[i], b.s[i]);
10000ba8: 38 00 7f e8 ld r3,56(r31)
10000bac: 30 00 9f e8 ld r4,48(r31)
10000bb0: 00 00 84 88 lbz r4,0(r4)
10000bb4: 00 00 63 88 lbz r3,0(r3)
10000bb8: 74 07 63 7c extsb r3,r3
10000bbc: 74 07 84 7c extsb r4,r4
10000bc0: 81 fa ff 4b bl 10000640 <min(signed char, signed char)>
10000bc4: 40 00 7f 98 stb r3,64(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000bc8: 28 00 7f e8 ld r3,40(r31)
10000bcc: 01 00 63 38 addi r3,r3,1
10000bd0: 28 00 7f f8 std r3,40(r31)
10000bd4: c8 ff ff 4b b 10000b9c <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x2c>
10000bd8: 40 00 7f 88 lbz r3,64(r31)
return v;
10000bdc: 50 00 21 38 addi r1,r1,80
10000be0: 10 00 01 e8 ld r0,16(r1)
10000be4: f8 ff e1 eb ld r31,-8(r1)
10000be8: a6 03 08 7c mtlr r0
10000bec: 20 00 80 4e blr
...
10000bfc: 00 00 00 60 nop
0000000010000c00 <pair<sint8x1>::operator[](unsigned long) const>:
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } };
10000c00: e8 ff 61 f8 std r3,-24(r1)
10000c04: e0 ff 81 f8 std r4,-32(r1)
10000c08: e8 ff 61 e8 ld r3,-24(r1)
10000c0c: e0 ff 81 e8 ld r4,-32(r1)
10000c10: 14 22 63 7c add r3,r3,r4
10000c14: f0 ff 81 38 addi r4,r1,-16
10000c18: 00 00 63 88 lbz r3,0(r3)
10000c1c: 00 00 64 98 stb r3,0(r4)
10000c20: f0 ff 61 88 lbz r3,-16(r1)
10000c24: 20 00 80 4e blr
...
10000c34: 00 00 00 60 nop
10000c38: 00 00 00 60 nop
10000c3c: 00 00 00 60 nop
0000000010000c40 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)>:
T min(const T &a, const T &b) {
10000c40: 02 10 40 3c lis r2,4098
10000c44: 00 7f 42 38 addi r2,r2,32512
10000c48: a6 02 08 7c mflr r0
10000c4c: f8 ff e1 fb std r31,-8(r1)
10000c50: 10 00 01 f8 std r0,16(r1)
10000c54: 91 ff 21 f8 stdu r1,-112(r1)
10000c58: 78 0b 3f 7c mr r31,r1
10000c5c: 58 00 7f f8 std r3,88(r31)
10000c60: 50 00 9f f8 std r4,80(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000c64: 00 00 60 38 li r3,0
10000c68: 48 00 7f f8 std r3,72(r31)
10000c6c: 48 00 7f e8 ld r3,72(r31)
10000c70: 02 00 23 28 cmpldi r3,2
10000c74: 7c 00 80 40 bge 10000cf0 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0xb0>
v.s[i] = min(a.s[i], b.s[i]);
10000c78: 58 00 7f e8 ld r3,88(r31)
10000c7c: 5b 1e 40 7c lxsihzx vs34,0,r3
10000c80: 4c 12 43 10 vsplth v2,v2,3
10000c84: 48 00 7f e8 ld r3,72(r31)
10000c88: 50 00 9f e8 ld r4,80(r31)
10000c8c: 5b 26 60 7c lxsihzx vs35,0,r4
10000c90: 4c 1a 63 10 vsplth v3,v3,3
10000c94: 0d 17 83 10 vextubrx r4,r3,v2
10000c98: 74 07 84 7c extsb r4,r4
10000c9c: 0d 1f 63 10 vextubrx r3,r3,v3
10000ca0: 74 07 63 7c extsb r3,r3
10000ca4: 28 00 7f f8 std r3,40(r31)
10000ca8: 78 23 83 7c mr r3,r4
10000cac: 28 00 9f e8 ld r4,40(r31)
10000cb0: 91 f9 ff 4b bl 10000640 <min(signed char, signed char)>
10000cb4: 48 00 9f e8 ld r4,72(r31)
10000cb8: 20 07 84 78 clrldi r4,r4,60
10000cbc: 60 00 bf 38 addi r5,r31,96
10000cc0: 5b 2e 40 7c lxsihzx vs34,0,r5
10000cc4: 4c 12 43 10 vsplth v2,v2,3
10000cc8: 3d 00 5f f4 stxv vs34,48(r31)
10000ccc: 30 00 df 38 addi r6,r31,48
10000cd0: ae 21 66 7c stbx r3,r6,r4
10000cd4: 39 00 5f f4 lxv vs34,48(r31)
10000cd8: 2c 12 42 10 vsldoi v2,v2,v2,8
10000cdc: 5b 2f 40 7c stxsihx vs34,0,r5
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000ce0: 48 00 7f e8 ld r3,72(r31)
10000ce4: 01 00 63 38 addi r3,r3,1
10000ce8: 48 00 7f f8 std r3,72(r31)
10000cec: 80 ff ff 4b b 10000c6c <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0x2c>
10000cf0: 60 00 7f a0 lhz r3,96(r31)
return v;
10000cf4: 70 00 21 38 addi r1,r1,112
10000cf8: 10 00 01 e8 ld r0,16(r1)
10000cfc: f8 ff e1 eb ld r31,-8(r1)
10000d00: a6 03 08 7c mtlr r0
10000d04: 20 00 80 4e blr
...
10000d14: 00 00 00 60 nop
10000d18: 00 00 00 60 nop
10000d1c: 00 00 00 60 nop
0000000010000d20 <pair<sint8x2>::operator[](unsigned long) const>:
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } };
10000d20: e8 ff 61 f8 std r3,-24(r1)
10000d24: e0 ff 81 f8 std r4,-32(r1)
10000d28: e8 ff 61 e8 ld r3,-24(r1)
10000d2c: e0 ff 81 e8 ld r4,-32(r1)
10000d30: a4 0f 84 78 rldicr r4,r4,1,62
10000d34: 14 22 63 7c add r3,r3,r4
10000d38: f0 ff 81 38 addi r4,r1,-16
10000d3c: 00 00 63 a0 lhz r3,0(r3)
10000d40: 00 00 64 b0 sth r3,0(r4)
10000d44: f0 ff 61 a0 lhz r3,-16(r1)
10000d48: 20 00 80 4e blr
...
10000d58: 00 00 00 60 nop
10000d5c: 00 00 00 60 nop
0000000010000d60 <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)>:
T min(const T &a, const T &b) {
10000d60: 02 10 40 3c lis r2,4098
10000d64: 00 7f 42 38 addi r2,r2,32512
10000d68: a6 02 08 7c mflr r0
10000d6c: f8 ff e1 fb std r31,-8(r1)
10000d70: 10 00 01 f8 std r0,16(r1)
10000d74: 91 ff 21 f8 stdu r1,-112(r1)
10000d78: 78 0b 3f 7c mr r31,r1
10000d7c: 58 00 7f f8 std r3,88(r31)
10000d80: 50 00 9f f8 std r4,80(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000d84: 00 00 60 38 li r3,0
10000d88: 48 00 7f f8 std r3,72(r31)
10000d8c: 48 00 7f e8 ld r3,72(r31)
10000d90: 04 00 23 28 cmpldi r3,4
10000d94: 78 00 80 40 bge 10000e0c <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0xac>
v.s[i] = min(a.s[i], b.s[i]);
10000d98: 58 00 7f e8 ld r3,88(r31)
10000d9c: ee 1e 00 7c lfiwzx f0,0,r3
10000da0: 51 02 40 f0 xxswapd vs34,vs0
10000da4: 48 00 7f e8 ld r3,72(r31)
10000da8: 50 00 9f e8 ld r4,80(r31)
10000dac: ee 26 00 7c lfiwzx f0,0,r4
10000db0: 51 02 60 f0 xxswapd vs35,vs0
10000db4: 0d 17 83 10 vextubrx r4,r3,v2
10000db8: 74 07 84 7c extsb r4,r4
10000dbc: 0d 1f 63 10 vextubrx r3,r3,v3
10000dc0: 74 07 63 7c extsb r3,r3
10000dc4: 28 00 7f f8 std r3,40(r31)
10000dc8: 78 23 83 7c mr r3,r4
10000dcc: 28 00 9f e8 ld r4,40(r31)
10000dd0: 71 f8 ff 4b bl 10000640 <min(signed char, signed char)>
10000dd4: 48 00 9f e8 ld r4,72(r31)
10000dd8: 20 07 84 78 clrldi r4,r4,60
10000ddc: 60 00 bf 38 addi r5,r31,96
10000de0: ee 2e 00 7c lfiwzx f0,0,r5
10000de4: 50 02 00 f0 xxswapd vs0,vs0
10000de8: 35 00 1f f4 stxv vs0,48(r31)
10000dec: 30 00 bf 38 addi r5,r31,48
10000df0: ae 21 65 7c stbx r3,r5,r4
10000df4: 30 00 7f 80 lwz r3,48(r31)
10000df8: 60 00 7f 90 stw r3,96(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000dfc: 48 00 7f e8 ld r3,72(r31)
10000e00: 01 00 63 38 addi r3,r3,1
10000e04: 48 00 7f f8 std r3,72(r31)
10000e08: 84 ff ff 4b b 10000d8c <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0x2c>
10000e0c: 60 00 7f 80 lwz r3,96(r31)
return v;
10000e10: 70 00 21 38 addi r1,r1,112
10000e14: 10 00 01 e8 ld r0,16(r1)
10000e18: f8 ff e1 eb ld r31,-8(r1)
10000e1c: a6 03 08 7c mtlr r0
10000e20: 20 00 80 4e blr
...
0000000010000e30 <pair<sint8x4>::operator[](unsigned long) const>:
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } };
10000e30: e8 ff 61 f8 std r3,-24(r1)
10000e34: e0 ff 81 f8 std r4,-32(r1)
10000e38: e8 ff 61 e8 ld r3,-24(r1)
10000e3c: e0 ff 81 e8 ld r4,-32(r1)
10000e40: 64 17 84 78 rldicr r4,r4,2,61
10000e44: 14 22 63 7c add r3,r3,r4
10000e48: f0 ff 81 38 addi r4,r1,-16
10000e4c: 00 00 63 80 lwz r3,0(r3)
10000e50: 00 00 64 90 stw r3,0(r4)
10000e54: f0 ff 61 80 lwz r3,-16(r1)
10000e58: 20 00 80 4e blr
...
10000e68: 00 00 00 60 nop
10000e6c: 00 00 00 60 nop
0000000010000e70 <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)>:
T min(const T &a, const T &b) {
10000e70: 02 10 40 3c lis r2,4098
10000e74: 00 7f 42 38 addi r2,r2,32512
10000e78: a6 02 08 7c mflr r0
10000e7c: f8 ff e1 fb std r31,-8(r1)
10000e80: 10 00 01 f8 std r0,16(r1)
10000e84: 91 ff 21 f8 stdu r1,-112(r1)
10000e88: 78 0b 3f 7c mr r31,r1
10000e8c: 58 00 7f f8 std r3,88(r31)
10000e90: 50 00 9f f8 std r4,80(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000e94: 00 00 60 38 li r3,0
10000e98: 48 00 7f f8 std r3,72(r31)
10000e9c: 48 00 7f e8 ld r3,72(r31)
10000ea0: 08 00 23 28 cmpldi r3,8
10000ea4: 78 00 80 40 bge 10000f1c <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0xac>
v.s[i] = min(a.s[i], b.s[i]);
10000ea8: 58 00 7f e8 ld r3,88(r31)
10000eac: 00 00 03 c8 lfd f0,0(r3)
10000eb0: 51 02 40 f0 xxswapd vs34,vs0
10000eb4: 48 00 7f e8 ld r3,72(r31)
10000eb8: 50 00 9f e8 ld r4,80(r31)
10000ebc: 00 00 04 c8 lfd f0,0(r4)
10000ec0: 51 02 60 f0 xxswapd vs35,vs0
10000ec4: 0d 17 83 10 vextubrx r4,r3,v2
10000ec8: 74 07 84 7c extsb r4,r4
10000ecc: 0d 1f 63 10 vextubrx r3,r3,v3
10000ed0: 74 07 63 7c extsb r3,r3
10000ed4: 28 00 7f f8 std r3,40(r31)
10000ed8: 78 23 83 7c mr r3,r4
10000edc: 28 00 9f e8 ld r4,40(r31)
10000ee0: 61 f7 ff 4b bl 10000640 <min(signed char, signed char)>
10000ee4: 48 00 9f e8 ld r4,72(r31)
10000ee8: 20 07 84 78 clrldi r4,r4,60
10000eec: 60 00 1f c8 lfd f0,96(r31)
10000ef0: 51 02 40 f0 xxswapd vs34,vs0
10000ef4: 3d 00 5f f4 stxv vs34,48(r31)
10000ef8: 30 00 bf 38 addi r5,r31,48
10000efc: ae 21 65 7c stbx r3,r5,r4
10000f00: 31 00 1f f4 lxv vs0,48(r31)
10000f04: 50 02 00 f0 xxswapd vs0,vs0
10000f08: 60 00 1f d8 stfd f0,96(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000f0c: 48 00 7f e8 ld r3,72(r31)
10000f10: 01 00 63 38 addi r3,r3,1
10000f14: 48 00 7f f8 std r3,72(r31)
10000f18: 84 ff ff 4b b 10000e9c <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0x2c>
return v;
10000f1c: 60 00 7f e8 ld r3,96(r31)
10000f20: 70 00 21 38 addi r1,r1,112
10000f24: 10 00 01 e8 ld r0,16(r1)
10000f28: f8 ff e1 eb ld r31,-8(r1)
10000f2c: a6 03 08 7c mtlr r0
10000f30: 20 00 80 4e blr
...
0000000010000f40 <pair<sint8x8>::operator[](unsigned long) const>:
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } };
10000f40: e8 ff 61 f8 std r3,-24(r1)
10000f44: e0 ff 81 f8 std r4,-32(r1)
10000f48: e8 ff 61 e8 ld r3,-24(r1)
10000f4c: e0 ff 81 e8 ld r4,-32(r1)
10000f50: 24 1f 84 78 rldicr r4,r4,3,60
10000f54: 14 22 63 7c add r3,r3,r4
10000f58: f0 ff 81 38 addi r4,r1,-16
10000f5c: 00 00 63 e8 ld r3,0(r3)
10000f60: 00 00 64 f8 std r3,0(r4)
10000f64: f0 ff 61 e8 ld r3,-16(r1)
10000f68: 20 00 80 4e blr
...
10000f78: 00 00 00 60 nop
10000f7c: 00 00 00 60 nop
0000000010000f80 <__libc_csu_init>:
10000f80: 02 10 40 3c lis r2,4098
10000f84: 00 7f 42 38 addi r2,r2,32512
10000f88: a6 02 08 7c mflr r0
10000f8c: d0 ff 41 fb std r26,-48(r1)
10000f90: d8 ff 61 fb std r27,-40(r1)
10000f94: 78 2b ba 7c mr r26,r5
10000f98: e0 ff 81 fb std r28,-32(r1)
10000f9c: e8 ff a1 fb std r29,-24(r1)
10000fa0: ff ff a2 3f addis r29,r2,-1
10000fa4: 78 1b 7c 7c mr r28,r3
10000fa8: f0 ff c1 fb std r30,-16(r1)
10000fac: ff ff c2 3f addis r30,r2,-1
10000fb0: c8 7d bd 3b addi r29,r29,32200
10000fb4: c0 7d de 3b addi r30,r30,32192
10000fb8: 78 23 9b 7c mr r27,r4
10000fbc: 50 e8 be 7f subf r29,r30,r29
10000fc0: 10 00 01 f8 std r0,16(r1)
10000fc4: b1 ff 21 f8 stdu r1,-80(r1)
10000fc8: a1 f4 ff 4b bl 10000468 <_init+0x8>
10000fcc: 00 00 00 60 nop
10000fd0: 75 1e bd 7f sradi. r29,r29,3
10000fd4: 4c 00 82 41 beq 10001020 <__libc_csu_init+0xa0>
10000fd8: 18 00 41 f8 std r2,24(r1)
10000fdc: 48 00 e1 fb std r31,72(r1)
10000fe0: f8 ff de 3b addi r30,r30,-8
10000fe4: 00 00 e0 3b li r31,0
10000fe8: 00 00 00 60 nop
10000fec: 00 00 42 60 ori r2,r2,0
10000ff0: 09 00 3e e9 ldu r9,8(r30)
10000ff4: 78 d3 45 7f mr r5,r26
10000ff8: 78 db 64 7f mr r4,r27
10000ffc: 78 e3 83 7f mr r3,r28
10001000: 01 00 ff 3b addi r31,r31,1
10001004: a6 03 29 7d mtctr r9
10001008: 78 4b 2c 7d mr r12,r9
1000100c: 21 04 80 4e bctrl
10001010: 18 00 41 e8 ld r2,24(r1)
10001014: 40 f8 bd 7f cmpld cr7,r29,r31
10001018: d8 ff 9e 40 bne cr7,10000ff0 <__libc_csu_init+0x70>
1000101c: 48 00 e1 eb ld r31,72(r1)
10001020: 50 00 21 38 addi r1,r1,80
10001024: 10 00 01 e8 ld r0,16(r1)
10001028: d0 ff 41 eb ld r26,-48(r1)
1000102c: d8 ff 61 eb ld r27,-40(r1)
10001030: e0 ff 81 eb ld r28,-32(r1)
10001034: e8 ff a1 eb ld r29,-24(r1)
10001038: f0 ff c1 eb ld r30,-16(r1)
1000103c: a6 03 08 7c mtlr r0
10001040: 20 00 80 4e blr
10001044: 00 00 00 00 .long 0x0
10001048: 00 00 00 01 .long 0x1000000
1000104c: 80 06 00 00 .long 0x680
0000000010001050 <__libc_csu_fini>:
10001050: 20 00 80 4e blr
...
10001060: 90 ef 01 00 .long 0x1ef90
10001064: 00 00 00 00 .long 0x0
0000000010001068 <__glink_PLTresolve>:
10001068: a6 02 08 7c mflr r0
1000106c: 05 00 9f 42 bcl 20,4*cr7+so,10001070 <__glink_PLTresolve+0x8>
10001070: a6 02 68 7d mflr r11
10001074: 18 00 41 f8 std r2,24(r1)
10001078: f0 ff 4b e8 ld r2,-16(r11)
1000107c: a6 03 08 7c mtlr r0
10001080: 50 60 8b 7d subf r12,r11,r12
10001084: 14 5a 62 7d add r11,r2,r11
10001088: d0 ff 0c 38 addi r0,r12,-48
1000108c: 00 00 8b e9 ld r12,0(r11)
10001090: 82 f0 00 78 rldicl r0,r0,62,2
10001094: a6 03 89 7d mtctr r12
10001098: 08 00 6b e9 ld r11,8(r11)
1000109c: 20 04 80 4e bctr
00000000100010a0 <__libc_start_main@plt>:
100010a0: c8 ff ff 4b b 10001068 <__glink_PLTresolve>
00000000100010a4 <__gmon_start__@plt>:
100010a4: c4 ff ff 4b b 10001068 <__glink_PLTresolve>
Disassembly of section .fini:
00000000100010a8 <_fini>:
100010a8: 02 10 40 3c lis r2,4098
100010ac: 00 7f 42 38 addi r2,r2,32512
100010b0: a6 02 08 7c mflr r0
100010b4: 10 00 01 f8 std r0,16(r1)
100010b8: a1 ff 21 f8 stdu r1,-96(r1)
100010bc: 60 00 21 38 addi r1,r1,96
100010c0: 10 00 01 e8 ld r0,16(r1)
100010c4: a6 03 08 7c mtlr r0
100010c8: 20 00 80 4e blr
good: file format elf64-powerpcle
Disassembly of section .init:
0000000010000440 <00000024.plt_call.__gmon_start__>:
10000440: 18 00 41 f8 std r2,24(r1)
10000444: 18 81 82 e9 ld r12,-32488(r2)
10000448: a6 03 89 7d mtctr r12
1000044c: 20 04 80 4e bctr
...
0000000010000460 <_init>:
10000460: 02 10 40 3c lis r2,4098
10000464: 00 7f 42 38 addi r2,r2,32512
10000468: a6 02 08 7c mflr r0
1000046c: 10 00 01 f8 std r0,16(r1)
10000470: a1 ff 21 f8 stdu r1,-96(r1)
10000474: 00 00 00 60 nop
10000478: 08 80 02 e8 ld r0,-32760(r2)
1000047c: 00 00 a0 2f cmpdi cr7,r0,0
10000480: 0c 00 fe 41 beq+ cr7,1000048c <_init+0x2c>
10000484: bd ff ff 4b bl 10000440 <00000024.plt_call.__gmon_start__>
10000488: 18 00 41 e8 ld r2,24(r1)
1000048c: 60 00 21 38 addi r1,r1,96
10000490: 10 00 01 e8 ld r0,16(r1)
10000494: a6 03 08 7c mtlr r0
10000498: 20 00 80 4e blr
Disassembly of section .text:
00000000100004a0 <00000019.plt_call.__libc_start_main@@GLIBC_2.17>:
100004a0: 18 00 41 f8 std r2,24(r1)
100004a4: 10 81 82 e9 ld r12,-32496(r2)
100004a8: a6 03 89 7d mtctr r12
100004ac: 20 04 80 4e bctr
...
00000000100004c0 <_start>:
100004c0: 02 10 40 3c lis r2,4098
100004c4: 00 7f 42 38 addi r2,r2,32512
100004c8: 78 0b 29 7c mr r9,r1
100004cc: e4 06 21 78 rldicr r1,r1,0,59
100004d0: 00 00 00 38 li r0,0
100004d4: 81 ff 21 f8 stdu r1,-128(r1)
100004d8: a6 03 08 7c mtlr r0
100004dc: 00 00 01 f8 std r0,0(r1)
100004e0: 10 80 02 e9 ld r8,-32752(r2)
100004e4: bc ff ff 4b b 100004a0 <00000019.plt_call.__libc_start_main@@GLIBC_2.17>
100004e8: 00 00 00 60 nop
...
100004f4: 40 20 0c 00 .long 0xc2040
100004f8: 2c 00 00 00 .long 0x2c
100004fc: 06 00 5f 73 andi. r31,r26,6
10000500: 74 61 72 74 andis. r18,r3,24948
10000504: 00 00 00 60 nop
10000508: 00 00 00 60 nop
1000050c: 00 00 00 60 nop
0000000010000510 <deregister_tm_clones>:
10000510: 02 10 40 3c lis r2,4098
10000514: 00 7f 42 38 addi r2,r2,32512
10000518: 00 00 00 60 nop
1000051c: 00 00 00 60 nop
10000520: 30 81 62 38 addi r3,r2,-32464
10000524: 30 81 22 39 addi r9,r2,-32464
10000528: 00 18 a9 7f cmpd cr7,r9,r3
1000052c: 20 00 9e 4d beqlr cr7
10000530: 00 00 00 60 nop
10000534: 18 80 22 e9 ld r9,-32744(r2)
10000538: 00 00 a9 2f cmpdi cr7,r9,0
1000053c: 20 00 9e 4d beqlr cr7
10000540: a6 02 08 7c mflr r0
10000544: 78 4b 2c 7d mr r12,r9
10000548: a6 03 29 7d mtctr r9
1000054c: 10 00 01 f8 std r0,16(r1)
10000550: e1 ff 21 f8 stdu r1,-32(r1)
10000554: 18 00 41 f8 std r2,24(r1)
10000558: 21 04 80 4e bctrl
1000055c: 18 00 41 e8 ld r2,24(r1)
10000560: 20 00 21 38 addi r1,r1,32
10000564: 10 00 01 e8 ld r0,16(r1)
10000568: a6 03 08 7c mtlr r0
1000056c: 20 00 80 4e blr
0000000010000570 <register_tm_clones>:
10000570: 02 10 40 3c lis r2,4098
10000574: 00 7f 42 38 addi r2,r2,32512
10000578: 00 00 00 60 nop
1000057c: 00 00 00 60 nop
10000580: 30 81 62 38 addi r3,r2,-32464
10000584: 30 81 82 38 addi r4,r2,-32464
10000588: 50 20 83 7c subf r4,r3,r4
1000058c: 74 1e 84 7c sradi r4,r4,3
10000590: 74 0e 84 7c sradi r4,r4,1
10000594: 95 01 84 7c addze. r4,r4
10000598: 20 00 82 4d beqlr
1000059c: 00 00 00 60 nop
100005a0: 20 80 22 e9 ld r9,-32736(r2)
100005a4: 00 00 a9 2f cmpdi cr7,r9,0
100005a8: 20 00 9e 4d beqlr cr7
100005ac: a6 02 08 7c mflr r0
100005b0: 78 4b 2c 7d mr r12,r9
100005b4: a6 03 29 7d mtctr r9
100005b8: 10 00 01 f8 std r0,16(r1)
100005bc: e1 ff 21 f8 stdu r1,-32(r1)
100005c0: 18 00 41 f8 std r2,24(r1)
100005c4: 21 04 80 4e bctrl
100005c8: 18 00 41 e8 ld r2,24(r1)
100005cc: 20 00 21 38 addi r1,r1,32
100005d0: 10 00 01 e8 ld r0,16(r1)
100005d4: a6 03 08 7c mtlr r0
100005d8: 20 00 80 4e blr
100005dc: 00 00 42 60 ori r2,r2,0
00000000100005e0 <__do_global_dtors_aux>:
100005e0: 02 10 40 3c lis r2,4098
100005e4: 00 7f 42 38 addi r2,r2,32512
100005e8: f8 ff e1 fb std r31,-8(r1)
100005ec: 00 00 00 60 nop
100005f0: d1 ff 21 f8 stdu r1,-48(r1)
100005f4: 30 81 22 89 lbz r9,-32464(r2)
100005f8: 00 00 89 2f cmpwi cr7,r9,0
100005fc: 20 00 9e 40 bne cr7,1000061c <__do_global_dtors_aux+0x3c>
10000600: a6 02 08 7c mflr r0
10000604: 40 00 01 f8 std r0,64(r1)
10000608: 11 ff ff 4b bl 10000518 <deregister_tm_clones+0x8>
1000060c: 40 00 01 e8 ld r0,64(r1)
10000610: 01 00 20 39 li r9,1
10000614: 30 81 22 99 stb r9,-32464(r2)
10000618: a6 03 08 7c mtlr r0
1000061c: 30 00 21 38 addi r1,r1,48
10000620: f8 ff e1 eb ld r31,-8(r1)
10000624: 20 00 80 4e blr
10000628: 00 00 00 60 nop
1000062c: 00 00 42 60 ori r2,r2,0
0000000010000630 <frame_dummy>:
10000630: 02 10 40 3c lis r2,4098
10000634: 00 7f 42 38 addi r2,r2,32512
10000638: 40 ff ff 4b b 10000578 <register_tm_clones+0x8>
1000063c: 00 00 00 60 nop
0000000010000640 <min(signed char, signed char)>:
struct sint8x8 { int8x8 s; };
struct sint8x16 { int8x16 s; };
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } };
signed char min(signed char a, signed char b) {
10000640: f7 ff 61 98 stb r3,-9(r1)
10000644: f6 ff 81 98 stb r4,-10(r1)
return a < b ? a : b;
10000648: f7 ff 61 88 lbz r3,-9(r1)
1000064c: 74 07 63 7c extsb r3,r3
10000650: f6 ff 81 88 lbz r4,-10(r1)
10000654: 74 07 84 7c extsb r4,r4
10000658: 00 20 03 7c cmpw r3,r4
1000065c: 10 00 80 40 bge 1000066c <min(signed char, signed char)+0x2c>
10000660: f7 ff 61 88 lbz r3,-9(r1)
10000664: f0 ff 61 90 stw r3,-16(r1)
10000668: 0c 00 00 48 b 10000674 <min(signed char, signed char)+0x34>
1000066c: f6 ff 61 88 lbz r3,-10(r1)
10000670: f0 ff 61 90 stw r3,-16(r1)
10000674: f0 ff 61 80 lwz r3,-16(r1)
10000678: 74 07 63 7c extsb r3,r3
1000067c: 20 00 80 4e blr
...
1000068c: 00 00 00 60 nop
0000000010000690 <split_by(sint8x2)>:
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
v.s[i] = min(a.s[i], b.s[i]);
return v;
}
pair<sint8x1> split_by(sint8x2 v) {
10000690: f0 ff 81 38 addi r4,r1,-16
10000694: e8 ff 61 b0 sth r3,-24(r1)
sint8x1 a, b;
a.s[0] = v.s[0];
10000698: e8 ff 61 a0 lhz r3,-24(r1)
1000069c: e0 ff 61 98 stb r3,-32(r1)
100006a0: e8 ff 61 38 addi r3,r1,-24
b.s[0] = v.s[1];
100006a4: 5b 1e 40 7c lxsihzx vs34,0,r3
100006a8: 4c 12 43 10 vsplth v2,v2,3
100006ac: ec 11 42 10 vsldoi v2,v2,v2,7
100006b0: d8 ff 61 38 addi r3,r1,-40
100006b4: 1b 1f 40 7c stxsibx vs34,0,r3
return {{a, b}};
100006b8: 78 23 83 7c mr r3,r4
100006bc: e0 ff a1 38 addi r5,r1,-32
100006c0: 00 00 a5 88 lbz r5,0(r5)
100006c4: 00 00 a3 98 stb r5,0(r3)
100006c8: 01 00 64 38 addi r3,r4,1
100006cc: d8 ff 81 38 addi r4,r1,-40
100006d0: 00 00 84 88 lbz r4,0(r4)
100006d4: 00 00 83 98 stb r4,0(r3)
100006d8: f0 ff 61 a0 lhz r3,-16(r1)
100006dc: 20 00 80 4e blr
...
100006ec: 00 00 00 60 nop
00000000100006f0 <split_by(sint8x4)>:
}
pair<sint8x2> split_by(sint8x4 v) {
100006f0: f0 ff 81 38 addi r4,r1,-16
100006f4: e8 ff 61 90 stw r3,-24(r1)
sint8x2 a, b;
a.s = __builtin_shufflevector(v.s, v.s, 0, 1);
100006f8: e8 ff 61 80 lwz r3,-24(r1)
100006fc: e0 ff 61 b0 sth r3,-32(r1)
10000700: e8 ff 61 38 addi r3,r1,-24
b.s = __builtin_shufflevector(v.s, v.s, 2, 3);
10000704: ee 1e 00 7c lfiwzx f0,0,r3
10000708: 51 02 40 f0 xxswapd vs34,vs0
1000070c: 4c 12 46 10 vsplth v2,v2,6
10000710: 2c 12 42 10 vsldoi v2,v2,v2,8
10000714: d8 ff 61 38 addi r3,r1,-40
10000718: 5b 1f 40 7c stxsihx vs34,0,r3
return {{a, b}};
1000071c: 78 23 83 7c mr r3,r4
10000720: e0 ff a1 38 addi r5,r1,-32
10000724: 00 00 a5 a0 lhz r5,0(r5)
10000728: 00 00 a3 b0 sth r5,0(r3)
1000072c: 02 00 64 38 addi r3,r4,2
10000730: d8 ff 81 38 addi r4,r1,-40
10000734: 00 00 84 a0 lhz r4,0(r4)
10000738: 00 00 83 b0 sth r4,0(r3)
1000073c: f0 ff 61 80 lwz r3,-16(r1)
10000740: 20 00 80 4e blr
...
0000000010000750 <split_by(sint8x8)>:
}
pair<sint8x4> split_by(sint8x8 v) {
10000750: f0 ff 81 38 addi r4,r1,-16
10000754: e8 ff 61 f8 std r3,-24(r1)
sint8x4 a, b;
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3);
10000758: e8 ff 61 e8 ld r3,-24(r1)
1000075c: e0 ff 61 90 stw r3,-32(r1)
b.s = __builtin_shufflevector(v.s, v.s, 4, 5, 6, 7);
10000760: ec ff 61 38 addi r3,r1,-20
10000764: d8 1a 00 7c lxvwsx vs0,0,r3
10000768: 10 02 00 f0 xxsldwi vs0,vs0,vs0,2
1000076c: d8 ff 61 38 addi r3,r1,-40
10000770: ae 1f 00 7c stfiwx f0,0,r3
return {{a, b}};
10000774: 78 23 83 7c mr r3,r4
10000778: e0 ff a1 38 addi r5,r1,-32
1000077c: 00 00 a5 80 lwz r5,0(r5)
10000780: 00 00 a3 90 stw r5,0(r3)
10000784: 04 00 64 38 addi r3,r4,4
10000788: d8 ff 81 38 addi r4,r1,-40
1000078c: 00 00 84 80 lwz r4,0(r4)
10000790: 00 00 83 90 stw r4,0(r3)
10000794: f0 ff 61 e8 ld r3,-16(r1)
10000798: 20 00 80 4e blr
...
100007a8: 00 00 00 60 nop
100007ac: 00 00 00 60 nop
00000000100007b0 <split_by(sint8x16)>:
}
pair<sint8x8> split_by(sint8x16 v) {
100007b0: dd ff 41 f4 stxv vs34,-48(r1)
sint8x8 a, b;
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3, 4, 5, 6, 7);
100007b4: d0 ff 61 e8 ld r3,-48(r1)
100007b8: c8 ff 61 f8 std r3,-56(r1)
b.s = __builtin_shufflevector(v.s, v.s, 8, 9, 10, 11, 12, 13, 14, 15);
100007bc: d8 ff 61 38 addi r3,r1,-40
100007c0: 99 1a 40 7c lxvdsx vs34,0,r3
100007c4: 56 12 02 f0 xxswapd vs0,vs34
100007c8: c0 ff 01 d8 stfd f0,-64(r1)
return {{a, b}};
100007cc: c8 ff 61 e8 ld r3,-56(r1)
100007d0: e8 ff 61 f8 std r3,-24(r1)
100007d4: c0 ff 61 e8 ld r3,-64(r1)
100007d8: f0 ff 61 f8 std r3,-16(r1)
100007dc: e8 ff 61 e8 ld r3,-24(r1)
100007e0: f0 ff 81 e8 ld r4,-16(r1)
100007e4: 20 00 80 4e blr
...
100007f4: 00 00 00 60 nop
100007f8: 00 00 00 60 nop
100007fc: 00 00 00 60 nop
0000000010000800 <hmin(sint8x1)>:
}
signed char hmin(sint8x1 v) {
10000800: f0 ff 61 98 stb r3,-16(r1)
return v.s[0];
10000804: f0 ff 61 88 lbz r3,-16(r1)
10000808: 74 07 63 7c extsb r3,r3
1000080c: 20 00 80 4e blr
...
1000081c: 00 00 00 60 nop
0000000010000820 <hmin(sint8x2)>:
}
signed char hmin(sint8x2 v) {
10000820: 02 10 40 3c lis r2,4098
10000824: 00 7f 42 38 addi r2,r2,32512
10000828: a6 02 08 7c mflr r0
1000082c: f8 ff e1 fb std r31,-8(r1)
10000830: 10 00 01 f8 std r0,16(r1)
10000834: 61 ff 21 f8 stdu r1,-160(r1)
10000838: 78 0b 3f 7c mr r31,r1
1000083c: 90 00 7f b0 sth r3,144(r31)
auto a = split_by(v);
10000840: 80 00 7f 38 addi r3,r31,128
10000844: 90 00 9f 38 addi r4,r31,144
10000848: 00 00 84 a0 lhz r4,0(r4)
1000084c: 00 00 83 b0 sth r4,0(r3)
10000850: 80 00 7f a0 lhz r3,128(r31)
10000854: 3d fe ff 4b bl 10000690 <split_by(sint8x2)>
10000858: 00 00 00 60 nop
1000085c: 88 00 7f b0 sth r3,136(r31)
return hmin(min(a[0], a[1]));
10000860: 88 00 7f 38 addi r3,r31,136
10000864: 00 00 80 38 li r4,0
10000868: 99 03 00 48 bl 10000c00 <pair<sint8x1>::operator[](unsigned long) const>
1000086c: 00 00 00 60 nop
10000870: 70 00 7f 98 stb r3,112(r31)
10000874: 88 00 7f 38 addi r3,r31,136
10000878: 01 00 80 38 li r4,1
1000087c: 85 03 00 48 bl 10000c00 <pair<sint8x1>::operator[](unsigned long) const>
10000880: 00 00 00 60 nop
10000884: 68 00 7f 98 stb r3,104(r31)
10000888: 70 00 7f 38 addi r3,r31,112
1000088c: 68 00 9f 38 addi r4,r31,104
10000890: e9 02 00 48 bl 10000b78 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x8>
10000894: 00 00 00 60 nop
10000898: 78 00 7f 98 stb r3,120(r31)
1000089c: 78 00 7f 88 lbz r3,120(r31)
100008a0: 61 ff ff 4b bl 10000800 <hmin(sint8x1)>
100008a4: 00 00 00 60 nop
100008a8: 74 07 63 7c extsb r3,r3
100008ac: a0 00 21 38 addi r1,r1,160
100008b0: 10 00 01 e8 ld r0,16(r1)
100008b4: f8 ff e1 eb ld r31,-8(r1)
100008b8: a6 03 08 7c mtlr r0
100008bc: 20 00 80 4e blr
...
100008cc: 00 00 00 60 nop
00000000100008d0 <hmin(sint8x4)>:
}
signed char hmin(sint8x4 v) {
100008d0: 02 10 40 3c lis r2,4098
100008d4: 00 7f 42 38 addi r2,r2,32512
100008d8: a6 02 08 7c mflr r0
100008dc: f8 ff e1 fb std r31,-8(r1)
100008e0: 10 00 01 f8 std r0,16(r1)
100008e4: 61 ff 21 f8 stdu r1,-160(r1)
100008e8: 78 0b 3f 7c mr r31,r1
100008ec: 90 00 7f 90 stw r3,144(r31)
auto a = split_by(v);
100008f0: 80 00 7f 38 addi r3,r31,128
100008f4: 90 00 9f 38 addi r4,r31,144
100008f8: 00 00 84 80 lwz r4,0(r4)
100008fc: 00 00 83 90 stw r4,0(r3)
10000900: 80 00 7f 80 lwz r3,128(r31)
10000904: ed fd ff 4b bl 100006f0 <split_by(sint8x4)>
10000908: 00 00 00 60 nop
1000090c: 88 00 7f 90 stw r3,136(r31)
return hmin(min(a[0], a[1]));
10000910: 88 00 7f 38 addi r3,r31,136
10000914: 00 00 80 38 li r4,0
10000918: 09 04 00 48 bl 10000d20 <pair<sint8x2>::operator[](unsigned long) const>
1000091c: 00 00 00 60 nop
10000920: 70 00 7f b0 sth r3,112(r31)
10000924: 88 00 7f 38 addi r3,r31,136
10000928: 01 00 80 38 li r4,1
1000092c: f5 03 00 48 bl 10000d20 <pair<sint8x2>::operator[](unsigned long) const>
10000930: 00 00 00 60 nop
10000934: 68 00 7f b0 sth r3,104(r31)
10000938: 70 00 7f 38 addi r3,r31,112
1000093c: 68 00 9f 38 addi r4,r31,104
10000940: 09 03 00 48 bl 10000c48 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0x8>
10000944: 00 00 00 60 nop
10000948: 78 00 7f b0 sth r3,120(r31)
1000094c: 78 00 7f a0 lhz r3,120(r31)
10000950: d9 fe ff 4b bl 10000828 <hmin(sint8x2)+0x8>
10000954: 00 00 00 60 nop
10000958: 74 07 63 7c extsb r3,r3
1000095c: a0 00 21 38 addi r1,r1,160
10000960: 10 00 01 e8 ld r0,16(r1)
10000964: f8 ff e1 eb ld r31,-8(r1)
10000968: a6 03 08 7c mtlr r0
1000096c: 20 00 80 4e blr
...
1000097c: 00 00 00 60 nop
0000000010000980 <hmin(sint8x8)>:
}
signed char hmin(sint8x8 v) {
10000980: 02 10 40 3c lis r2,4098
10000984: 00 7f 42 38 addi r2,r2,32512
10000988: a6 02 08 7c mflr r0
1000098c: f8 ff e1 fb std r31,-8(r1)
10000990: 10 00 01 f8 std r0,16(r1)
10000994: 61 ff 21 f8 stdu r1,-160(r1)
10000998: 78 0b 3f 7c mr r31,r1
1000099c: 90 00 7f f8 std r3,144(r31)
auto a = split_by(v);
100009a0: 80 00 7f 38 addi r3,r31,128
100009a4: 90 00 9f 38 addi r4,r31,144
100009a8: 00 00 84 e8 ld r4,0(r4)
100009ac: 00 00 83 f8 std r4,0(r3)
100009b0: 80 00 7f e8 ld r3,128(r31)
100009b4: 9d fd ff 4b bl 10000750 <split_by(sint8x8)>
100009b8: 00 00 00 60 nop
100009bc: 88 00 7f f8 std r3,136(r31)
return hmin(min(a[0], a[1]));
100009c0: 88 00 7f 38 addi r3,r31,136
100009c4: 00 00 80 38 li r4,0
100009c8: 69 04 00 48 bl 10000e30 <pair<sint8x4>::operator[](unsigned long) const>
100009cc: 00 00 00 60 nop
100009d0: 70 00 7f 90 stw r3,112(r31)
100009d4: 88 00 7f 38 addi r3,r31,136
100009d8: 01 00 80 38 li r4,1
100009dc: 55 04 00 48 bl 10000e30 <pair<sint8x4>::operator[](unsigned long) const>
100009e0: 00 00 00 60 nop
100009e4: 68 00 7f 90 stw r3,104(r31)
100009e8: 70 00 7f 38 addi r3,r31,112
100009ec: 68 00 9f 38 addi r4,r31,104
100009f0: 79 03 00 48 bl 10000d68 <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0x8>
100009f4: 00 00 00 60 nop
100009f8: 78 00 7f 90 stw r3,120(r31)
100009fc: 78 00 7f 80 lwz r3,120(r31)
10000a00: d9 fe ff 4b bl 100008d8 <hmin(sint8x4)+0x8>
10000a04: 00 00 00 60 nop
10000a08: 74 07 63 7c extsb r3,r3
10000a0c: a0 00 21 38 addi r1,r1,160
10000a10: 10 00 01 e8 ld r0,16(r1)
10000a14: f8 ff e1 eb ld r31,-8(r1)
10000a18: a6 03 08 7c mtlr r0
10000a1c: 20 00 80 4e blr
...
10000a2c: 00 00 00 60 nop
0000000010000a30 <hmin(sint8x16)>:
}
signed char hmin(sint8x16 v) {
10000a30: 02 10 40 3c lis r2,4098
10000a34: 00 7f 42 38 addi r2,r2,32512
10000a38: a6 02 08 7c mflr r0
10000a3c: f8 ff e1 fb std r31,-8(r1)
10000a40: 10 00 01 f8 std r0,16(r1)
10000a44: 41 ff 21 f8 stdu r1,-192(r1)
10000a48: 78 0b 3f 7c mr r31,r1
10000a4c: ad 00 5f f4 stxv vs34,160(r31)
auto a = split_by(v);
10000a50: a8 00 7f e8 ld r3,168(r31)
10000a54: 88 00 7f f8 std r3,136(r31)
10000a58: a0 00 7f e8 ld r3,160(r31)
10000a5c: 80 00 7f f8 std r3,128(r31)
10000a60: 81 00 1f f4 lxv vs0,128(r31)
10000a64: 91 04 40 f0 xxlor vs34,vs0,vs0
10000a68: 49 fd ff 4b bl 100007b0 <split_by(sint8x16)>
10000a6c: 90 00 7f f8 std r3,144(r31)
10000a70: 98 00 9f f8 std r4,152(r31)
return hmin(min(a[0], a[1]));
10000a74: 90 00 7f 38 addi r3,r31,144
10000a78: 00 00 80 38 li r4,0
10000a7c: c5 04 00 48 bl 10000f40 <pair<sint8x8>::operator[](unsigned long) const>
10000a80: 00 00 00 60 nop
10000a84: 70 00 7f f8 std r3,112(r31)
10000a88: 90 00 7f 38 addi r3,r31,144
10000a8c: 01 00 80 38 li r4,1
10000a90: b1 04 00 48 bl 10000f40 <pair<sint8x8>::operator[](unsigned long) const>
10000a94: 00 00 00 60 nop
10000a98: 68 00 7f f8 std r3,104(r31)
10000a9c: 70 00 7f 38 addi r3,r31,112
10000aa0: 68 00 9f 38 addi r4,r31,104
10000aa4: d5 03 00 48 bl 10000e78 <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0x8>
10000aa8: 00 00 00 60 nop
10000aac: 78 00 7f f8 std r3,120(r31)
10000ab0: 78 00 7f e8 ld r3,120(r31)
10000ab4: d5 fe ff 4b bl 10000988 <hmin(sint8x8)+0x8>
10000ab8: 00 00 00 60 nop
10000abc: 74 07 63 7c extsb r3,r3
10000ac0: c0 00 21 38 addi r1,r1,192
10000ac4: 10 00 01 e8 ld r0,16(r1)
10000ac8: f8 ff e1 eb ld r31,-8(r1)
10000acc: a6 03 08 7c mtlr r0
10000ad0: 20 00 80 4e blr
...
0000000010000ae0 <main>:
}
int main(void) {
10000ae0: 02 10 40 3c lis r2,4098
10000ae4: 00 7f 42 38 addi r2,r2,32512
10000ae8: a6 02 08 7c mflr r0
10000aec: f8 ff e1 fb std r31,-8(r1)
10000af0: 10 00 01 f8 std r0,16(r1)
10000af4: a1 ff 21 f8 stdu r1,-96(r1)
10000af8: 78 0b 3f 7c mr r31,r1
10000afc: 00 00 60 38 li r3,0
10000b00: 54 00 7f 90 stw r3,84(r31)
const long data[] = {0x00010102464c457f, 0};
10000b04: fe ff 62 3c addis r3,r2,-2
10000b08: f8 91 63 38 addi r3,r3,-28168
10000b0c: 08 00 83 e8 ld r4,8(r3)
10000b10: 48 00 9f f8 std r4,72(r31)
10000b14: 00 00 63 e8 ld r3,0(r3)
10000b18: 40 00 7f f8 std r3,64(r31)
sint8x16 v;
__builtin_memcpy(&v.s, data, 16);
10000b1c: 48 00 7f e8 ld r3,72(r31)
10000b20: 38 00 7f f8 std r3,56(r31)
10000b24: 40 00 7f e8 ld r3,64(r31)
10000b28: 30 00 7f f8 std r3,48(r31)
return hmin(v);
10000b2c: 38 00 7f e8 ld r3,56(r31)
10000b30: 28 00 7f f8 std r3,40(r31)
10000b34: 30 00 7f e8 ld r3,48(r31)
10000b38: 20 00 7f f8 std r3,32(r31)
10000b3c: 21 00 1f f4 lxv vs0,32(r31)
10000b40: 91 04 40 f0 xxlor vs34,vs0,vs0
10000b44: f5 fe ff 4b bl 10000a38 <hmin(sint8x16)+0x8>
10000b48: 74 07 63 7c extsb r3,r3
10000b4c: b4 07 63 7c extsw r3,r3
10000b50: 60 00 21 38 addi r1,r1,96
10000b54: 10 00 01 e8 ld r0,16(r1)
10000b58: f8 ff e1 eb ld r31,-8(r1)
10000b5c: a6 03 08 7c mtlr r0
10000b60: 20 00 80 4e blr
...
0000000010000b70 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)>:
T min(const T &a, const T &b) {
10000b70: 02 10 40 3c lis r2,4098
10000b74: 00 7f 42 38 addi r2,r2,32512
10000b78: a6 02 08 7c mflr r0
10000b7c: f8 ff e1 fb std r31,-8(r1)
10000b80: 10 00 01 f8 std r0,16(r1)
10000b84: b1 ff 21 f8 stdu r1,-80(r1)
10000b88: 78 0b 3f 7c mr r31,r1
10000b8c: 38 00 7f f8 std r3,56(r31)
10000b90: 30 00 9f f8 std r4,48(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000b94: 00 00 60 38 li r3,0
10000b98: 28 00 7f f8 std r3,40(r31)
10000b9c: 28 00 7f e8 ld r3,40(r31)
10000ba0: 01 00 23 28 cmpldi r3,1
10000ba4: 34 00 80 40 bge 10000bd8 <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x68>
v.s[i] = min(a.s[i], b.s[i]);
10000ba8: 38 00 7f e8 ld r3,56(r31)
10000bac: 30 00 9f e8 ld r4,48(r31)
10000bb0: 00 00 84 88 lbz r4,0(r4)
10000bb4: 00 00 63 88 lbz r3,0(r3)
10000bb8: 74 07 63 7c extsb r3,r3
10000bbc: 74 07 84 7c extsb r4,r4
10000bc0: 81 fa ff 4b bl 10000640 <min(signed char, signed char)>
10000bc4: 40 00 7f 98 stb r3,64(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000bc8: 28 00 7f e8 ld r3,40(r31)
10000bcc: 01 00 63 38 addi r3,r3,1
10000bd0: 28 00 7f f8 std r3,40(r31)
10000bd4: c8 ff ff 4b b 10000b9c <sint8x1 min<sint8x1>(sint8x1 const&, sint8x1 const&)+0x2c>
10000bd8: 40 00 7f 88 lbz r3,64(r31)
return v;
10000bdc: 50 00 21 38 addi r1,r1,80
10000be0: 10 00 01 e8 ld r0,16(r1)
10000be4: f8 ff e1 eb ld r31,-8(r1)
10000be8: a6 03 08 7c mtlr r0
10000bec: 20 00 80 4e blr
...
10000bfc: 00 00 00 60 nop
0000000010000c00 <pair<sint8x1>::operator[](unsigned long) const>:
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } };
10000c00: e8 ff 61 f8 std r3,-24(r1)
10000c04: e0 ff 81 f8 std r4,-32(r1)
10000c08: e8 ff 61 e8 ld r3,-24(r1)
10000c0c: e0 ff 81 e8 ld r4,-32(r1)
10000c10: 14 22 63 7c add r3,r3,r4
10000c14: f0 ff 81 38 addi r4,r1,-16
10000c18: 00 00 63 88 lbz r3,0(r3)
10000c1c: 00 00 64 98 stb r3,0(r4)
10000c20: f0 ff 61 88 lbz r3,-16(r1)
10000c24: 20 00 80 4e blr
...
10000c34: 00 00 00 60 nop
10000c38: 00 00 00 60 nop
10000c3c: 00 00 00 60 nop
0000000010000c40 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)>:
T min(const T &a, const T &b) {
10000c40: 02 10 40 3c lis r2,4098
10000c44: 00 7f 42 38 addi r2,r2,32512
10000c48: a6 02 08 7c mflr r0
10000c4c: f8 ff e1 fb std r31,-8(r1)
10000c50: 10 00 01 f8 std r0,16(r1)
10000c54: 91 ff 21 f8 stdu r1,-112(r1)
10000c58: 78 0b 3f 7c mr r31,r1
10000c5c: 58 00 7f f8 std r3,88(r31)
10000c60: 50 00 9f f8 std r4,80(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000c64: 00 00 60 38 li r3,0
10000c68: 48 00 7f f8 std r3,72(r31)
10000c6c: 48 00 7f e8 ld r3,72(r31)
10000c70: 02 00 23 28 cmpldi r3,2
10000c74: 7c 00 80 40 bge 10000cf0 <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0xb0>
v.s[i] = min(a.s[i], b.s[i]);
10000c78: 58 00 7f e8 ld r3,88(r31)
10000c7c: 5b 1e 40 7c lxsihzx vs34,0,r3
10000c80: 4c 12 43 10 vsplth v2,v2,3
10000c84: 48 00 7f e8 ld r3,72(r31)
10000c88: 50 00 9f e8 ld r4,80(r31)
10000c8c: 5b 26 60 7c lxsihzx vs35,0,r4
10000c90: 4c 1a 63 10 vsplth v3,v3,3
10000c94: 0d 17 83 10 vextubrx r4,r3,v2
10000c98: 74 07 84 7c extsb r4,r4
10000c9c: 0d 1f 63 10 vextubrx r3,r3,v3
10000ca0: 74 07 63 7c extsb r3,r3
10000ca4: 28 00 7f f8 std r3,40(r31)
10000ca8: 78 23 83 7c mr r3,r4
10000cac: 28 00 9f e8 ld r4,40(r31)
10000cb0: 91 f9 ff 4b bl 10000640 <min(signed char, signed char)>
10000cb4: 48 00 9f e8 ld r4,72(r31)
10000cb8: 20 07 84 78 clrldi r4,r4,60
10000cbc: 60 00 bf 38 addi r5,r31,96
10000cc0: 5b 2e 40 7c lxsihzx vs34,0,r5
10000cc4: 4c 12 43 10 vsplth v2,v2,3
10000cc8: 3d 00 5f f4 stxv vs34,48(r31)
10000ccc: 30 00 df 38 addi r6,r31,48
10000cd0: ae 21 66 7c stbx r3,r6,r4
10000cd4: 39 00 5f f4 lxv vs34,48(r31)
10000cd8: 2c 12 42 10 vsldoi v2,v2,v2,8
10000cdc: 5b 2f 40 7c stxsihx vs34,0,r5
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000ce0: 48 00 7f e8 ld r3,72(r31)
10000ce4: 01 00 63 38 addi r3,r3,1
10000ce8: 48 00 7f f8 std r3,72(r31)
10000cec: 80 ff ff 4b b 10000c6c <sint8x2 min<sint8x2>(sint8x2 const&, sint8x2 const&)+0x2c>
10000cf0: 60 00 7f a0 lhz r3,96(r31)
return v;
10000cf4: 70 00 21 38 addi r1,r1,112
10000cf8: 10 00 01 e8 ld r0,16(r1)
10000cfc: f8 ff e1 eb ld r31,-8(r1)
10000d00: a6 03 08 7c mtlr r0
10000d04: 20 00 80 4e blr
...
10000d14: 00 00 00 60 nop
10000d18: 00 00 00 60 nop
10000d1c: 00 00 00 60 nop
0000000010000d20 <pair<sint8x2>::operator[](unsigned long) const>:
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } };
10000d20: e8 ff 61 f8 std r3,-24(r1)
10000d24: e0 ff 81 f8 std r4,-32(r1)
10000d28: e8 ff 61 e8 ld r3,-24(r1)
10000d2c: e0 ff 81 e8 ld r4,-32(r1)
10000d30: a4 0f 84 78 rldicr r4,r4,1,62
10000d34: 14 22 63 7c add r3,r3,r4
10000d38: f0 ff 81 38 addi r4,r1,-16
10000d3c: 00 00 63 a0 lhz r3,0(r3)
10000d40: 00 00 64 b0 sth r3,0(r4)
10000d44: f0 ff 61 a0 lhz r3,-16(r1)
10000d48: 20 00 80 4e blr
...
10000d58: 00 00 00 60 nop
10000d5c: 00 00 00 60 nop
0000000010000d60 <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)>:
T min(const T &a, const T &b) {
10000d60: 02 10 40 3c lis r2,4098
10000d64: 00 7f 42 38 addi r2,r2,32512
10000d68: a6 02 08 7c mflr r0
10000d6c: f8 ff e1 fb std r31,-8(r1)
10000d70: 10 00 01 f8 std r0,16(r1)
10000d74: 91 ff 21 f8 stdu r1,-112(r1)
10000d78: 78 0b 3f 7c mr r31,r1
10000d7c: 58 00 7f f8 std r3,88(r31)
10000d80: 50 00 9f f8 std r4,80(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000d84: 00 00 60 38 li r3,0
10000d88: 48 00 7f f8 std r3,72(r31)
10000d8c: 48 00 7f e8 ld r3,72(r31)
10000d90: 04 00 23 28 cmpldi r3,4
10000d94: 78 00 80 40 bge 10000e0c <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0xac>
v.s[i] = min(a.s[i], b.s[i]);
10000d98: 58 00 7f e8 ld r3,88(r31)
10000d9c: ee 1e 00 7c lfiwzx f0,0,r3
10000da0: 51 02 40 f0 xxswapd vs34,vs0
10000da4: 48 00 7f e8 ld r3,72(r31)
10000da8: 50 00 9f e8 ld r4,80(r31)
10000dac: ee 26 00 7c lfiwzx f0,0,r4
10000db0: 51 02 60 f0 xxswapd vs35,vs0
10000db4: 0d 17 83 10 vextubrx r4,r3,v2
10000db8: 74 07 84 7c extsb r4,r4
10000dbc: 0d 1f 63 10 vextubrx r3,r3,v3
10000dc0: 74 07 63 7c extsb r3,r3
10000dc4: 28 00 7f f8 std r3,40(r31)
10000dc8: 78 23 83 7c mr r3,r4
10000dcc: 28 00 9f e8 ld r4,40(r31)
10000dd0: 71 f8 ff 4b bl 10000640 <min(signed char, signed char)>
10000dd4: 48 00 9f e8 ld r4,72(r31)
10000dd8: 20 07 84 78 clrldi r4,r4,60
10000ddc: 60 00 bf 38 addi r5,r31,96
10000de0: ee 2e 00 7c lfiwzx f0,0,r5
10000de4: 50 02 00 f0 xxswapd vs0,vs0
10000de8: 35 00 1f f4 stxv vs0,48(r31)
10000dec: 30 00 bf 38 addi r5,r31,48
10000df0: ae 21 65 7c stbx r3,r5,r4
10000df4: 30 00 7f 80 lwz r3,48(r31)
10000df8: 60 00 7f 90 stw r3,96(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000dfc: 48 00 7f e8 ld r3,72(r31)
10000e00: 01 00 63 38 addi r3,r3,1
10000e04: 48 00 7f f8 std r3,72(r31)
10000e08: 84 ff ff 4b b 10000d8c <sint8x4 min<sint8x4>(sint8x4 const&, sint8x4 const&)+0x2c>
10000e0c: 60 00 7f 80 lwz r3,96(r31)
return v;
10000e10: 70 00 21 38 addi r1,r1,112
10000e14: 10 00 01 e8 ld r0,16(r1)
10000e18: f8 ff e1 eb ld r31,-8(r1)
10000e1c: a6 03 08 7c mtlr r0
10000e20: 20 00 80 4e blr
...
0000000010000e30 <pair<sint8x4>::operator[](unsigned long) const>:
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } };
10000e30: e8 ff 61 f8 std r3,-24(r1)
10000e34: e0 ff 81 f8 std r4,-32(r1)
10000e38: e8 ff 61 e8 ld r3,-24(r1)
10000e3c: e0 ff 81 e8 ld r4,-32(r1)
10000e40: 64 17 84 78 rldicr r4,r4,2,61
10000e44: 14 22 63 7c add r3,r3,r4
10000e48: f0 ff 81 38 addi r4,r1,-16
10000e4c: 00 00 63 80 lwz r3,0(r3)
10000e50: 00 00 64 90 stw r3,0(r4)
10000e54: f0 ff 61 80 lwz r3,-16(r1)
10000e58: 20 00 80 4e blr
...
10000e68: 00 00 00 60 nop
10000e6c: 00 00 00 60 nop
0000000010000e70 <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)>:
T min(const T &a, const T &b) {
10000e70: 02 10 40 3c lis r2,4098
10000e74: 00 7f 42 38 addi r2,r2,32512
10000e78: a6 02 08 7c mflr r0
10000e7c: f8 ff e1 fb std r31,-8(r1)
10000e80: 10 00 01 f8 std r0,16(r1)
10000e84: 91 ff 21 f8 stdu r1,-112(r1)
10000e88: 78 0b 3f 7c mr r31,r1
10000e8c: 58 00 7f f8 std r3,88(r31)
10000e90: 50 00 9f f8 std r4,80(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000e94: 00 00 60 38 li r3,0
10000e98: 48 00 7f f8 std r3,72(r31)
10000e9c: 48 00 7f e8 ld r3,72(r31)
10000ea0: 08 00 23 28 cmpldi r3,8
10000ea4: 78 00 80 40 bge 10000f1c <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0xac>
v.s[i] = min(a.s[i], b.s[i]);
10000ea8: 58 00 7f e8 ld r3,88(r31)
10000eac: 00 00 03 c8 lfd f0,0(r3)
10000eb0: 51 02 40 f0 xxswapd vs34,vs0
10000eb4: 48 00 7f e8 ld r3,72(r31)
10000eb8: 50 00 9f e8 ld r4,80(r31)
10000ebc: 00 00 04 c8 lfd f0,0(r4)
10000ec0: 51 02 60 f0 xxswapd vs35,vs0
10000ec4: 0d 17 83 10 vextubrx r4,r3,v2
10000ec8: 74 07 84 7c extsb r4,r4
10000ecc: 0d 1f 63 10 vextubrx r3,r3,v3
10000ed0: 74 07 63 7c extsb r3,r3
10000ed4: 28 00 7f f8 std r3,40(r31)
10000ed8: 78 23 83 7c mr r3,r4
10000edc: 28 00 9f e8 ld r4,40(r31)
10000ee0: 61 f7 ff 4b bl 10000640 <min(signed char, signed char)>
10000ee4: 48 00 9f e8 ld r4,72(r31)
10000ee8: 20 07 84 78 clrldi r4,r4,60
10000eec: 60 00 1f c8 lfd f0,96(r31)
10000ef0: 51 02 40 f0 xxswapd vs34,vs0
10000ef4: 3d 00 5f f4 stxv vs34,48(r31)
10000ef8: 30 00 bf 38 addi r5,r31,48
10000efc: ae 21 65 7c stbx r3,r5,r4
10000f00: 31 00 1f f4 lxv vs0,48(r31)
10000f04: 50 02 00 f0 xxswapd vs0,vs0
10000f08: 60 00 1f d8 stfd f0,96(r31)
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
10000f0c: 48 00 7f e8 ld r3,72(r31)
10000f10: 01 00 63 38 addi r3,r3,1
10000f14: 48 00 7f f8 std r3,72(r31)
10000f18: 84 ff ff 4b b 10000e9c <sint8x8 min<sint8x8>(sint8x8 const&, sint8x8 const&)+0x2c>
return v;
10000f1c: 60 00 7f e8 ld r3,96(r31)
10000f20: 70 00 21 38 addi r1,r1,112
10000f24: 10 00 01 e8 ld r0,16(r1)
10000f28: f8 ff e1 eb ld r31,-8(r1)
10000f2c: a6 03 08 7c mtlr r0
10000f30: 20 00 80 4e blr
...
0000000010000f40 <pair<sint8x8>::operator[](unsigned long) const>:
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } };
10000f40: e8 ff 61 f8 std r3,-24(r1)
10000f44: e0 ff 81 f8 std r4,-32(r1)
10000f48: e8 ff 61 e8 ld r3,-24(r1)
10000f4c: e0 ff 81 e8 ld r4,-32(r1)
10000f50: 24 1f 84 78 rldicr r4,r4,3,60
10000f54: 14 22 63 7c add r3,r3,r4
10000f58: f0 ff 81 38 addi r4,r1,-16
10000f5c: 00 00 63 e8 ld r3,0(r3)
10000f60: 00 00 64 f8 std r3,0(r4)
10000f64: f0 ff 61 e8 ld r3,-16(r1)
10000f68: 20 00 80 4e blr
...
10000f78: 00 00 00 60 nop
10000f7c: 00 00 00 60 nop
0000000010000f80 <__libc_csu_init>:
10000f80: 02 10 40 3c lis r2,4098
10000f84: 00 7f 42 38 addi r2,r2,32512
10000f88: a6 02 08 7c mflr r0
10000f8c: d0 ff 41 fb std r26,-48(r1)
10000f90: d8 ff 61 fb std r27,-40(r1)
10000f94: 78 2b ba 7c mr r26,r5
10000f98: e0 ff 81 fb std r28,-32(r1)
10000f9c: e8 ff a1 fb std r29,-24(r1)
10000fa0: ff ff a2 3f addis r29,r2,-1
10000fa4: 78 1b 7c 7c mr r28,r3
10000fa8: f0 ff c1 fb std r30,-16(r1)
10000fac: ff ff c2 3f addis r30,r2,-1
10000fb0: c8 7d bd 3b addi r29,r29,32200
10000fb4: c0 7d de 3b addi r30,r30,32192
10000fb8: 78 23 9b 7c mr r27,r4
10000fbc: 50 e8 be 7f subf r29,r30,r29
10000fc0: 10 00 01 f8 std r0,16(r1)
10000fc4: b1 ff 21 f8 stdu r1,-80(r1)
10000fc8: a1 f4 ff 4b bl 10000468 <_init+0x8>
10000fcc: 00 00 00 60 nop
10000fd0: 75 1e bd 7f sradi. r29,r29,3
10000fd4: 4c 00 82 41 beq 10001020 <__libc_csu_init+0xa0>
10000fd8: 18 00 41 f8 std r2,24(r1)
10000fdc: 48 00 e1 fb std r31,72(r1)
10000fe0: f8 ff de 3b addi r30,r30,-8
10000fe4: 00 00 e0 3b li r31,0
10000fe8: 00 00 00 60 nop
10000fec: 00 00 42 60 ori r2,r2,0
10000ff0: 09 00 3e e9 ldu r9,8(r30)
10000ff4: 78 d3 45 7f mr r5,r26
10000ff8: 78 db 64 7f mr r4,r27
10000ffc: 78 e3 83 7f mr r3,r28
10001000: 01 00 ff 3b addi r31,r31,1
10001004: a6 03 29 7d mtctr r9
10001008: 78 4b 2c 7d mr r12,r9
1000100c: 21 04 80 4e bctrl
10001010: 18 00 41 e8 ld r2,24(r1)
10001014: 40 f8 bd 7f cmpld cr7,r29,r31
10001018: d8 ff 9e 40 bne cr7,10000ff0 <__libc_csu_init+0x70>
1000101c: 48 00 e1 eb ld r31,72(r1)
10001020: 50 00 21 38 addi r1,r1,80
10001024: 10 00 01 e8 ld r0,16(r1)
10001028: d0 ff 41 eb ld r26,-48(r1)
1000102c: d8 ff 61 eb ld r27,-40(r1)
10001030: e0 ff 81 eb ld r28,-32(r1)
10001034: e8 ff a1 eb ld r29,-24(r1)
10001038: f0 ff c1 eb ld r30,-16(r1)
1000103c: a6 03 08 7c mtlr r0
10001040: 20 00 80 4e blr
10001044: 00 00 00 00 .long 0x0
10001048: 00 00 00 01 .long 0x1000000
1000104c: 80 06 00 00 .long 0x680
0000000010001050 <__libc_csu_fini>:
10001050: 20 00 80 4e blr
...
10001060: 90 ef 01 00 .long 0x1ef90
10001064: 00 00 00 00 .long 0x0
0000000010001068 <__glink_PLTresolve>:
10001068: a6 02 08 7c mflr r0
1000106c: 05 00 9f 42 bcl 20,4*cr7+so,10001070 <__glink_PLTresolve+0x8>
10001070: a6 02 68 7d mflr r11
10001074: 18 00 41 f8 std r2,24(r1)
10001078: f0 ff 4b e8 ld r2,-16(r11)
1000107c: a6 03 08 7c mtlr r0
10001080: 50 60 8b 7d subf r12,r11,r12
10001084: 14 5a 62 7d add r11,r2,r11
10001088: d0 ff 0c 38 addi r0,r12,-48
1000108c: 00 00 8b e9 ld r12,0(r11)
10001090: 82 f0 00 78 rldicl r0,r0,62,2
10001094: a6 03 89 7d mtctr r12
10001098: 08 00 6b e9 ld r11,8(r11)
1000109c: 20 04 80 4e bctr
00000000100010a0 <__libc_start_main@plt>:
100010a0: c8 ff ff 4b b 10001068 <__glink_PLTresolve>
00000000100010a4 <__gmon_start__@plt>:
100010a4: c4 ff ff 4b b 10001068 <__glink_PLTresolve>
Disassembly of section .fini:
00000000100010a8 <_fini>:
100010a8: 02 10 40 3c lis r2,4098
100010ac: 00 7f 42 38 addi r2,r2,32512
100010b0: a6 02 08 7c mflr r0
100010b4: 10 00 01 f8 std r0,16(r1)
100010b8: a1 ff 21 f8 stdu r1,-96(r1)
100010bc: 60 00 21 38 addi r1,r1,96
100010c0: 10 00 01 e8 ld r0,16(r1)
100010c4: a6 03 08 7c mtlr r0
100010c8: 20 00 80 4e blr
typedef unsigned long size_t;
typedef signed char int8x1 __attribute__((vector_size(1)));
typedef signed char int8x2 __attribute__((vector_size(2)));
typedef signed char int8x4 __attribute__((vector_size(4)));
typedef signed char int8x8 __attribute__((vector_size(8)));
typedef signed char int8x16 __attribute__((vector_size(16)));
struct sint8x1 { int8x1 s; };
struct sint8x2 { int8x2 s; };
struct sint8x4 { int8x4 s; };
struct sint8x8 { int8x8 s; };
struct sint8x16 { int8x16 s; };
template <class T> struct pair {T a[2]; T operator[](size_t i) const { return a[i]; } };
signed char min(signed char a, signed char b) {
return a < b ? a : b;
}
template <class T>
T min(const T &a, const T &b) {
T v;
for (size_t i = 0; i < sizeof(a.s)/sizeof(a.s[0]); i++)
v.s[i] = min(a.s[i], b.s[i]);
return v;
}
pair<sint8x1> split_by(sint8x2 v) {
sint8x1 a, b;
a.s[0] = v.s[0];
b.s[0] = v.s[1];
return {{a, b}};
}
pair<sint8x2> split_by(sint8x4 v) {
sint8x2 a, b;
a.s = __builtin_shufflevector(v.s, v.s, 0, 1);
b.s = __builtin_shufflevector(v.s, v.s, 2, 3);
return {{a, b}};
}
pair<sint8x4> split_by(sint8x8 v) {
sint8x4 a, b;
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3);
b.s = __builtin_shufflevector(v.s, v.s, 4, 5, 6, 7);
return {{a, b}};
}
pair<sint8x8> split_by(sint8x16 v) {
sint8x8 a, b;
a.s = __builtin_shufflevector(v.s, v.s, 0, 1, 2, 3, 4, 5, 6, 7);
b.s = __builtin_shufflevector(v.s, v.s, 8, 9, 10, 11, 12, 13, 14, 15);
return {{a, b}};
}
signed char hmin(sint8x1 v) {
return v.s[0];
}
signed char hmin(sint8x2 v) {
auto a = split_by(v);
return hmin(min(a[0], a[1]));
}
signed char hmin(sint8x4 v) {
auto a = split_by(v);
return hmin(min(a[0], a[1]));
}
signed char hmin(sint8x8 v) {
auto a = split_by(v);
return hmin(min(a[0], a[1]));
}
signed char hmin(sint8x16 v) {
auto a = split_by(v);
return hmin(min(a[0], a[1]));
}
int main(void) {
const long data[] = {0x00010102464c457f, 0};
sint8x16 v;
__builtin_memcpy(&v.s, data, 16);
return hmin(v);
}
@MaskRay
Copy link
Author

MaskRay commented Jul 22, 2020

Compiled and linked with clang++ -g a.cc -mcpu=pwr9 hmin (-O0). It should return 0 but the incorrect program returns -1 (shell exit code 255).

diff -U good.s bad.s

split_by(sint8x8) in bad.s is incorrect.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment