Skip to content

Instantly share code, notes, and snippets.

@bjacob
Created March 8, 2023 04:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bjacob/e69201fc4528ea516fc1788cabb597f0 to your computer and use it in GitHub Desktop.
Save bjacob/e69201fc4528ea516fc1788cabb597f0 to your computer and use it in GitHub Desktop.
/tmp/pack_pad_transpose_1x9xi8_into_2x4x8x4xi8_dispatch_0-53639f.so: file format elf64-littleaarch64
Disassembly of section .text:
00000000000103cc <.text>:
103cc: fd 7b be a9 stp x29, x30, [sp, #-32]!
103d0: f4 4f 01 a9 stp x20, x19, [sp, #16]
103d4: fd 03 00 91 mov x29, sp
103d8: 48 04 40 b9 ldr w8, [x2, #4]
103dc: a8 00 00 34 cbz w8, 0x103f0 <.text+0x24>
103e0: f4 4f 41 a9 ldp x20, x19, [sp, #16]
103e4: e0 03 1f 2a mov w0, wzr
103e8: fd 7b c2 a8 ldp x29, x30, [sp], #32
103ec: c0 03 5f d6 ret
103f0: 29 10 40 f9 ldr x9, [x1, #32]
103f4: 0e 01 80 52 mov w14, #8
103f8: 48 00 40 b9 ldr w8, [x2]
103fc: 2d 31 40 a9 ldp x13, x12, [x9]
10400: 0a 7d 7d d3 ubfiz x10, x8, #3, #32
10404: 2b a4 41 29 ldp w11, w9, [x1, #12]
10408: ad 0d 00 91 add x13, x13, #3
1040c: 8c 41 00 91 add x12, x12, #16
10410: 6b f1 7d d3 lsl x11, x11, #3
10414: 02 00 00 14 b 0x1041c <.text+0x50>
10418: 49 fe ff 35 cbnz w9, 0x103e0 <.text+0x14>
1041c: e8 ff ff 35 cbnz w8, 0x10418 <.text+0x4c>
10420: ef 03 0a aa mov x15, x10
10424: 04 00 00 14 b 0x10434 <.text+0x68>
10428: ef 01 0b 8b add x15, x15, x11
1042c: ff 11 00 f1 cmp x15, #4
10430: 4a ff ff 54 b.ge 0x10418 <.text+0x4c>
10434: f0 03 1f aa mov x16, xzr
10438: f1 03 0d aa mov x17, x13
1043c: e0 03 0c aa mov x0, x12
10440: 23 01 80 52 mov w3, #9
10444: 07 00 00 14 b 0x10460 <.text+0x94>
10448: 10 06 00 91 add x16, x16, #1
1044c: 00 00 02 91 add x0, x0, #128
10450: 31 22 00 91 add x17, x17, #8
10454: e3 03 01 aa mov x3, x1
10458: 1f 0a 00 f1 cmp x16, #2
1045c: 60 fe ff 54 b.eq 0x10428 <.text+0x5c>
10460: 61 20 00 f1 subs x1, x3, #8
10464: e2 03 11 aa mov x2, x17
10468: 63 b0 8e 9a csel x3, x3, x14, lt
1046c: e4 03 00 aa mov x4, x0
10470: 05 02 80 52 mov w5, #16
10474: 2c 00 00 14 b 0x10524 <.text+0x158>
10478: 06 3c 01 0e umov w6, v0.b[0]
1047c: 07 3c 03 0e umov w7, v0.b[1]
10480: 01 e4 00 6f movi v1.2d, #0000000000000000
10484: a5 10 00 f1 subs x5, x5, #4
10488: 02 e4 00 6f movi v2.2d, #0000000000000000
1048c: 42 90 00 91 add x2, x2, #36
10490: 13 3c 0b 0e umov w19, v0.b[5]
10494: 04 e4 00 6f movi v4.2d, #0000000000000000
10498: 14 3c 0d 0e umov w20, v0.b[6]
1049c: c1 1c 02 4e mov v1.h[0], w6
104a0: e2 1c 02 4e mov v2.h[0], w7
104a4: 07 3c 09 0e umov w7, v0.b[4]
104a8: 03 e4 00 6f movi v3.2d, #0000000000000000
104ac: 06 3c 05 0e umov w6, v0.b[2]
104b0: 21 28 21 0e xtn v1.8b, v1.8h
104b4: 42 28 21 0e xtn v2.8b, v2.8h
104b8: e4 1c 02 4e mov v4.h[0], w7
104bc: 07 3c 0f 0e umov w7, v0.b[7]
104c0: 41 04 0c 6e mov v1.s[1], v2.s[0]
104c4: 02 e4 00 6f movi v2.2d, #0000000000000000
104c8: 06 e4 00 6f movi v6.2d, #0000000000000000
104cc: c3 1c 02 4e mov v3.h[0], w6
104d0: 06 3c 07 0e umov w6, v0.b[3]
104d4: 62 1e 02 4e mov v2.h[0], w19
104d8: 80 28 21 0e xtn v0.8b, v4.8h
104dc: 05 e4 00 6f movi v5.2d, #0000000000000000
104e0: 86 1e 02 4e mov v6.h[0], w20
104e4: 63 28 21 0e xtn v3.8b, v3.8h
104e8: 42 28 21 0e xtn v2.8b, v2.8h
104ec: c5 1c 02 4e mov v5.h[0], w6
104f0: 40 04 0c 6e mov v0.s[1], v2.s[0]
104f4: 02 e4 00 6f movi v2.2d, #0000000000000000
104f8: c4 28 21 0e xtn v4.8b, v6.8h
104fc: 61 04 14 6e mov v1.s[2], v3.s[0]
10500: a3 28 21 0e xtn v3.8b, v5.8h
10504: 80 04 14 6e mov v0.s[2], v4.s[0]
10508: e2 1c 02 4e mov v2.h[0], w7
1050c: 61 04 1c 6e mov v1.s[3], v3.s[0]
10510: 42 28 21 0e xtn v2.8b, v2.8h
10514: 40 04 1c 6e mov v0.s[3], v2.s[0]
10518: 81 80 3f ad stp q1, q0, [x4, #-16]
1051c: 84 80 00 91 add x4, x4, #32
10520: 40 f9 ff 54 b.eq 0x10448 <.text+0x7c>
10524: 00 e4 00 6f movi v0.2d, #0000000000000000
10528: a6 3c 00 d1 sub x6, x5, #15
1052c: df 04 00 f1 cmp x6, #1
10530: 4b fa ff 54 b.lt 0x10478 <.text+0xac>
10534: 46 0c 00 d1 sub x6, x2, #3
10538: 7f 04 00 f1 cmp x3, #1
1053c: c0 00 40 0d ld1 { v0.b }[0], [x6]
10540: c0 f9 ff 54 b.eq 0x10478 <.text+0xac>
10544: 46 08 00 d1 sub x6, x2, #2
10548: 7f 08 00 f1 cmp x3, #2
1054c: c0 04 40 0d ld1 { v0.b }[1], [x6]
10550: 40 f9 ff 54 b.eq 0x10478 <.text+0xac>
10554: 46 04 00 d1 sub x6, x2, #1
10558: 7f 0c 00 f1 cmp x3, #3
1055c: c0 08 40 0d ld1 { v0.b }[2], [x6]
10560: c0 f8 ff 54 b.eq 0x10478 <.text+0xac>
10564: 40 0c 40 0d ld1 { v0.b }[3], [x2]
10568: 7f 10 00 f1 cmp x3, #4
1056c: 60 f8 ff 54 b.eq 0x10478 <.text+0xac>
10570: 46 04 00 91 add x6, x2, #1
10574: 7f 14 00 f1 cmp x3, #5
10578: c0 10 40 0d ld1 { v0.b }[4], [x6]
1057c: e0 f7 ff 54 b.eq 0x10478 <.text+0xac>
10580: 46 08 00 91 add x6, x2, #2
10584: 7f 18 00 f1 cmp x3, #6
10588: c0 14 40 0d ld1 { v0.b }[5], [x6]
1058c: 60 f7 ff 54 b.eq 0x10478 <.text+0xac>
10590: 46 0c 00 91 add x6, x2, #3
10594: 7f 1c 00 f1 cmp x3, #7
10598: c0 18 40 0d ld1 { v0.b }[6], [x6]
1059c: e0 f6 ff 54 b.eq 0x10478 <.text+0xac>
105a0: 46 10 00 91 add x6, x2, #4
105a4: c0 1c 40 0d ld1 { v0.b }[7], [x6]
105a8: b4 ff ff 17 b 0x10478 <.text+0xac>
00000000000105ac <iree_hal_executable_library_query>:
105ac: 1f 20 03 d5 nop
105b0: 88 02 08 10 adr x8, #65616
105b4: 1f 0c 00 71 cmp w0, #3
105b8: 00 01 9f 9a csel x0, x8, xzr, eq
105bc: c0 03 5f d6 ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment