Skip to content

Instantly share code, notes, and snippets.

@bjacob
Created March 8, 2023 04:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bjacob/e364981e10e878c4b092728fe6e087aa to your computer and use it in GitHub Desktop.
Save bjacob/e364981e10e878c4b092728fe6e087aa to your computer and use it in GitHub Desktop.
/tmp/pack_pad_transpose_1x9xi8_into_2x4x8x4xi8_dispatch_0-8dd977.so: file format elf64-littleaarch64
Disassembly of section .text:
00000000000103cc <.text>:
103cc: fd 7b be a9 stp x29, x30, [sp, #-32]!
103d0: f4 4f 01 a9 stp x20, x19, [sp, #16]
103d4: fd 03 00 91 mov x29, sp
103d8: 48 04 40 b9 ldr w8, [x2, #4]
103dc: a8 00 00 34 cbz w8, 0x103f0 <.text+0x24>
103e0: f4 4f 41 a9 ldp x20, x19, [sp, #16]
103e4: e0 03 1f 2a mov w0, wzr
103e8: fd 7b c2 a8 ldp x29, x30, [sp], #32
103ec: c0 03 5f d6 ret
103f0: 29 10 40 f9 ldr x9, [x1, #32]
103f4: 0e 01 80 52 mov w14, #8
103f8: 48 00 40 b9 ldr w8, [x2]
103fc: 2d 31 40 a9 ldp x13, x12, [x9]
10400: 0a 7d 7d d3 ubfiz x10, x8, #3, #32
10404: 2b a4 41 29 ldp w11, w9, [x1, #12]
10408: ad 0d 00 91 add x13, x13, #3
1040c: 8c 41 00 91 add x12, x12, #16
10410: 6b f1 7d d3 lsl x11, x11, #3
10414: 02 00 00 14 b 0x1041c <.text+0x50>
10418: 49 fe ff 35 cbnz w9, 0x103e0 <.text+0x14>
1041c: e8 ff ff 35 cbnz w8, 0x10418 <.text+0x4c>
10420: ef 03 0a aa mov x15, x10
10424: 04 00 00 14 b 0x10434 <.text+0x68>
10428: ef 01 0b 8b add x15, x15, x11
1042c: ff 11 00 f1 cmp x15, #4
10430: 4a ff ff 54 b.ge 0x10418 <.text+0x4c>
10434: f0 03 1f aa mov x16, xzr
10438: f1 03 0d aa mov x17, x13
1043c: e0 03 0c aa mov x0, x12
10440: 23 01 80 52 mov w3, #9
10444: 07 00 00 14 b 0x10460 <.text+0x94>
10448: 10 06 00 91 add x16, x16, #1
1044c: 00 00 02 91 add x0, x0, #128
10450: 31 22 00 91 add x17, x17, #8
10454: e3 03 01 aa mov x3, x1
10458: 1f 0a 00 f1 cmp x16, #2
1045c: 60 fe ff 54 b.eq 0x10428 <.text+0x5c>
10460: 61 20 00 f1 subs x1, x3, #8
10464: e2 03 11 aa mov x2, x17
10468: 63 b0 8e 9a csel x3, x3, x14, lt
1046c: e4 03 00 aa mov x4, x0
10470: 05 02 80 52 mov w5, #16
10474: 2d 00 00 14 b 0x10528 <.text+0x15c>
10478: 00 e4 00 6f movi v0.2d, #0000000000000000
1047c: 06 3c 01 0e umov w6, v0.b[0]
10480: 07 3c 03 0e umov w7, v0.b[1]
10484: 01 e4 00 6f movi v1.2d, #0000000000000000
10488: a5 10 00 f1 subs x5, x5, #4
1048c: 02 e4 00 6f movi v2.2d, #0000000000000000
10490: 42 90 00 91 add x2, x2, #36
10494: 13 3c 0b 0e umov w19, v0.b[5]
10498: 04 e4 00 6f movi v4.2d, #0000000000000000
1049c: 14 3c 0d 0e umov w20, v0.b[6]
104a0: c1 1c 02 4e mov v1.h[0], w6
104a4: e2 1c 02 4e mov v2.h[0], w7
104a8: 07 3c 09 0e umov w7, v0.b[4]
104ac: 03 e4 00 6f movi v3.2d, #0000000000000000
104b0: 06 3c 05 0e umov w6, v0.b[2]
104b4: 21 28 21 0e xtn v1.8b, v1.8h
104b8: 42 28 21 0e xtn v2.8b, v2.8h
104bc: e4 1c 02 4e mov v4.h[0], w7
104c0: 07 3c 0f 0e umov w7, v0.b[7]
104c4: 41 04 0c 6e mov v1.s[1], v2.s[0]
104c8: 02 e4 00 6f movi v2.2d, #0000000000000000
104cc: 06 e4 00 6f movi v6.2d, #0000000000000000
104d0: c3 1c 02 4e mov v3.h[0], w6
104d4: 06 3c 07 0e umov w6, v0.b[3]
104d8: 62 1e 02 4e mov v2.h[0], w19
104dc: 80 28 21 0e xtn v0.8b, v4.8h
104e0: 05 e4 00 6f movi v5.2d, #0000000000000000
104e4: 86 1e 02 4e mov v6.h[0], w20
104e8: 63 28 21 0e xtn v3.8b, v3.8h
104ec: 42 28 21 0e xtn v2.8b, v2.8h
104f0: c5 1c 02 4e mov v5.h[0], w6
104f4: 40 04 0c 6e mov v0.s[1], v2.s[0]
104f8: 02 e4 00 6f movi v2.2d, #0000000000000000
104fc: c4 28 21 0e xtn v4.8b, v6.8h
10500: 61 04 14 6e mov v1.s[2], v3.s[0]
10504: a3 28 21 0e xtn v3.8b, v5.8h
10508: 80 04 14 6e mov v0.s[2], v4.s[0]
1050c: e2 1c 02 4e mov v2.h[0], w7
10510: 61 04 1c 6e mov v1.s[3], v3.s[0]
10514: 42 28 21 0e xtn v2.8b, v2.8h
10518: 40 04 1c 6e mov v0.s[3], v2.s[0]
1051c: 81 80 3f ad stp q1, q0, [x4, #-16]
10520: 84 80 00 91 add x4, x4, #32
10524: 20 f9 ff 54 b.eq 0x10448 <.text+0x7c>
10528: a6 3c 00 d1 sub x6, x5, #15
1052c: df 04 00 f1 cmp x6, #1
10530: 4b fa ff 54 b.lt 0x10478 <.text+0xac>
10534: 40 f4 7f 3d ldr b0, [x2, #4093]
10538: 7f 04 00 f1 cmp x3, #1
1053c: 00 fa ff 54 b.eq 0x1047c <.text+0xb0>
10540: 46 08 00 d1 sub x6, x2, #2
10544: 7f 08 00 f1 cmp x3, #2
10548: c0 04 40 0d ld1 { v0.b }[1], [x6]
1054c: 80 f9 ff 54 b.eq 0x1047c <.text+0xb0>
10550: 46 04 00 d1 sub x6, x2, #1
10554: 7f 0c 00 f1 cmp x3, #3
10558: c0 08 40 0d ld1 { v0.b }[2], [x6]
1055c: 00 f9 ff 54 b.eq 0x1047c <.text+0xb0>
10560: 40 0c 40 0d ld1 { v0.b }[3], [x2]
10564: 7f 10 00 f1 cmp x3, #4
10568: a0 f8 ff 54 b.eq 0x1047c <.text+0xb0>
1056c: 46 04 00 91 add x6, x2, #1
10570: 7f 14 00 f1 cmp x3, #5
10574: c0 10 40 0d ld1 { v0.b }[4], [x6]
10578: 20 f8 ff 54 b.eq 0x1047c <.text+0xb0>
1057c: 46 08 00 91 add x6, x2, #2
10580: 7f 18 00 f1 cmp x3, #6
10584: c0 14 40 0d ld1 { v0.b }[5], [x6]
10588: a0 f7 ff 54 b.eq 0x1047c <.text+0xb0>
1058c: 46 0c 00 91 add x6, x2, #3
10590: 7f 1c 00 f1 cmp x3, #7
10594: c0 18 40 0d ld1 { v0.b }[6], [x6]
10598: 20 f7 ff 54 b.eq 0x1047c <.text+0xb0>
1059c: 46 10 00 91 add x6, x2, #4
105a0: c0 1c 40 0d ld1 { v0.b }[7], [x6]
105a4: b6 ff ff 17 b 0x1047c <.text+0xb0>
00000000000105a8 <iree_hal_executable_library_query>:
105a8: 1f 20 03 d5 nop
105ac: a8 02 08 10 adr x8, #65620
105b0: 1f 0c 00 71 cmp w0, #3
105b4: 00 01 9f 9a csel x0, x8, xzr, eq
105b8: c0 03 5f d6 ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment