Skip to content

Instantly share code, notes, and snippets.

@luismarques
Created March 18, 2020 09:35
Show Gist options
  • Save luismarques/52b5b03b22b0fa6ac227067323742db4 to your computer and use it in GitHub Desktop.
Save luismarques/52b5b03b22b0fa6ac227067323742db4 to your computer and use it in GitHub Desktop.
embench/build-ibex-clang/src/aha-mont64/aha-mont64: file format elf32-littleriscv
Disassembly of section .vectors:
00100000 <_stack_start-0x100000>:
...
100080: 0040006f j 100084 <_start>
Disassembly of section .text:
00100084 <_start>:
100084: 4181 li gp,0
100086: 4201 li tp,0
100088: 00100117 auipc sp,0x100
10008c: f7810113 addi sp,sp,-136 # 200000 <_stack_start>
100090: 00000297 auipc t0,0x0
100094: 4cc28293 addi t0,t0,1228 # 10055c <_bss_end>
100098: 00000317 auipc t1,0x0
10009c: 4c430313 addi t1,t1,1220 # 10055c <_bss_end>
1000a0: 0062d763 bge t0,t1,1000ae <zero_loop_end>
001000a4 <zero_loop>:
1000a4: 0002a023 sw zero,0(t0)
1000a8: 0291 addi t0,t0,4
1000aa: fe535de3 bge t1,t0,1000a4 <zero_loop>
001000ae <zero_loop_end>:
1000ae: 4501 li a0,0
1000b0: 4581 li a1,0
1000b2: 2161 jal 10053a <main>
1000b4: 000202b7 lui t0,0x20
1000b8: 0291 addi t0,t0,4
1000ba: 4305 li t1,1
1000bc: 0062a023 sw t1,0(t0) # 20000 <_stack_len+0x18000>
001000c0 <mulul64>:
1000c0: 02a60833 mul a6,a2,a0
1000c4: 02a638b3 mulhu a7,a2,a0
1000c8: 02b632b3 mulhu t0,a2,a1
1000cc: 02b60633 mul a2,a2,a1
1000d0: 00c88333 add t1,a7,a2
1000d4: 01133633 sltu a2,t1,a7
1000d8: 92b2 add t0,t0,a2
1000da: 02a6b8b3 mulhu a7,a3,a0
1000de: 02a68533 mul a0,a3,a0
1000e2: 951a add a0,a0,t1
1000e4: 00653633 sltu a2,a0,t1
1000e8: 9646 add a2,a2,a7
1000ea: 0107a023 sw a6,0(a5)
1000ee: c3c8 sw a0,4(a5)
1000f0: 02b6b533 mulhu a0,a3,a1
1000f4: 02b685b3 mul a1,a3,a1
1000f8: 9596 add a1,a1,t0
1000fa: 0055b6b3 sltu a3,a1,t0
1000fe: 9536 add a0,a0,a3
100100: 962e add a2,a2,a1
100102: 00b635b3 sltu a1,a2,a1
100106: 952e add a0,a0,a1
100108: c310 sw a2,0(a4)
10010a: c348 sw a0,4(a4)
10010c: 8082 ret
0010010e <modul64>:
10010e: 4801 li a6,0
100110: 4885 li a7,1
100112: a831 j 10012e <modul64+0x20>
100114: 4e01 li t3,0
100116: 01d6e6b3 or a3,a3,t4
10011a: 00566633 or a2,a2,t0
10011e: 41e585b3 sub a1,a1,t5
100122: 40a30533 sub a0,t1,a0
100126: 987e add a6,a6,t6
100128: 889e mv a7,t2
10012a: 060e0263 beqz t3,10018e <modul64+0x80>
10012e: 41f5d293 srai t0,a1,0x1f
100132: 01f55313 srli t1,a0,0x1f
100136: 0586 slli a1,a1,0x1
100138: 0065e3b3 or t2,a1,t1
10013c: 00151313 slli t1,a0,0x1
100140: 01f6d593 srli a1,a3,0x1f
100144: 0053e533 or a0,t2,t0
100148: 00b36333 or t1,t1,a1
10014c: 00f50563 beq a0,a5,100156 <modul64+0x48>
100150: 00f535b3 sltu a1,a0,a5
100154: a029 j 10015e <modul64+0x50>
100156: 00536533 or a0,t1,t0
10015a: 00e535b3 sltu a1,a0,a4
10015e: 0015c293 xori t0,a1,1
100162: 4e01 li t3,0
100164: 4501 li a0,0
100166: e199 bnez a1,10016c <modul64+0x5e>
100168: 8e3e mv t3,a5
10016a: 853a mv a0,a4
10016c: 01f65e93 srli t4,a2,0x1f
100170: 0686 slli a3,a3,0x1
100172: 0606 slli a2,a2,0x1
100174: 00a33f33 sltu t5,t1,a0
100178: 41c385b3 sub a1,t2,t3
10017c: 00188393 addi t2,a7,1
100180: 0113bfb3 sltu t6,t2,a7
100184: f80818e3 bnez a6,100114 <modul64+0x6>
100188: 0408be13 sltiu t3,a7,64
10018c: b769 j 100116 <modul64+0x8>
10018e: 8082 ret
00100190 <montmul>:
100190: 7139 addi sp,sp,-64
100192: de06 sw ra,60(sp)
100194: dc22 sw s0,56(sp)
100196: da26 sw s1,52(sp)
100198: d84a sw s2,48(sp)
10019a: d64e sw s3,44(sp)
10019c: d452 sw s4,40(sp)
10019e: d256 sw s5,36(sp)
1001a0: 8a46 mv s4,a7
1001a2: 84c2 mv s1,a6
1001a4: 893e mv s2,a5
1001a6: 89ba mv s3,a4
1001a8: 0838 addi a4,sp,24
1001aa: 081c addi a5,sp,16
1001ac: 3f11 jal 1000c0 <mulul64>
1001ae: 4442 lw s0,16(sp)
1001b0: 4ad2 lw s5,20(sp)
1001b2: 03440533 mul a0,s0,s4
1001b6: 029435b3 mulhu a1,s0,s1
1001ba: 952e add a0,a0,a1
1001bc: 029a85b3 mul a1,s5,s1
1001c0: 95aa add a1,a1,a0
1001c2: 02940533 mul a0,s0,s1
1001c6: 0038 addi a4,sp,8
1001c8: 878a mv a5,sp
1001ca: 864e mv a2,s3
1001cc: 86ca mv a3,s2
1001ce: 3dcd jal 1000c0 <mulul64>
1001d0: 4662 lw a2,24(sp)
1001d2: 46f2 lw a3,28(sp)
1001d4: 45b2 lw a1,12(sp)
1001d6: 4722 lw a4,8(sp)
1001d8: 4512 lw a0,4(sp)
1001da: 00d587b3 add a5,a1,a3
1001de: 00c705b3 add a1,a4,a2
1001e2: 00e5b4b3 sltu s1,a1,a4
1001e6: fffac713 not a4,s5
1001ea: 97a6 add a5,a5,s1
1001ec: 00e51563 bne a0,a4,1001f6 <montmul+0x66>
1001f0: 4502 lw a0,0(sp)
1001f2: fff44713 not a4,s0
1001f6: 00a73733 sltu a4,a4,a0
1001fa: 00e58533 add a0,a1,a4
1001fe: 00b535b3 sltu a1,a0,a1
100202: 95be add a1,a1,a5
100204: 00d58563 beq a1,a3,10020e <montmul+0x7e>
100208: 00d5b7b3 sltu a5,a1,a3
10020c: a019 j 100212 <montmul+0x82>
10020e: 00c537b3 sltu a5,a0,a2
100212: 8ead xor a3,a3,a1
100214: 8e29 xor a2,a2,a0
100216: 8e55 or a2,a2,a3
100218: 00163613 seqz a2,a2
10021c: 8e79 and a2,a2,a4
10021e: 8e5d or a2,a2,a5
100220: 01258563 beq a1,s2,10022a <montmul+0x9a>
100224: 0125b6b3 sltu a3,a1,s2
100228: a019 j 10022e <montmul+0x9e>
10022a: 013536b3 sltu a3,a0,s3
10022e: 0016c693 xori a3,a3,1
100232: 8e55 or a2,a2,a3
100234: e219 bnez a2,10023a <montmul+0xaa>
100236: 4901 li s2,0
100238: 4981 li s3,0
10023a: 01353633 sltu a2,a0,s3
10023e: 412585b3 sub a1,a1,s2
100242: 8d91 sub a1,a1,a2
100244: 41350533 sub a0,a0,s3
100248: 5a92 lw s5,36(sp)
10024a: 5a22 lw s4,40(sp)
10024c: 59b2 lw s3,44(sp)
10024e: 5942 lw s2,48(sp)
100250: 54d2 lw s1,52(sp)
100252: 5462 lw s0,56(sp)
100254: 50f2 lw ra,60(sp)
100256: 6121 addi sp,sp,64
100258: 8082 ret
0010025a <xbinGCD>:
10025a: 00b56833 or a6,a0,a1
10025e: 0a080163 beqz a6,100300 <xbinGCD+0xa6>
100262: 4801 li a6,0
100264: 4881 li a7,0
100266: 4281 li t0,0
100268: 4305 li t1,1
10026a: 8e2a mv t3,a0
10026c: 83ae mv t2,a1
10026e: a881 j 1002be <xbinGCD+0x64>
100270: 00d2ceb3 xor t4,t0,a3
100274: 00c34f33 xor t5,t1,a2
100278: 001f5f13 srli t5,t5,0x1
10027c: 01fe9f93 slli t6,t4,0x1f
100280: 01ff6f33 or t5,t5,t6
100284: 001ede93 srli t4,t4,0x1
100288: 00d2f2b3 and t0,t0,a3
10028c: 00c37333 and t1,t1,a2
100290: 937a add t1,t1,t5
100292: 01e33f33 sltu t5,t1,t5
100296: 92f6 add t0,t0,t4
100298: 92fa add t0,t0,t5
10029a: 00185813 srli a6,a6,0x1
10029e: 01f89e93 slli t4,a7,0x1f
1002a2: 01d86eb3 or t4,a6,t4
1002a6: 0018d893 srli a7,a7,0x1
1002aa: 00ae8833 add a6,t4,a0
1002ae: 01d83eb3 sltu t4,a6,t4
1002b2: 98ae add a7,a7,a1
1002b4: 98f6 add a7,a7,t4
1002b6: 007e6eb3 or t4,t3,t2
1002ba: 040e8763 beqz t4,100308 <xbinGCD+0xae>
1002be: 001e5e13 srli t3,t3,0x1
1002c2: 01f39e93 slli t4,t2,0x1f
1002c6: 01de6e33 or t3,t3,t4
1002ca: 00137e93 andi t4,t1,1
1002ce: 0013d393 srli t2,t2,0x1
1002d2: f80e9fe3 bnez t4,100270 <xbinGCD+0x16>
1002d6: 00135313 srli t1,t1,0x1
1002da: 01f29e93 slli t4,t0,0x1f
1002de: 01d36333 or t1,t1,t4
1002e2: 0012d293 srli t0,t0,0x1
1002e6: 00185813 srli a6,a6,0x1
1002ea: 01f89e93 slli t4,a7,0x1f
1002ee: 01d86833 or a6,a6,t4
1002f2: 0018d893 srli a7,a7,0x1
1002f6: 007e6eb3 or t4,t3,t2
1002fa: fc0e92e3 bnez t4,1002be <xbinGCD+0x64>
1002fe: a029 j 100308 <xbinGCD+0xae>
100300: 4281 li t0,0
100302: 4801 li a6,0
100304: 4881 li a7,0
100306: 4305 li t1,1
100308: 00672023 sw t1,0(a4)
10030c: 00572223 sw t0,4(a4)
100310: 0117a223 sw a7,4(a5)
100314: 0107a023 sw a6,0(a5)
100318: 8082 ret
0010031a <warm_caches>:
10031a: a009 j 10031c <benchmark_body>
0010031c <benchmark_body>:
10031c: 7175 addi sp,sp,-144
10031e: c706 sw ra,140(sp)
100320: c522 sw s0,136(sp)
100322: c326 sw s1,132(sp)
100324: c14a sw s2,128(sp)
100326: dece sw s3,124(sp)
100328: dcd2 sw s4,120(sp)
10032a: dad6 sw s5,116(sp)
10032c: d8da sw s6,112(sp)
10032e: d6de sw s7,108(sp)
100330: d4e2 sw s8,104(sp)
100332: d2e6 sw s9,100(sp)
100334: d0ea sw s10,96(sp)
100336: ceee sw s11,92(sp)
100338: 4585 li a1,1
10033a: 18b54e63 blt a0,a1,1004d6 <benchmark_body+0x1ba>
10033e: 892a mv s2,a0
100340: fae85537 lui a0,0xfae85
100344: 92750513 addi a0,a0,-1753 # fae84927 <_stack_start+0xfac84927>
100348: d22a sw a0,36(sp)
10034a: 39290537 lui a0,0x39290
10034e: 89f50513 addi a0,a0,-1889 # 3928f89f <_stack_start+0x3908f89f>
100352: d02a sw a0,32(sp)
100354: b9330537 lui a0,0xb9330
100358: 57350513 addi a0,a0,1395 # b9330573 <_stack_start+0xb9130573>
10035c: ce2a sw a0,28(sp)
10035e: 14737537 lui a0,0x14737
100362: def50513 addi a0,a0,-529 # 14736def <_stack_start+0x14536def>
100366: cc2a sw a0,24(sp)
100368: 87238537 lui a0,0x87238
10036c: 153d addi a0,a0,-17
10036e: ca2a sw a0,20(sp)
100370: 05493537 lui a0,0x5493
100374: 72150513 addi a0,a0,1825 # 5493721 <_stack_start+0x5293721>
100378: c82a sw a0,16(sp)
10037a: a0e1 j 100442 <benchmark_body+0x126>
10037c: 8562 mv a0,s8
10037e: 85ce mv a1,s3
100380: 865a mv a2,s6
100382: 86de mv a3,s7
100384: 0880 addi s0,sp,80
100386: 8722 mv a4,s0
100388: 00a4 addi s1,sp,72
10038a: 87a6 mv a5,s1
10038c: 3b15 jal 1000c0 <mulul64>
10038e: 45d6 lw a1,84(sp)
100390: 4546 lw a0,80(sp)
100392: 46b6 lw a3,76(sp)
100394: 4626 lw a2,72(sp)
100396: 8766 mv a4,s9
100398: 87d2 mv a5,s4
10039a: 3b95 jal 10010e <modul64>
10039c: 862a mv a2,a0
10039e: 86ae mv a3,a1
1003a0: 8722 mv a4,s0
1003a2: 87a6 mv a5,s1
1003a4: 3b31 jal 1000c0 <mulul64>
1003a6: 45d6 lw a1,84(sp)
1003a8: 4546 lw a0,80(sp)
1003aa: 46b6 lw a3,76(sp)
1003ac: 4626 lw a2,72(sp)
1003ae: 8766 mv a4,s9
1003b0: 87d2 mv a5,s4
1003b2: 3bb1 jal 10010e <modul64>
1003b4: 862a mv a2,a0
1003b6: 86ae mv a3,a1
1003b8: 8722 mv a4,s0
1003ba: 87a6 mv a5,s1
1003bc: 3311 jal 1000c0 <mulul64>
1003be: 4556 lw a0,84(sp)
1003c0: c62a sw a0,12(sp)
1003c2: 4dc6 lw s11,80(sp)
1003c4: 4536 lw a0,76(sp)
1003c6: c42a sw a0,8(sp)
1003c8: 4aa6 lw s5,72(sp)
1003ca: 800005b7 lui a1,0x80000
1003ce: 4501 li a0,0
1003d0: 8666 mv a2,s9
1003d2: 86d2 mv a3,s4
1003d4: 1818 addi a4,sp,48
1003d6: 103c addi a5,sp,40
1003d8: 3549 jal 10025a <xbinGCD>
1003da: 5552 lw a0,52(sp)
1003dc: 5542 lw a0,48(sp)
1003de: 5532 lw a0,44(sp)
1003e0: c22a sw a0,4(sp)
1003e2: 5d22 lw s10,40(sp)
1003e4: 8562 mv a0,s8
1003e6: 85ce mv a1,s3
1003e8: 4601 li a2,0
1003ea: 4681 li a3,0
1003ec: 8766 mv a4,s9
1003ee: 87d2 mv a5,s4
1003f0: 3b39 jal 10010e <modul64>
1003f2: 84aa mv s1,a0
1003f4: 842e mv s0,a1
1003f6: 855a mv a0,s6
1003f8: 85de mv a1,s7
1003fa: 4601 li a2,0
1003fc: 4681 li a3,0
1003fe: 8766 mv a4,s9
100400: 87d2 mv a5,s4
100402: 3331 jal 10010e <modul64>
100404: 58b2 lw a7,44(sp)
100406: 5822 lw a6,40(sp)
100408: 862a mv a2,a0
10040a: 86ae mv a3,a1
10040c: 8526 mv a0,s1
10040e: 85a2 mv a1,s0
100410: 8766 mv a4,s9
100412: 87d2 mv a5,s4
100414: 3bb5 jal 100190 <montmul>
100416: 58b2 lw a7,44(sp)
100418: 5822 lw a6,40(sp)
10041a: 862a mv a2,a0
10041c: 86ae mv a3,a1
10041e: 8766 mv a4,s9
100420: 87d2 mv a5,s4
100422: 33bd jal 100190 <montmul>
100424: 58b2 lw a7,44(sp)
100426: 5822 lw a6,40(sp)
100428: 862a mv a2,a0
10042a: 86ae mv a3,a1
10042c: 8766 mv a4,s9
10042e: 87d2 mv a5,s4
100430: 3385 jal 100190 <montmul>
100432: 56d2 lw a3,52(sp)
100434: 5642 lw a2,48(sp)
100436: 0098 addi a4,sp,64
100438: 183c addi a5,sp,56
10043a: 3159 jal 1000c0 <mulul64>
10043c: 197d addi s2,s2,-1
10043e: 02090f63 beqz s2,10047c <benchmark_body+0x160>
100442: 00100537 lui a0,0x100
100446: 56054503 lbu a0,1376(a0) # 100560 <in_m>
10044a: 5a12 lw s4,36(sp)
10044c: 5c82 lw s9,32(sp)
10044e: e119 bnez a0,100454 <benchmark_body+0x138>
100450: 4a01 li s4,0
100452: 4c81 li s9,0
100454: 00100537 lui a0,0x100
100458: 56854503 lbu a0,1384(a0) # 100568 <in_b>
10045c: 4b72 lw s6,28(sp)
10045e: 4be2 lw s7,24(sp)
100460: e119 bnez a0,100466 <benchmark_body+0x14a>
100462: 4b01 li s6,0
100464: 4b81 li s7,0
100466: 00100537 lui a0,0x100
10046a: 57054503 lbu a0,1392(a0) # 100570 <in_a>
10046e: 4c52 lw s8,20(sp)
100470: 49c2 lw s3,16(sp)
100472: f00515e3 bnez a0,10037c <benchmark_body+0x60>
100476: 4c01 li s8,0
100478: 4981 li s3,0
10047a: b709 j 10037c <benchmark_body+0x60>
10047c: 4996 lw s3,68(sp)
10047e: 4b86 lw s7,64(sp)
100480: 5972 lw s2,60(sp)
100482: 5b62 lw s6,56(sp)
100484: 856e mv a0,s11
100486: 45b2 lw a1,12(sp)
100488: 8656 mv a2,s5
10048a: 46a2 lw a3,8(sp)
10048c: 8766 mv a4,s9
10048e: 87d2 mv a5,s4
100490: 39bd jal 10010e <modul64>
100492: 8aaa mv s5,a0
100494: 8c2e mv s8,a1
100496: 039d3533 mulhu a0,s10,s9
10049a: 034d05b3 mul a1,s10,s4
10049e: 952e add a0,a0,a1
1004a0: 4592 lw a1,4(sp)
1004a2: 039585b3 mul a1,a1,s9
1004a6: 952e add a0,a0,a1
1004a8: 039d05b3 mul a1,s10,s9
1004ac: fff54413 not s0,a0
1004b0: fff5c493 not s1,a1
1004b4: 855e mv a0,s7
1004b6: 85ce mv a1,s3
1004b8: 865a mv a2,s6
1004ba: 86ca mv a3,s2
1004bc: 8766 mv a4,s9
1004be: 87d2 mv a5,s4
1004c0: 31b9 jal 10010e <modul64>
1004c2: 01554533 xor a0,a0,s5
1004c6: 0185c5b3 xor a1,a1,s8
1004ca: 8dc1 or a1,a1,s0
1004cc: 8d45 or a0,a0,s1
1004ce: 8d4d or a0,a0,a1
1004d0: 00a03533 snez a0,a0
1004d4: a009 j 1004d6 <benchmark_body+0x1ba>
1004d6: 4df6 lw s11,92(sp)
1004d8: 5d06 lw s10,96(sp)
1004da: 5c96 lw s9,100(sp)
1004dc: 5c26 lw s8,104(sp)
1004de: 5bb6 lw s7,108(sp)
1004e0: 5b46 lw s6,112(sp)
1004e2: 5ad6 lw s5,116(sp)
1004e4: 5a66 lw s4,120(sp)
1004e6: 59f6 lw s3,124(sp)
1004e8: 490a lw s2,128(sp)
1004ea: 449a lw s1,132(sp)
1004ec: 442a lw s0,136(sp)
1004ee: 40ba lw ra,140(sp)
1004f0: 6149 addi sp,sp,144
1004f2: 8082 ret
001004f4 <benchmark>:
1004f4: 13c00513 li a0,316
1004f8: b515 j 10031c <benchmark_body>
001004fa <initialise_benchmark>:
1004fa: 00100537 lui a0,0x100
1004fe: 4585 li a1,1
100500: 56b50023 sb a1,1376(a0) # 100560 <in_m>
100504: 00100537 lui a0,0x100
100508: 56b50423 sb a1,1384(a0) # 100568 <in_b>
10050c: 00100537 lui a0,0x100
100510: 56b50823 sb a1,1392(a0) # 100570 <in_a>
100514: 8082 ret
00100516 <verify_benchmark>:
100516: 00153513 seqz a0,a0
10051a: 8082 ret
0010051c <initialise_board>:
10051c: 8082 ret
0010051e <start_trigger>:
10051e: 52fd li t0,-1
100520: 32029073 csrw mucounteren,t0
100524: b0001073 csrw mcycle,zero
100528: b8001073 csrw mcycleh,zero
10052c: 32001073 csrw mucounteren,zero
100530: 8082 ret
00100532 <stop_trigger>:
100532: 52fd li t0,-1
100534: 32029073 csrw mucounteren,t0
100538: 8082 ret
0010053a <main>:
10053a: 1141 addi sp,sp,-16
10053c: c606 sw ra,12(sp)
10053e: 3ff9 jal 10051c <initialise_board>
100540: 3f6d jal 1004fa <initialise_benchmark>
100542: 4505 li a0,1
100544: 3bd9 jal 10031a <warm_caches>
100546: 3fe1 jal 10051e <start_trigger>
100548: 3775 jal 1004f4 <benchmark>
10054a: c42a sw a0,8(sp)
10054c: 37dd jal 100532 <stop_trigger>
10054e: 4522 lw a0,8(sp)
100550: 37d9 jal 100516 <verify_benchmark>
100552: 00153513 seqz a0,a0
100556: 40b2 lw ra,12(sp)
100558: 0141 addi sp,sp,16
10055a: 8082 ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment