Created
November 20, 2023 20:58
-
-
Save terrelln/2e14ff1fb197102a08d7823d8044978d to your computer and use it in GitHub Desktop.
objdump of huf_decompress.o before the manual unrolling
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
0000000000000450 <HUF_decompress4X2_usingDTable_internal_fast_c_loop>: | |
; { | |
450: f3 0f 1e fa endbr64 | |
454: 41 57 pushq %r15 | |
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); | |
456: 4c 8d 4f 40 leaq 0x40(%rdi), %r9 | |
; ZSTD_memcpy(&op, &args->op, sizeof(op)); | |
45a: 48 8d 4f 20 leaq 0x20(%rdi), %rcx | |
; size_t iters = (size_t)(ip[0] - ilimit) / 7; | |
45e: 49 bf 93 24 49 92 24 49 92 24 movabsq $0x2492492492492493, %r15 # imm = 0x2492492492492493 | |
; { | |
468: 41 56 pushq %r14 | |
46a: 41 55 pushq %r13 | |
46c: 4d 89 cd movq %r9, %r13 | |
46f: 49 89 c9 movq %rcx, %r9 | |
472: 48 89 f9 movq %rdi, %rcx | |
475: 41 54 pushq %r12 | |
; size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10; | |
477: 49 bc cd cc cc cc cc cc cc cc movabsq $-0x3333333333333333, %r12 # imm = 0xCCCCCCCCCCCCCCCD | |
; { | |
481: 55 pushq %rbp | |
482: 53 pushq %rbx | |
483: 48 81 ec a8 00 00 00 subq $0xa8, %rsp | |
; ZSTD_memcpy(&op, &args->op, sizeof(op)); | |
48a: 48 8b 57 28 movq 0x28(%rdi), %rdx | |
; HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt; | |
48e: 4c 8b 57 60 movq 0x60(%rdi), %r10 | |
; { | |
492: 65 48 8b 04 25 28 00 00 00 movq %gs:0x28, %rax | |
49b: 48 89 84 24 a0 00 00 00 movq %rax, 0xa0(%rsp) | |
4a3: 31 c0 xorl %eax, %eax | |
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); | |
4a5: 48 8b 47 40 movq 0x40(%rdi), %rax | |
; BYTE const* const ilimit = args->ilimit; | |
4a9: 4c 8b 77 68 movq 0x68(%rdi), %r14 | |
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); | |
4ad: 4c 8d 5c 24 38 leaq 0x38(%rsp), %r11 | |
; ZSTD_memcpy(&op, &args->op, sizeof(op)); | |
4b2: 48 89 54 24 68 movq %rdx, 0x68(%rsp) | |
4b7: 48 8d 5c 24 40 leaq 0x40(%rsp), %rbx | |
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); | |
4bc: 48 89 44 24 20 movq %rax, 0x20(%rsp) | |
4c1: 48 8b 47 48 movq 0x48(%rdi), %rax | |
4c5: 48 89 44 24 28 movq %rax, 0x28(%rsp) | |
4ca: 48 8b 47 50 movq 0x50(%rdi), %rax | |
4ce: 48 89 44 24 30 movq %rax, 0x30(%rsp) | |
4d3: 48 8b 47 58 movq 0x58(%rdi), %rax | |
4d7: 48 89 44 24 38 movq %rax, 0x38(%rsp) | |
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); | |
4dc: 48 8b 07 movq (%rdi), %rax | |
4df: 48 89 44 24 40 movq %rax, 0x40(%rsp) | |
4e4: 48 8b 47 08 movq 0x8(%rdi), %rax | |
4e8: 48 89 44 24 48 movq %rax, 0x48(%rsp) | |
4ed: 48 8b 47 10 movq 0x10(%rdi), %rax | |
4f1: 48 89 44 24 50 movq %rax, 0x50(%rsp) | |
4f6: 48 8b 47 18 movq 0x18(%rdi), %rax | |
4fa: 48 89 44 24 58 movq %rax, 0x58(%rsp) | |
; ZSTD_memcpy(&op, &args->op, sizeof(op)); | |
4ff: 48 8b 47 20 movq 0x20(%rdi), %rax | |
503: 48 89 44 24 60 movq %rax, 0x60(%rsp) | |
508: 48 8b 47 30 movq 0x30(%rdi), %rax | |
50c: 48 89 44 24 70 movq %rax, 0x70(%rsp) | |
511: 48 8b 77 38 movq 0x38(%rdi), %rsi | |
; oend[1] = op[2]; | |
515: 48 89 84 24 88 00 00 00 movq %rax, 0x88(%rsp) | |
; oend[3] = args->oend; | |
51d: 48 8b 47 70 movq 0x70(%rdi), %rax | |
; ZSTD_memcpy(&op, &args->op, sizeof(op)); | |
521: 48 89 74 24 78 movq %rsi, 0x78(%rsp) | |
; oend[0] = op[1]; | |
526: 48 89 94 24 80 00 00 00 movq %rdx, 0x80(%rsp) | |
; oend[2] = op[3]; | |
52e: 48 89 b4 24 90 00 00 00 movq %rsi, 0x90(%rsp) | |
; oend[3] = args->oend; | |
536: 48 89 84 24 98 00 00 00 movq %rax, 0x98(%rsp) | |
<L7>: | |
; size_t iters = (size_t)(ip[0] - ilimit) / 7; | |
53e: 4c 8b 44 24 40 movq 0x40(%rsp), %r8 | |
543: 4c 89 c7 movq %r8, %rdi | |
546: 4c 29 f7 subq %r14, %rdi | |
549: 48 89 f8 movq %rdi, %rax | |
54c: 49 f7 e7 mulq %r15 | |
54f: 48 29 d7 subq %rdx, %rdi | |
552: 48 d1 ef shrq %rdi | |
555: 48 8d 2c 3a leaq (%rdx,%rdi), %rbp | |
559: 31 ff xorl %edi, %edi | |
55b: 48 c1 ed 02 shrq $0x2, %rbp | |
<L0>: | |
; size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10; | |
55f: 48 8b 94 3c 80 00 00 00 movq 0x80(%rsp,%rdi), %rdx | |
567: 48 2b 54 3c 60 subq 0x60(%rsp,%rdi), %rdx | |
56c: 48 89 d0 movq %rdx, %rax | |
56f: 49 f7 e4 mulq %r12 | |
572: 48 c1 ea 03 shrq $0x3, %rdx | |
; iters = MIN(iters, oiters); | |
576: 48 39 d5 cmpq %rdx, %rbp | |
579: 48 0f 47 ea cmovaq %rdx, %rbp | |
; for (stream = 0; stream < 4; ++stream) { | |
57d: 48 83 c7 08 addq $0x8, %rdi | |
581: 48 83 ff 20 cmpq $0x20, %rdi | |
585: 75 d8 jne <L0> | |
; olimit = op[3] + (iters * 5); | |
587: 48 8d 44 ad 00 leaq (%rbp,%rbp,4), %rax | |
58c: 48 8d 3c 06 leaq (%rsi,%rax), %rdi | |
; if (op[3] + 10 > olimit) | |
590: 48 8d 46 0a leaq 0xa(%rsi), %rax | |
594: 48 39 c7 cmpq %rax, %rdi | |
597: 0f 82 5b 01 00 00 jb <L1> | |
; if (ip[stream] < ip[stream - 1]) | |
59d: 48 8b 44 24 48 movq 0x48(%rsp), %rax | |
5a2: 4c 39 c0 cmpq %r8, %rax | |
5a5: 0f 82 4d 01 00 00 jb <L1> | |
5ab: 48 8b 54 24 50 movq 0x50(%rsp), %rdx | |
5b0: 48 39 c2 cmpq %rax, %rdx | |
5b3: 0f 82 3f 01 00 00 jb <L1> | |
5b9: 48 39 54 24 58 cmpq %rdx, 0x58(%rsp) | |
5be: 0f 82 34 01 00 00 jb <L1> | |
5c4: 4c 89 4c 24 18 movq %r9, 0x18(%rsp) | |
5c9: 4c 89 6c 24 10 movq %r13, 0x10(%rsp) | |
5ce: 49 89 cd movq %rcx, %r13 | |
<L6>: | |
; for (symbol = 0; symbol < 5; ++symbol) { | |
5d1: 48 89 34 24 movq %rsi, (%rsp) | |
; size_t iters = (size_t)(ip[0] - ilimit) / 7; | |
5d5: bd 05 00 00 00 movl $0x5, %ebp | |
5da: 49 89 f8 movq %rdi, %r8 | |
5dd: 4c 89 74 24 08 movq %r14, 0x8(%rsp) | |
<L3>: | |
; for (stream = 0; stream < 3; ++stream) { | |
5e2: 4c 8d 4c 24 20 leaq 0x20(%rsp), %r9 | |
5e7: 48 8d 74 24 60 leaq 0x60(%rsp), %rsi | |
; size_t iters = (size_t)(ip[0] - ilimit) / 7; | |
5ec: 4c 89 c8 movq %r9, %rax | |
<L2>: | |
; int const index = (int)(bits[stream] >> 53); | |
5ef: bf 35 00 00 00 movl $0x35, %edi | |
5f4: c4 e2 c3 f7 10 shrxq %rdi, (%rax), %rdx | |
; HUF_DEltX2 const entry = dtable[index]; | |
5f9: 49 8d 14 92 leaq (%r10,%rdx,4), %rdx | |
; for (stream = 0; stream < 3; ++stream) { | |
5fd: 48 83 c0 08 addq $0x8, %rax | |
; HUF_DEltX2 const entry = dtable[index]; | |
601: 44 0f b7 32 movzwl (%rdx), %r14d | |
605: 0f b6 7a 02 movzbl 0x2(%rdx), %edi | |
; for (stream = 0; stream < 3; ++stream) { | |
609: 48 83 c6 08 addq $0x8, %rsi | |
; bits[stream] <<= (entry.nbBits); | |
60d: c4 e2 c1 f7 78 f8 shlxq %rdi, -0x8(%rax), %rdi | |
; HUF_DEltX2 const entry = dtable[index]; | |
613: 0f b6 4a 03 movzbl 0x3(%rdx), %ecx | |
; MEM_write16(op[stream], entry.sequence); | |
617: 48 8b 56 f8 movq -0x8(%rsi), %rdx | |
; bits[stream] <<= (entry.nbBits); | |
61b: 48 89 78 f8 movq %rdi, -0x8(%rax) | |
; put_unaligned(value, (U16 *)memPtr); | |
61f: 66 44 89 32 movw %r14w, (%rdx) | |
; op[stream] += (entry.length); | |
623: 48 01 ca addq %rcx, %rdx | |
626: 48 89 56 f8 movq %rdx, -0x8(%rsi) | |
; for (stream = 0; stream < 3; ++stream) { | |
62a: 4c 39 d8 cmpq %r11, %rax | |
62d: 75 c0 jne <L2> | |
; for (symbol = 0; symbol < 5; ++symbol) { | |
62f: 83 ed 01 subl $0x1, %ebp | |
632: 75 ae jne <L3> | |
; int const index = (int)(bits[3] >> 53); | |
634: 48 8b 44 24 38 movq 0x38(%rsp), %rax | |
639: 48 8b 34 24 movq (%rsp), %rsi | |
63d: 4c 89 c7 movq %r8, %rdi | |
640: 4c 8b 74 24 08 movq 0x8(%rsp), %r14 | |
645: 48 89 c2 movq %rax, %rdx | |
648: 48 c1 ea 35 shrq $0x35, %rdx | |
; HUF_DEltX2 const entry = dtable[index]; | |
64c: 49 8d 14 92 leaq (%r10,%rdx,4), %rdx | |
650: 0f b6 4a 02 movzbl 0x2(%rdx), %ecx | |
654: 44 0f b7 02 movzwl (%rdx), %r8d | |
658: 0f b6 52 03 movzbl 0x3(%rdx), %edx | |
; bits[3] <<= (entry.nbBits); | |
65c: c4 e2 f1 f7 c0 shlxq %rcx, %rax, %rax | |
; put_unaligned(value, (U16 *)memPtr); | |
661: 66 44 89 06 movw %r8w, (%rsi) | |
; op[3] += (entry.length); | |
665: 48 8d 4c 24 40 leaq 0x40(%rsp), %rcx | |
66a: 48 01 d6 addq %rdx, %rsi | |
<L5>: | |
; int const index = (int)(bits[3] >> 53); | |
66d: 48 89 c2 movq %rax, %rdx | |
; for (stream = 0; stream < 4; ++stream) { | |
670: 49 83 c1 08 addq $0x8, %r9 | |
674: 48 83 c1 08 addq $0x8, %rcx | |
; int const index = (int)(bits[3] >> 53); | |
678: 48 c1 ea 35 shrq $0x35, %rdx | |
; HUF_DEltX2 const entry = dtable[index]; | |
67c: 49 8d 14 92 leaq (%r10,%rdx,4), %rdx | |
680: 44 0f b6 42 02 movzbl 0x2(%rdx), %r8d | |
685: 0f b7 2a movzwl (%rdx), %ebp | |
688: 0f b6 52 03 movzbl 0x3(%rdx), %edx | |
; bits[3] <<= (entry.nbBits); | |
68c: c4 e2 b9 f7 c0 shlxq %r8, %rax, %rax | |
691: 48 89 44 24 38 movq %rax, 0x38(%rsp) | |
; op[3] += (entry.length); | |
696: 0f b6 c2 movzbl %dl, %eax | |
; return (unsigned)__builtin_ctzll(val); | |
699: 31 d2 xorl %edx, %edx | |
69b: f3 49 0f bc 51 f8 tzcntq -0x8(%r9), %rdx | |
; put_unaligned(value, (U16 *)memPtr); | |
6a1: 66 89 2e movw %bp, (%rsi) | |
; op[3] += (entry.length); | |
6a4: 48 01 c6 addq %rax, %rsi | |
; ip[stream] -= nbBytes; | |
6a7: 48 8b 41 f8 movq -0x8(%rcx), %rax | |
6ab: 49 89 d0 movq %rdx, %r8 | |
; int const nbBits = ctz & 7; | |
6ae: 83 e2 07 andl $0x7, %edx | |
; ip[stream] -= nbBytes; | |
6b1: 49 c1 e8 03 shrq $0x3, %r8 | |
6b5: 4c 29 c0 subq %r8, %rax | |
6b8: 48 89 41 f8 movq %rax, -0x8(%rcx) | |
; bits[stream] = MEM_read64(ip[stream]) | 1; | |
6bc: 48 8b 00 movq (%rax), %rax | |
6bf: 48 83 c8 01 orq $0x1, %rax | |
; bits[stream] <<= nbBits; | |
6c3: c4 e2 e9 f7 c0 shlxq %rdx, %rax, %rax | |
6c8: 49 89 41 f8 movq %rax, -0x8(%r9) | |
; for (stream = 0; stream < 4; ++stream) { | |
6cc: 49 39 d9 cmpq %rbx, %r9 | |
6cf: 74 07 je <L4> | |
; int const index = (int)(bits[3] >> 53); | |
6d1: 48 8b 44 24 38 movq 0x38(%rsp), %rax | |
6d6: eb 95 jmp <L5> | |
<L4>: | |
; } while (op[3] < olimit); | |
6d8: 48 39 fe cmpq %rdi, %rsi | |
6db: 0f 82 f0 fe ff ff jb <L6> | |
6e1: 4c 89 e9 movq %r13, %rcx | |
6e4: 48 89 74 24 78 movq %rsi, 0x78(%rsp) | |
6e9: 4c 8b 4c 24 18 movq 0x18(%rsp), %r9 | |
6ee: 4c 8b 6c 24 10 movq 0x10(%rsp), %r13 | |
6f3: e9 46 fe ff ff jmp <L7> | |
<L1>: | |
; ZSTD_memcpy(&args->bits, &bits, sizeof(bits)); | |
6f8: 48 8b 44 24 20 movq 0x20(%rsp), %rax | |
6fd: 48 89 41 40 movq %rax, 0x40(%rcx) | |
701: 48 8b 44 24 28 movq 0x28(%rsp), %rax | |
706: 49 89 45 08 movq %rax, 0x8(%r13) | |
70a: 48 8b 44 24 30 movq 0x30(%rsp), %rax | |
70f: 49 89 45 10 movq %rax, 0x10(%r13) | |
713: 48 8b 44 24 38 movq 0x38(%rsp), %rax | |
718: 49 89 45 18 movq %rax, 0x18(%r13) | |
; ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip)); | |
71c: 48 8b 44 24 40 movq 0x40(%rsp), %rax | |
721: 48 89 01 movq %rax, (%rcx) | |
724: 48 8b 44 24 48 movq 0x48(%rsp), %rax | |
729: 48 89 41 08 movq %rax, 0x8(%rcx) | |
72d: 48 8b 44 24 50 movq 0x50(%rsp), %rax | |
732: 48 89 41 10 movq %rax, 0x10(%rcx) | |
736: 48 8b 44 24 58 movq 0x58(%rsp), %rax | |
73b: 48 89 41 18 movq %rax, 0x18(%rcx) | |
; ZSTD_memcpy(&args->op, &op, sizeof(op)); | |
73f: 48 8b 44 24 60 movq 0x60(%rsp), %rax | |
744: 48 89 41 20 movq %rax, 0x20(%rcx) | |
748: 48 8b 44 24 68 movq 0x68(%rsp), %rax | |
74d: 49 89 41 08 movq %rax, 0x8(%r9) | |
751: 48 8b 44 24 70 movq 0x70(%rsp), %rax | |
756: 49 89 41 10 movq %rax, 0x10(%r9) | |
75a: 48 8b 44 24 78 movq 0x78(%rsp), %rax | |
75f: 49 89 41 18 movq %rax, 0x18(%r9) | |
; } | |
763: 48 8b 84 24 a0 00 00 00 movq 0xa0(%rsp), %rax | |
76b: 65 48 2b 04 25 28 00 00 00 subq %gs:0x28, %rax | |
774: 75 16 jne <L8> | |
776: 48 81 c4 a8 00 00 00 addq $0xa8, %rsp | |
77d: 5b popq %rbx | |
77e: 5d popq %rbp | |
77f: 41 5c popq %r12 | |
781: 41 5d popq %r13 | |
783: 41 5e popq %r14 | |
785: 41 5f popq %r15 | |
787: e9 00 00 00 00 jmp <L8> | |
<L8>: | |
78c: e8 00 00 00 00 callq <L9> | |
<L9>: | |
791: 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) | |
79c: 0f 1f 40 00 nopl (%rax) | |
7a0: 90 nop | |
7a1: 90 nop | |
7a2: 90 nop | |
7a3: 90 nop | |
7a4: 90 nop | |
7a5: 90 nop | |
7a6: 90 nop | |
7a7: 90 nop | |
7a8: 90 nop | |
7a9: 90 nop | |
7aa: 90 nop | |
7ab: 90 nop | |
7ac: 90 nop | |
7ad: 90 nop | |
7ae: 90 nop | |
7af: 90 nop | |
00000000000007b0 <HUF_decompress4X1_usingDTable_internal_fast_c_loop>: | |
; { | |
7b0: f3 0f 1e fa endbr64 | |
7b4: 41 56 pushq %r14 | |
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); | |
7b6: 4c 8d 4f 40 leaq 0x40(%rdi), %r9 | |
; ZSTD_memcpy(&op, &args->op, sizeof(op)); | |
7ba: 4c 8d 47 20 leaq 0x20(%rdi), %r8 | |
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7; | |
7be: 49 ba 93 24 49 92 24 49 92 24 movabsq $0x2492492492492493, %r10 # imm = 0x2492492492492493 | |
; { | |
7c8: 41 55 pushq %r13 | |
7ca: 41 54 pushq %r12 | |
7cc: 55 pushq %rbp | |
7cd: 53 pushq %rbx | |
; size_t const oiters = (size_t)(oend - op[3]) / 5; | |
7ce: 48 bb cd cc cc cc cc cc cc cc movabsq $-0x3333333333333333, %rbx # imm = 0xCCCCCCCCCCCCCCCD | |
; { | |
7d8: 48 83 ec 70 subq $0x70, %rsp | |
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); | |
7dc: 4c 8b 37 movq (%rdi), %r14 | |
; BYTE* const oend = args->oend; | |
7df: 48 8b 6f 70 movq 0x70(%rdi), %rbp | |
; { | |
7e3: 65 48 8b 04 25 28 00 00 00 movq %gs:0x28, %rax | |
7ec: 48 89 44 24 68 movq %rax, 0x68(%rsp) | |
7f1: 31 c0 xorl %eax, %eax | |
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); | |
7f3: 48 8b 47 40 movq 0x40(%rdi), %rax | |
; BYTE const* const ilimit = args->ilimit; | |
7f7: 4c 8b 5f 68 movq 0x68(%rdi), %r11 | |
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); | |
7fb: 4c 89 74 24 28 movq %r14, 0x28(%rsp) | |
; U16 const* const dtable = (U16 const*)args->dt; | |
800: 48 8b 77 60 movq 0x60(%rdi), %rsi | |
; size_t const oiters = (size_t)(oend - op[3]) / 5; | |
804: 48 89 ea movq %rbp, %rdx | |
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7; | |
807: 4d 89 f5 movq %r14, %r13 | |
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); | |
80a: 48 89 44 24 08 movq %rax, 0x8(%rsp) | |
80f: 49 8b 41 08 movq 0x8(%r9), %rax | |
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7; | |
813: 4d 29 dd subq %r11, %r13 | |
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); | |
816: 48 89 44 24 10 movq %rax, 0x10(%rsp) | |
81b: 49 8b 41 10 movq 0x10(%r9), %rax | |
81f: 48 89 44 24 18 movq %rax, 0x18(%rsp) | |
824: 49 8b 41 18 movq 0x18(%r9), %rax | |
828: 48 89 44 24 20 movq %rax, 0x20(%rsp) | |
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); | |
82d: 48 8b 47 08 movq 0x8(%rdi), %rax | |
831: 48 89 44 24 30 movq %rax, 0x30(%rsp) | |
836: 48 8b 47 10 movq 0x10(%rdi), %rax | |
83a: 48 89 44 24 38 movq %rax, 0x38(%rsp) | |
83f: 48 8b 47 18 movq 0x18(%rdi), %rax | |
843: 48 89 44 24 40 movq %rax, 0x40(%rsp) | |
; ZSTD_memcpy(&op, &args->op, sizeof(op)); | |
848: 48 8b 47 20 movq 0x20(%rdi), %rax | |
84c: 48 89 44 24 48 movq %rax, 0x48(%rsp) | |
851: 49 8b 40 08 movq 0x8(%r8), %rax | |
855: 48 89 44 24 50 movq %rax, 0x50(%rsp) | |
85a: 49 8b 40 10 movq 0x10(%r8), %rax | |
85e: 48 89 44 24 58 movq %rax, 0x58(%rsp) | |
863: 4d 8b 60 18 movq 0x18(%r8), %r12 | |
; size_t const oiters = (size_t)(oend - op[3]) / 5; | |
867: 4c 29 e2 subq %r12, %rdx | |
; ZSTD_memcpy(&op, &args->op, sizeof(op)); | |
86a: 4c 89 64 24 60 movq %r12, 0x60(%rsp) | |
; size_t const oiters = (size_t)(oend - op[3]) / 5; | |
86f: 48 89 d0 movq %rdx, %rax | |
872: 48 f7 e3 mulq %rbx | |
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7; | |
875: 4c 89 e8 movq %r13, %rax | |
; size_t const oiters = (size_t)(oend - op[3]) / 5; | |
878: 48 c1 ea 02 shrq $0x2, %rdx | |
87c: 48 89 d1 movq %rdx, %rcx | |
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7; | |
87f: 49 f7 e2 mulq %r10 | |
882: 4c 89 e8 movq %r13, %rax | |
885: 48 29 d0 subq %rdx, %rax | |
888: 48 d1 e8 shrq %rax | |
88b: 48 01 d0 addq %rdx, %rax | |
88e: 48 c1 e8 02 shrq $0x2, %rax | |
; size_t const iters = MIN(oiters, iiters); | |
892: 48 39 c1 cmpq %rax, %rcx | |
895: 48 0f 47 c8 cmovaq %rax, %rcx | |
; size_t const symbols = iters * 5; | |
899: 4c 8d 2c 89 leaq (%rcx,%rcx,4), %r13 | |
; olimit = op[3] + symbols; | |
89d: 4d 01 e5 addq %r12, %r13 | |
; if (op[3] + 20 > olimit) | |
8a0: 49 83 c4 14 addq $0x14, %r12 | |
8a4: 4d 39 e5 cmpq %r12, %r13 | |
8a7: 0f 82 0a 01 00 00 jb <L0> | |
<L5>: | |
; if (ip[stream] < ip[stream - 1]) | |
8ad: 48 8b 44 24 30 movq 0x30(%rsp), %rax | |
8b2: 4c 39 f0 cmpq %r14, %rax | |
8b5: 0f 82 fc 00 00 00 jb <L0> | |
8bb: 48 8b 54 24 38 movq 0x38(%rsp), %rdx | |
8c0: 48 39 c2 cmpq %rax, %rdx | |
8c3: 0f 82 ee 00 00 00 jb <L0> | |
8c9: 48 39 54 24 40 cmpq %rdx, 0x40(%rsp) | |
8ce: 0f 82 e3 00 00 00 jb <L0> | |
<L4>: | |
; { | |
8d4: 45 31 e4 xorl %r12d, %r12d | |
<L2>: | |
8d7: 31 c0 xorl %eax, %eax | |
<L1>: | |
; int const index = (int)(bits[stream] >> 53); | |
8d9: 48 8b 54 04 08 movq 0x8(%rsp,%rax), %rdx | |
8de: 48 89 d1 movq %rdx, %rcx | |
8e1: 48 c1 e9 35 shrq $0x35, %rcx | |
; int const entry = (int)dtable[index]; | |
8e5: 0f b7 0c 4e movzwl (%rsi,%rcx,2), %ecx | |
; bits[stream] <<= (entry & 63); | |
8e9: 41 89 ce movl %ecx, %r14d | |
; op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF); | |
8ec: 0f b6 cd movzbl %ch, %ecx | |
; bits[stream] <<= (entry & 63); | |
8ef: c4 e2 89 f7 d2 shlxq %r14, %rdx, %rdx | |
8f4: 48 89 54 04 08 movq %rdx, 0x8(%rsp,%rax) | |
; op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF); | |
8f9: 48 8b 54 04 48 movq 0x48(%rsp,%rax), %rdx | |
; for (stream = 0; stream < 4; ++stream) { | |
8fe: 48 83 c0 08 addq $0x8, %rax | |
; op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF); | |
902: 42 88 0c 22 movb %cl, (%rdx,%r12) | |
; for (stream = 0; stream < 4; ++stream) { | |
906: 48 83 f8 20 cmpq $0x20, %rax | |
90a: 75 cd jne <L1> | |
; for (symbol = 0; symbol < 5; ++symbol) { | |
90c: 49 83 c4 01 addq $0x1, %r12 | |
910: 49 83 fc 05 cmpq $0x5, %r12 | |
914: 75 c1 jne <L2> | |
916: 31 c0 xorl %eax, %eax | |
<L3>: | |
; return (unsigned)__builtin_ctzll(val); | |
918: 31 c9 xorl %ecx, %ecx | |
91a: f3 48 0f bc 4c 04 08 tzcntq 0x8(%rsp,%rax), %rcx | |
; ip[stream] -= nbBytes; | |
921: 48 8b 54 04 28 movq 0x28(%rsp,%rax), %rdx | |
; op[stream] += 5; | |
926: 48 83 44 04 48 05 addq $0x5, 0x48(%rsp,%rax) | |
; ip[stream] -= nbBytes; | |
92c: 48 89 14 24 movq %rdx, (%rsp) | |
930: 49 89 cc movq %rcx, %r12 | |
; int const nbBits = ctz & 7; | |
933: 83 e1 07 andl $0x7, %ecx | |
; ip[stream] -= nbBytes; | |
936: 49 c1 ec 03 shrq $0x3, %r12 | |
93a: 4c 29 e2 subq %r12, %rdx | |
93d: 48 89 54 04 28 movq %rdx, 0x28(%rsp,%rax) | |
; bits[stream] = MEM_read64(ip[stream]) | 1; | |
942: 48 8b 12 movq (%rdx), %rdx | |
945: 48 83 ca 01 orq $0x1, %rdx | |
; bits[stream] <<= nbBits; | |
949: c4 e2 f1 f7 d2 shlxq %rcx, %rdx, %rdx | |
94e: 48 89 54 04 08 movq %rdx, 0x8(%rsp,%rax) | |
; for (stream = 0; stream < 4; ++stream) { | |
953: 48 83 c0 08 addq $0x8, %rax | |
957: 48 83 f8 20 cmpq $0x20, %rax | |
95b: 75 bb jne <L3> | |
; } while (op[3] < olimit); | |
95d: 48 8b 4c 24 60 movq 0x60(%rsp), %rcx | |
962: 4c 39 e9 cmpq %r13, %rcx | |
965: 0f 82 69 ff ff ff jb <L4> | |
; size_t const oiters = (size_t)(oend - op[3]) / 5; | |
96b: 48 89 ea movq %rbp, %rdx | |
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7; | |
96e: 4c 8b 74 24 28 movq 0x28(%rsp), %r14 | |
; size_t const oiters = (size_t)(oend - op[3]) / 5; | |
973: 48 29 ca subq %rcx, %rdx | |
976: 48 89 d0 movq %rdx, %rax | |
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7; | |
979: 4d 89 f5 movq %r14, %r13 | |
; size_t const oiters = (size_t)(oend - op[3]) / 5; | |
97c: 48 f7 e3 mulq %rbx | |
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7; | |
97f: 4d 29 dd subq %r11, %r13 | |
982: 4c 89 e8 movq %r13, %rax | |
; size_t const oiters = (size_t)(oend - op[3]) / 5; | |
985: 48 c1 ea 02 shrq $0x2, %rdx | |
989: 49 89 d4 movq %rdx, %r12 | |
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7; | |
98c: 49 f7 e2 mulq %r10 | |
98f: 49 29 d5 subq %rdx, %r13 | |
992: 49 d1 ed shrq %r13 | |
995: 4c 01 ea addq %r13, %rdx | |
998: 48 c1 ea 02 shrq $0x2, %rdx | |
; size_t const iters = MIN(oiters, iiters); | |
99c: 49 39 d4 cmpq %rdx, %r12 | |
99f: 4c 0f 47 e2 cmovaq %rdx, %r12 | |
; size_t const symbols = iters * 5; | |
9a3: 4f 8d 2c a4 leaq (%r12,%r12,4), %r13 | |
; olimit = op[3] + symbols; | |
9a7: 49 01 cd addq %rcx, %r13 | |
; if (op[3] + 20 > olimit) | |
9aa: 48 83 c1 14 addq $0x14, %rcx | |
9ae: 49 39 cd cmpq %rcx, %r13 | |
9b1: 0f 83 f6 fe ff ff jae <L5> | |
<L0>: | |
; ZSTD_memcpy(&args->bits, &bits, sizeof(bits)); | |
9b7: 48 8b 44 24 08 movq 0x8(%rsp), %rax | |
9bc: 48 89 47 40 movq %rax, 0x40(%rdi) | |
9c0: 48 8b 44 24 10 movq 0x10(%rsp), %rax | |
9c5: 49 89 41 08 movq %rax, 0x8(%r9) | |
9c9: 48 8b 44 24 18 movq 0x18(%rsp), %rax | |
9ce: 49 89 41 10 movq %rax, 0x10(%r9) | |
9d2: 48 8b 44 24 20 movq 0x20(%rsp), %rax | |
9d7: 49 89 41 18 movq %rax, 0x18(%r9) | |
; ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip)); | |
9db: 48 8b 44 24 28 movq 0x28(%rsp), %rax | |
9e0: 48 89 07 movq %rax, (%rdi) | |
9e3: 48 8b 44 24 30 movq 0x30(%rsp), %rax | |
9e8: 48 89 47 08 movq %rax, 0x8(%rdi) | |
9ec: 48 8b 44 24 38 movq 0x38(%rsp), %rax | |
9f1: 48 89 47 10 movq %rax, 0x10(%rdi) | |
9f5: 48 8b 44 24 40 movq 0x40(%rsp), %rax | |
9fa: 48 89 47 18 movq %rax, 0x18(%rdi) | |
; ZSTD_memcpy(&args->op, &op, sizeof(op)); | |
9fe: 48 8b 44 24 48 movq 0x48(%rsp), %rax | |
a03: 48 89 47 20 movq %rax, 0x20(%rdi) | |
a07: 48 8b 44 24 50 movq 0x50(%rsp), %rax | |
a0c: 49 89 40 08 movq %rax, 0x8(%r8) | |
a10: 48 8b 44 24 58 movq 0x58(%rsp), %rax | |
a15: 49 89 40 10 movq %rax, 0x10(%r8) | |
a19: 48 8b 44 24 60 movq 0x60(%rsp), %rax | |
a1e: 49 89 40 18 movq %rax, 0x18(%r8) | |
; } | |
a22: 48 8b 44 24 68 movq 0x68(%rsp), %rax | |
a27: 65 48 2b 04 25 28 00 00 00 subq %gs:0x28, %rax | |
a30: 75 11 jne <L6> | |
a32: 48 83 c4 70 addq $0x70, %rsp | |
a36: 5b popq %rbx | |
a37: 5d popq %rbp | |
a38: 41 5c popq %r12 | |
a3a: 41 5d popq %r13 | |
a3c: 41 5e popq %r14 | |
a3e: e9 00 00 00 00 jmp <L6> | |
<L6>: | |
a43: e8 00 00 00 00 callq <L7> | |
<L7>: | |
a48: 0f 1f 84 00 00 00 00 00 nopl (%rax,%rax) | |
a50: 90 nop | |
a51: 90 nop | |
a52: 90 nop | |
a53: 90 nop | |
a54: 90 nop | |
a55: 90 nop | |
a56: 90 nop | |
a57: 90 nop | |
a58: 90 nop | |
a59: 90 nop | |
a5a: 90 nop | |
a5b: 90 nop | |
a5c: 90 nop | |
a5d: 90 nop | |
a5e: 90 nop | |
a5f: 90 nop |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment