Skip to content

Instantly share code, notes, and snippets.

@terrelln
Created November 20, 2023 21:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save terrelln/a70bde22a2abc800691fb65c21eabc2a to your computer and use it in GitHub Desktop.
Save terrelln/a70bde22a2abc800691fb65c21eabc2a to your computer and use it in GitHub Desktop.
objdump of huf_decompress.o after the manual unrolling
0000000000000450 <HUF_decompress4X2_usingDTable_internal_fast_c_loop>:
; {
450: f3 0f 1e fa endbr64
454: 41 57 pushq %r15
456: 49 89 f8 movq %rdi, %r8
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
459: 48 8d 4f 40 leaq 0x40(%rdi), %rcx
; {
45d: 41 56 pushq %r14
; ZSTD_memcpy(&op, &args->op, sizeof(op));
45f: 4d 8d 78 20 leaq 0x20(%r8), %r15
; {
463: 41 55 pushq %r13
465: 41 54 pushq %r12
467: 55 pushq %rbp
468: 53 pushq %rbx
469: 48 81 ec 10 01 00 00 subq $0x110, %rsp # imm = 0x110
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
470: 48 8b 6f 40 movq 0x40(%rdi), %rbp
474: 4c 8b 57 58 movq 0x58(%rdi), %r10
; {
478: 65 48 8b 04 25 28 00 00 00 movq %gs:0x28, %rax
481: 48 89 84 24 08 01 00 00 movq %rax, 0x108(%rsp)
489: 31 c0 xorl %eax, %eax
; BYTE const* const ilimit = args->ilimit;
48b: 48 8b 47 68 movq 0x68(%rdi), %rax
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
48f: 4c 8b 5f 50 movq 0x50(%rdi), %r11
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
493: 4c 8b 0f movq (%rdi), %r9
496: 49 8b 50 18 movq 0x18(%r8), %rdx
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
49a: 48 89 ac 24 88 00 00 00 movq %rbp, 0x88(%rsp)
4a2: 48 8b 5f 48 movq 0x48(%rdi), %rbx
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
4a6: 49 8b 70 10 movq 0x10(%r8), %rsi
; BYTE const* const ilimit = args->ilimit;
4aa: 48 89 44 24 68 movq %rax, 0x68(%rsp)
; HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
4af: 4c 8b 77 60 movq 0x60(%rdi), %r14
; ZSTD_memcpy(&op, &args->op, sizeof(op));
4b3: 49 8b 40 20 movq 0x20(%r8), %rax
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
4b7: 4c 89 94 24 a0 00 00 00 movq %r10, 0xa0(%rsp)
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
4bf: 48 8b 7f 08 movq 0x8(%rdi), %rdi
; ZSTD_memcpy(&op, &args->op, sizeof(op));
4c3: 4d 8b 60 28 movq 0x28(%r8), %r12
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
4c7: 48 89 94 24 c0 00 00 00 movq %rdx, 0xc0(%rsp)
; ZSTD_memcpy(&op, &args->op, sizeof(op));
4cf: 48 89 44 24 18 movq %rax, 0x18(%rsp)
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
4d4: 48 89 bc 24 b0 00 00 00 movq %rdi, 0xb0(%rsp)
; ZSTD_memcpy(&op, &args->op, sizeof(op));
4dc: 48 89 84 24 c8 00 00 00 movq %rax, 0xc8(%rsp)
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
4e4: 48 89 9c 24 90 00 00 00 movq %rbx, 0x90(%rsp)
4ec: 4c 89 9c 24 98 00 00 00 movq %r11, 0x98(%rsp)
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
4f4: 4c 89 8c 24 a8 00 00 00 movq %r9, 0xa8(%rsp)
4fc: 48 89 b4 24 b8 00 00 00 movq %rsi, 0xb8(%rsp)
; ZSTD_memcpy(&op, &args->op, sizeof(op));
504: 4c 89 a4 24 d0 00 00 00 movq %r12, 0xd0(%rsp)
50c: 4d 8b 68 30 movq 0x30(%r8), %r13
510: 4d 8b 60 38 movq 0x38(%r8), %r12
; oend[0] = op[1];
514: 49 8b 40 28 movq 0x28(%r8), %rax
518: 48 89 54 24 10 movq %rdx, 0x10(%rsp)
51d: 48 8b 54 24 18 movq 0x18(%rsp), %rdx
522: 48 89 6c 24 08 movq %rbp, 0x8(%rsp)
527: 48 89 fd movq %rdi, %rbp
; oend[3] = args->oend;
52a: 31 ff xorl %edi, %edi
; ZSTD_memcpy(&op, &args->op, sizeof(op));
52c: 4c 89 ac 24 d8 00 00 00 movq %r13, 0xd8(%rsp)
; oend[1] = op[2];
534: 4c 89 ac 24 f0 00 00 00 movq %r13, 0xf0(%rsp)
; oend[3] = args->oend;
53c: 4d 8b 68 70 movq 0x70(%r8), %r13
; ZSTD_memcpy(&op, &args->op, sizeof(op));
540: 4c 89 a4 24 e0 00 00 00 movq %r12, 0xe0(%rsp)
; oend[2] = op[3];
548: 4c 89 a4 24 f8 00 00 00 movq %r12, 0xf8(%rsp)
550: 4c 89 5c 24 30 movq %r11, 0x30(%rsp)
555: 48 89 34 24 movq %rsi, (%rsp)
559: 48 89 54 24 28 movq %rdx, 0x28(%rsp)
; size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
55e: 48 89 4c 24 70 movq %rcx, 0x70(%rsp)
563: 4c 89 84 24 80 00 00 00 movq %r8, 0x80(%rsp)
; oend[0] = op[1];
56b: 48 89 84 24 e8 00 00 00 movq %rax, 0xe8(%rsp)
; oend[3] = args->oend;
573: 4c 89 d0 movq %r10, %rax
; size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
576: 4d 89 ca movq %r9, %r10
579: 4c 89 7c 24 78 movq %r15, 0x78(%rsp)
57e: 49 89 ef movq %rbp, %r15
581: 48 8b 6c 24 08 movq 0x8(%rsp), %rbp
; oend[3] = args->oend;
586: 4c 89 ac 24 00 01 00 00 movq %r13, 0x100(%rsp)
; size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
58e: 49 89 c5 movq %rax, %r13
<L3>:
; size_t iters = (size_t)(ip[0] - ilimit) / 7;
591: 48 8b 44 24 68 movq 0x68(%rsp), %rax
596: 4c 89 d1 movq %r10, %rcx
599: 48 29 c1 subq %rax, %rcx
59c: 48 b8 93 24 49 92 24 49 92 24 movabsq $0x2492492492492493, %rax # imm = 0x2492492492492493
5a6: 48 f7 e1 mulq %rcx
5a9: 48 29 d1 subq %rdx, %rcx
5ac: 48 d1 e9 shrq %rcx
5af: 48 8d 34 0a leaq (%rdx,%rcx), %rsi
5b3: 31 c9 xorl %ecx, %ecx
5b5: 48 c1 ee 02 shrq $0x2, %rsi
<L0>:
; size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
5b9: 48 8b 94 0c e8 00 00 00 movq 0xe8(%rsp,%rcx), %rdx
5c1: 48 b8 cd cc cc cc cc cc cc cc movabsq $-0x3333333333333333, %rax # imm = 0xCCCCCCCCCCCCCCCD
5cb: 48 2b 94 0c c8 00 00 00 subq 0xc8(%rsp,%rcx), %rdx
5d3: 48 f7 e2 mulq %rdx
5d6: 48 c1 ea 03 shrq $0x3, %rdx
; iters = MIN(iters, oiters);
5da: 48 39 d6 cmpq %rdx, %rsi
5dd: 48 0f 47 f2 cmovaq %rdx, %rsi
; for (stream = 0; stream < 4; ++stream) {
5e1: 48 83 c1 08 addq $0x8, %rcx
5e5: 48 83 f9 20 cmpq $0x20, %rcx
5e9: 75 ce jne <L0>
; olimit = op[3] + (iters * 5);
5eb: 48 8d 04 b6 leaq (%rsi,%rsi,4), %rax
5ef: 4c 01 e0 addq %r12, %rax
5f2: 48 89 44 24 38 movq %rax, 0x38(%rsp)
5f7: 48 89 c2 movq %rax, %rdx
; if (op[3] + 10 > olimit)
5fa: 49 8d 44 24 0a leaq 0xa(%r12), %rax
5ff: 48 39 c2 cmpq %rax, %rdx
602: 0f 82 97 04 00 00 jb <L1>
; if (ip[stream] < ip[stream - 1])
608: 4d 39 d7 cmpq %r10, %r15
60b: 0f 82 8e 04 00 00 jb <L1>
611: 4c 39 3c 24 cmpq %r15, (%rsp)
615: 0f 82 84 04 00 00 jb <L1>
61b: 48 8b 14 24 movq (%rsp), %rdx
61f: 48 39 54 24 10 cmpq %rdx, 0x10(%rsp)
624: 0f 82 75 04 00 00 jb <L1>
62a: 48 8b 84 24 d8 00 00 00 movq 0xd8(%rsp), %rax
632: 48 8b 94 24 d0 00 00 00 movq 0xd0(%rsp), %rdx
63a: 4c 89 7c 24 08 movq %r15, 0x8(%rsp)
63f: 4d 89 e7 movq %r12, %r15
642: 4d 89 d4 movq %r10, %r12
645: 48 89 44 24 18 movq %rax, 0x18(%rsp)
64a: 48 89 54 24 20 movq %rdx, 0x20(%rsp)
64f: 48 8b 54 24 30 movq 0x30(%rsp), %rdx
<L2>:
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
654: 48 89 e8 movq %rbp, %rax
; put_unaligned(value, (U16 *)memPtr);
657: 48 8b 7c 24 28 movq 0x28(%rsp), %rdi
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
65c: 48 c1 e8 35 shrq $0x35, %rax
660: 49 8d 0c 86 leaq (%r14,%rax,4), %rcx
664: 0f b7 01 movzwl (%rcx), %eax
667: 44 0f b6 59 02 movzbl 0x2(%rcx), %r11d
66c: 0f b6 49 03 movzbl 0x3(%rcx), %ecx
; put_unaligned(value, (U16 *)memPtr);
670: 66 89 07 movw %ax, (%rdi)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
673: 48 89 d8 movq %rbx, %rax
676: c4 62 a1 f7 dd shlxq %r11, %rbp, %r11
67b: 48 c1 e8 35 shrq $0x35, %rax
67f: 48 01 f9 addq %rdi, %rcx
; put_unaligned(value, (U16 *)memPtr);
682: 48 8b 7c 24 20 movq 0x20(%rsp), %rdi
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
687: 49 8d 04 86 leaq (%r14,%rax,4), %rax
68b: 0f b7 30 movzwl (%rax), %esi
68e: 44 0f b6 50 02 movzbl 0x2(%rax), %r10d
693: 0f b6 40 03 movzbl 0x3(%rax), %eax
; put_unaligned(value, (U16 *)memPtr);
697: 66 89 37 movw %si, (%rdi)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
69a: 48 89 d6 movq %rdx, %rsi
69d: c4 62 a9 f7 d3 shlxq %r10, %rbx, %r10
; put_unaligned(value, (U16 *)memPtr);
6a2: 48 8b 5c 24 18 movq 0x18(%rsp), %rbx
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
6a7: 48 c1 ee 35 shrq $0x35, %rsi
6ab: 48 01 f8 addq %rdi, %rax
6ae: 4d 8d 04 b6 leaq (%r14,%rsi,4), %r8
6b2: 41 0f b6 78 02 movzbl 0x2(%r8), %edi
6b7: 41 0f b7 30 movzwl (%r8), %esi
6bb: 45 0f b6 48 03 movzbl 0x3(%r8), %r9d
6c0: c4 e2 c1 f7 fa shlxq %rdi, %rdx, %rdi
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
6c5: 4c 89 da movq %r11, %rdx
; put_unaligned(value, (U16 *)memPtr);
6c8: 66 89 33 movw %si, (%rbx)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
6cb: 48 c1 ea 35 shrq $0x35, %rdx
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
6cf: 49 01 d9 addq %rbx, %r9
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
6d2: 4d 8d 04 96 leaq (%r14,%rdx,4), %r8
6d6: 41 0f b7 10 movzwl (%r8), %edx
6da: 41 0f b6 70 02 movzbl 0x2(%r8), %esi
6df: 45 0f b6 40 03 movzbl 0x3(%r8), %r8d
; put_unaligned(value, (U16 *)memPtr);
6e4: 66 89 11 movw %dx, (%rcx)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
6e7: 4c 89 d2 movq %r10, %rdx
6ea: c4 c2 c9 f7 f3 shlxq %rsi, %r11, %rsi
6ef: 48 c1 ea 35 shrq $0x35, %rdx
6f3: 49 01 c8 addq %rcx, %r8
6f6: 49 8d 0c 96 leaq (%r14,%rdx,4), %rcx
6fa: 44 0f b7 19 movzwl (%rcx), %r11d
6fe: 0f b6 51 02 movzbl 0x2(%rcx), %edx
702: 0f b6 49 03 movzbl 0x3(%rcx), %ecx
; put_unaligned(value, (U16 *)memPtr);
706: 66 44 89 18 movw %r11w, (%rax)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
70a: c4 c2 e9 f7 d2 shlxq %rdx, %r10, %rdx
70f: 48 01 c1 addq %rax, %rcx
712: 48 89 f8 movq %rdi, %rax
715: 48 c1 e8 35 shrq $0x35, %rax
719: 49 8d 04 86 leaq (%r14,%rax,4), %rax
71d: 44 0f b6 50 02 movzbl 0x2(%rax), %r10d
722: 44 0f b7 18 movzwl (%rax), %r11d
726: 0f b6 40 03 movzbl 0x3(%rax), %eax
72a: c4 62 a9 f7 d7 shlxq %r10, %rdi, %r10
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
72f: 48 89 f7 movq %rsi, %rdi
; put_unaligned(value, (U16 *)memPtr);
732: 66 45 89 19 movw %r11w, (%r9)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
736: 48 c1 ef 35 shrq $0x35, %rdi
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
73a: 4c 01 c8 addq %r9, %rax
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
73d: 4d 8d 0c be leaq (%r14,%rdi,4), %r9
741: 41 0f b6 79 02 movzbl 0x2(%r9), %edi
746: 45 0f b7 19 movzwl (%r9), %r11d
74a: 45 0f b6 49 03 movzbl 0x3(%r9), %r9d
74f: c4 e2 c1 f7 fe shlxq %rdi, %rsi, %rdi
754: 48 89 d6 movq %rdx, %rsi
; put_unaligned(value, (U16 *)memPtr);
757: 66 45 89 18 movw %r11w, (%r8)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
75b: 48 c1 ee 35 shrq $0x35, %rsi
75f: 4d 01 c1 addq %r8, %r9
762: 4d 8d 04 b6 leaq (%r14,%rsi,4), %r8
766: 45 0f b7 18 movzwl (%r8), %r11d
76a: 41 0f b6 70 02 movzbl 0x2(%r8), %esi
76f: 45 0f b6 40 03 movzbl 0x3(%r8), %r8d
774: c4 e2 c9 f7 f2 shlxq %rsi, %rdx, %rsi
779: 4c 89 d2 movq %r10, %rdx
; put_unaligned(value, (U16 *)memPtr);
77c: 66 44 89 19 movw %r11w, (%rcx)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
780: 48 c1 ea 35 shrq $0x35, %rdx
784: 49 01 c8 addq %rcx, %r8
787: 49 8d 0c 96 leaq (%r14,%rdx,4), %rcx
78b: 44 0f b7 19 movzwl (%rcx), %r11d
78f: 0f b6 51 02 movzbl 0x2(%rcx), %edx
793: 0f b6 49 03 movzbl 0x3(%rcx), %ecx
; put_unaligned(value, (U16 *)memPtr);
797: 66 44 89 18 movw %r11w, (%rax)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
79b: c4 c2 e9 f7 d2 shlxq %rdx, %r10, %rdx
7a0: 48 01 c1 addq %rax, %rcx
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
7a3: 48 89 f8 movq %rdi, %rax
7a6: 48 c1 e8 35 shrq $0x35, %rax
7aa: 49 8d 04 86 leaq (%r14,%rax,4), %rax
7ae: 44 0f b7 18 movzwl (%rax), %r11d
7b2: 44 0f b6 50 02 movzbl 0x2(%rax), %r10d
7b7: 0f b6 40 03 movzbl 0x3(%rax), %eax
; put_unaligned(value, (U16 *)memPtr);
7bb: 66 45 89 19 movw %r11w, (%r9)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
7bf: c4 e2 a9 f7 ff shlxq %r10, %rdi, %rdi
7c4: 4c 01 c8 addq %r9, %rax
7c7: 49 89 f1 movq %rsi, %r9
7ca: 49 c1 e9 35 shrq $0x35, %r9
7ce: 4f 8d 0c 8e leaq (%r14,%r9,4), %r9
7d2: 45 0f b6 51 02 movzbl 0x2(%r9), %r10d
7d7: 45 0f b7 19 movzwl (%r9), %r11d
7db: 45 0f b6 49 03 movzbl 0x3(%r9), %r9d
; put_unaligned(value, (U16 *)memPtr);
7e0: 66 45 89 18 movw %r11w, (%r8)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
7e4: c4 e2 a9 f7 f6 shlxq %r10, %rsi, %rsi
7e9: 4b 8d 1c 08 leaq (%r8,%r9), %rbx
7ed: 49 89 d0 movq %rdx, %r8
7f0: 49 c1 e8 35 shrq $0x35, %r8
7f4: 48 89 5c 24 40 movq %rbx, 0x40(%rsp)
7f9: 4f 8d 04 86 leaq (%r14,%r8,4), %r8
7fd: 45 0f b6 48 02 movzbl 0x2(%r8), %r9d
802: 45 0f b7 10 movzwl (%r8), %r10d
806: 45 0f b6 40 03 movzbl 0x3(%r8), %r8d
80b: c4 e2 b1 f7 d2 shlxq %r9, %rdx, %rdx
; put_unaligned(value, (U16 *)memPtr);
810: 66 44 89 11 movw %r10w, (%rcx)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
814: 4e 8d 0c 01 leaq (%rcx,%r8), %r9
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
818: 48 89 f9 movq %rdi, %rcx
81b: 48 c1 e9 35 shrq $0x35, %rcx
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
81f: 4c 89 4c 24 60 movq %r9, 0x60(%rsp)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
824: 49 8d 0c 8e leaq (%r14,%rcx,4), %rcx
828: 44 0f b6 51 02 movzbl 0x2(%rcx), %r10d
82d: 44 0f b7 01 movzwl (%rcx), %r8d
831: 0f b6 49 03 movzbl 0x3(%rcx), %ecx
; put_unaligned(value, (U16 *)memPtr);
835: 66 44 89 00 movw %r8w, (%rax)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
839: c4 e2 a9 f7 ff shlxq %r10, %rdi, %rdi
83e: 48 01 c8 addq %rcx, %rax
841: 48 89 44 24 28 movq %rax, 0x28(%rsp)
846: 48 89 f0 movq %rsi, %rax
849: 48 c1 e8 35 shrq $0x35, %rax
84d: 49 8d 04 86 leaq (%r14,%rax,4), %rax
851: 0f b7 08 movzwl (%rax), %ecx
854: 44 0f b6 58 02 movzbl 0x2(%rax), %r11d
859: 0f b6 40 03 movzbl 0x3(%rax), %eax
85d: 66 89 4c 24 4e movw %cx, 0x4e(%rsp)
862: c4 e2 a1 f7 f6 shlxq %r11, %rsi, %rsi
867: 48 01 d8 addq %rbx, %rax
; put_unaligned(value, (U16 *)memPtr);
86a: 66 89 0b movw %cx, (%rbx)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
86d: 48 89 44 24 20 movq %rax, 0x20(%rsp)
872: 48 89 d0 movq %rdx, %rax
875: 48 c1 e8 35 shrq $0x35, %rax
879: 49 8d 04 86 leaq (%r14,%rax,4), %rax
87d: 0f b7 18 movzwl (%rax), %ebx
880: 0f b6 48 02 movzbl 0x2(%rax), %ecx
884: 0f b6 40 03 movzbl 0x3(%rax), %eax
; put_unaligned(value, (U16 *)memPtr);
888: 66 41 89 19 movw %bx, (%r9)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0)
88c: c4 e2 f1 f7 d2 shlxq %rcx, %rdx, %rdx
891: 4c 01 c8 addq %r9, %rax
894: 66 89 5c 24 50 movw %bx, 0x50(%rsp)
899: 48 89 44 24 18 movq %rax, 0x18(%rsp)
; HUF_4X2_DECODE_SYMBOL(3, 1)
89e: 4c 89 e8 movq %r13, %rax
8a1: 48 c1 e8 35 shrq $0x35, %rax
8a5: 4d 8d 04 86 leaq (%r14,%rax,4), %r8
8a9: 41 0f b6 40 02 movzbl 0x2(%r8), %eax
8ae: 41 0f b7 18 movzwl (%r8), %ebx
8b2: 45 0f b6 48 03 movzbl 0x3(%r8), %r9d
8b7: c4 c2 f9 f7 c5 shlxq %rax, %r13, %rax
; HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM)
8bc: 49 89 c0 movq %rax, %r8
; put_unaligned(value, (U16 *)memPtr);
8bf: 66 41 89 1f movw %bx, (%r15)
; HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM)
8c3: 49 c1 e8 35 shrq $0x35, %r8
; HUF_4X2_DECODE_SYMBOL(3, 1)
8c7: 66 89 5c 24 52 movw %bx, 0x52(%rsp)
8cc: 4d 01 f9 addq %r15, %r9
; HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM)
8cf: 4f 8d 04 86 leaq (%r14,%r8,4), %r8
8d3: 41 0f b7 28 movzwl (%r8), %ebp
8d7: 41 0f b6 58 02 movzbl 0x2(%r8), %ebx
8dc: 45 0f b6 40 03 movzbl 0x3(%r8), %r8d
8e1: 66 89 6c 24 54 movw %bp, 0x54(%rsp)
8e6: c4 e2 e1 f7 c0 shlxq %rbx, %rax, %rax
8eb: 4d 01 c8 addq %r9, %r8
; return (unsigned)__builtin_ctzll(val);
8ee: f3 48 0f bc ff tzcntq %rdi, %rdi
; put_unaligned(value, (U16 *)memPtr);
8f3: 66 41 89 29 movw %bp, (%r9)
; return (unsigned)__builtin_ctzll(val);
8f7: 31 c9 xorl %ecx, %ecx
; HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM)
8f9: 49 89 fa movq %rdi, %r10
8fc: 83 e7 07 andl $0x7, %edi
; return (unsigned)__builtin_ctzll(val);
8ff: f3 48 0f bc f6 tzcntq %rsi, %rsi
904: f3 48 0f bc ca tzcntq %rdx, %rcx
; HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM)
909: 49 c1 ea 03 shrq $0x3, %r10
90d: 49 89 f3 movq %rsi, %r11
910: 83 e6 07 andl $0x7, %esi
913: 48 89 ca movq %rcx, %rdx
916: 49 c1 eb 03 shrq $0x3, %r11
91a: 4d 29 d4 subq %r10, %r12
91d: 4c 29 5c 24 08 subq %r11, 0x8(%rsp)
922: 48 c1 ea 03 shrq $0x3, %rdx
926: 49 8b 2c 24 movq (%r12), %rbp
92a: 48 29 14 24 subq %rdx, (%rsp)
92e: 83 e1 07 andl $0x7, %ecx
931: 48 83 cd 01 orq $0x1, %rbp
935: c4 e2 c1 f7 ed shlxq %rdi, %rbp, %rbp
93a: 48 89 c7 movq %rax, %rdi
93d: 48 c1 ef 35 shrq $0x35, %rdi
941: 49 8d 3c be leaq (%r14,%rdi,4), %rdi
945: 0f b6 5f 02 movzbl 0x2(%rdi), %ebx
949: 44 0f b7 17 movzwl (%rdi), %r10d
94d: 0f b6 7f 03 movzbl 0x3(%rdi), %edi
951: c4 e2 e1 f7 c0 shlxq %rbx, %rax, %rax
956: 48 8b 5c 24 08 movq 0x8(%rsp), %rbx
; put_unaligned(value, (U16 *)memPtr);
95b: 66 45 89 10 movw %r10w, (%r8)
; HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM)
95f: 4c 01 c7 addq %r8, %rdi
962: 48 8b 1b movq (%rbx), %rbx
965: 48 83 cb 01 orq $0x1, %rbx
969: c4 e2 c9 f7 db shlxq %rsi, %rbx, %rbx
96e: 48 89 c6 movq %rax, %rsi
971: 48 c1 ee 35 shrq $0x35, %rsi
975: 49 8d 34 b6 leaq (%r14,%rsi,4), %rsi
979: 44 0f b7 2e movzwl (%rsi), %r13d
97d: 44 0f b6 5e 02 movzbl 0x2(%rsi), %r11d
982: 0f b6 76 03 movzbl 0x3(%rsi), %esi
986: c4 e2 a1 f7 c0 shlxq %r11, %rax, %rax
98b: 66 44 89 6c 24 56 movw %r13w, 0x56(%rsp)
; put_unaligned(value, (U16 *)memPtr);
991: 66 44 89 2f movw %r13w, (%rdi)
; HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM)
995: 4c 8b 1c 24 movq (%rsp), %r11
999: 48 01 fe addq %rdi, %rsi
99c: 4c 89 7c 24 58 movq %r15, 0x58(%rsp)
9a1: 4d 8b 1b movq (%r11), %r11
9a4: 49 83 cb 01 orq $0x1, %r11
9a8: c4 c2 f1 f7 d3 shlxq %rcx, %r11, %rdx
9ad: 48 89 c1 movq %rax, %rcx
9b0: 48 c1 e9 35 shrq $0x35, %rcx
9b4: 49 8d 0c 8e leaq (%r14,%rcx,4), %rcx
9b8: 44 0f b6 69 02 movzbl 0x2(%rcx), %r13d
9bd: 44 0f b7 19 movzwl (%rcx), %r11d
9c1: 0f b6 49 03 movzbl 0x3(%rcx), %ecx
9c5: c4 e2 91 f7 c0 shlxq %r13, %rax, %rax
; return (unsigned)__builtin_ctzll(val);
9ca: f3 48 0f bc c0 tzcntq %rax, %rax
; put_unaligned(value, (U16 *)memPtr);
9cf: 66 44 89 1e movw %r11w, (%rsi)
; HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM)
9d3: 4c 8d 3c 0e leaq (%rsi,%rcx), %r15
9d7: 48 89 c1 movq %rax, %rcx
9da: 83 e0 07 andl $0x7, %eax
9dd: 48 c1 e9 03 shrq $0x3, %rcx
9e1: 48 29 4c 24 10 subq %rcx, 0x10(%rsp)
9e6: 4c 8b 6c 24 10 movq 0x10(%rsp), %r13
9eb: 49 8b 4d 00 movq (%r13), %rcx
9ef: 48 89 4c 24 30 movq %rcx, 0x30(%rsp)
9f4: 48 83 c9 01 orq $0x1, %rcx
9f8: c4 62 f9 f7 e9 shlxq %rax, %rcx, %r13
; } while (op[3] < olimit);
9fd: 48 8b 44 24 38 movq 0x38(%rsp), %rax
a02: 49 39 c7 cmpq %rax, %r15
a05: 0f 82 49 fc ff ff jb <L2>
a0b: 48 8b 44 24 28 movq 0x28(%rsp), %rax
a10: 0f b7 4c 24 4e movzwl 0x4e(%rsp), %ecx
a15: 48 89 54 24 30 movq %rdx, 0x30(%rsp)
a1a: 48 8b 54 24 20 movq 0x20(%rsp), %rdx
a1f: 66 44 89 54 24 20 movw %r10w, 0x20(%rsp)
a25: 4d 89 e2 movq %r12, %r10
a28: 4d 89 fc movq %r15, %r12
a2b: 48 89 84 24 c8 00 00 00 movq %rax, 0xc8(%rsp)
a33: 48 8b 44 24 40 movq 0x40(%rsp), %rax
a38: 48 89 94 24 d0 00 00 00 movq %rdx, 0xd0(%rsp)
a40: 48 8b 54 24 60 movq 0x60(%rsp), %rdx
a45: 66 89 08 movw %cx, (%rax)
a48: 0f b7 44 24 50 movzwl 0x50(%rsp), %eax
a4d: 4c 8b 7c 24 08 movq 0x8(%rsp), %r15
a52: 66 89 02 movw %ax, (%rdx)
a55: 48 8b 44 24 18 movq 0x18(%rsp), %rax
a5a: 0f b7 54 24 52 movzwl 0x52(%rsp), %edx
a5f: 48 89 84 24 d8 00 00 00 movq %rax, 0xd8(%rsp)
a67: 48 8b 44 24 58 movq 0x58(%rsp), %rax
a6c: 66 89 10 movw %dx, (%rax)
a6f: 0f b7 44 24 54 movzwl 0x54(%rsp), %eax
a74: 4c 89 a4 24 e0 00 00 00 movq %r12, 0xe0(%rsp)
a7c: 66 41 89 01 movw %ax, (%r9)
a80: 0f b7 44 24 20 movzwl 0x20(%rsp), %eax
a85: 66 41 89 00 movw %ax, (%r8)
a89: 0f b7 44 24 56 movzwl 0x56(%rsp), %eax
a8e: 66 89 07 movw %ax, (%rdi)
a91: bf 01 00 00 00 movl $0x1, %edi
a96: 66 44 89 1e movw %r11w, (%rsi)
a9a: e9 f2 fa ff ff jmp <L3>
<L1>:
a9f: 48 89 e8 movq %rbp, %rax
aa2: 48 8b 4c 24 70 movq 0x70(%rsp), %rcx
aa7: 4c 89 fd movq %r15, %rbp
aaa: 4c 89 ea movq %r13, %rdx
aad: 4c 8b 7c 24 78 movq 0x78(%rsp), %r15
ab2: 4c 8b 84 24 80 00 00 00 movq 0x80(%rsp), %r8
aba: 40 84 ff testb %dil, %dil
abd: 74 4e je <L4>
abf: 48 89 84 24 88 00 00 00 movq %rax, 0x88(%rsp)
ac7: 48 8b 04 24 movq (%rsp), %rax
acb: 4c 89 94 24 a8 00 00 00 movq %r10, 0xa8(%rsp)
ad3: 48 89 84 24 b8 00 00 00 movq %rax, 0xb8(%rsp)
adb: 48 8b 44 24 30 movq 0x30(%rsp), %rax
ae0: 48 89 ac 24 b0 00 00 00 movq %rbp, 0xb0(%rsp)
ae8: 48 89 84 24 98 00 00 00 movq %rax, 0x98(%rsp)
af0: 48 8b 44 24 10 movq 0x10(%rsp), %rax
af5: 48 89 9c 24 90 00 00 00 movq %rbx, 0x90(%rsp)
afd: 48 89 84 24 c0 00 00 00 movq %rax, 0xc0(%rsp)
b05: 48 89 94 24 a0 00 00 00 movq %rdx, 0xa0(%rsp)
<L4>:
; ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
b0d: 48 8b 84 24 88 00 00 00 movq 0x88(%rsp), %rax
b15: 49 89 40 40 movq %rax, 0x40(%r8)
b19: 48 8b 84 24 90 00 00 00 movq 0x90(%rsp), %rax
b21: 48 89 41 08 movq %rax, 0x8(%rcx)
b25: 48 8b 84 24 98 00 00 00 movq 0x98(%rsp), %rax
b2d: 48 89 41 10 movq %rax, 0x10(%rcx)
b31: 48 8b 84 24 a0 00 00 00 movq 0xa0(%rsp), %rax
b39: 48 89 41 18 movq %rax, 0x18(%rcx)
; ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
b3d: 48 8b 84 24 a8 00 00 00 movq 0xa8(%rsp), %rax
b45: 49 89 00 movq %rax, (%r8)
b48: 48 8b 84 24 b0 00 00 00 movq 0xb0(%rsp), %rax
b50: 49 89 40 08 movq %rax, 0x8(%r8)
b54: 48 8b 84 24 b8 00 00 00 movq 0xb8(%rsp), %rax
b5c: 49 89 40 10 movq %rax, 0x10(%r8)
b60: 48 8b 84 24 c0 00 00 00 movq 0xc0(%rsp), %rax
b68: 49 89 40 18 movq %rax, 0x18(%r8)
; ZSTD_memcpy(&args->op, &op, sizeof(op));
b6c: 48 8b 84 24 c8 00 00 00 movq 0xc8(%rsp), %rax
b74: 49 89 40 20 movq %rax, 0x20(%r8)
b78: 48 8b 84 24 d0 00 00 00 movq 0xd0(%rsp), %rax
b80: 49 89 47 08 movq %rax, 0x8(%r15)
b84: 48 8b 84 24 d8 00 00 00 movq 0xd8(%rsp), %rax
b8c: 49 89 47 10 movq %rax, 0x10(%r15)
b90: 48 8b 84 24 e0 00 00 00 movq 0xe0(%rsp), %rax
b98: 49 89 47 18 movq %rax, 0x18(%r15)
; }
b9c: 48 8b 84 24 08 01 00 00 movq 0x108(%rsp), %rax
ba4: 65 48 2b 04 25 28 00 00 00 subq %gs:0x28, %rax
bad: 75 16 jne <L5>
baf: 48 81 c4 10 01 00 00 addq $0x110, %rsp # imm = 0x110
bb6: 5b popq %rbx
bb7: 5d popq %rbp
bb8: 41 5c popq %r12
bba: 41 5d popq %r13
bbc: 41 5e popq %r14
bbe: 41 5f popq %r15
bc0: e9 00 00 00 00 jmp <L5>
<L5>:
bc5: e8 00 00 00 00 callq <L6>
<L6>:
bca: 66 0f 1f 44 00 00 nopw (%rax,%rax)
bd0: 90 nop
bd1: 90 nop
bd2: 90 nop
bd3: 90 nop
bd4: 90 nop
bd5: 90 nop
bd6: 90 nop
bd7: 90 nop
bd8: 90 nop
bd9: 90 nop
bda: 90 nop
bdb: 90 nop
bdc: 90 nop
bdd: 90 nop
bde: 90 nop
bdf: 90 nop
0000000000000be0 <HUF_decompress4X1_usingDTable_internal_fast_c_loop>:
; {
be0: f3 0f 1e fa endbr64
be4: 41 57 pushq %r15
be6: 41 56 pushq %r14
be8: 49 89 fe movq %rdi, %r14
beb: 41 55 pushq %r13
bed: 41 54 pushq %r12
bef: 55 pushq %rbp
bf0: 53 pushq %rbx
bf1: 48 81 ec b8 00 00 00 subq $0xb8, %rsp
; BYTE* const oend = args->oend;
bf8: 4c 8b 6f 70 movq 0x70(%rdi), %r13
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
bfc: 49 8b 5e 08 movq 0x8(%r14), %rbx
; {
c00: 65 48 8b 04 25 28 00 00 00 movq %gs:0x28, %rax
c09: 48 89 84 24 b0 00 00 00 movq %rax, 0xb0(%rsp)
c11: 31 c0 xorl %eax, %eax
; BYTE const* const ilimit = args->ilimit;
c13: 48 8b 47 68 movq 0x68(%rdi), %rax
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
c17: 49 8b 56 10 movq 0x10(%r14), %rdx
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
c1b: 4c 8b 4f 40 movq 0x40(%rdi), %r9
c1f: 4c 8b 47 48 movq 0x48(%rdi), %r8
; BYTE* const oend = args->oend;
c23: 4c 89 6c 24 30 movq %r13, 0x30(%rsp)
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
c28: 4d 8b 7e 18 movq 0x18(%r14), %r15
; U16 const* const dtable = (U16 const*)args->dt;
c2c: 48 8b 4f 60 movq 0x60(%rdi), %rcx
; BYTE const* const ilimit = args->ilimit;
c30: 48 89 44 24 28 movq %rax, 0x28(%rsp)
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
c35: 48 8d 47 40 leaq 0x40(%rdi), %rax
c39: 49 8b 76 58 movq 0x58(%r14), %rsi
c3d: 48 8b 7f 50 movq 0x50(%rdi), %rdi
c41: 48 89 44 24 38 movq %rax, 0x38(%rsp)
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
c46: 49 8b 06 movq (%r14), %rax
c49: 48 89 1c 24 movq %rbx, (%rsp)
c4d: 48 89 5c 24 78 movq %rbx, 0x78(%rsp)
c52: 48 89 44 24 70 movq %rax, 0x70(%rsp)
c57: 48 89 54 24 08 movq %rdx, 0x8(%rsp)
c5c: 48 89 94 24 80 00 00 00 movq %rdx, 0x80(%rsp)
; ZSTD_memcpy(&op, &args->op, sizeof(op));
c64: 49 8d 56 20 leaq 0x20(%r14), %rdx
; ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
c68: 4c 89 4c 24 50 movq %r9, 0x50(%rsp)
c6d: 4c 89 44 24 58 movq %r8, 0x58(%rsp)
c72: 48 89 7c 24 60 movq %rdi, 0x60(%rsp)
c77: 48 89 74 24 68 movq %rsi, 0x68(%rsp)
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
c7c: 4c 89 7c 24 10 movq %r15, 0x10(%rsp)
; ZSTD_memcpy(&op, &args->op, sizeof(op));
c81: 4d 8b 56 38 movq 0x38(%r14), %r10
c85: 49 8b 6e 20 movq 0x20(%r14), %rbp
c89: 49 8b 5e 28 movq 0x28(%r14), %rbx
; ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
c8d: 4c 89 bc 24 88 00 00 00 movq %r15, 0x88(%rsp)
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
c95: 49 89 c7 movq %rax, %r15
; size_t const oiters = (size_t)(oend - op[3]) / 5;
c98: 4d 29 d5 subq %r10, %r13
; ZSTD_memcpy(&op, &args->op, sizeof(op));
c9b: 48 89 54 24 40 movq %rdx, 0x40(%rsp)
ca0: 4d 8b 5e 30 movq 0x30(%r14), %r11
; size_t const oiters = (size_t)(oend - op[3]) / 5;
ca4: 48 b8 cd cc cc cc cc cc cc cc movabsq $-0x3333333333333333, %rax # imm = 0xCCCCCCCCCCCCCCCD
cae: 49 f7 e5 mulq %r13
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
cb1: 48 8b 44 24 28 movq 0x28(%rsp), %rax
cb6: 4d 89 fd movq %r15, %r13
; ZSTD_memcpy(&op, &args->op, sizeof(op));
cb9: 48 89 ac 24 90 00 00 00 movq %rbp, 0x90(%rsp)
cc1: 48 89 9c 24 98 00 00 00 movq %rbx, 0x98(%rsp)
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
cc9: 49 29 c5 subq %rax, %r13
; ZSTD_memcpy(&op, &args->op, sizeof(op));
ccc: 4c 89 9c 24 a0 00 00 00 movq %r11, 0xa0(%rsp)
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
cd4: 48 b8 93 24 49 92 24 49 92 24 movabsq $0x2492492492492493, %rax # imm = 0x2492492492492493
; size_t const oiters = (size_t)(oend - op[3]) / 5;
cde: 49 89 d4 movq %rdx, %r12
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
ce1: 49 f7 e5 mulq %r13
; ZSTD_memcpy(&op, &args->op, sizeof(op));
ce4: 4c 89 94 24 a8 00 00 00 movq %r10, 0xa8(%rsp)
; size_t const oiters = (size_t)(oend - op[3]) / 5;
cec: 49 c1 ec 02 shrq $0x2, %r12
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
cf0: 49 29 d5 subq %rdx, %r13
cf3: 49 d1 ed shrq %r13
cf6: 4c 01 ea addq %r13, %rdx
cf9: 48 c1 ea 02 shrq $0x2, %rdx
; size_t const iters = MIN(oiters, iiters);
cfd: 49 39 d4 cmpq %rdx, %r12
d00: 4c 0f 47 e2 cmovaq %rdx, %r12
; size_t const symbols = iters * 5;
d04: 4b 8d 04 a4 leaq (%r12,%r12,4), %rax
; olimit = op[3] + symbols;
d08: 4c 01 d0 addq %r10, %rax
d0b: 48 89 44 24 20 movq %rax, 0x20(%rsp)
d10: 48 89 c2 movq %rax, %rdx
; if (op[3] + 20 > olimit)
d13: 49 8d 42 14 leaq 0x14(%r10), %rax
d17: 48 39 c2 cmpq %rax, %rdx
d1a: 0f 82 ef 03 00 00 jb <L0>
d20: 48 8b 54 24 08 movq 0x8(%rsp), %rdx
d25: 4c 8b 24 24 movq (%rsp), %r12
d29: 4c 89 74 24 48 movq %r14, 0x48(%rsp)
d2e: 31 c0 xorl %eax, %eax
d30: 48 89 14 24 movq %rdx, (%rsp)
<L3>:
; if (ip[stream] < ip[stream - 1])
d34: 4d 39 fc cmpq %r15, %r12
d37: 0f 82 72 03 00 00 jb <L1>
d3d: 4c 39 24 24 cmpq %r12, (%rsp)
d41: 0f 82 68 03 00 00 jb <L1>
d47: 4c 8b 34 24 movq (%rsp), %r14
d4b: 4c 39 74 24 10 cmpq %r14, 0x10(%rsp)
d50: 0f 82 59 03 00 00 jb <L1>
d56: 48 89 5c 24 08 movq %rbx, 0x8(%rsp)
d5b: 4c 89 c8 movq %r9, %rax
<L2>:
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0)
d5e: 48 89 c2 movq %rax, %rdx
d61: 4c 8b 74 24 08 movq 0x8(%rsp), %r14
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
d66: 48 83 c5 05 addq $0x5, %rbp
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0)
d6a: 48 c1 ea 35 shrq $0x35, %rdx
d6e: 0f b7 14 51 movzwl (%rcx,%rdx,2), %edx
d72: 41 89 d1 movl %edx, %r9d
d75: 88 75 fb movb %dh, -0x5(%rbp)
d78: c4 62 b1 f7 c8 shlxq %r9, %rax, %r9
d7d: 4c 89 c0 movq %r8, %rax
d80: 48 c1 e8 35 shrq $0x35, %rax
d84: 0f b7 04 41 movzwl (%rcx,%rax,2), %eax
d88: 89 c2 movl %eax, %edx
d8a: 0f b6 c4 movzbl %ah, %eax
d8d: 41 88 06 movb %al, (%r14)
d90: 48 89 f8 movq %rdi, %rax
d93: c4 42 e9 f7 c0 shlxq %rdx, %r8, %r8
d98: 48 c1 e8 35 shrq $0x35, %rax
d9c: 0f b7 14 41 movzwl (%rcx,%rax,2), %edx
da0: 89 d0 movl %edx, %eax
da2: 0f b6 d6 movzbl %dh, %edx
da5: 41 88 13 movb %dl, (%r11)
da8: 48 89 f2 movq %rsi, %rdx
dab: c4 e2 f9 f7 c7 shlxq %rax, %rdi, %rax
db0: 48 c1 ea 35 shrq $0x35, %rdx
db4: 0f b7 14 51 movzwl (%rcx,%rdx,2), %edx
db8: 89 d7 movl %edx, %edi
dba: 0f b6 d6 movzbl %dh, %edx
dbd: 41 88 12 movb %dl, (%r10)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1)
dc0: 4c 89 ca movq %r9, %rdx
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0)
dc3: c4 e2 c1 f7 f6 shlxq %rdi, %rsi, %rsi
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1)
dc8: 48 c1 ea 35 shrq $0x35, %rdx
dcc: 0f b7 3c 51 movzwl (%rcx,%rdx,2), %edi
dd0: 89 fb movl %edi, %ebx
dd2: 89 fa movl %edi, %edx
dd4: 4c 89 c7 movq %r8, %rdi
dd7: 48 c1 ef 35 shrq $0x35, %rdi
ddb: 88 7d fc movb %bh, -0x4(%rbp)
dde: c4 c2 e9 f7 d1 shlxq %rdx, %r9, %rdx
de3: 44 0f b7 0c 79 movzwl (%rcx,%rdi,2), %r9d
de8: 44 89 cf movl %r9d, %edi
deb: 44 89 cb movl %r9d, %ebx
dee: c4 c2 c1 f7 f8 shlxq %rdi, %r8, %rdi
df3: 49 89 c0 movq %rax, %r8
df6: 0f b6 df movzbl %bh, %ebx
df9: 49 c1 e8 35 shrq $0x35, %r8
dfd: 41 88 5e 01 movb %bl, 0x1(%r14)
e01: 46 0f b7 04 41 movzwl (%rcx,%r8,2), %r8d
e06: 45 89 c1 movl %r8d, %r9d
e09: c4 62 b1 f7 c8 shlxq %r9, %rax, %r9
e0e: 44 89 c0 movl %r8d, %eax
e11: 0f b6 c4 movzbl %ah, %eax
e14: 41 88 43 01 movb %al, 0x1(%r11)
e18: 48 89 f0 movq %rsi, %rax
e1b: 48 c1 e8 35 shrq $0x35, %rax
e1f: 0f b7 04 41 movzwl (%rcx,%rax,2), %eax
e23: 41 89 c0 movl %eax, %r8d
e26: 0f b6 c4 movzbl %ah, %eax
e29: 41 88 42 01 movb %al, 0x1(%r10)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2)
e2d: 48 89 d0 movq %rdx, %rax
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1)
e30: c4 62 b9 f7 c6 shlxq %r8, %rsi, %r8
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2)
e35: 48 c1 e8 35 shrq $0x35, %rax
e39: 0f b7 34 41 movzwl (%rcx,%rax,2), %esi
e3d: 89 f0 movl %esi, %eax
e3f: 89 f3 movl %esi, %ebx
e41: c4 e2 f9 f7 c2 shlxq %rax, %rdx, %rax
e46: 48 89 fa movq %rdi, %rdx
e49: 88 7d fd movb %bh, -0x3(%rbp)
e4c: 48 c1 ea 35 shrq $0x35, %rdx
e50: 0f b7 34 51 movzwl (%rcx,%rdx,2), %esi
e54: 89 f2 movl %esi, %edx
e56: 89 f3 movl %esi, %ebx
e58: 4c 89 ce movq %r9, %rsi
e5b: 0f b6 df movzbl %bh, %ebx
e5e: 48 c1 ee 35 shrq $0x35, %rsi
e62: c4 e2 e9 f7 d7 shlxq %rdx, %rdi, %rdx
e67: 41 88 5e 02 movb %bl, 0x2(%r14)
e6b: 0f b7 34 71 movzwl (%rcx,%rsi,2), %esi
e6f: 89 f7 movl %esi, %edi
e71: 89 f3 movl %esi, %ebx
e73: 4c 89 c6 movq %r8, %rsi
e76: 48 c1 ee 35 shrq $0x35, %rsi
e7a: 0f b6 df movzbl %bh, %ebx
e7d: c4 c2 c1 f7 f9 shlxq %rdi, %r9, %rdi
e82: 41 88 5b 02 movb %bl, 0x2(%r11)
e86: 44 0f b7 0c 71 movzwl (%rcx,%rsi,2), %r9d
e8b: 44 89 ce movl %r9d, %esi
e8e: 44 89 cb movl %r9d, %ebx
e91: c4 c2 c9 f7 f0 shlxq %rsi, %r8, %rsi
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3)
e96: 49 89 c0 movq %rax, %r8
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2)
e99: 0f b6 df movzbl %bh, %ebx
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3)
e9c: 49 c1 e8 35 shrq $0x35, %r8
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2)
ea0: 41 88 5a 02 movb %bl, 0x2(%r10)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3)
ea4: 46 0f b7 04 41 movzwl (%rcx,%r8,2), %r8d
ea9: 45 89 c1 movl %r8d, %r9d
eac: 44 89 c3 movl %r8d, %ebx
eaf: 49 89 d0 movq %rdx, %r8
eb2: 49 c1 e8 35 shrq $0x35, %r8
eb6: 88 7d fe movb %bh, -0x2(%rbp)
eb9: c4 e2 b1 f7 c0 shlxq %r9, %rax, %rax
ebe: 46 0f b7 04 41 movzwl (%rcx,%r8,2), %r8d
ec3: 45 89 c1 movl %r8d, %r9d
ec6: 44 89 c3 movl %r8d, %ebx
ec9: 49 89 f8 movq %rdi, %r8
ecc: 0f b6 df movzbl %bh, %ebx
ecf: 49 c1 e8 35 shrq $0x35, %r8
ed3: c4 e2 b1 f7 d2 shlxq %r9, %rdx, %rdx
ed8: 41 88 5e 03 movb %bl, 0x3(%r14)
edc: 46 0f b7 04 41 movzwl (%rcx,%r8,2), %r8d
ee1: 45 89 c1 movl %r8d, %r9d
ee4: 44 89 c3 movl %r8d, %ebx
ee7: 49 89 f0 movq %rsi, %r8
eea: 0f b6 df movzbl %bh, %ebx
eed: 49 c1 e8 35 shrq $0x35, %r8
ef1: c4 e2 b1 f7 ff shlxq %r9, %rdi, %rdi
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
ef6: 49 89 fd movq %rdi, %r13
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3)
ef9: 41 88 5b 03 movb %bl, 0x3(%r11)
efd: 46 0f b7 04 41 movzwl (%rcx,%r8,2), %r8d
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
f02: 49 c1 ed 35 shrq $0x35, %r13
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3)
f06: 45 89 c1 movl %r8d, %r9d
f09: 44 89 c3 movl %r8d, %ebx
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
f0c: 49 89 c0 movq %rax, %r8
f0f: 49 c1 e8 35 shrq $0x35, %r8
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3)
f13: 0f b6 df movzbl %bh, %ebx
f16: c4 e2 b1 f7 f6 shlxq %r9, %rsi, %rsi
f1b: 41 88 5a 03 movb %bl, 0x3(%r10)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
f1f: 46 0f b7 0c 41 movzwl (%rcx,%r8,2), %r9d
f24: 49 89 d0 movq %rdx, %r8
f27: 49 c1 e8 35 shrq $0x35, %r8
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
f2b: 48 83 44 24 08 05 addq $0x5, 0x8(%rsp)
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
f31: 44 89 cb movl %r9d, %ebx
f34: c4 e2 b1 f7 c0 shlxq %r9, %rax, %rax
; return (unsigned)__builtin_ctzll(val);
f39: f3 48 0f bc c0 tzcntq %rax, %rax
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
f3e: 88 7d ff movb %bh, -0x1(%rbp)
f41: 46 0f b7 04 41 movzwl (%rcx,%r8,2), %r8d
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
f46: 49 89 c1 movq %rax, %r9
f49: 83 e0 07 andl $0x7, %eax
f4c: 49 c1 e9 03 shrq $0x3, %r9
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
f50: 44 89 c3 movl %r8d, %ebx
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
f53: 4d 29 cf subq %r9, %r15
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
f56: c4 62 b9 f7 c2 shlxq %r8, %rdx, %r8
; return (unsigned)__builtin_ctzll(val);
f5b: f3 4d 0f bc c0 tzcntq %r8, %r8
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
f60: 0f b6 df movzbl %bh, %ebx
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
f63: 4c 89 c2 movq %r8, %rdx
f66: 41 83 e0 07 andl $0x7, %r8d
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
f6a: 41 88 5e 04 movb %bl, 0x4(%r14)
f6e: 46 0f b7 34 69 movzwl (%rcx,%r13,2), %r14d
f73: 49 89 f5 movq %rsi, %r13
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
f76: 48 c1 ea 03 shrq $0x3, %rdx
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
f7a: 49 c1 ed 35 shrq $0x35, %r13
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
f7e: 49 29 d4 subq %rdx, %r12
; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4)
f81: 44 89 f3 movl %r14d, %ebx
f84: c4 e2 89 f7 ff shlxq %r14, %rdi, %rdi
f89: 0f b6 df movzbl %bh, %ebx
f8c: 41 88 5b 04 movb %bl, 0x4(%r11)
f90: 46 0f b7 2c 69 movzwl (%rcx,%r13,2), %r13d
f95: 44 89 eb movl %r13d, %ebx
f98: c4 e2 91 f7 f6 shlxq %r13, %rsi, %rsi
f9d: 4d 89 d5 movq %r10, %r13
fa0: 0f b6 df movzbl %bh, %ebx
fa3: 41 88 5a 04 movb %bl, 0x4(%r10)
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
fa7: 4d 8b 0f movq (%r15), %r9
faa: 49 83 c9 01 orq $0x1, %r9
fae: c4 c2 f9 f7 c1 shlxq %rax, %r9, %rax
fb3: 4d 8b 0c 24 movq (%r12), %r9
fb7: 49 83 c9 01 orq $0x1, %r9
; return (unsigned)__builtin_ctzll(val);
fbb: 45 31 f6 xorl %r14d, %r14d
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
fbe: 49 83 c2 05 addq $0x5, %r10
fc2: 49 83 c3 05 addq $0x5, %r11
; return (unsigned)__builtin_ctzll(val);
fc6: f3 4c 0f bc f7 tzcntq %rdi, %r14
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
fcb: c4 42 b9 f7 c1 shlxq %r8, %r9, %r8
fd0: 4c 89 f2 movq %r14, %rdx
fd3: 41 83 e6 07 andl $0x7, %r14d
fd7: 48 c1 ea 03 shrq $0x3, %rdx
fdb: 48 29 14 24 subq %rdx, (%rsp)
fdf: 48 8b 3c 24 movq (%rsp), %rdi
fe3: 48 8b 17 movq (%rdi), %rdx
fe6: 48 83 ca 01 orq $0x1, %rdx
fea: c4 e2 89 f7 fa shlxq %r14, %rdx, %rdi
; return (unsigned)__builtin_ctzll(val);
fef: 31 d2 xorl %edx, %edx
ff1: f3 48 0f bc d6 tzcntq %rsi, %rdx
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
ff6: 48 89 d6 movq %rdx, %rsi
ff9: 83 e2 07 andl $0x7, %edx
ffc: 48 c1 ee 03 shrq $0x3, %rsi
1000: 48 29 74 24 10 subq %rsi, 0x10(%rsp)
1005: 48 8b 5c 24 10 movq 0x10(%rsp), %rbx
100a: 48 8b 33 movq (%rbx), %rsi
; } while (op[3] < olimit);
100d: 48 8b 5c 24 20 movq 0x20(%rsp), %rbx
; HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM)
1012: 49 89 f1 movq %rsi, %r9
1015: 48 89 74 24 18 movq %rsi, 0x18(%rsp)
101a: 49 83 c9 01 orq $0x1, %r9
101e: c4 c2 e9 f7 f1 shlxq %rdx, %r9, %rsi
; } while (op[3] < olimit);
1023: 49 39 da cmpq %rbx, %r10
1026: 0f 82 32 fd ff ff jb <L2>
; size_t const oiters = (size_t)(oend - op[3]) / 5;
102c: 4c 8b 74 24 30 movq 0x30(%rsp), %r14
1031: 49 89 c1 movq %rax, %r9
1034: 48 8b 5c 24 08 movq 0x8(%rsp), %rbx
1039: 48 b8 cd cc cc cc cc cc cc cc movabsq $-0x3333333333333333, %rax # imm = 0xCCCCCCCCCCCCCCCD
1043: 4d 29 d6 subq %r10, %r14
1046: 49 f7 e6 mulq %r14
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
1049: 4c 89 f8 movq %r15, %rax
; size_t const oiters = (size_t)(oend - op[3]) / 5;
104c: 49 89 d6 movq %rdx, %r14
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
104f: 48 8b 54 24 28 movq 0x28(%rsp), %rdx
; size_t const oiters = (size_t)(oend - op[3]) / 5;
1054: 49 c1 ee 02 shrq $0x2, %r14
; size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
1058: 48 29 d0 subq %rdx, %rax
105b: 48 89 c2 movq %rax, %rdx
105e: 48 b8 93 24 49 92 24 49 92 24 movabsq $0x2492492492492493, %rax # imm = 0x2492492492492493
1068: 48 89 54 24 08 movq %rdx, 0x8(%rsp)
106d: 48 f7 e2 mulq %rdx
1070: 48 8b 44 24 08 movq 0x8(%rsp), %rax
1075: 48 29 d0 subq %rdx, %rax
1078: 48 d1 e8 shrq %rax
107b: 48 01 c2 addq %rax, %rdx
107e: 48 c1 ea 02 shrq $0x2, %rdx
; size_t const iters = MIN(oiters, iiters);
1082: 49 39 d6 cmpq %rdx, %r14
1085: 4c 0f 47 f2 cmovaq %rdx, %r14
; if (op[3] + 20 > olimit)
1089: 49 83 c5 19 addq $0x19, %r13
; size_t const symbols = iters * 5;
108d: 4b 8d 04 b6 leaq (%r14,%r14,4), %rax
; olimit = op[3] + symbols;
1091: 49 8d 14 02 leaq (%r10,%rax), %rdx
1095: b8 01 00 00 00 movl $0x1, %eax
109a: 48 89 54 24 20 movq %rdx, 0x20(%rsp)
; if (op[3] + 20 > olimit)
109f: 4c 39 ea cmpq %r13, %rdx
10a2: 0f 83 8c fc ff ff jae <L3>
10a8: 4c 8b 74 24 48 movq 0x48(%rsp), %r14
10ad: eb 09 jmp <L4>
<L1>:
10af: 4c 8b 74 24 48 movq 0x48(%rsp), %r14
10b4: 84 c0 testb %al, %al
10b6: 74 57 je <L0>
<L4>:
10b8: 48 8b 04 24 movq (%rsp), %rax
10bc: 48 89 ac 24 90 00 00 00 movq %rbp, 0x90(%rsp)
10c4: 4c 89 7c 24 70 movq %r15, 0x70(%rsp)
10c9: 48 89 84 24 80 00 00 00 movq %rax, 0x80(%rsp)
10d1: 48 8b 44 24 10 movq 0x10(%rsp), %rax
10d6: 4c 89 4c 24 50 movq %r9, 0x50(%rsp)
10db: 48 89 9c 24 98 00 00 00 movq %rbx, 0x98(%rsp)
10e3: 4c 89 64 24 78 movq %r12, 0x78(%rsp)
10e8: 4c 89 44 24 58 movq %r8, 0x58(%rsp)
10ed: 4c 89 9c 24 a0 00 00 00 movq %r11, 0xa0(%rsp)
10f5: 48 89 7c 24 60 movq %rdi, 0x60(%rsp)
10fa: 4c 89 94 24 a8 00 00 00 movq %r10, 0xa8(%rsp)
1102: 48 89 84 24 88 00 00 00 movq %rax, 0x88(%rsp)
110a: 48 89 74 24 68 movq %rsi, 0x68(%rsp)
<L0>:
; ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
110f: 48 8b 44 24 50 movq 0x50(%rsp), %rax
1114: 48 8b 4c 24 38 movq 0x38(%rsp), %rcx
1119: 49 89 46 40 movq %rax, 0x40(%r14)
111d: 48 8b 44 24 58 movq 0x58(%rsp), %rax
1122: 48 89 41 08 movq %rax, 0x8(%rcx)
1126: 48 8b 44 24 60 movq 0x60(%rsp), %rax
112b: 48 89 41 10 movq %rax, 0x10(%rcx)
112f: 48 8b 44 24 68 movq 0x68(%rsp), %rax
1134: 48 89 41 18 movq %rax, 0x18(%rcx)
; ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
1138: 48 8b 44 24 70 movq 0x70(%rsp), %rax
; ZSTD_memcpy(&args->op, &op, sizeof(op));
113d: 48 8b 4c 24 40 movq 0x40(%rsp), %rcx
; ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
1142: 49 89 06 movq %rax, (%r14)
1145: 48 8b 44 24 78 movq 0x78(%rsp), %rax
114a: 49 89 46 08 movq %rax, 0x8(%r14)
114e: 48 8b 84 24 80 00 00 00 movq 0x80(%rsp), %rax
1156: 49 89 46 10 movq %rax, 0x10(%r14)
115a: 48 8b 84 24 88 00 00 00 movq 0x88(%rsp), %rax
1162: 49 89 46 18 movq %rax, 0x18(%r14)
; ZSTD_memcpy(&args->op, &op, sizeof(op));
1166: 48 8b 84 24 90 00 00 00 movq 0x90(%rsp), %rax
116e: 49 89 46 20 movq %rax, 0x20(%r14)
1172: 48 8b 84 24 98 00 00 00 movq 0x98(%rsp), %rax
117a: 48 89 41 08 movq %rax, 0x8(%rcx)
117e: 48 8b 84 24 a0 00 00 00 movq 0xa0(%rsp), %rax
1186: 48 89 41 10 movq %rax, 0x10(%rcx)
118a: 48 8b 84 24 a8 00 00 00 movq 0xa8(%rsp), %rax
1192: 48 89 41 18 movq %rax, 0x18(%rcx)
; }
1196: 48 8b 84 24 b0 00 00 00 movq 0xb0(%rsp), %rax
119e: 65 48 2b 04 25 28 00 00 00 subq %gs:0x28, %rax
11a7: 75 16 jne <L5>
11a9: 48 81 c4 b8 00 00 00 addq $0xb8, %rsp
11b0: 5b popq %rbx
11b1: 5d popq %rbp
11b2: 41 5c popq %r12
11b4: 41 5d popq %r13
11b6: 41 5e popq %r14
11b8: 41 5f popq %r15
11ba: e9 00 00 00 00 jmp <L5>
<L5>:
11bf: e8 00 00 00 00 callq <L6>
<L6>:
11c4: 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax)
11cf: 90 nop
11d0: 90 nop
11d1: 90 nop
11d2: 90 nop
11d3: 90 nop
11d4: 90 nop
11d5: 90 nop
11d6: 90 nop
11d7: 90 nop
11d8: 90 nop
11d9: 90 nop
11da: 90 nop
11db: 90 nop
11dc: 90 nop
11dd: 90 nop
11de: 90 nop
11df: 90 nop
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment