Created
December 27, 2020 12:48
-
-
Save lu-zero/61b856697f5d8209e2908783c4c125b5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
./src/libdav1d.5.dylib.p/mc16_avx2.obj: file format Mach-O 64-bit x86-64 | |
Disassembly of section __TEXT,__text: | |
0000000000000000 _dav1d_put_8tap_regular_16bpc_avx2: | |
0: 41 ba 2d 00 00 00 movl $45, %r10d | |
6: 41 bb 2d 00 00 00 movl $45, %r11d | |
c: e9 8f 00 00 00 jmp 143 <_dav1d_put_8tap_16bpc_avx2> | |
11: 0f 1f 84 00 00 00 00 00 nopl (%rax,%rax) | |
19: 0f 1f 80 00 00 00 00 nopl (%rax) | |
0000000000000020 _dav1d_put_8tap_regular_smooth_16bpc_avx2: | |
20: 41 ba 2d 00 00 00 movl $45, %r10d | |
26: 41 bb bc 07 00 00 movl $1980, %r11d | |
2c: eb 72 jmp 114 <_dav1d_put_8tap_16bpc_avx2> | |
2e: 66 90 nop | |
0000000000000030 _dav1d_put_8tap_regular_sharp_16bpc_avx2: | |
30: 41 ba 2d 00 00 00 movl $45, %r10d | |
36: 41 bb 2d 0f 00 00 movl $3885, %r11d | |
3c: eb 62 jmp 98 <_dav1d_put_8tap_16bpc_avx2> | |
3e: 66 90 nop | |
0000000000000040 _dav1d_put_8tap_smooth_16bpc_avx2: | |
40: 41 ba bc 07 00 00 movl $1980, %r10d | |
46: 41 bb bc 07 00 00 movl $1980, %r11d | |
4c: eb 52 jmp 82 <_dav1d_put_8tap_16bpc_avx2> | |
4e: 66 90 nop | |
0000000000000050 _dav1d_put_8tap_smooth_regular_16bpc_avx2: | |
50: 41 ba bc 07 00 00 movl $1980, %r10d | |
56: 41 bb 2d 00 00 00 movl $45, %r11d | |
5c: eb 42 jmp 66 <_dav1d_put_8tap_16bpc_avx2> | |
5e: 66 90 nop | |
0000000000000060 _dav1d_put_8tap_smooth_sharp_16bpc_avx2: | |
60: 41 ba bc 07 00 00 movl $1980, %r10d | |
66: 41 bb 2d 0f 00 00 movl $3885, %r11d | |
6c: eb 32 jmp 50 <_dav1d_put_8tap_16bpc_avx2> | |
6e: 66 90 nop | |
0000000000000070 _dav1d_put_8tap_sharp_16bpc_avx2: | |
70: 41 ba 2d 0f 00 00 movl $3885, %r10d | |
76: 41 bb 2d 0f 00 00 movl $3885, %r11d | |
7c: eb 22 jmp 34 <_dav1d_put_8tap_16bpc_avx2> | |
7e: 66 90 nop | |
0000000000000080 _dav1d_put_8tap_sharp_regular_16bpc_avx2: | |
80: 41 ba 2d 0f 00 00 movl $3885, %r10d | |
86: 41 bb 2d 00 00 00 movl $45, %r11d | |
8c: eb 12 jmp 18 <_dav1d_put_8tap_16bpc_avx2> | |
8e: 66 90 nop | |
0000000000000090 _dav1d_put_8tap_sharp_smooth_16bpc_avx2: | |
90: 41 ba 2d 0f 00 00 movl $3885, %r10d | |
96: 41 bb bc 07 00 00 movl $1980, %r11d | |
9c: eb 02 jmp 2 <_dav1d_put_8tap_16bpc_avx2> | |
9e: 66 90 nop | |
00000000000000a0 _dav1d_put_8tap_16bpc_avx2: | |
a0: 53 pushq %rbx | |
a1: 69 44 24 10 81 40 00 00 imull $16513, 16(%rsp), %eax | |
a9: 44 01 d0 addl %r10d, %eax | |
ac: 44 69 54 24 18 81 40 00 00 imull $16513, 24(%rsp), %r10d | |
b5: 45 01 da addl %r11d, %r10d | |
b8: 4d 63 c0 movslq %r8d, %r8 | |
bb: c4 e2 7d 79 7c 24 20 vpbroadcastw 32(%rsp), %ymm7 | |
c2: a9 00 c0 1f 00 testl $2080768, %eax | |
c7: 0f 85 e6 01 00 00 jne 486 <_dav1d_put_8tap_16bpc_avx2.put_8tap_h_16bpc> | |
cd: 41 f7 c2 00 c0 1f 00 testl $2080768, %r10d | |
d4: 0f 85 45 04 00 00 jne 1093 <_dav1d_put_8tap_16bpc_avx2.put_8tap_v_16bpc> | |
00000000000000da _dav1d_put_8tap_16bpc_avx2.put_16bpc: | |
da: 4c 8d 1d b6 01 00 00 leaq 438(%rip), %r11 | |
e1: f3 45 0f bc c0 tzcntl %r8d, %r8d | |
e6: 41 83 e8 01 subl $1, %r8d | |
ea: 4f 63 04 83 movslq (%r11,%r8,4), %r8 | |
ee: 4d 01 d8 addq %r11, %r8 | |
f1: 41 ff e0 jmpq *%r8 | |
00000000000000f4 _dav1d_put_8tap_16bpc_avx2.w2: | |
f4: c5 f9 6e 02 vmovd (%rdx), %xmm0 | |
f8: c5 f9 6e 0c 0a vmovd (%rdx,%rcx), %xmm1 | |
fd: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
101: c5 f9 7e 07 vmovd %xmm0, (%rdi) | |
105: c5 f9 7e 0c 37 vmovd %xmm1, (%rdi,%rsi) | |
10a: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
10e: 41 83 e9 02 subl $2, %r9d | |
112: 7f e0 jg -32 <_dav1d_put_8tap_16bpc_avx2.w2> | |
114: 5b popq %rbx | |
115: c3 retq | |
0000000000000116 _dav1d_put_8tap_16bpc_avx2.w4: | |
116: c5 fa 7e 02 vmovq (%rdx), %xmm0 | |
11a: c5 fa 7e 0c 0a vmovq (%rdx,%rcx), %xmm1 | |
11f: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
123: c5 f9 d6 07 vmovq %xmm0, (%rdi) | |
127: c5 f9 d6 0c 37 vmovq %xmm1, (%rdi,%rsi) | |
12c: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
130: 41 83 e9 02 subl $2, %r9d | |
134: 7f e0 jg -32 <_dav1d_put_8tap_16bpc_avx2.w4> | |
136: 5b popq %rbx | |
137: c3 retq | |
0000000000000138 _dav1d_put_8tap_16bpc_avx2.w8: | |
138: c5 fa 6f 02 vmovdqu (%rdx), %xmm0 | |
13c: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1 | |
141: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
145: c5 f9 7f 07 vmovdqa %xmm0, (%rdi) | |
149: c5 f9 7f 0c 37 vmovdqa %xmm1, (%rdi,%rsi) | |
14e: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
152: 41 83 e9 02 subl $2, %r9d | |
156: 7f e0 jg -32 <_dav1d_put_8tap_16bpc_avx2.w8> | |
158: 5b popq %rbx | |
159: c3 retq | |
000000000000015a _dav1d_put_8tap_16bpc_avx2.w16: | |
15a: c5 fe 6f 02 vmovdqu (%rdx), %ymm0 | |
15e: c5 fe 6f 0c 0a vmovdqu (%rdx,%rcx), %ymm1 | |
163: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
167: c5 fd 7f 07 vmovdqa %ymm0, (%rdi) | |
16b: c5 fd 7f 0c 37 vmovdqa %ymm1, (%rdi,%rsi) | |
170: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
174: 41 83 e9 02 subl $2, %r9d | |
178: 7f e0 jg -32 <_dav1d_put_8tap_16bpc_avx2.w16> | |
17a: 5b popq %rbx | |
17b: c5 f8 77 vzeroupper | |
17e: c3 retq | |
000000000000017f _dav1d_put_8tap_16bpc_avx2.w32: | |
17f: c5 fe 6f 02 vmovdqu (%rdx), %ymm0 | |
183: c5 fe 6f 4a 20 vmovdqu 32(%rdx), %ymm1 | |
188: c5 fe 6f 14 0a vmovdqu (%rdx,%rcx), %ymm2 | |
18d: c5 fe 6f 5c 0a 20 vmovdqu 32(%rdx,%rcx), %ymm3 | |
193: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
197: c5 fd 7f 07 vmovdqa %ymm0, (%rdi) | |
19b: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi) | |
1a0: c5 fd 7f 14 37 vmovdqa %ymm2, (%rdi,%rsi) | |
1a5: c5 fd 7f 5c 37 20 vmovdqa %ymm3, 32(%rdi,%rsi) | |
1ab: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
1af: 41 83 e9 02 subl $2, %r9d | |
1b3: 7f ca jg -54 <_dav1d_put_8tap_16bpc_avx2.w32> | |
1b5: 5b popq %rbx | |
1b6: c5 f8 77 vzeroupper | |
1b9: c3 retq | |
00000000000001ba _dav1d_put_8tap_16bpc_avx2.w64: | |
1ba: c5 fe 6f 02 vmovdqu (%rdx), %ymm0 | |
1be: c5 fe 6f 4a 20 vmovdqu 32(%rdx), %ymm1 | |
1c3: c5 fe 6f 52 40 vmovdqu 64(%rdx), %ymm2 | |
1c8: c5 fe 6f 5a 60 vmovdqu 96(%rdx), %ymm3 | |
1cd: c5 fe 6f 24 0a vmovdqu (%rdx,%rcx), %ymm4 | |
1d2: c5 fe 6f 6c 0a 20 vmovdqu 32(%rdx,%rcx), %ymm5 | |
1d8: c5 fe 6f 74 0a 40 vmovdqu 64(%rdx,%rcx), %ymm6 | |
1de: c5 fe 6f 7c 0a 60 vmovdqu 96(%rdx,%rcx), %ymm7 | |
1e4: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
1e8: c5 fd 7f 07 vmovdqa %ymm0, (%rdi) | |
1ec: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi) | |
1f1: c5 fd 7f 57 40 vmovdqa %ymm2, 64(%rdi) | |
1f6: c5 fd 7f 5f 60 vmovdqa %ymm3, 96(%rdi) | |
1fb: c5 fd 7f 24 37 vmovdqa %ymm4, (%rdi,%rsi) | |
200: c5 fd 7f 6c 37 20 vmovdqa %ymm5, 32(%rdi,%rsi) | |
206: c5 fd 7f 74 37 40 vmovdqa %ymm6, 64(%rdi,%rsi) | |
20c: c5 fd 7f 7c 37 60 vmovdqa %ymm7, 96(%rdi,%rsi) | |
212: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
216: 41 83 e9 02 subl $2, %r9d | |
21a: 7f 9e jg -98 <_dav1d_put_8tap_16bpc_avx2.w64> | |
21c: 5b popq %rbx | |
21d: c5 f8 77 vzeroupper | |
220: c3 retq | |
0000000000000221 _dav1d_put_8tap_16bpc_avx2.w128: | |
221: c5 fe 6f 02 vmovdqu (%rdx), %ymm0 | |
225: c5 fe 6f 4a 20 vmovdqu 32(%rdx), %ymm1 | |
22a: c5 fe 6f 52 40 vmovdqu 64(%rdx), %ymm2 | |
22f: c5 fe 6f 5a 60 vmovdqu 96(%rdx), %ymm3 | |
234: c5 fe 6f a2 80 00 00 00 vmovdqu 128(%rdx), %ymm4 | |
23c: c5 fe 6f aa a0 00 00 00 vmovdqu 160(%rdx), %ymm5 | |
244: c5 fe 6f b2 c0 00 00 00 vmovdqu 192(%rdx), %ymm6 | |
24c: c5 fe 6f ba e0 00 00 00 vmovdqu 224(%rdx), %ymm7 | |
254: 48 01 ca addq %rcx, %rdx | |
257: c5 fd 7f 07 vmovdqa %ymm0, (%rdi) | |
25b: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi) | |
260: c5 fd 7f 57 40 vmovdqa %ymm2, 64(%rdi) | |
265: c5 fd 7f 5f 60 vmovdqa %ymm3, 96(%rdi) | |
26a: c5 fd 7f a7 80 00 00 00 vmovdqa %ymm4, 128(%rdi) | |
272: c5 fd 7f af a0 00 00 00 vmovdqa %ymm5, 160(%rdi) | |
27a: c5 fd 7f b7 c0 00 00 00 vmovdqa %ymm6, 192(%rdi) | |
282: c5 fd 7f bf e0 00 00 00 vmovdqa %ymm7, 224(%rdi) | |
28a: 48 01 f7 addq %rsi, %rdi | |
28d: 41 ff c9 decl %r9d | |
290: 7f 8f jg -113 <_dav1d_put_8tap_16bpc_avx2.w128> | |
292: 5b popq %rbx | |
293: c5 f8 77 vzeroupper | |
296: c3 retq | |
0000000000000297 _dav1d_put_8tap_16bpc_avx2.jmp_tbl: | |
297: 5d popq %rbp | |
298: fe ff <unknown> | |
29a: ff 7f fe <unknown> | |
29d: ff ff <unknown> | |
29f: a1 fe ff ff c3 fe ff ff e8 movabsl -1657324668173942786, %eax | |
2a8: fe ff <unknown> | |
2aa: ff 23 jmpq *(%rbx) | |
2ac: ff ff <unknown> | |
2ae: ff 8a ff ff ff 41 decl 1107296255(%rdx) | |
00000000000002b3 _dav1d_put_8tap_16bpc_avx2.put_8tap_h_16bpc: | |
2b3: 41 83 f8 04 cmpl $4, %r8d | |
2b7: 7e 03 jle 3 <_dav1d_put_8tap_16bpc_avx2.h_use4tap> | |
2b9: c1 e8 07 shrl $7, %eax | |
00000000000002bc _dav1d_put_8tap_16bpc_avx2.h_use4tap: | |
2bc: 83 e0 7f andl $127, %eax | |
2bf: 41 f7 c2 00 c0 1f 00 testl $2080768, %r10d | |
2c6: 0f 85 5f 08 00 00 jne 2143 <_dav1d_put_8tap_16bpc_avx2.put_8tap_hv_16bpc> | |
2cc: f3 44 0f b8 5c 24 20 popcntl 32(%rsp), %r11d | |
2d3: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
2dc: 41 83 fb 0c cmpl $12, %r11d | |
2e0: 75 09 jne 9 <_dav1d_put_8tap_16bpc_avx2.h_bits10> | |
2e2: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
00000000000002eb _dav1d_put_8tap_16bpc_avx2.h_bits10: | |
2eb: 4f 8d 14 00 leaq (%r8,%r8), %r10 | |
2ef: 4c 8d 1d 0d 02 00 00 leaq 525(%rip), %r11 | |
2f6: f3 45 0f bc c0 tzcntl %r8d, %r8d | |
2fb: 41 83 e8 01 subl $1, %r8d | |
2ff: 4f 63 04 83 movslq (%r11,%r8,4), %r8 | |
303: 4d 01 d8 addq %r11, %r8 | |
306: 41 ff e0 jmpq *%r8 | |
0000000000000309 _dav1d_put_8tap_16bpc_avx2.h_w2: | |
309: 48 83 ea 02 subq $2, %rdx | |
30d: c5 f9 6f 25 00 00 00 00 vmovdqa (%rip), %xmm4 | |
315: c4 c2 79 58 ac c3 16 fe ff ff vpbroadcastd -490(%r11,%rax,8), %xmm5 | |
31f: c4 e2 79 20 ed vpmovsxbw %xmm5, %xmm5 | |
0000000000000324 _dav1d_put_8tap_16bpc_avx2.h_w2l: | |
324: c5 fa 6f 02 vmovdqu (%rdx), %xmm0 | |
328: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1 | |
32d: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
331: c4 e2 79 00 c4 vpshufb %xmm4, %xmm0, %xmm0 | |
336: c4 e2 71 00 cc vpshufb %xmm4, %xmm1, %xmm1 | |
33b: c5 f9 f5 c5 vpmaddwd %xmm5, %xmm0, %xmm0 | |
33f: c5 f1 f5 cd vpmaddwd %xmm5, %xmm1, %xmm1 | |
343: c4 e2 79 02 c1 vphaddd %xmm1, %xmm0, %xmm0 | |
348: c5 f9 fe c6 vpaddd %xmm6, %xmm0, %xmm0 | |
34c: c5 f9 72 e0 06 vpsrad $6, %xmm0, %xmm0 | |
351: c4 e2 79 2b c0 vpackusdw %xmm0, %xmm0, %xmm0 | |
356: c4 e2 79 3a c7 vpminuw %xmm7, %xmm0, %xmm0 | |
35b: c5 f9 7e 07 vmovd %xmm0, (%rdi) | |
35f: c4 e3 79 16 04 37 01 vpextrd $1, %xmm0, (%rdi,%rsi) | |
366: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
36a: 41 83 e9 02 subl $2, %r9d | |
36e: 7f b4 jg -76 <_dav1d_put_8tap_16bpc_avx2.h_w2l> | |
370: 5b popq %rbx | |
371: c3 retq | |
0000000000000372 _dav1d_put_8tap_16bpc_avx2.h_w4: | |
372: 48 83 ea 02 subq $2, %rdx | |
376: c5 fd 6f 25 00 00 00 00 vmovdqa (%rip), %ymm4 | |
37e: c4 c2 79 58 ac c3 7f fe ff ff vpbroadcastd -385(%r11,%rax,8), %xmm5 | |
388: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5 | |
000000000000038d _dav1d_put_8tap_16bpc_avx2.h_w4l: | |
38d: c4 e2 7d 5a 02 vbroadcasti128 (%rdx), %ymm0 | |
392: c4 e2 7d 5a 0c 0a vbroadcasti128 (%rdx,%rcx), %ymm1 | |
398: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
39c: c4 e2 7d 00 c4 vpshufb %ymm4, %ymm0, %ymm0 | |
3a1: c4 e2 75 00 cc vpshufb %ymm4, %ymm1, %ymm1 | |
3a6: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0 | |
3aa: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
3ae: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0 | |
3b3: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0 | |
3b7: c5 fd 72 e0 06 vpsrad $6, %ymm0, %ymm0 | |
3bc: c4 e2 7d 2b c0 vpackusdw %ymm0, %ymm0, %ymm0 | |
3c1: c4 e2 7d 3a c7 vpminuw %ymm7, %ymm0, %ymm0 | |
3c6: c4 e3 7d 39 c1 01 vextracti128 $1, %ymm0, %xmm1 | |
3cc: c5 f9 7e 07 vmovd %xmm0, (%rdi) | |
3d0: c5 f9 7e 4f 04 vmovd %xmm1, 4(%rdi) | |
3d5: c4 e3 79 16 04 37 01 vpextrd $1, %xmm0, (%rdi,%rsi) | |
3dc: c4 e3 79 16 4c 37 04 01 vpextrd $1, %xmm1, 4(%rdi,%rsi) | |
3e4: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
3e8: 41 83 e9 02 subl $2, %r9d | |
3ec: 7f 9f jg -97 <_dav1d_put_8tap_16bpc_avx2.h_w4l> | |
3ee: 5b popq %rbx | |
3ef: c5 f8 77 vzeroupper | |
3f2: c3 retq | |
00000000000003f3 _dav1d_put_8tap_16bpc_avx2.h_w8: | |
3f3: 48 83 ea 06 subq $6, %rdx | |
3f7: c4 c2 79 59 ac c3 f6 fe ff ff vpbroadcastq -266(%r11,%rax,8), %xmm5 | |
401: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5 | |
0000000000000406 _dav1d_put_8tap_16bpc_avx2.h_w8l: | |
406: 45 89 d0 movl %r10d, %r8d | |
0000000000000409 _dav1d_put_8tap_16bpc_avx2.h_w8c: | |
409: c5 fa 6f 02 vmovdqu (%rdx), %xmm0 | |
40d: c5 fa 6f 52 02 vmovdqu 2(%rdx), %xmm2 | |
412: c4 e3 7d 38 04 0a 01 vinserti128 $1, (%rdx,%rcx), %ymm0, %ymm0 | |
419: c4 e3 6d 38 54 11 02 01 vinserti128 $1, 2(%rcx,%rdx), %ymm2, %ymm2 | |
421: c5 fa 6f 4a 04 vmovdqu 4(%rdx), %xmm1 | |
426: c5 fa 6f 5a 06 vmovdqu 6(%rdx), %xmm3 | |
42b: c4 e3 75 38 4c 11 04 01 vinserti128 $1, 4(%rcx,%rdx), %ymm1, %ymm1 | |
433: c4 e3 65 38 5c 11 06 01 vinserti128 $1, 6(%rcx,%rdx), %ymm3, %ymm3 | |
43b: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0 | |
43f: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
443: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
447: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
44b: c4 e2 7d 02 c2 vphaddd %ymm2, %ymm0, %ymm0 | |
450: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1 | |
455: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0 | |
45a: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0 | |
45e: c5 fd 72 e0 06 vpsrad $6, %ymm0, %ymm0 | |
463: c5 fa 6f 4a 08 vmovdqu 8(%rdx), %xmm1 | |
468: c5 fa 6f 5a 0a vmovdqu 10(%rdx), %xmm3 | |
46d: c4 e3 75 38 4c 11 08 01 vinserti128 $1, 8(%rcx,%rdx), %ymm1, %ymm1 | |
475: c4 e3 65 38 5c 11 0a 01 vinserti128 $1, 10(%rcx,%rdx), %ymm3, %ymm3 | |
47d: c5 fa 6f 52 0c vmovdqu 12(%rdx), %xmm2 | |
482: c5 fa 6f 62 0e vmovdqu 14(%rdx), %xmm4 | |
487: c4 e3 6d 38 54 11 0c 01 vinserti128 $1, 12(%rcx,%rdx), %ymm2, %ymm2 | |
48f: c4 e3 5d 38 64 11 0e 01 vinserti128 $1, 14(%rcx,%rdx), %ymm4, %ymm4 | |
497: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
49b: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
49f: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
4a3: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
4a7: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1 | |
4ac: c4 e2 6d 02 d4 vphaddd %ymm4, %ymm2, %ymm2 | |
4b1: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1 | |
4b6: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1 | |
4ba: c5 f5 72 e1 06 vpsrad $6, %ymm1, %ymm1 | |
4bf: c4 e2 7d 2b c1 vpackusdw %ymm1, %ymm0, %ymm0 | |
4c4: c4 e2 7d 3a c7 vpminuw %ymm7, %ymm0, %ymm0 | |
4c9: 48 83 c2 10 addq $16, %rdx | |
4cd: c5 f9 7f 07 vmovdqa %xmm0, (%rdi) | |
4d1: c4 e3 7d 39 04 37 01 vextracti128 $1, %ymm0, (%rdi,%rsi) | |
4d8: 48 83 c7 10 addq $16, %rdi | |
4dc: 41 83 e8 10 subl $16, %r8d | |
4e0: 0f 8f 23 ff ff ff jg -221 <_dav1d_put_8tap_16bpc_avx2.h_w8c> | |
4e6: 4c 29 d2 subq %r10, %rdx | |
4e9: 4c 29 d7 subq %r10, %rdi | |
4ec: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
4f0: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
4f4: 41 83 e9 02 subl $2, %r9d | |
4f8: 0f 8f 08 ff ff ff jg -248 <_dav1d_put_8tap_16bpc_avx2.h_w8l> | |
4fe: 5b popq %rbx | |
4ff: c5 f8 77 vzeroupper | |
502: c3 retq | |
0000000000000503 _dav1d_put_8tap_16bpc_avx2.h_jmp_tbl: | |
503: 06 <unknown> | |
504: fe ff <unknown> | |
506: ff 6f fe ljmpl *-2(%rdi) | |
509: ff ff <unknown> | |
50b: f0 lock | |
50c: fe ff <unknown> | |
50e: ff f0 pushq %rax | |
510: fe ff <unknown> | |
512: ff f0 pushq %rax | |
514: fe ff <unknown> | |
516: ff f0 pushq %rax | |
518: fe ff <unknown> | |
51a: ff f0 pushq %rax | |
51c: fe ff <unknown> | |
51e: ff 41 83 incl -125(%rcx) | |
000000000000051f _dav1d_put_8tap_16bpc_avx2.put_8tap_v_16bpc: | |
51f: 41 83 f9 04 cmpl $4, %r9d | |
523: 7e 04 jle 4 <_dav1d_put_8tap_16bpc_avx2.v_use4tap> | |
525: 41 c1 ea 07 shrl $7, %r10d | |
0000000000000529 _dav1d_put_8tap_16bpc_avx2.v_use4tap: | |
529: 41 83 e2 7f andl $127, %r10d | |
52d: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
536: 4c 8d 1d d2 05 00 00 leaq 1490(%rip), %r11 | |
53d: 4b 8d 04 00 leaq (%r8,%r8), %rax | |
541: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx | |
545: 4f 8d 94 d3 36 fa ff ff leaq -1482(%r11,%r10,8), %r10 | |
54d: c4 42 7d 79 02 vpbroadcastw (%r10), %ymm8 | |
552: c4 42 7d 79 4a 02 vpbroadcastw 2(%r10), %ymm9 | |
558: c4 42 7d 79 52 04 vpbroadcastw 4(%r10), %ymm10 | |
55e: c4 42 7d 79 5a 06 vpbroadcastw 6(%r10), %ymm11 | |
564: c4 42 7d 20 c0 vpmovsxbw %xmm8, %ymm8 | |
569: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9 | |
56e: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10 | |
573: c4 42 7d 20 db vpmovsxbw %xmm11, %ymm11 | |
578: f3 45 0f bc c0 tzcntl %r8d, %r8d | |
57d: 41 83 e8 01 subl $1, %r8d | |
581: 4f 63 04 83 movslq (%r11,%r8,4), %r8 | |
585: 4d 01 d8 addq %r11, %r8 | |
588: 41 ff e0 jmpq *%r8 | |
000000000000058b _dav1d_put_8tap_16bpc_avx2.v_w2: | |
58b: 41 83 f9 04 cmpl $4, %r9d | |
58f: 0f 8f 91 00 00 00 jg 145 <_dav1d_put_8tap_16bpc_avx2.v_w28> | |
595: 48 29 ca subq %rcx, %rdx | |
598: c5 f9 6e 02 vmovd (%rdx), %xmm0 | |
59c: c5 f9 6e 0c 0a vmovd (%rdx,%rcx), %xmm1 | |
5a1: c5 f9 6e 14 4a vmovd (%rdx,%rcx,2), %xmm2 | |
5a6: 48 01 da addq %rbx, %rdx | |
5a9: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0 | |
5ad: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1 | |
00000000000005b1 _dav1d_put_8tap_16bpc_avx2.v_w2l: | |
5b1: c5 f9 6e 1a vmovd (%rdx), %xmm3 | |
5b5: c5 f9 6e 24 0a vmovd (%rdx,%rcx), %xmm4 | |
5ba: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
5be: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2 | |
5c2: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3 | |
5c6: c5 b1 f5 c0 vpmaddwd %xmm0, %xmm9, %xmm0 | |
5ca: c5 a9 f5 ea vpmaddwd %xmm2, %xmm10, %xmm5 | |
5ce: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5 | |
5d2: c5 f9 6f c2 vmovdqa %xmm2, %xmm0 | |
5d6: c5 b1 f5 c9 vpmaddwd %xmm1, %xmm9, %xmm1 | |
5da: c5 29 f5 c3 vpmaddwd %xmm3, %xmm10, %xmm8 | |
5de: c5 39 fe c1 vpaddd %xmm1, %xmm8, %xmm8 | |
5e2: c5 f9 6f cb vmovdqa %xmm3, %xmm1 | |
5e6: c5 f9 6f d4 vmovdqa %xmm4, %xmm2 | |
5ea: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5 | |
5ee: c5 39 fe c6 vpaddd %xmm6, %xmm8, %xmm8 | |
5f2: c5 d1 72 e5 06 vpsrad $6, %xmm5, %xmm5 | |
5f7: c4 c1 39 72 e0 06 vpsrad $6, %xmm8, %xmm8 | |
5fd: c4 e2 51 2b ed vpackusdw %xmm5, %xmm5, %xmm5 | |
602: c4 42 39 2b c0 vpackusdw %xmm8, %xmm8, %xmm8 | |
607: c4 e2 51 3a ef vpminuw %xmm7, %xmm5, %xmm5 | |
60c: c4 62 39 3a c7 vpminuw %xmm7, %xmm8, %xmm8 | |
611: c5 f9 7e 2f vmovd %xmm5, (%rdi) | |
615: c5 79 7e 04 37 vmovd %xmm8, (%rdi,%rsi) | |
61a: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
61e: 41 83 e9 02 subl $2, %r9d | |
622: 7f 8d jg -115 <_dav1d_put_8tap_16bpc_avx2.v_w2l> | |
624: 5b popq %rbx | |
625: c3 retq | |
0000000000000626 _dav1d_put_8tap_16bpc_avx2.v_w28: | |
626: 48 29 da subq %rbx, %rdx | |
629: c5 f9 6e 02 vmovd (%rdx), %xmm0 | |
62d: c5 f9 6e 0c 0a vmovd (%rdx,%rcx), %xmm1 | |
632: c5 f9 6e 14 4a vmovd (%rdx,%rcx,2), %xmm2 | |
637: 48 01 da addq %rbx, %rdx | |
63a: c5 f9 6e 1a vmovd (%rdx), %xmm3 | |
63e: c5 f9 6e 24 0a vmovd (%rdx,%rcx), %xmm4 | |
643: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
647: c5 79 6e 22 vmovd (%rdx), %xmm12 | |
64b: c5 79 6e 2c 0a vmovd (%rdx,%rcx), %xmm13 | |
650: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
654: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0 | |
658: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1 | |
65c: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2 | |
660: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3 | |
664: c4 c1 59 61 e4 vpunpcklwd %xmm12, %xmm4, %xmm4 | |
669: c4 41 19 61 e5 vpunpcklwd %xmm13, %xmm12, %xmm12 | |
000000000000066e _dav1d_put_8tap_16bpc_avx2.v_w28l: | |
66e: 48 29 ca subq %rcx, %rdx | |
671: c5 79 6e 2a vmovd (%rdx), %xmm13 | |
675: c5 79 6e 34 0a vmovd (%rdx,%rcx), %xmm14 | |
67a: c5 79 6e 3c 4a vmovd (%rdx,%rcx,2), %xmm15 | |
67f: 48 01 da addq %rbx, %rdx | |
682: c4 41 11 61 ee vpunpcklwd %xmm14, %xmm13, %xmm13 | |
687: c4 41 09 61 f7 vpunpcklwd %xmm15, %xmm14, %xmm14 | |
68c: c5 b9 f5 c0 vpmaddwd %xmm0, %xmm8, %xmm0 | |
690: c5 b1 f5 ea vpmaddwd %xmm2, %xmm9, %xmm5 | |
694: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5 | |
698: c5 f9 6f c2 vmovdqa %xmm2, %xmm0 | |
69c: c5 b9 f5 c9 vpmaddwd %xmm1, %xmm8, %xmm1 | |
6a0: c5 31 f5 fb vpmaddwd %xmm3, %xmm9, %xmm15 | |
6a4: c5 01 fe f9 vpaddd %xmm1, %xmm15, %xmm15 | |
6a8: c5 f9 6f cb vmovdqa %xmm3, %xmm1 | |
6ac: c5 a9 f5 d4 vpmaddwd %xmm4, %xmm10, %xmm2 | |
6b0: c4 c1 19 f5 da vpmaddwd %xmm10, %xmm12, %xmm3 | |
6b5: c5 d1 fe ea vpaddd %xmm2, %xmm5, %xmm5 | |
6b9: c5 01 fe fb vpaddd %xmm3, %xmm15, %xmm15 | |
6bd: c5 f9 6f d4 vmovdqa %xmm4, %xmm2 | |
6c1: c4 c1 79 6f dc vmovdqa %xmm12, %xmm3 | |
6c6: c4 c1 11 f5 e3 vpmaddwd %xmm11, %xmm13, %xmm4 | |
6cb: c4 41 09 f5 e3 vpmaddwd %xmm11, %xmm14, %xmm12 | |
6d0: c5 d1 fe ec vpaddd %xmm4, %xmm5, %xmm5 | |
6d4: c4 41 01 fe fc vpaddd %xmm12, %xmm15, %xmm15 | |
6d9: c4 c1 79 6f e5 vmovdqa %xmm13, %xmm4 | |
6de: c4 41 79 6f e6 vmovdqa %xmm14, %xmm12 | |
6e3: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5 | |
6e7: c5 01 fe fe vpaddd %xmm6, %xmm15, %xmm15 | |
6eb: c5 d1 72 e5 06 vpsrad $6, %xmm5, %xmm5 | |
6f0: c4 c1 01 72 e7 06 vpsrad $6, %xmm15, %xmm15 | |
6f6: c4 e2 51 2b ed vpackusdw %xmm5, %xmm5, %xmm5 | |
6fb: c4 42 01 2b ff vpackusdw %xmm15, %xmm15, %xmm15 | |
700: c4 e2 51 3a ef vpminuw %xmm7, %xmm5, %xmm5 | |
705: c4 62 01 3a ff vpminuw %xmm7, %xmm15, %xmm15 | |
70a: c5 f9 7e 2f vmovd %xmm5, (%rdi) | |
70e: c5 79 7e 3c 37 vmovd %xmm15, (%rdi,%rsi) | |
713: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
717: 41 83 e9 02 subl $2, %r9d | |
71b: 0f 8f 4d ff ff ff jg -179 <_dav1d_put_8tap_16bpc_avx2.v_w28l> | |
721: 5b popq %rbx | |
722: c3 retq | |
0000000000000723 _dav1d_put_8tap_16bpc_avx2.v_w4: | |
723: 41 83 f9 04 cmpl $4, %r9d | |
727: 0f 8f 91 00 00 00 jg 145 <_dav1d_put_8tap_16bpc_avx2.v_w48> | |
72d: 48 29 ca subq %rcx, %rdx | |
730: c5 fa 7e 02 vmovq (%rdx), %xmm0 | |
734: c5 fa 7e 0c 0a vmovq (%rdx,%rcx), %xmm1 | |
739: c5 fa 7e 14 4a vmovq (%rdx,%rcx,2), %xmm2 | |
73e: 48 01 da addq %rbx, %rdx | |
741: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0 | |
745: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1 | |
0000000000000749 _dav1d_put_8tap_16bpc_avx2.v_w4l: | |
749: c5 fa 7e 1a vmovq (%rdx), %xmm3 | |
74d: c5 fa 7e 24 0a vmovq (%rdx,%rcx), %xmm4 | |
752: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
756: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2 | |
75a: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3 | |
75e: c5 b1 f5 c0 vpmaddwd %xmm0, %xmm9, %xmm0 | |
762: c5 a9 f5 ea vpmaddwd %xmm2, %xmm10, %xmm5 | |
766: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5 | |
76a: c5 f9 6f c2 vmovdqa %xmm2, %xmm0 | |
76e: c5 b1 f5 c9 vpmaddwd %xmm1, %xmm9, %xmm1 | |
772: c5 29 f5 c3 vpmaddwd %xmm3, %xmm10, %xmm8 | |
776: c5 39 fe c1 vpaddd %xmm1, %xmm8, %xmm8 | |
77a: c5 f9 6f cb vmovdqa %xmm3, %xmm1 | |
77e: c5 f9 6f d4 vmovdqa %xmm4, %xmm2 | |
782: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5 | |
786: c5 39 fe c6 vpaddd %xmm6, %xmm8, %xmm8 | |
78a: c5 d1 72 e5 06 vpsrad $6, %xmm5, %xmm5 | |
78f: c4 c1 39 72 e0 06 vpsrad $6, %xmm8, %xmm8 | |
795: c4 e2 51 2b ed vpackusdw %xmm5, %xmm5, %xmm5 | |
79a: c4 42 39 2b c0 vpackusdw %xmm8, %xmm8, %xmm8 | |
79f: c4 e2 51 3a ef vpminuw %xmm7, %xmm5, %xmm5 | |
7a4: c4 62 39 3a c7 vpminuw %xmm7, %xmm8, %xmm8 | |
7a9: c5 f9 d6 2f vmovq %xmm5, (%rdi) | |
7ad: c5 79 d6 04 37 vmovq %xmm8, (%rdi,%rsi) | |
7b2: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
7b6: 41 83 e9 02 subl $2, %r9d | |
7ba: 7f 8d jg -115 <_dav1d_put_8tap_16bpc_avx2.v_w4l> | |
7bc: 5b popq %rbx | |
7bd: c3 retq | |
00000000000007be _dav1d_put_8tap_16bpc_avx2.v_w48: | |
7be: 48 29 da subq %rbx, %rdx | |
7c1: c5 fa 7e 02 vmovq (%rdx), %xmm0 | |
7c5: c5 fa 7e 0c 0a vmovq (%rdx,%rcx), %xmm1 | |
7ca: c5 fa 7e 14 4a vmovq (%rdx,%rcx,2), %xmm2 | |
7cf: 48 01 da addq %rbx, %rdx | |
7d2: c5 fa 7e 1a vmovq (%rdx), %xmm3 | |
7d6: c5 fa 7e 24 0a vmovq (%rdx,%rcx), %xmm4 | |
7db: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
7df: c5 7a 7e 22 vmovq (%rdx), %xmm12 | |
7e3: c5 7a 7e 2c 0a vmovq (%rdx,%rcx), %xmm13 | |
7e8: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
7ec: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0 | |
7f0: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1 | |
7f4: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2 | |
7f8: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3 | |
7fc: c4 c1 59 61 e4 vpunpcklwd %xmm12, %xmm4, %xmm4 | |
801: c4 41 19 61 e5 vpunpcklwd %xmm13, %xmm12, %xmm12 | |
0000000000000806 _dav1d_put_8tap_16bpc_avx2.v_w48l: | |
806: 48 29 ca subq %rcx, %rdx | |
809: c5 7a 7e 2a vmovq (%rdx), %xmm13 | |
80d: c5 7a 7e 34 0a vmovq (%rdx,%rcx), %xmm14 | |
812: c5 7a 7e 3c 4a vmovq (%rdx,%rcx,2), %xmm15 | |
817: 48 01 da addq %rbx, %rdx | |
81a: c4 41 11 61 ee vpunpcklwd %xmm14, %xmm13, %xmm13 | |
81f: c4 41 09 61 f7 vpunpcklwd %xmm15, %xmm14, %xmm14 | |
824: c5 b9 f5 c0 vpmaddwd %xmm0, %xmm8, %xmm0 | |
828: c5 b1 f5 ea vpmaddwd %xmm2, %xmm9, %xmm5 | |
82c: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5 | |
830: c5 f9 6f c2 vmovdqa %xmm2, %xmm0 | |
834: c5 b9 f5 c9 vpmaddwd %xmm1, %xmm8, %xmm1 | |
838: c5 31 f5 fb vpmaddwd %xmm3, %xmm9, %xmm15 | |
83c: c5 01 fe f9 vpaddd %xmm1, %xmm15, %xmm15 | |
840: c5 f9 6f cb vmovdqa %xmm3, %xmm1 | |
844: c5 a9 f5 d4 vpmaddwd %xmm4, %xmm10, %xmm2 | |
848: c4 c1 19 f5 da vpmaddwd %xmm10, %xmm12, %xmm3 | |
84d: c5 d1 fe ea vpaddd %xmm2, %xmm5, %xmm5 | |
851: c5 01 fe fb vpaddd %xmm3, %xmm15, %xmm15 | |
855: c5 f9 6f d4 vmovdqa %xmm4, %xmm2 | |
859: c4 c1 79 6f dc vmovdqa %xmm12, %xmm3 | |
85e: c4 c1 11 f5 e3 vpmaddwd %xmm11, %xmm13, %xmm4 | |
863: c4 41 09 f5 e3 vpmaddwd %xmm11, %xmm14, %xmm12 | |
868: c5 d1 fe ec vpaddd %xmm4, %xmm5, %xmm5 | |
86c: c4 41 01 fe fc vpaddd %xmm12, %xmm15, %xmm15 | |
871: c4 c1 79 6f e5 vmovdqa %xmm13, %xmm4 | |
876: c4 41 79 6f e6 vmovdqa %xmm14, %xmm12 | |
87b: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5 | |
87f: c5 01 fe fe vpaddd %xmm6, %xmm15, %xmm15 | |
883: c5 d1 72 e5 06 vpsrad $6, %xmm5, %xmm5 | |
888: c4 c1 01 72 e7 06 vpsrad $6, %xmm15, %xmm15 | |
88e: c4 e2 51 2b ed vpackusdw %xmm5, %xmm5, %xmm5 | |
893: c4 42 01 2b ff vpackusdw %xmm15, %xmm15, %xmm15 | |
898: c4 e2 51 3a ef vpminuw %xmm7, %xmm5, %xmm5 | |
89d: c4 62 01 3a ff vpminuw %xmm7, %xmm15, %xmm15 | |
8a2: c5 f9 d6 2f vmovq %xmm5, (%rdi) | |
8a6: c5 79 d6 3c 37 vmovq %xmm15, (%rdi,%rsi) | |
8ab: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
8af: 41 83 e9 02 subl $2, %r9d | |
8b3: 0f 8f 4d ff ff ff jg -179 <_dav1d_put_8tap_16bpc_avx2.v_w48l> | |
8b9: 5b popq %rbx | |
8ba: c3 retq | |
00000000000008bb _dav1d_put_8tap_16bpc_avx2.v_w8: | |
8bb: 45 89 c8 movl %r9d, %r8d | |
8be: 49 89 fa movq %rdi, %r10 | |
8c1: 41 83 f9 04 cmpl $4, %r9d | |
8c5: 0f 8f df 00 00 00 jg 223 <_dav1d_put_8tap_16bpc_avx2.v_w88> | |
8cb: 48 29 ca subq %rcx, %rdx | |
8ce: 49 89 d3 movq %rdx, %r11 | |
00000000000008d1 _dav1d_put_8tap_16bpc_avx2.v_w8l: | |
8d1: c5 fa 6f 02 vmovdqu (%rdx), %xmm0 | |
8d5: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1 | |
8da: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2 | |
8df: c4 e3 fd 00 c0 d8 vpermq $216, %ymm0, %ymm0 | |
8e5: c4 e3 fd 00 c9 d8 vpermq $216, %ymm1, %ymm1 | |
8eb: c4 e3 fd 00 d2 d8 vpermq $216, %ymm2, %ymm2 | |
8f1: 48 01 da addq %rbx, %rdx | |
8f4: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
8f8: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1 | |
00000000000008fc _dav1d_put_8tap_16bpc_avx2.v_w8c: | |
8fc: c5 fa 6f 1a vmovdqu (%rdx), %xmm3 | |
900: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4 | |
905: c4 e3 fd 00 db d8 vpermq $216, %ymm3, %ymm3 | |
90b: c4 e3 fd 00 e4 d8 vpermq $216, %ymm4, %ymm4 | |
911: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
915: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
919: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
91d: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0 | |
921: c5 ad f5 ea vpmaddwd %ymm2, %ymm10, %ymm5 | |
925: c5 d5 fe e8 vpaddd %ymm0, %ymm5, %ymm5 | |
929: c5 fd 6f c2 vmovdqa %ymm2, %ymm0 | |
92d: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1 | |
931: c5 2d f5 c3 vpmaddwd %ymm3, %ymm10, %ymm8 | |
935: c5 3d fe c1 vpaddd %ymm1, %ymm8, %ymm8 | |
939: c5 fd 6f cb vmovdqa %ymm3, %ymm1 | |
93d: c5 fd 6f d4 vmovdqa %ymm4, %ymm2 | |
941: c5 d5 fe ee vpaddd %ymm6, %ymm5, %ymm5 | |
945: c5 3d fe c6 vpaddd %ymm6, %ymm8, %ymm8 | |
949: c5 d5 72 e5 06 vpsrad $6, %ymm5, %ymm5 | |
94e: c4 c1 3d 72 e0 06 vpsrad $6, %ymm8, %ymm8 | |
954: c4 e2 55 2b ed vpackusdw %ymm5, %ymm5, %ymm5 | |
959: c4 42 3d 2b c0 vpackusdw %ymm8, %ymm8, %ymm8 | |
95e: c4 e2 55 3a ef vpminuw %ymm7, %ymm5, %ymm5 | |
963: c4 62 3d 3a c7 vpminuw %ymm7, %ymm8, %ymm8 | |
968: c4 e3 fd 00 ed d8 vpermq $216, %ymm5, %ymm5 | |
96e: c4 43 fd 00 c0 d8 vpermq $216, %ymm8, %ymm8 | |
974: c5 fa 7f 2f vmovdqu %xmm5, (%rdi) | |
978: c5 7a 7f 04 37 vmovdqu %xmm8, (%rdi,%rsi) | |
97d: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
981: 41 83 e9 02 subl $2, %r9d | |
985: 0f 8f 71 ff ff ff jg -143 <_dav1d_put_8tap_16bpc_avx2.v_w8c> | |
98b: 49 83 c2 10 addq $16, %r10 | |
98f: 49 83 c3 10 addq $16, %r11 | |
993: 45 89 c1 movl %r8d, %r9d | |
996: 4c 89 d7 movq %r10, %rdi | |
999: 4c 89 da movq %r11, %rdx | |
99c: 83 e8 10 subl $16, %eax | |
99f: 0f 8f 2c ff ff ff jg -212 <_dav1d_put_8tap_16bpc_avx2.v_w8l> | |
9a5: 5b popq %rbx | |
9a6: c5 f8 77 vzeroupper | |
9a9: c3 retq | |
00000000000009aa _dav1d_put_8tap_16bpc_avx2.v_w88: | |
9aa: 48 29 da subq %rbx, %rdx | |
9ad: 49 89 d3 movq %rdx, %r11 | |
00000000000009b0 _dav1d_put_8tap_16bpc_avx2.v_w88l: | |
9b0: c5 fa 6f 02 vmovdqu (%rdx), %xmm0 | |
9b4: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1 | |
9b9: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2 | |
9be: c4 e3 fd 00 c0 d8 vpermq $216, %ymm0, %ymm0 | |
9c4: c4 e3 fd 00 c9 d8 vpermq $216, %ymm1, %ymm1 | |
9ca: c4 e3 fd 00 d2 d8 vpermq $216, %ymm2, %ymm2 | |
9d0: 48 01 da addq %rbx, %rdx | |
9d3: c5 fa 6f 1a vmovdqu (%rdx), %xmm3 | |
9d7: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4 | |
9dc: c4 e3 fd 00 db d8 vpermq $216, %ymm3, %ymm3 | |
9e2: c4 e3 fd 00 e4 d8 vpermq $216, %ymm4, %ymm4 | |
9e8: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
9ec: c5 7a 6f 22 vmovdqu (%rdx), %xmm12 | |
9f0: c5 7a 6f 2c 0a vmovdqu (%rdx,%rcx), %xmm13 | |
9f5: c4 43 fd 00 e4 d8 vpermq $216, %ymm12, %ymm12 | |
9fb: c4 43 fd 00 ed d8 vpermq $216, %ymm13, %ymm13 | |
a01: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
a05: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
a09: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1 | |
a0d: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
a11: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
a15: c4 c1 5d 61 e4 vpunpcklwd %ymm12, %ymm4, %ymm4 | |
a1a: c4 41 1d 61 e5 vpunpcklwd %ymm13, %ymm12, %ymm12 | |
0000000000000a1f _dav1d_put_8tap_16bpc_avx2.v_w88c: | |
a1f: 48 29 ca subq %rcx, %rdx | |
a22: c5 7a 6f 2a vmovdqu (%rdx), %xmm13 | |
a26: c5 7a 6f 34 0a vmovdqu (%rdx,%rcx), %xmm14 | |
a2b: c5 7a 6f 3c 4a vmovdqu (%rdx,%rcx,2), %xmm15 | |
a30: c4 43 fd 00 ed d8 vpermq $216, %ymm13, %ymm13 | |
a36: c4 43 fd 00 f6 d8 vpermq $216, %ymm14, %ymm14 | |
a3c: c4 43 fd 00 ff d8 vpermq $216, %ymm15, %ymm15 | |
a42: 48 01 da addq %rbx, %rdx | |
a45: c4 41 15 61 ee vpunpcklwd %ymm14, %ymm13, %ymm13 | |
a4a: c4 41 0d 61 f7 vpunpcklwd %ymm15, %ymm14, %ymm14 | |
a4f: c5 bd f5 c0 vpmaddwd %ymm0, %ymm8, %ymm0 | |
a53: c5 b5 f5 ea vpmaddwd %ymm2, %ymm9, %ymm5 | |
a57: c5 d5 fe e8 vpaddd %ymm0, %ymm5, %ymm5 | |
a5b: c5 fd 6f c2 vmovdqa %ymm2, %ymm0 | |
a5f: c5 bd f5 c9 vpmaddwd %ymm1, %ymm8, %ymm1 | |
a63: c5 35 f5 fb vpmaddwd %ymm3, %ymm9, %ymm15 | |
a67: c5 05 fe f9 vpaddd %ymm1, %ymm15, %ymm15 | |
a6b: c5 fd 6f cb vmovdqa %ymm3, %ymm1 | |
a6f: c5 ad f5 d4 vpmaddwd %ymm4, %ymm10, %ymm2 | |
a73: c4 c1 1d f5 da vpmaddwd %ymm10, %ymm12, %ymm3 | |
a78: c5 d5 fe ea vpaddd %ymm2, %ymm5, %ymm5 | |
a7c: c5 05 fe fb vpaddd %ymm3, %ymm15, %ymm15 | |
a80: c5 fd 6f d4 vmovdqa %ymm4, %ymm2 | |
a84: c4 c1 7d 6f dc vmovdqa %ymm12, %ymm3 | |
a89: c4 c1 15 f5 e3 vpmaddwd %ymm11, %ymm13, %ymm4 | |
a8e: c4 41 0d f5 e3 vpmaddwd %ymm11, %ymm14, %ymm12 | |
a93: c5 d5 fe ec vpaddd %ymm4, %ymm5, %ymm5 | |
a97: c4 41 05 fe fc vpaddd %ymm12, %ymm15, %ymm15 | |
a9c: c4 c1 7d 6f e5 vmovdqa %ymm13, %ymm4 | |
aa1: c4 41 7d 6f e6 vmovdqa %ymm14, %ymm12 | |
aa6: c5 d5 fe ee vpaddd %ymm6, %ymm5, %ymm5 | |
aaa: c5 05 fe fe vpaddd %ymm6, %ymm15, %ymm15 | |
aae: c5 d5 72 e5 06 vpsrad $6, %ymm5, %ymm5 | |
ab3: c4 c1 05 72 e7 06 vpsrad $6, %ymm15, %ymm15 | |
ab9: c4 e2 55 2b ed vpackusdw %ymm5, %ymm5, %ymm5 | |
abe: c4 42 05 2b ff vpackusdw %ymm15, %ymm15, %ymm15 | |
ac3: c4 e2 55 3a ef vpminuw %ymm7, %ymm5, %ymm5 | |
ac8: c4 62 05 3a ff vpminuw %ymm7, %ymm15, %ymm15 | |
acd: c4 e3 fd 00 ed d8 vpermq $216, %ymm5, %ymm5 | |
ad3: c4 43 fd 00 ff d8 vpermq $216, %ymm15, %ymm15 | |
ad9: c5 fa 7f 2f vmovdqu %xmm5, (%rdi) | |
add: c5 7a 7f 3c 37 vmovdqu %xmm15, (%rdi,%rsi) | |
ae2: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
ae6: 41 83 e9 02 subl $2, %r9d | |
aea: 0f 8f 2f ff ff ff jg -209 <_dav1d_put_8tap_16bpc_avx2.v_w88c> | |
af0: 49 83 c2 10 addq $16, %r10 | |
af4: 49 83 c3 10 addq $16, %r11 | |
af8: 45 89 c1 movl %r8d, %r9d | |
afb: 4c 89 d7 movq %r10, %rdi | |
afe: 4c 89 da movq %r11, %rdx | |
b01: 83 e8 10 subl $16, %eax | |
b04: 0f 8f a6 fe ff ff jg -346 <_dav1d_put_8tap_16bpc_avx2.v_w88l> | |
b0a: 5b popq %rbx | |
b0b: c5 f8 77 vzeroupper | |
b0e: c3 retq | |
0000000000000b0f _dav1d_put_8tap_16bpc_avx2.v_jmp_tbl: | |
b0f: 7c fa jl -6 <_dav1d_put_8tap_16bpc_avx2.v_w88c+0xec> | |
b11: ff ff <unknown> | |
b13: 14 fc adcb $-4, %al | |
b15: ff ff <unknown> | |
b17: ac lodsb (%rsi), %al | |
b18: fd std | |
b19: ff ff <unknown> | |
b1b: ac lodsb (%rsi), %al | |
b1c: fd std | |
b1d: ff ff <unknown> | |
b1f: ac lodsb (%rsi), %al | |
b20: fd std | |
b21: ff ff <unknown> | |
b23: ac lodsb (%rsi), %al | |
b24: fd std | |
b25: ff ff <unknown> | |
b27: ac lodsb (%rsi), %al | |
b28: fd std | |
b29: ff ff <unknown> | |
0000000000000b2b _dav1d_put_8tap_16bpc_avx2.put_8tap_hv_16bpc: | |
b2b: 41 83 f9 04 cmpl $4, %r9d | |
b2f: 7e 04 jle 4 <_dav1d_put_8tap_16bpc_avx2.hv_use4tap> | |
b31: 41 c1 ea 07 shrl $7, %r10d | |
0000000000000b35 _dav1d_put_8tap_16bpc_avx2.hv_use4tap: | |
b35: 41 83 e2 7f andl $127, %r10d | |
b39: f3 44 0f b8 5c 24 20 popcntl 32(%rsp), %r11d | |
b40: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
b49: c5 7a 7e 2d 00 00 00 00 vmovq (%rip), %xmm13 | |
b51: c4 62 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm14 | |
b5a: c5 7a 7e 3d 00 00 00 00 vmovq (%rip), %xmm15 | |
b62: 41 83 fb 0c cmpl $12, %r11d | |
b66: 75 22 jne 34 <_dav1d_put_8tap_16bpc_avx2.hv_bits10> | |
b68: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
b71: c5 7a 7e 2d 00 00 00 00 vmovq (%rip), %xmm13 | |
b79: c4 62 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm14 | |
b82: c5 7a 7e 3d 00 00 00 00 vmovq (%rip), %xmm15 | |
0000000000000b8a _dav1d_put_8tap_16bpc_avx2.hv_bits10: | |
b8a: 4c 8d 1d 90 13 00 00 leaq 5008(%rip), %r11 | |
b91: 4b 8d 9c d3 70 ec ff ff leaq -5008(%r11,%r10,8), %rbx | |
b99: c4 62 79 79 03 vpbroadcastw (%rbx), %xmm8 | |
b9e: c4 62 79 79 4b 02 vpbroadcastw 2(%rbx), %xmm9 | |
ba4: c4 62 79 79 53 04 vpbroadcastw 4(%rbx), %xmm10 | |
baa: c4 62 79 79 5b 06 vpbroadcastw 6(%rbx), %xmm11 | |
bb0: c4 42 7d 20 c0 vpmovsxbw %xmm8, %ymm8 | |
bb5: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9 | |
bba: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10 | |
bbf: c4 42 7d 20 db vpmovsxbw %xmm11, %ymm11 | |
bc4: 4c 89 c3 movq %r8, %rbx | |
bc7: f3 45 0f bc c0 tzcntl %r8d, %r8d | |
bcc: 41 83 e8 01 subl $1, %r8d | |
bd0: 4f 63 04 83 movslq (%r11,%r8,4), %r8 | |
bd4: 4d 01 d8 addq %r11, %r8 | |
bd7: 41 ff e0 jmpq *%r8 | |
0000000000000bda _dav1d_put_8tap_16bpc_avx2.hv_w2: | |
bda: 41 83 f9 04 cmpl $4, %r9d | |
bde: 0f 8f 3c 01 00 00 jg 316 <_dav1d_put_8tap_16bpc_avx2.hv_w28> | |
be4: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx | |
be8: c5 79 6f 05 00 00 00 00 vmovdqa (%rip), %xmm8 | |
bf0: c4 c2 79 58 ac c3 d3 ec ff ff vpbroadcastd -4909(%r11,%rax,8), %xmm5 | |
bfa: c4 e2 79 20 ed vpmovsxbw %xmm5, %xmm5 | |
bff: 48 83 ea 02 subq $2, %rdx | |
c03: 48 29 ca subq %rcx, %rdx | |
c06: c5 fa 6f 02 vmovdqu (%rdx), %xmm0 | |
c0a: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1 | |
c0f: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2 | |
c14: 48 01 da addq %rbx, %rdx | |
c17: c4 c2 79 00 c0 vpshufb %xmm8, %xmm0, %xmm0 | |
c1c: c4 c2 71 00 c8 vpshufb %xmm8, %xmm1, %xmm1 | |
c21: c5 f9 f5 c5 vpmaddwd %xmm5, %xmm0, %xmm0 | |
c25: c5 f1 f5 cd vpmaddwd %xmm5, %xmm1, %xmm1 | |
c29: c4 e2 79 02 c0 vphaddd %xmm0, %xmm0, %xmm0 | |
c2e: c4 e2 71 02 c9 vphaddd %xmm1, %xmm1, %xmm1 | |
c33: c5 f9 fe c6 vpaddd %xmm6, %xmm0, %xmm0 | |
c37: c5 f1 fe ce vpaddd %xmm6, %xmm1, %xmm1 | |
c3b: c4 c1 79 e2 c5 vpsrad %xmm13, %xmm0, %xmm0 | |
c40: c4 c1 71 e2 cd vpsrad %xmm13, %xmm1, %xmm1 | |
c45: c5 f9 6b c0 vpackssdw %xmm0, %xmm0, %xmm0 | |
c49: c5 f1 6b c9 vpackssdw %xmm1, %xmm1, %xmm1 | |
c4d: c4 c2 69 00 d0 vpshufb %xmm8, %xmm2, %xmm2 | |
c52: c5 e9 f5 d5 vpmaddwd %xmm5, %xmm2, %xmm2 | |
c56: c4 e2 69 02 d2 vphaddd %xmm2, %xmm2, %xmm2 | |
c5b: c5 e9 fe d6 vpaddd %xmm6, %xmm2, %xmm2 | |
c5f: c4 c1 69 e2 d5 vpsrad %xmm13, %xmm2, %xmm2 | |
c64: c5 e9 6b d2 vpackssdw %xmm2, %xmm2, %xmm2 | |
c68: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0 | |
c6c: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1 | |
0000000000000c70 _dav1d_put_8tap_16bpc_avx2.hv_w2l: | |
c70: c5 fa 6f 1a vmovdqu (%rdx), %xmm3 | |
c74: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4 | |
c79: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
c7d: c4 c2 61 00 d8 vpshufb %xmm8, %xmm3, %xmm3 | |
c82: c4 c2 59 00 e0 vpshufb %xmm8, %xmm4, %xmm4 | |
c87: c5 e1 f5 dd vpmaddwd %xmm5, %xmm3, %xmm3 | |
c8b: c5 d9 f5 e5 vpmaddwd %xmm5, %xmm4, %xmm4 | |
c8f: c4 e2 61 02 db vphaddd %xmm3, %xmm3, %xmm3 | |
c94: c4 e2 59 02 e4 vphaddd %xmm4, %xmm4, %xmm4 | |
c99: c5 e1 fe de vpaddd %xmm6, %xmm3, %xmm3 | |
c9d: c5 d9 fe e6 vpaddd %xmm6, %xmm4, %xmm4 | |
ca1: c4 c1 61 e2 dd vpsrad %xmm13, %xmm3, %xmm3 | |
ca6: c4 c1 59 e2 e5 vpsrad %xmm13, %xmm4, %xmm4 | |
cab: c5 e1 6b db vpackssdw %xmm3, %xmm3, %xmm3 | |
caf: c5 d9 6b e4 vpackssdw %xmm4, %xmm4, %xmm4 | |
cb3: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2 | |
cb7: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3 | |
cbb: c5 b1 f5 c0 vpmaddwd %xmm0, %xmm9, %xmm0 | |
cbf: c5 29 f5 da vpmaddwd %xmm2, %xmm10, %xmm11 | |
cc3: c5 21 fe d8 vpaddd %xmm0, %xmm11, %xmm11 | |
cc7: c5 f9 6f c2 vmovdqa %xmm2, %xmm0 | |
ccb: c5 b1 f5 c9 vpmaddwd %xmm1, %xmm9, %xmm1 | |
ccf: c5 29 f5 e3 vpmaddwd %xmm3, %xmm10, %xmm12 | |
cd3: c5 19 fe e1 vpaddd %xmm1, %xmm12, %xmm12 | |
cd7: c5 f9 6f cb vmovdqa %xmm3, %xmm1 | |
cdb: c5 f9 6f d4 vmovdqa %xmm4, %xmm2 | |
cdf: c4 41 21 fe de vpaddd %xmm14, %xmm11, %xmm11 | |
ce4: c4 41 19 fe e6 vpaddd %xmm14, %xmm12, %xmm12 | |
ce9: c4 41 21 e2 df vpsrad %xmm15, %xmm11, %xmm11 | |
cee: c4 41 19 e2 e7 vpsrad %xmm15, %xmm12, %xmm12 | |
cf3: c4 42 21 2b db vpackusdw %xmm11, %xmm11, %xmm11 | |
cf8: c4 42 19 2b e4 vpackusdw %xmm12, %xmm12, %xmm12 | |
cfd: c4 62 21 3a df vpminuw %xmm7, %xmm11, %xmm11 | |
d02: c4 62 19 3a e7 vpminuw %xmm7, %xmm12, %xmm12 | |
d07: c5 79 7e 1f vmovd %xmm11, (%rdi) | |
d0b: c5 79 7e 24 37 vmovd %xmm12, (%rdi,%rsi) | |
d10: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
d14: 41 83 e9 02 subl $2, %r9d | |
d18: 0f 8f 52 ff ff ff jg -174 <_dav1d_put_8tap_16bpc_avx2.hv_w2l> | |
d1e: 5b popq %rbx | |
d1f: c3 retq | |
0000000000000d20 _dav1d_put_8tap_16bpc_avx2.hv_w28: | |
d20: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx | |
d24: c5 79 6f 05 00 00 00 00 vmovdqa (%rip), %xmm8 | |
d2c: c4 c2 79 58 ac c3 0f ee ff ff vpbroadcastd -4593(%r11,%rax,8), %xmm5 | |
d36: c4 e2 79 20 ed vpmovsxbw %xmm5, %xmm5 | |
d3b: 4f 8d 94 d3 1a ee ff ff leaq -4582(%r11,%r10,8), %r10 | |
d43: c4 42 79 58 0a vpbroadcastd (%r10), %xmm9 | |
d48: c4 42 79 58 52 04 vpbroadcastd 4(%r10), %xmm10 | |
d4e: c4 42 79 20 c9 vpmovsxbw %xmm9, %xmm9 | |
d53: c4 42 79 20 d2 vpmovsxbw %xmm10, %xmm10 | |
d58: 48 83 ea 02 subq $2, %rdx | |
d5c: 48 29 da subq %rbx, %rdx | |
d5f: c5 fa 6f 02 vmovdqu (%rdx), %xmm0 | |
d63: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1 | |
d68: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
d6c: c4 c2 79 00 c0 vpshufb %xmm8, %xmm0, %xmm0 | |
d71: c4 c2 71 00 c8 vpshufb %xmm8, %xmm1, %xmm1 | |
d76: c5 f9 f5 c5 vpmaddwd %xmm5, %xmm0, %xmm0 | |
d7a: c5 f1 f5 cd vpmaddwd %xmm5, %xmm1, %xmm1 | |
d7e: c4 e2 79 02 c0 vphaddd %xmm0, %xmm0, %xmm0 | |
d83: c4 e2 71 02 c9 vphaddd %xmm1, %xmm1, %xmm1 | |
d88: c5 f9 fe c6 vpaddd %xmm6, %xmm0, %xmm0 | |
d8c: c5 f1 fe ce vpaddd %xmm6, %xmm1, %xmm1 | |
d90: c4 c1 79 e2 c5 vpsrad %xmm13, %xmm0, %xmm0 | |
d95: c4 c1 71 e2 cd vpsrad %xmm13, %xmm1, %xmm1 | |
d9a: c5 f9 6b c0 vpackssdw %xmm0, %xmm0, %xmm0 | |
d9e: c5 f1 6b c9 vpackssdw %xmm1, %xmm1, %xmm1 | |
da2: c5 fa 6f 22 vmovdqu (%rdx), %xmm4 | |
da6: c5 fa 6f 1c 0a vmovdqu (%rdx,%rcx), %xmm3 | |
dab: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2 | |
db0: 48 01 da addq %rbx, %rdx | |
db3: c4 c2 59 00 e0 vpshufb %xmm8, %xmm4, %xmm4 | |
db8: c4 c2 61 00 d8 vpshufb %xmm8, %xmm3, %xmm3 | |
dbd: c5 d9 f5 e5 vpmaddwd %xmm5, %xmm4, %xmm4 | |
dc1: c5 e1 f5 dd vpmaddwd %xmm5, %xmm3, %xmm3 | |
dc5: c4 e2 59 02 e4 vphaddd %xmm4, %xmm4, %xmm4 | |
dca: c4 e2 61 02 db vphaddd %xmm3, %xmm3, %xmm3 | |
dcf: c5 d9 fe e6 vpaddd %xmm6, %xmm4, %xmm4 | |
dd3: c5 e1 fe de vpaddd %xmm6, %xmm3, %xmm3 | |
dd7: c4 c1 59 e2 e5 vpsrad %xmm13, %xmm4, %xmm4 | |
ddc: c4 c1 61 e2 dd vpsrad %xmm13, %xmm3, %xmm3 | |
de1: c5 d9 6b e4 vpackssdw %xmm4, %xmm4, %xmm4 | |
de5: c5 e1 6b db vpackssdw %xmm3, %xmm3, %xmm3 | |
de9: c4 c2 69 00 d0 vpshufb %xmm8, %xmm2, %xmm2 | |
dee: c5 e9 f5 d5 vpmaddwd %xmm5, %xmm2, %xmm2 | |
df2: c4 e2 69 02 d2 vphaddd %xmm2, %xmm2, %xmm2 | |
df7: c5 e9 fe d6 vpaddd %xmm6, %xmm2, %xmm2 | |
dfb: c4 c1 69 e2 d5 vpsrad %xmm13, %xmm2, %xmm2 | |
e00: c5 e9 6b d2 vpackssdw %xmm2, %xmm2, %xmm2 | |
e04: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0 | |
e08: c5 f1 61 cc vpunpcklwd %xmm4, %xmm1, %xmm1 | |
e0c: c5 d9 61 e3 vpunpcklwd %xmm3, %xmm4, %xmm4 | |
e10: c5 e1 61 da vpunpcklwd %xmm2, %xmm3, %xmm3 | |
e14: c5 f9 62 c4 vpunpckldq %xmm4, %xmm0, %xmm0 | |
e18: c5 f1 62 cb vpunpckldq %xmm3, %xmm1, %xmm1 | |
e1c: c5 fa 6f 1a vmovdqu (%rdx), %xmm3 | |
e20: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4 | |
e25: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
e29: c4 c2 61 00 d8 vpshufb %xmm8, %xmm3, %xmm3 | |
e2e: c4 c2 59 00 e0 vpshufb %xmm8, %xmm4, %xmm4 | |
e33: c5 e1 f5 dd vpmaddwd %xmm5, %xmm3, %xmm3 | |
e37: c5 d9 f5 e5 vpmaddwd %xmm5, %xmm4, %xmm4 | |
e3b: c4 e2 61 02 db vphaddd %xmm3, %xmm3, %xmm3 | |
e40: c4 e2 59 02 e4 vphaddd %xmm4, %xmm4, %xmm4 | |
e45: c5 e1 fe de vpaddd %xmm6, %xmm3, %xmm3 | |
e49: c5 d9 fe e6 vpaddd %xmm6, %xmm4, %xmm4 | |
e4d: c4 c1 61 e2 dd vpsrad %xmm13, %xmm3, %xmm3 | |
e52: c4 c1 59 e2 e5 vpsrad %xmm13, %xmm4, %xmm4 | |
e57: c5 e1 6b db vpackssdw %xmm3, %xmm3, %xmm3 | |
e5b: c5 d9 6b e4 vpackssdw %xmm4, %xmm4, %xmm4 | |
e5f: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2 | |
e63: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3 | |
0000000000000e67 _dav1d_put_8tap_16bpc_avx2.hv_w28l: | |
e67: c5 7a 6f 1a vmovdqu (%rdx), %xmm11 | |
e6b: c5 7a 6f 24 0a vmovdqu (%rdx,%rcx), %xmm12 | |
e70: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
e74: c4 42 21 00 d8 vpshufb %xmm8, %xmm11, %xmm11 | |
e79: c4 42 19 00 e0 vpshufb %xmm8, %xmm12, %xmm12 | |
e7e: c5 21 f5 dd vpmaddwd %xmm5, %xmm11, %xmm11 | |
e82: c5 19 f5 e5 vpmaddwd %xmm5, %xmm12, %xmm12 | |
e86: c4 42 21 02 db vphaddd %xmm11, %xmm11, %xmm11 | |
e8b: c4 42 19 02 e4 vphaddd %xmm12, %xmm12, %xmm12 | |
e90: c5 21 fe de vpaddd %xmm6, %xmm11, %xmm11 | |
e94: c5 19 fe e6 vpaddd %xmm6, %xmm12, %xmm12 | |
e98: c4 41 21 e2 dd vpsrad %xmm13, %xmm11, %xmm11 | |
e9d: c4 41 19 e2 e5 vpsrad %xmm13, %xmm12, %xmm12 | |
ea2: c4 41 21 6b db vpackssdw %xmm11, %xmm11, %xmm11 | |
ea7: c4 41 19 6b e4 vpackssdw %xmm12, %xmm12, %xmm12 | |
eac: c4 c1 59 61 e3 vpunpcklwd %xmm11, %xmm4, %xmm4 | |
eb1: c4 41 21 61 dc vpunpcklwd %xmm12, %xmm11, %xmm11 | |
eb6: c5 e9 62 d4 vpunpckldq %xmm4, %xmm2, %xmm2 | |
eba: c4 c1 61 62 db vpunpckldq %xmm11, %xmm3, %xmm3 | |
ebf: c5 31 f5 d8 vpmaddwd %xmm0, %xmm9, %xmm11 | |
ec3: c5 a9 f5 e2 vpmaddwd %xmm2, %xmm10, %xmm4 | |
ec7: c5 31 f5 e1 vpmaddwd %xmm1, %xmm9, %xmm12 | |
ecb: c5 21 fe dc vpaddd %xmm4, %xmm11, %xmm11 | |
ecf: c5 a9 f5 e3 vpmaddwd %xmm3, %xmm10, %xmm4 | |
ed3: c5 19 fe e4 vpaddd %xmm4, %xmm12, %xmm12 | |
ed7: c4 42 21 02 db vphaddd %xmm11, %xmm11, %xmm11 | |
edc: c4 42 19 02 e4 vphaddd %xmm12, %xmm12, %xmm12 | |
ee1: c4 41 21 fe de vpaddd %xmm14, %xmm11, %xmm11 | |
ee6: c4 41 19 fe e6 vpaddd %xmm14, %xmm12, %xmm12 | |
eeb: c4 41 21 e2 df vpsrad %xmm15, %xmm11, %xmm11 | |
ef0: c4 41 19 e2 e7 vpsrad %xmm15, %xmm12, %xmm12 | |
ef5: c4 42 21 2b db vpackusdw %xmm11, %xmm11, %xmm11 | |
efa: c4 42 19 2b e4 vpackusdw %xmm12, %xmm12, %xmm12 | |
eff: c4 62 21 3a df vpminuw %xmm7, %xmm11, %xmm11 | |
f04: c4 62 19 3a e7 vpminuw %xmm7, %xmm12, %xmm12 | |
f09: c5 79 7e 1f vmovd %xmm11, (%rdi) | |
f0d: c5 79 7e 24 37 vmovd %xmm12, (%rdi,%rsi) | |
f12: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
f16: c5 f9 70 c0 8d vpshufd $141, %xmm0, %xmm0 | |
f1b: c5 f9 70 c9 8d vpshufd $141, %xmm1, %xmm1 | |
f20: c5 79 70 da d8 vpshufd $216, %xmm2, %xmm11 | |
f25: c5 79 70 e3 d8 vpshufd $216, %xmm3, %xmm12 | |
f2a: c5 f9 70 d2 8d vpshufd $141, %xmm2, %xmm2 | |
f2f: c5 f9 70 db 8d vpshufd $141, %xmm3, %xmm3 | |
f34: c5 f9 6f e3 vmovdqa %xmm3, %xmm4 | |
f38: c5 d9 72 e4 10 vpsrad $16, %xmm4, %xmm4 | |
f3d: c5 d9 6b e4 vpackssdw %xmm4, %xmm4, %xmm4 | |
f41: c4 c1 79 62 c3 vpunpckldq %xmm11, %xmm0, %xmm0 | |
f46: c4 c1 71 62 cc vpunpckldq %xmm12, %xmm1, %xmm1 | |
f4b: 41 83 e9 02 subl $2, %r9d | |
f4f: 0f 8f 12 ff ff ff jg -238 <_dav1d_put_8tap_16bpc_avx2.hv_w28l> | |
f55: 5b popq %rbx | |
f56: c3 retq | |
0000000000000f57 _dav1d_put_8tap_16bpc_avx2.hv_w4: | |
f57: 41 83 f9 04 cmpl $4, %r9d | |
f5b: 0f 8f 5b 01 00 00 jg 347 <_dav1d_put_8tap_16bpc_avx2.hv_w48> | |
f61: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx | |
f65: c5 7d 6f 05 00 00 00 00 vmovdqa (%rip), %ymm8 | |
f6d: c4 c2 79 58 ac c3 50 f0 ff ff vpbroadcastd -4016(%r11,%rax,8), %xmm5 | |
f77: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5 | |
f7c: 48 83 ea 02 subq $2, %rdx | |
f80: 48 29 ca subq %rcx, %rdx | |
f83: c4 e2 7d 5a 02 vbroadcasti128 (%rdx), %ymm0 | |
f88: c4 e2 7d 5a 0c 0a vbroadcasti128 (%rdx,%rcx), %ymm1 | |
f8e: c4 e2 7d 5a 14 4a vbroadcasti128 (%rdx,%rcx,2), %ymm2 | |
f94: 48 01 da addq %rbx, %rdx | |
f97: c4 c2 7d 00 c0 vpshufb %ymm8, %ymm0, %ymm0 | |
f9c: c4 c2 75 00 c8 vpshufb %ymm8, %ymm1, %ymm1 | |
fa1: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0 | |
fa5: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
fa9: c4 e2 7d 02 c0 vphaddd %ymm0, %ymm0, %ymm0 | |
fae: c4 e2 75 02 c9 vphaddd %ymm1, %ymm1, %ymm1 | |
fb3: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0 | |
fb7: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1 | |
fbb: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0 | |
fc0: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1 | |
fc5: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0 | |
fc9: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1 | |
fcd: c4 c2 6d 00 d0 vpshufb %ymm8, %ymm2, %ymm2 | |
fd2: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
fd6: c4 e2 6d 02 d2 vphaddd %ymm2, %ymm2, %ymm2 | |
fdb: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2 | |
fdf: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2 | |
fe4: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2 | |
fe8: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
fec: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1 | |
0000000000000ff0 _dav1d_put_8tap_16bpc_avx2.hv_w4l: | |
ff0: c4 e2 7d 5a 1a vbroadcasti128 (%rdx), %ymm3 | |
ff5: c4 e2 7d 5a 24 0a vbroadcasti128 (%rdx,%rcx), %ymm4 | |
ffb: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
fff: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3 | |
1004: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4 | |
1009: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
100d: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
1011: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3 | |
1016: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4 | |
101b: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3 | |
101f: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4 | |
1023: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3 | |
1028: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4 | |
102d: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3 | |
1031: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
1035: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
1039: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
103d: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0 | |
1041: c5 2d f5 da vpmaddwd %ymm2, %ymm10, %ymm11 | |
1045: c5 25 fe d8 vpaddd %ymm0, %ymm11, %ymm11 | |
1049: c5 fd 6f c2 vmovdqa %ymm2, %ymm0 | |
104d: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1 | |
1051: c5 2d f5 e3 vpmaddwd %ymm3, %ymm10, %ymm12 | |
1055: c5 1d fe e1 vpaddd %ymm1, %ymm12, %ymm12 | |
1059: c5 fd 6f cb vmovdqa %ymm3, %ymm1 | |
105d: c5 fd 6f d4 vmovdqa %ymm4, %ymm2 | |
1061: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11 | |
1066: c4 41 1d fe e6 vpaddd %ymm14, %ymm12, %ymm12 | |
106b: c4 41 25 e2 df vpsrad %xmm15, %ymm11, %ymm11 | |
1070: c4 41 1d e2 e7 vpsrad %xmm15, %ymm12, %ymm12 | |
1075: c4 42 25 2b db vpackusdw %ymm11, %ymm11, %ymm11 | |
107a: c4 42 1d 2b e4 vpackusdw %ymm12, %ymm12, %ymm12 | |
107f: c4 62 25 3a df vpminuw %ymm7, %ymm11, %ymm11 | |
1084: c4 62 1d 3a e7 vpminuw %ymm7, %ymm12, %ymm12 | |
1089: c4 63 7d 39 db 01 vextracti128 $1, %ymm11, %xmm3 | |
108f: c4 63 7d 39 e4 01 vextracti128 $1, %ymm12, %xmm4 | |
1095: c5 79 7e 1f vmovd %xmm11, (%rdi) | |
1099: c5 f9 7e 5f 04 vmovd %xmm3, 4(%rdi) | |
109e: c5 79 7e 24 37 vmovd %xmm12, (%rdi,%rsi) | |
10a3: c5 f9 7e 64 37 04 vmovd %xmm4, 4(%rdi,%rsi) | |
10a9: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
10ad: 41 83 e9 02 subl $2, %r9d | |
10b1: 0f 8f 39 ff ff ff jg -199 <_dav1d_put_8tap_16bpc_avx2.hv_w4l> | |
10b7: 5b popq %rbx | |
10b8: c5 f8 77 vzeroupper | |
10bb: c3 retq | |
00000000000010bc _dav1d_put_8tap_16bpc_avx2.hv_w48: | |
10bc: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx | |
10c0: c5 7d 6f 05 00 00 00 00 vmovdqa (%rip), %ymm8 | |
10c8: c4 c2 79 58 ac c3 ab f1 ff ff vpbroadcastd -3669(%r11,%rax,8), %xmm5 | |
10d2: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5 | |
10d7: 4f 8d 94 d3 b6 f1 ff ff leaq -3658(%r11,%r10,8), %r10 | |
10df: c4 42 79 58 0a vpbroadcastd (%r10), %xmm9 | |
10e4: c4 42 79 58 52 04 vpbroadcastd 4(%r10), %xmm10 | |
10ea: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9 | |
10ef: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10 | |
10f4: 48 83 ea 02 subq $2, %rdx | |
10f8: 48 29 da subq %rbx, %rdx | |
10fb: c4 e2 7d 5a 02 vbroadcasti128 (%rdx), %ymm0 | |
1100: c4 e2 7d 5a 0c 0a vbroadcasti128 (%rdx,%rcx), %ymm1 | |
1106: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
110a: c4 c2 7d 00 c0 vpshufb %ymm8, %ymm0, %ymm0 | |
110f: c4 c2 75 00 c8 vpshufb %ymm8, %ymm1, %ymm1 | |
1114: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0 | |
1118: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
111c: c4 e2 7d 02 c0 vphaddd %ymm0, %ymm0, %ymm0 | |
1121: c4 e2 75 02 c9 vphaddd %ymm1, %ymm1, %ymm1 | |
1126: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0 | |
112a: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1 | |
112e: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0 | |
1133: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1 | |
1138: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0 | |
113c: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1 | |
1140: c4 e2 7d 5a 22 vbroadcasti128 (%rdx), %ymm4 | |
1145: c4 e2 7d 5a 1c 0a vbroadcasti128 (%rdx,%rcx), %ymm3 | |
114b: c4 e2 7d 5a 14 4a vbroadcasti128 (%rdx,%rcx,2), %ymm2 | |
1151: 48 01 da addq %rbx, %rdx | |
1154: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4 | |
1159: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3 | |
115e: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
1162: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
1166: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4 | |
116b: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3 | |
1170: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4 | |
1174: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3 | |
1178: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4 | |
117d: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3 | |
1182: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
1186: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3 | |
118a: c4 c2 6d 00 d0 vpshufb %ymm8, %ymm2, %ymm2 | |
118f: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
1193: c4 e2 6d 02 d2 vphaddd %ymm2, %ymm2, %ymm2 | |
1198: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2 | |
119c: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2 | |
11a1: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2 | |
11a5: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
11a9: c5 f5 61 cc vpunpcklwd %ymm4, %ymm1, %ymm1 | |
11ad: c5 dd 61 e3 vpunpcklwd %ymm3, %ymm4, %ymm4 | |
11b1: c5 e5 61 da vpunpcklwd %ymm2, %ymm3, %ymm3 | |
11b5: c5 fd 62 c4 vpunpckldq %ymm4, %ymm0, %ymm0 | |
11b9: c5 f5 62 cb vpunpckldq %ymm3, %ymm1, %ymm1 | |
11bd: c4 e2 7d 5a 1a vbroadcasti128 (%rdx), %ymm3 | |
11c2: c4 e2 7d 5a 24 0a vbroadcasti128 (%rdx,%rcx), %ymm4 | |
11c8: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
11cc: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3 | |
11d1: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4 | |
11d6: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
11da: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
11de: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3 | |
11e3: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4 | |
11e8: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3 | |
11ec: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4 | |
11f0: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3 | |
11f5: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4 | |
11fa: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3 | |
11fe: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
1202: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
1206: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
000000000000120a _dav1d_put_8tap_16bpc_avx2.hv_w48l: | |
120a: c4 62 7d 5a 1a vbroadcasti128 (%rdx), %ymm11 | |
120f: c4 62 7d 5a 24 0a vbroadcasti128 (%rdx,%rcx), %ymm12 | |
1215: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
1219: c4 42 25 00 d8 vpshufb %ymm8, %ymm11, %ymm11 | |
121e: c4 42 1d 00 e0 vpshufb %ymm8, %ymm12, %ymm12 | |
1223: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11 | |
1227: c5 1d f5 e5 vpmaddwd %ymm5, %ymm12, %ymm12 | |
122b: c4 42 25 02 db vphaddd %ymm11, %ymm11, %ymm11 | |
1230: c4 42 1d 02 e4 vphaddd %ymm12, %ymm12, %ymm12 | |
1235: c5 25 fe de vpaddd %ymm6, %ymm11, %ymm11 | |
1239: c5 1d fe e6 vpaddd %ymm6, %ymm12, %ymm12 | |
123d: c4 41 25 e2 dd vpsrad %xmm13, %ymm11, %ymm11 | |
1242: c4 41 1d e2 e5 vpsrad %xmm13, %ymm12, %ymm12 | |
1247: c4 41 25 6b db vpackssdw %ymm11, %ymm11, %ymm11 | |
124c: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12 | |
1251: c4 c1 5d 61 e3 vpunpcklwd %ymm11, %ymm4, %ymm4 | |
1256: c4 41 25 61 dc vpunpcklwd %ymm12, %ymm11, %ymm11 | |
125b: c5 ed 62 d4 vpunpckldq %ymm4, %ymm2, %ymm2 | |
125f: c4 c1 65 62 db vpunpckldq %ymm11, %ymm3, %ymm3 | |
1264: c5 35 f5 d8 vpmaddwd %ymm0, %ymm9, %ymm11 | |
1268: c5 ad f5 e2 vpmaddwd %ymm2, %ymm10, %ymm4 | |
126c: c5 35 f5 e1 vpmaddwd %ymm1, %ymm9, %ymm12 | |
1270: c5 25 fe dc vpaddd %ymm4, %ymm11, %ymm11 | |
1274: c5 ad f5 e3 vpmaddwd %ymm3, %ymm10, %ymm4 | |
1278: c5 1d fe e4 vpaddd %ymm4, %ymm12, %ymm12 | |
127c: c4 42 25 02 db vphaddd %ymm11, %ymm11, %ymm11 | |
1281: c4 42 1d 02 e4 vphaddd %ymm12, %ymm12, %ymm12 | |
1286: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11 | |
128b: c4 41 1d fe e6 vpaddd %ymm14, %ymm12, %ymm12 | |
1290: c4 41 25 e2 df vpsrad %xmm15, %ymm11, %ymm11 | |
1295: c4 41 1d e2 e7 vpsrad %xmm15, %ymm12, %ymm12 | |
129a: c4 42 25 2b db vpackusdw %ymm11, %ymm11, %ymm11 | |
129f: c4 42 1d 2b e4 vpackusdw %ymm12, %ymm12, %ymm12 | |
12a4: c4 62 25 3a df vpminuw %ymm7, %ymm11, %ymm11 | |
12a9: c4 62 1d 3a e7 vpminuw %ymm7, %ymm12, %ymm12 | |
12ae: c4 63 7d 39 dc 01 vextracti128 $1, %ymm11, %xmm4 | |
12b4: c5 79 7e 1f vmovd %xmm11, (%rdi) | |
12b8: c5 f9 7e 67 04 vmovd %xmm4, 4(%rdi) | |
12bd: c4 63 7d 39 e4 01 vextracti128 $1, %ymm12, %xmm4 | |
12c3: c5 79 7e 24 37 vmovd %xmm12, (%rdi,%rsi) | |
12c8: c5 f9 7e 64 37 04 vmovd %xmm4, 4(%rdi,%rsi) | |
12ce: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
12d2: c5 fd 70 c0 8d vpshufd $141, %ymm0, %ymm0 | |
12d7: c5 fd 70 c9 8d vpshufd $141, %ymm1, %ymm1 | |
12dc: c5 7d 70 da d8 vpshufd $216, %ymm2, %ymm11 | |
12e1: c5 7d 70 e3 d8 vpshufd $216, %ymm3, %ymm12 | |
12e6: c5 fd 70 d2 8d vpshufd $141, %ymm2, %ymm2 | |
12eb: c5 fd 70 db 8d vpshufd $141, %ymm3, %ymm3 | |
12f0: c5 fd 6f e3 vmovdqa %ymm3, %ymm4 | |
12f4: c5 dd 72 e4 10 vpsrad $16, %ymm4, %ymm4 | |
12f9: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
12fd: c4 c1 7d 62 c3 vpunpckldq %ymm11, %ymm0, %ymm0 | |
1302: c4 c1 75 62 cc vpunpckldq %ymm12, %ymm1, %ymm1 | |
1307: 41 83 e9 02 subl $2, %r9d | |
130b: 0f 8f f9 fe ff ff jg -263 <_dav1d_put_8tap_16bpc_avx2.hv_w48l> | |
1311: 5b popq %rbx | |
1312: c5 f8 77 vzeroupper | |
1315: c3 retq | |
0000000000001316 _dav1d_put_8tap_16bpc_avx2.hv_w8: | |
1316: 49 89 d8 movq %rbx, %r8 | |
1319: 41 83 f9 04 cmpl $4, %r9d | |
131d: 0f 8f b3 02 00 00 jg 691 <_dav1d_put_8tap_16bpc_avx2.hv_w88> | |
1323: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx | |
1327: c4 c2 79 59 ac c3 08 f4 ff ff vpbroadcastq -3064(%r11,%rax,8), %xmm5 | |
1331: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5 | |
1336: 48 83 ea 06 subq $6, %rdx | |
133a: 48 29 ca subq %rcx, %rdx | |
133d: 44 89 c8 movl %r9d, %eax | |
1340: 49 89 fa movq %rdi, %r10 | |
1343: 49 89 d3 movq %rdx, %r11 | |
0000000000001346 _dav1d_put_8tap_16bpc_avx2.hv_w8l: | |
1346: c5 fa 6f 02 vmovdqu (%rdx), %xmm0 | |
134a: c5 fa 6f 52 02 vmovdqu 2(%rdx), %xmm2 | |
134f: c4 e3 7d 38 42 08 01 vinserti128 $1, 8(%rdx), %ymm0, %ymm0 | |
1356: c4 e3 6d 38 52 0a 01 vinserti128 $1, 10(%rdx), %ymm2, %ymm2 | |
135d: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0 | |
1361: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
1365: c4 e2 7d 02 c2 vphaddd %ymm2, %ymm0, %ymm0 | |
136a: c5 fa 6f 4a 04 vmovdqu 4(%rdx), %xmm1 | |
136f: c5 fa 6f 52 06 vmovdqu 6(%rdx), %xmm2 | |
1374: c4 e3 75 38 4a 0c 01 vinserti128 $1, 12(%rdx), %ymm1, %ymm1 | |
137b: c4 e3 6d 38 52 0e 01 vinserti128 $1, 14(%rdx), %ymm2, %ymm2 | |
1382: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
1386: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
138a: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1 | |
138f: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0 | |
1394: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0 | |
1398: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0 | |
139d: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0 | |
13a1: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1 | |
13a6: c5 fa 6f 5c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm3 | |
13ac: c4 e3 75 38 4c 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm1, %ymm1 | |
13b4: c4 e3 65 38 5c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm3, %ymm3 | |
13bc: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
13c0: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
13c4: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1 | |
13c9: c5 fa 6f 54 0a 04 vmovdqu 4(%rdx,%rcx), %xmm2 | |
13cf: c5 fa 6f 5c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm3 | |
13d5: c4 e3 6d 38 54 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm2, %ymm2 | |
13dd: c4 e3 65 38 5c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm3, %ymm3 | |
13e5: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
13e9: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
13ed: c4 e2 6d 02 d3 vphaddd %ymm3, %ymm2, %ymm2 | |
13f2: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1 | |
13f7: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1 | |
13fb: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1 | |
1400: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1 | |
1404: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2 | |
1409: c5 fa 6f 64 4a 02 vmovdqu 2(%rdx,%rcx,2), %xmm4 | |
140f: c4 e3 6d 38 54 4a 08 01 vinserti128 $1, 8(%rdx,%rcx,2), %ymm2, %ymm2 | |
1417: c4 e3 5d 38 64 4a 0a 01 vinserti128 $1, 10(%rdx,%rcx,2), %ymm4, %ymm4 | |
141f: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
1423: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
1427: c4 e2 6d 02 d4 vphaddd %ymm4, %ymm2, %ymm2 | |
142c: c5 fa 6f 5c 4a 04 vmovdqu 4(%rdx,%rcx,2), %xmm3 | |
1432: c5 fa 6f 64 4a 06 vmovdqu 6(%rdx,%rcx,2), %xmm4 | |
1438: c4 e3 65 38 5c 4a 0c 01 vinserti128 $1, 12(%rdx,%rcx,2), %ymm3, %ymm3 | |
1440: c4 e3 5d 38 64 4a 0e 01 vinserti128 $1, 14(%rdx,%rcx,2), %ymm4, %ymm4 | |
1448: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
144c: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
1450: c4 e2 65 02 dc vphaddd %ymm4, %ymm3, %ymm3 | |
1455: c4 e2 6d 02 d3 vphaddd %ymm3, %ymm2, %ymm2 | |
145a: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2 | |
145e: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2 | |
1463: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2 | |
1467: 48 01 da addq %rbx, %rdx | |
146a: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
146e: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1 | |
0000000000001472 _dav1d_put_8tap_16bpc_avx2.hv_w8c: | |
1472: c5 fa 6f 1a vmovdqu (%rdx), %xmm3 | |
1476: c5 7a 6f 5a 02 vmovdqu 2(%rdx), %xmm11 | |
147b: c4 e3 65 38 5a 08 01 vinserti128 $1, 8(%rdx), %ymm3, %ymm3 | |
1482: c4 63 25 38 5a 0a 01 vinserti128 $1, 10(%rdx), %ymm11, %ymm11 | |
1489: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
148d: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11 | |
1491: c4 c2 65 02 db vphaddd %ymm11, %ymm3, %ymm3 | |
1496: c5 7a 6f 42 04 vmovdqu 4(%rdx), %xmm8 | |
149b: c5 7a 6f 5a 06 vmovdqu 6(%rdx), %xmm11 | |
14a0: c4 63 3d 38 42 0c 01 vinserti128 $1, 12(%rdx), %ymm8, %ymm8 | |
14a7: c4 63 25 38 5a 0e 01 vinserti128 $1, 14(%rdx), %ymm11, %ymm11 | |
14ae: c5 3d f5 c5 vpmaddwd %ymm5, %ymm8, %ymm8 | |
14b2: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11 | |
14b6: c4 42 3d 02 c3 vphaddd %ymm11, %ymm8, %ymm8 | |
14bb: c4 c2 65 02 d8 vphaddd %ymm8, %ymm3, %ymm3 | |
14c0: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3 | |
14c4: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3 | |
14c9: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3 | |
14cd: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4 | |
14d2: c5 7a 6f 5c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm11 | |
14d8: c4 e3 5d 38 64 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm4, %ymm4 | |
14e0: c4 63 25 38 5c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm11, %ymm11 | |
14e8: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
14ec: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11 | |
14f0: c4 c2 5d 02 e3 vphaddd %ymm11, %ymm4, %ymm4 | |
14f5: c5 7a 6f 44 0a 04 vmovdqu 4(%rdx,%rcx), %xmm8 | |
14fb: c5 7a 6f 5c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm11 | |
1501: c4 63 3d 38 44 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm8, %ymm8 | |
1509: c4 63 25 38 5c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm11, %ymm11 | |
1511: c5 3d f5 c5 vpmaddwd %ymm5, %ymm8, %ymm8 | |
1515: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11 | |
1519: c4 42 3d 02 c3 vphaddd %ymm11, %ymm8, %ymm8 | |
151e: c4 c2 5d 02 e0 vphaddd %ymm8, %ymm4, %ymm4 | |
1523: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4 | |
1527: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4 | |
152c: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
1530: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
1534: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
1538: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
153c: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0 | |
1540: c5 2d f5 c2 vpmaddwd %ymm2, %ymm10, %ymm8 | |
1544: c5 3d fe c0 vpaddd %ymm0, %ymm8, %ymm8 | |
1548: c5 fd 6f c2 vmovdqa %ymm2, %ymm0 | |
154c: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1 | |
1550: c5 2d f5 db vpmaddwd %ymm3, %ymm10, %ymm11 | |
1554: c5 25 fe d9 vpaddd %ymm1, %ymm11, %ymm11 | |
1558: c5 fd 6f cb vmovdqa %ymm3, %ymm1 | |
155c: c5 fd 6f d4 vmovdqa %ymm4, %ymm2 | |
1560: c4 41 3d fe c6 vpaddd %ymm14, %ymm8, %ymm8 | |
1565: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11 | |
156a: c4 41 3d e2 c7 vpsrad %xmm15, %ymm8, %ymm8 | |
156f: c4 41 25 e2 df vpsrad %xmm15, %ymm11, %ymm11 | |
1574: c4 42 3d 2b c0 vpackusdw %ymm8, %ymm8, %ymm8 | |
1579: c4 42 25 2b db vpackusdw %ymm11, %ymm11, %ymm11 | |
157e: c4 62 3d 3a c7 vpminuw %ymm7, %ymm8, %ymm8 | |
1583: c4 62 25 3a df vpminuw %ymm7, %ymm11, %ymm11 | |
1588: c4 63 7d 39 c3 01 vextracti128 $1, %ymm8, %xmm3 | |
158e: c4 63 7d 39 dc 01 vextracti128 $1, %ymm11, %xmm4 | |
1594: c5 79 d6 07 vmovq %xmm8, (%rdi) | |
1598: c5 f9 d6 5f 08 vmovq %xmm3, 8(%rdi) | |
159d: c5 79 d6 1c 37 vmovq %xmm11, (%rdi,%rsi) | |
15a2: c5 f9 d6 64 37 08 vmovq %xmm4, 8(%rdi,%rsi) | |
15a8: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
15ac: 41 83 e9 02 subl $2, %r9d | |
15b0: 0f 8f bc fe ff ff jg -324 <_dav1d_put_8tap_16bpc_avx2.hv_w8c> | |
15b6: 49 83 c2 10 addq $16, %r10 | |
15ba: 49 83 c3 10 addq $16, %r11 | |
15be: 41 89 c1 movl %eax, %r9d | |
15c1: 4c 89 d7 movq %r10, %rdi | |
15c4: 4c 89 da movq %r11, %rdx | |
15c7: 41 83 e8 08 subl $8, %r8d | |
15cb: 0f 8f 75 fd ff ff jg -651 <_dav1d_put_8tap_16bpc_avx2.hv_w8l> | |
15d1: 5b popq %rbx | |
15d2: c5 f8 77 vzeroupper | |
15d5: c3 retq | |
00000000000015d6 _dav1d_put_8tap_16bpc_avx2.hv_w88: | |
15d6: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx | |
15da: c4 c2 79 59 bc c3 bb f6 ff ff vpbroadcastq -2373(%r11,%rax,8), %xmm7 | |
15e4: c4 e2 7d 20 ff vpmovsxbw %xmm7, %ymm7 | |
15e9: 48 83 ea 06 subq $6, %rdx | |
15ed: 48 29 da subq %rbx, %rdx | |
15f0: 44 89 c8 movl %r9d, %eax | |
15f3: 49 89 fa movq %rdi, %r10 | |
15f6: f3 44 0f b8 5c 24 20 popcntl 32(%rsp), %r11d | |
15fd: 41 83 fb 0c cmpl $12, %r11d | |
1601: 0f 84 8d 04 00 00 je 1165 <_dav1d_put_8tap_16bpc_avx2.hv_w88_12bit> | |
1607: 49 89 d3 movq %rdx, %r11 | |
000000000000160a _dav1d_put_8tap_16bpc_avx2.hv_w88l_10bit: | |
160a: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15 | |
1613: c5 fa 6f 02 vmovdqu (%rdx), %xmm0 | |
1617: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13 | |
161c: c4 e3 7d 38 42 08 01 vinserti128 $1, 8(%rdx), %ymm0, %ymm0 | |
1623: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13 | |
162a: c5 fd f5 c7 vpmaddwd %ymm7, %ymm0, %ymm0 | |
162e: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1632: c4 c2 7d 02 c5 vphaddd %ymm13, %ymm0, %ymm0 | |
1637: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12 | |
163c: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13 | |
1641: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12 | |
1648: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13 | |
164f: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1653: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1657: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
165c: c4 c2 7d 02 c4 vphaddd %ymm12, %ymm0, %ymm0 | |
1661: c5 85 fe c0 vpaddd %ymm0, %ymm15, %ymm0 | |
1665: c5 fd 72 e0 02 vpsrad $2, %ymm0, %ymm0 | |
166a: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0 | |
166e: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1 | |
1673: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13 | |
1679: c4 e3 75 38 4c 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm1, %ymm1 | |
1681: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13 | |
1689: c5 f5 f5 cf vpmaddwd %ymm7, %ymm1, %ymm1 | |
168d: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1691: c4 c2 75 02 cd vphaddd %ymm13, %ymm1, %ymm1 | |
1696: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12 | |
169c: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13 | |
16a2: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12 | |
16aa: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13 | |
16b2: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
16b6: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
16ba: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
16bf: c4 c2 75 02 cc vphaddd %ymm12, %ymm1, %ymm1 | |
16c4: c5 85 fe c9 vpaddd %ymm1, %ymm15, %ymm1 | |
16c8: c5 f5 72 e1 02 vpsrad $2, %ymm1, %ymm1 | |
16cd: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1 | |
16d1: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2 | |
16d6: c5 7a 6f 6c 4a 02 vmovdqu 2(%rdx,%rcx,2), %xmm13 | |
16dc: c4 e3 6d 38 54 4a 08 01 vinserti128 $1, 8(%rdx,%rcx,2), %ymm2, %ymm2 | |
16e4: c4 63 15 38 6c 4a 0a 01 vinserti128 $1, 10(%rdx,%rcx,2), %ymm13, %ymm13 | |
16ec: c5 ed f5 d7 vpmaddwd %ymm7, %ymm2, %ymm2 | |
16f0: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
16f4: c4 c2 6d 02 d5 vphaddd %ymm13, %ymm2, %ymm2 | |
16f9: c5 7a 6f 64 4a 04 vmovdqu 4(%rdx,%rcx,2), %xmm12 | |
16ff: c5 7a 6f 6c 4a 06 vmovdqu 6(%rdx,%rcx,2), %xmm13 | |
1705: c4 63 1d 38 64 4a 0c 01 vinserti128 $1, 12(%rdx,%rcx,2), %ymm12, %ymm12 | |
170d: c4 63 15 38 6c 4a 0e 01 vinserti128 $1, 14(%rdx,%rcx,2), %ymm13, %ymm13 | |
1715: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1719: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
171d: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
1722: c4 c2 6d 02 d4 vphaddd %ymm12, %ymm2, %ymm2 | |
1727: c5 85 fe d2 vpaddd %ymm2, %ymm15, %ymm2 | |
172b: c5 ed 72 e2 02 vpsrad $2, %ymm2, %ymm2 | |
1730: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2 | |
1734: 48 01 da addq %rbx, %rdx | |
1737: c5 fa 6f 1a vmovdqu (%rdx), %xmm3 | |
173b: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13 | |
1740: c4 e3 65 38 5a 08 01 vinserti128 $1, 8(%rdx), %ymm3, %ymm3 | |
1747: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13 | |
174e: c5 e5 f5 df vpmaddwd %ymm7, %ymm3, %ymm3 | |
1752: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1756: c4 c2 65 02 dd vphaddd %ymm13, %ymm3, %ymm3 | |
175b: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12 | |
1760: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13 | |
1765: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12 | |
176c: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13 | |
1773: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1777: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
177b: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
1780: c4 c2 65 02 dc vphaddd %ymm12, %ymm3, %ymm3 | |
1785: c5 85 fe db vpaddd %ymm3, %ymm15, %ymm3 | |
1789: c5 e5 72 e3 02 vpsrad $2, %ymm3, %ymm3 | |
178e: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3 | |
1792: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4 | |
1797: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13 | |
179d: c4 e3 5d 38 64 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm4, %ymm4 | |
17a5: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13 | |
17ad: c5 dd f5 e7 vpmaddwd %ymm7, %ymm4, %ymm4 | |
17b1: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
17b5: c4 c2 5d 02 e5 vphaddd %ymm13, %ymm4, %ymm4 | |
17ba: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12 | |
17c0: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13 | |
17c6: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12 | |
17ce: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13 | |
17d6: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
17da: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
17de: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
17e3: c4 c2 5d 02 e4 vphaddd %ymm12, %ymm4, %ymm4 | |
17e8: c5 85 fe e4 vpaddd %ymm4, %ymm15, %ymm4 | |
17ec: c5 dd 72 e4 02 vpsrad $2, %ymm4, %ymm4 | |
17f1: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
17f5: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
17f9: c5 fa 6f 2a vmovdqu (%rdx), %xmm5 | |
17fd: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13 | |
1802: c4 e3 55 38 6a 08 01 vinserti128 $1, 8(%rdx), %ymm5, %ymm5 | |
1809: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13 | |
1810: c5 d5 f5 ef vpmaddwd %ymm7, %ymm5, %ymm5 | |
1814: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1818: c4 c2 55 02 ed vphaddd %ymm13, %ymm5, %ymm5 | |
181d: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12 | |
1822: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13 | |
1827: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12 | |
182e: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13 | |
1835: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1839: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
183d: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
1842: c4 c2 55 02 ec vphaddd %ymm12, %ymm5, %ymm5 | |
1847: c5 85 fe ed vpaddd %ymm5, %ymm15, %ymm5 | |
184b: c5 d5 72 e5 02 vpsrad $2, %ymm5, %ymm5 | |
1850: c5 d5 6b ed vpackssdw %ymm5, %ymm5, %ymm5 | |
1854: c5 fa 6f 34 0a vmovdqu (%rdx,%rcx), %xmm6 | |
1859: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13 | |
185f: c4 e3 4d 38 74 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm6, %ymm6 | |
1867: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13 | |
186f: c5 cd f5 f7 vpmaddwd %ymm7, %ymm6, %ymm6 | |
1873: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1877: c4 c2 4d 02 f5 vphaddd %ymm13, %ymm6, %ymm6 | |
187c: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12 | |
1882: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13 | |
1888: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12 | |
1890: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13 | |
1898: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
189c: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
18a0: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
18a5: c4 c2 4d 02 f4 vphaddd %ymm12, %ymm6, %ymm6 | |
18aa: c5 85 fe f6 vpaddd %ymm6, %ymm15, %ymm6 | |
18ae: c5 cd 72 e6 02 vpsrad $2, %ymm6, %ymm6 | |
18b3: c5 cd 6b f6 vpackssdw %ymm6, %ymm6, %ymm6 | |
18b7: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
18bb: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
18bf: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1 | |
18c3: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
18c7: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
18cb: c5 dd 61 e5 vpunpcklwd %ymm5, %ymm4, %ymm4 | |
18cf: c5 d5 61 ee vpunpcklwd %ymm6, %ymm5, %ymm5 | |
00000000000018d3 _dav1d_put_8tap_16bpc_avx2.hv_w88c_10bit: | |
18d3: c5 7a 6f 22 vmovdqu (%rdx), %xmm12 | |
18d7: c5 7a 6f 7a 02 vmovdqu 2(%rdx), %xmm15 | |
18dc: c4 63 1d 38 62 08 01 vinserti128 $1, 8(%rdx), %ymm12, %ymm12 | |
18e3: c4 63 05 38 7a 0a 01 vinserti128 $1, 10(%rdx), %ymm15, %ymm15 | |
18ea: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
18ee: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
18f2: c4 42 1d 02 e7 vphaddd %ymm15, %ymm12, %ymm12 | |
18f7: c5 7a 6f 72 04 vmovdqu 4(%rdx), %xmm14 | |
18fc: c5 7a 6f 7a 06 vmovdqu 6(%rdx), %xmm15 | |
1901: c4 63 0d 38 72 0c 01 vinserti128 $1, 12(%rdx), %ymm14, %ymm14 | |
1908: c4 63 05 38 7a 0e 01 vinserti128 $1, 14(%rdx), %ymm15, %ymm15 | |
190f: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14 | |
1913: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
1917: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14 | |
191c: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15 | |
1925: c4 42 1d 02 e6 vphaddd %ymm14, %ymm12, %ymm12 | |
192a: c4 41 1d fe e7 vpaddd %ymm15, %ymm12, %ymm12 | |
192f: c4 c1 1d 72 e4 02 vpsrad $2, %ymm12, %ymm12 | |
1935: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12 | |
193a: c5 7a 6f 2c 0a vmovdqu (%rdx,%rcx), %xmm13 | |
193f: c5 7a 6f 7c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm15 | |
1945: c4 63 15 38 6c 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm13, %ymm13 | |
194d: c4 63 05 38 7c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm15, %ymm15 | |
1955: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1959: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
195d: c4 42 15 02 ef vphaddd %ymm15, %ymm13, %ymm13 | |
1962: c5 7a 6f 74 0a 04 vmovdqu 4(%rdx,%rcx), %xmm14 | |
1968: c5 7a 6f 7c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm15 | |
196e: c4 63 0d 38 74 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm14, %ymm14 | |
1976: c4 63 05 38 7c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm15, %ymm15 | |
197e: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14 | |
1982: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
1986: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14 | |
198b: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15 | |
1994: c4 42 15 02 ee vphaddd %ymm14, %ymm13, %ymm13 | |
1999: c4 41 15 fe ef vpaddd %ymm15, %ymm13, %ymm13 | |
199e: c4 c1 15 72 e5 02 vpsrad $2, %ymm13, %ymm13 | |
19a4: c4 41 15 6b ed vpackssdw %ymm13, %ymm13, %ymm13 | |
19a9: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
19ad: c4 c1 4d 61 f4 vpunpcklwd %ymm12, %ymm6, %ymm6 | |
19b2: c4 41 1d 61 e5 vpunpcklwd %ymm13, %ymm12, %ymm12 | |
19b7: c5 bd f5 c0 vpmaddwd %ymm0, %ymm8, %ymm0 | |
19bb: c5 35 f5 f2 vpmaddwd %ymm2, %ymm9, %ymm14 | |
19bf: c5 0d fe f0 vpaddd %ymm0, %ymm14, %ymm14 | |
19c3: c5 fd 6f c2 vmovdqa %ymm2, %ymm0 | |
19c7: c5 bd f5 c9 vpmaddwd %ymm1, %ymm8, %ymm1 | |
19cb: c5 35 f5 fb vpmaddwd %ymm3, %ymm9, %ymm15 | |
19cf: c5 05 fe f9 vpaddd %ymm1, %ymm15, %ymm15 | |
19d3: c5 fd 6f cb vmovdqa %ymm3, %ymm1 | |
19d7: c5 ad f5 d4 vpmaddwd %ymm4, %ymm10, %ymm2 | |
19db: c5 ad f5 dd vpmaddwd %ymm5, %ymm10, %ymm3 | |
19df: c5 0d fe f2 vpaddd %ymm2, %ymm14, %ymm14 | |
19e3: c5 05 fe fb vpaddd %ymm3, %ymm15, %ymm15 | |
19e7: c5 fd 6f d4 vmovdqa %ymm4, %ymm2 | |
19eb: c5 fd 6f dd vmovdqa %ymm5, %ymm3 | |
19ef: c5 a5 f5 e6 vpmaddwd %ymm6, %ymm11, %ymm4 | |
19f3: c4 c1 1d f5 eb vpmaddwd %ymm11, %ymm12, %ymm5 | |
19f8: c5 0d fe f4 vpaddd %ymm4, %ymm14, %ymm14 | |
19fc: c5 05 fe fd vpaddd %ymm5, %ymm15, %ymm15 | |
1a00: c5 fd 6f e6 vmovdqa %ymm6, %ymm4 | |
1a04: c4 c1 7d 6f ec vmovdqa %ymm12, %ymm5 | |
1a09: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
1a12: c4 62 7d 79 64 24 20 vpbroadcastw 32(%rsp), %ymm12 | |
1a19: c5 0d fe f6 vpaddd %ymm6, %ymm14, %ymm14 | |
1a1d: c5 05 fe fe vpaddd %ymm6, %ymm15, %ymm15 | |
1a21: c4 c1 0d 72 e6 0a vpsrad $10, %ymm14, %ymm14 | |
1a27: c4 c1 05 72 e7 0a vpsrad $10, %ymm15, %ymm15 | |
1a2d: c4 42 0d 2b f6 vpackusdw %ymm14, %ymm14, %ymm14 | |
1a32: c4 42 05 2b ff vpackusdw %ymm15, %ymm15, %ymm15 | |
1a37: c4 42 0d 3a f4 vpminuw %ymm12, %ymm14, %ymm14 | |
1a3c: c4 42 05 3a fc vpminuw %ymm12, %ymm15, %ymm15 | |
1a41: c4 c1 7d 6f f5 vmovdqa %ymm13, %ymm6 | |
1a46: c4 43 7d 39 f4 01 vextracti128 $1, %ymm14, %xmm12 | |
1a4c: c4 43 7d 39 fd 01 vextracti128 $1, %ymm15, %xmm13 | |
1a52: c5 79 d6 37 vmovq %xmm14, (%rdi) | |
1a56: c5 79 d6 67 08 vmovq %xmm12, 8(%rdi) | |
1a5b: c5 79 d6 3c 37 vmovq %xmm15, (%rdi,%rsi) | |
1a60: c5 79 d6 6c 37 08 vmovq %xmm13, 8(%rdi,%rsi) | |
1a66: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
1a6a: 41 83 e9 02 subl $2, %r9d | |
1a6e: 0f 8f 5f fe ff ff jg -417 <_dav1d_put_8tap_16bpc_avx2.hv_w88c_10bit> | |
1a74: 49 83 c2 10 addq $16, %r10 | |
1a78: 49 83 c3 10 addq $16, %r11 | |
1a7c: 41 89 c1 movl %eax, %r9d | |
1a7f: 4c 89 d7 movq %r10, %rdi | |
1a82: 4c 89 da movq %r11, %rdx | |
1a85: 41 83 e8 08 subl $8, %r8d | |
1a89: 0f 8f 7b fb ff ff jg -1157 <_dav1d_put_8tap_16bpc_avx2.hv_w88l_10bit> | |
1a8f: 5b popq %rbx | |
1a90: c5 f8 77 vzeroupper | |
1a93: c3 retq | |
0000000000001a94 _dav1d_put_8tap_16bpc_avx2.hv_w88_12bit: | |
1a94: 49 89 d3 movq %rdx, %r11 | |
0000000000001a97 _dav1d_put_8tap_16bpc_avx2.hv_w88l_12bit: | |
1a97: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15 | |
1aa0: c5 fa 6f 02 vmovdqu (%rdx), %xmm0 | |
1aa4: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13 | |
1aa9: c4 e3 7d 38 42 08 01 vinserti128 $1, 8(%rdx), %ymm0, %ymm0 | |
1ab0: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13 | |
1ab7: c5 fd f5 c7 vpmaddwd %ymm7, %ymm0, %ymm0 | |
1abb: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1abf: c4 c2 7d 02 c5 vphaddd %ymm13, %ymm0, %ymm0 | |
1ac4: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12 | |
1ac9: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13 | |
1ace: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12 | |
1ad5: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13 | |
1adc: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1ae0: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1ae4: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
1ae9: c4 c2 7d 02 c4 vphaddd %ymm12, %ymm0, %ymm0 | |
1aee: c5 85 fe c0 vpaddd %ymm0, %ymm15, %ymm0 | |
1af2: c5 fd 72 e0 04 vpsrad $4, %ymm0, %ymm0 | |
1af7: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0 | |
1afb: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1 | |
1b00: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13 | |
1b06: c4 e3 75 38 4c 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm1, %ymm1 | |
1b0e: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13 | |
1b16: c5 f5 f5 cf vpmaddwd %ymm7, %ymm1, %ymm1 | |
1b1a: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1b1e: c4 c2 75 02 cd vphaddd %ymm13, %ymm1, %ymm1 | |
1b23: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12 | |
1b29: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13 | |
1b2f: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12 | |
1b37: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13 | |
1b3f: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1b43: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1b47: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
1b4c: c4 c2 75 02 cc vphaddd %ymm12, %ymm1, %ymm1 | |
1b51: c5 85 fe c9 vpaddd %ymm1, %ymm15, %ymm1 | |
1b55: c5 f5 72 e1 04 vpsrad $4, %ymm1, %ymm1 | |
1b5a: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1 | |
1b5e: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2 | |
1b63: c5 7a 6f 6c 4a 02 vmovdqu 2(%rdx,%rcx,2), %xmm13 | |
1b69: c4 e3 6d 38 54 4a 08 01 vinserti128 $1, 8(%rdx,%rcx,2), %ymm2, %ymm2 | |
1b71: c4 63 15 38 6c 4a 0a 01 vinserti128 $1, 10(%rdx,%rcx,2), %ymm13, %ymm13 | |
1b79: c5 ed f5 d7 vpmaddwd %ymm7, %ymm2, %ymm2 | |
1b7d: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1b81: c4 c2 6d 02 d5 vphaddd %ymm13, %ymm2, %ymm2 | |
1b86: c5 7a 6f 64 4a 04 vmovdqu 4(%rdx,%rcx,2), %xmm12 | |
1b8c: c5 7a 6f 6c 4a 06 vmovdqu 6(%rdx,%rcx,2), %xmm13 | |
1b92: c4 63 1d 38 64 4a 0c 01 vinserti128 $1, 12(%rdx,%rcx,2), %ymm12, %ymm12 | |
1b9a: c4 63 15 38 6c 4a 0e 01 vinserti128 $1, 14(%rdx,%rcx,2), %ymm13, %ymm13 | |
1ba2: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1ba6: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1baa: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
1baf: c4 c2 6d 02 d4 vphaddd %ymm12, %ymm2, %ymm2 | |
1bb4: c5 85 fe d2 vpaddd %ymm2, %ymm15, %ymm2 | |
1bb8: c5 ed 72 e2 04 vpsrad $4, %ymm2, %ymm2 | |
1bbd: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2 | |
1bc1: 48 01 da addq %rbx, %rdx | |
1bc4: c5 fa 6f 1a vmovdqu (%rdx), %xmm3 | |
1bc8: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13 | |
1bcd: c4 e3 65 38 5a 08 01 vinserti128 $1, 8(%rdx), %ymm3, %ymm3 | |
1bd4: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13 | |
1bdb: c5 e5 f5 df vpmaddwd %ymm7, %ymm3, %ymm3 | |
1bdf: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1be3: c4 c2 65 02 dd vphaddd %ymm13, %ymm3, %ymm3 | |
1be8: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12 | |
1bed: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13 | |
1bf2: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12 | |
1bf9: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13 | |
1c00: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1c04: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1c08: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
1c0d: c4 c2 65 02 dc vphaddd %ymm12, %ymm3, %ymm3 | |
1c12: c5 85 fe db vpaddd %ymm3, %ymm15, %ymm3 | |
1c16: c5 e5 72 e3 04 vpsrad $4, %ymm3, %ymm3 | |
1c1b: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3 | |
1c1f: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4 | |
1c24: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13 | |
1c2a: c4 e3 5d 38 64 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm4, %ymm4 | |
1c32: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13 | |
1c3a: c5 dd f5 e7 vpmaddwd %ymm7, %ymm4, %ymm4 | |
1c3e: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1c42: c4 c2 5d 02 e5 vphaddd %ymm13, %ymm4, %ymm4 | |
1c47: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12 | |
1c4d: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13 | |
1c53: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12 | |
1c5b: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13 | |
1c63: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1c67: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1c6b: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
1c70: c4 c2 5d 02 e4 vphaddd %ymm12, %ymm4, %ymm4 | |
1c75: c5 85 fe e4 vpaddd %ymm4, %ymm15, %ymm4 | |
1c79: c5 dd 72 e4 04 vpsrad $4, %ymm4, %ymm4 | |
1c7e: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
1c82: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
1c86: c5 fa 6f 2a vmovdqu (%rdx), %xmm5 | |
1c8a: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13 | |
1c8f: c4 e3 55 38 6a 08 01 vinserti128 $1, 8(%rdx), %ymm5, %ymm5 | |
1c96: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13 | |
1c9d: c5 d5 f5 ef vpmaddwd %ymm7, %ymm5, %ymm5 | |
1ca1: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1ca5: c4 c2 55 02 ed vphaddd %ymm13, %ymm5, %ymm5 | |
1caa: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12 | |
1caf: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13 | |
1cb4: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12 | |
1cbb: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13 | |
1cc2: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1cc6: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1cca: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
1ccf: c4 c2 55 02 ec vphaddd %ymm12, %ymm5, %ymm5 | |
1cd4: c5 85 fe ed vpaddd %ymm5, %ymm15, %ymm5 | |
1cd8: c5 d5 72 e5 04 vpsrad $4, %ymm5, %ymm5 | |
1cdd: c5 d5 6b ed vpackssdw %ymm5, %ymm5, %ymm5 | |
1ce1: c5 fa 6f 34 0a vmovdqu (%rdx,%rcx), %xmm6 | |
1ce6: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13 | |
1cec: c4 e3 4d 38 74 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm6, %ymm6 | |
1cf4: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13 | |
1cfc: c5 cd f5 f7 vpmaddwd %ymm7, %ymm6, %ymm6 | |
1d00: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1d04: c4 c2 4d 02 f5 vphaddd %ymm13, %ymm6, %ymm6 | |
1d09: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12 | |
1d0f: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13 | |
1d15: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12 | |
1d1d: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13 | |
1d25: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1d29: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1d2d: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
1d32: c4 c2 4d 02 f4 vphaddd %ymm12, %ymm6, %ymm6 | |
1d37: c5 85 fe f6 vpaddd %ymm6, %ymm15, %ymm6 | |
1d3b: c5 cd 72 e6 04 vpsrad $4, %ymm6, %ymm6 | |
1d40: c5 cd 6b f6 vpackssdw %ymm6, %ymm6, %ymm6 | |
1d44: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
1d48: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
1d4c: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1 | |
1d50: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
1d54: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
1d58: c5 dd 61 e5 vpunpcklwd %ymm5, %ymm4, %ymm4 | |
1d5c: c5 d5 61 ee vpunpcklwd %ymm6, %ymm5, %ymm5 | |
0000000000001d60 _dav1d_put_8tap_16bpc_avx2.hv_w88c_12bit: | |
1d60: c5 7a 6f 22 vmovdqu (%rdx), %xmm12 | |
1d64: c5 7a 6f 7a 02 vmovdqu 2(%rdx), %xmm15 | |
1d69: c4 63 1d 38 62 08 01 vinserti128 $1, 8(%rdx), %ymm12, %ymm12 | |
1d70: c4 63 05 38 7a 0a 01 vinserti128 $1, 10(%rdx), %ymm15, %ymm15 | |
1d77: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
1d7b: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
1d7f: c4 42 1d 02 e7 vphaddd %ymm15, %ymm12, %ymm12 | |
1d84: c5 7a 6f 72 04 vmovdqu 4(%rdx), %xmm14 | |
1d89: c5 7a 6f 7a 06 vmovdqu 6(%rdx), %xmm15 | |
1d8e: c4 63 0d 38 72 0c 01 vinserti128 $1, 12(%rdx), %ymm14, %ymm14 | |
1d95: c4 63 05 38 7a 0e 01 vinserti128 $1, 14(%rdx), %ymm15, %ymm15 | |
1d9c: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14 | |
1da0: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
1da4: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14 | |
1da9: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15 | |
1db2: c4 42 1d 02 e6 vphaddd %ymm14, %ymm12, %ymm12 | |
1db7: c4 41 1d fe e7 vpaddd %ymm15, %ymm12, %ymm12 | |
1dbc: c4 c1 1d 72 e4 04 vpsrad $4, %ymm12, %ymm12 | |
1dc2: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12 | |
1dc7: c5 7a 6f 2c 0a vmovdqu (%rdx,%rcx), %xmm13 | |
1dcc: c5 7a 6f 7c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm15 | |
1dd2: c4 63 15 38 6c 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm13, %ymm13 | |
1dda: c4 63 05 38 7c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm15, %ymm15 | |
1de2: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
1de6: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
1dea: c4 42 15 02 ef vphaddd %ymm15, %ymm13, %ymm13 | |
1def: c5 7a 6f 74 0a 04 vmovdqu 4(%rdx,%rcx), %xmm14 | |
1df5: c5 7a 6f 7c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm15 | |
1dfb: c4 63 0d 38 74 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm14, %ymm14 | |
1e03: c4 63 05 38 7c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm15, %ymm15 | |
1e0b: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14 | |
1e0f: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
1e13: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14 | |
1e18: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15 | |
1e21: c4 42 15 02 ee vphaddd %ymm14, %ymm13, %ymm13 | |
1e26: c4 41 15 fe ef vpaddd %ymm15, %ymm13, %ymm13 | |
1e2b: c4 c1 15 72 e5 04 vpsrad $4, %ymm13, %ymm13 | |
1e31: c4 41 15 6b ed vpackssdw %ymm13, %ymm13, %ymm13 | |
1e36: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx | |
1e3a: c4 c1 4d 61 f4 vpunpcklwd %ymm12, %ymm6, %ymm6 | |
1e3f: c4 41 1d 61 e5 vpunpcklwd %ymm13, %ymm12, %ymm12 | |
1e44: c5 bd f5 c0 vpmaddwd %ymm0, %ymm8, %ymm0 | |
1e48: c5 35 f5 f2 vpmaddwd %ymm2, %ymm9, %ymm14 | |
1e4c: c5 0d fe f0 vpaddd %ymm0, %ymm14, %ymm14 | |
1e50: c5 fd 6f c2 vmovdqa %ymm2, %ymm0 | |
1e54: c5 bd f5 c9 vpmaddwd %ymm1, %ymm8, %ymm1 | |
1e58: c5 35 f5 fb vpmaddwd %ymm3, %ymm9, %ymm15 | |
1e5c: c5 05 fe f9 vpaddd %ymm1, %ymm15, %ymm15 | |
1e60: c5 fd 6f cb vmovdqa %ymm3, %ymm1 | |
1e64: c5 ad f5 d4 vpmaddwd %ymm4, %ymm10, %ymm2 | |
1e68: c5 ad f5 dd vpmaddwd %ymm5, %ymm10, %ymm3 | |
1e6c: c5 0d fe f2 vpaddd %ymm2, %ymm14, %ymm14 | |
1e70: c5 05 fe fb vpaddd %ymm3, %ymm15, %ymm15 | |
1e74: c5 fd 6f d4 vmovdqa %ymm4, %ymm2 | |
1e78: c5 fd 6f dd vmovdqa %ymm5, %ymm3 | |
1e7c: c5 a5 f5 e6 vpmaddwd %ymm6, %ymm11, %ymm4 | |
1e80: c4 c1 1d f5 eb vpmaddwd %ymm11, %ymm12, %ymm5 | |
1e85: c5 0d fe f4 vpaddd %ymm4, %ymm14, %ymm14 | |
1e89: c5 05 fe fd vpaddd %ymm5, %ymm15, %ymm15 | |
1e8d: c5 fd 6f e6 vmovdqa %ymm6, %ymm4 | |
1e91: c4 c1 7d 6f ec vmovdqa %ymm12, %ymm5 | |
1e96: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
1e9f: c4 62 7d 79 64 24 20 vpbroadcastw 32(%rsp), %ymm12 | |
1ea6: c5 0d fe f6 vpaddd %ymm6, %ymm14, %ymm14 | |
1eaa: c5 05 fe fe vpaddd %ymm6, %ymm15, %ymm15 | |
1eae: c4 c1 0d 72 e6 08 vpsrad $8, %ymm14, %ymm14 | |
1eb4: c4 c1 05 72 e7 08 vpsrad $8, %ymm15, %ymm15 | |
1eba: c4 42 0d 2b f6 vpackusdw %ymm14, %ymm14, %ymm14 | |
1ebf: c4 42 05 2b ff vpackusdw %ymm15, %ymm15, %ymm15 | |
1ec4: c4 42 0d 3a f4 vpminuw %ymm12, %ymm14, %ymm14 | |
1ec9: c4 42 05 3a fc vpminuw %ymm12, %ymm15, %ymm15 | |
1ece: c4 c1 7d 6f f5 vmovdqa %ymm13, %ymm6 | |
1ed3: c4 43 7d 39 f4 01 vextracti128 $1, %ymm14, %xmm12 | |
1ed9: c4 43 7d 39 fd 01 vextracti128 $1, %ymm15, %xmm13 | |
1edf: c5 79 d6 37 vmovq %xmm14, (%rdi) | |
1ee3: c5 79 d6 67 08 vmovq %xmm12, 8(%rdi) | |
1ee8: c5 79 d6 3c 37 vmovq %xmm15, (%rdi,%rsi) | |
1eed: c5 79 d6 6c 37 08 vmovq %xmm13, 8(%rdi,%rsi) | |
1ef3: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi | |
1ef7: 41 83 e9 02 subl $2, %r9d | |
1efb: 0f 8f 5f fe ff ff jg -417 <_dav1d_put_8tap_16bpc_avx2.hv_w88c_12bit> | |
1f01: 49 83 c2 10 addq $16, %r10 | |
1f05: 49 83 c3 10 addq $16, %r11 | |
1f09: 41 89 c1 movl %eax, %r9d | |
1f0c: 4c 89 d7 movq %r10, %rdi | |
1f0f: 4c 89 da movq %r11, %rdx | |
1f12: 41 83 e8 08 subl $8, %r8d | |
1f16: 0f 8f 7b fb ff ff jg -1157 <_dav1d_put_8tap_16bpc_avx2.hv_w88l_12bit> | |
1f1c: 5b popq %rbx | |
1f1d: c5 f8 77 vzeroupper | |
1f20: c3 retq | |
0000000000001f21 _dav1d_put_8tap_16bpc_avx2.hv_jmp_tbl: | |
1f21: b9 ec ff ff 36 movl $922746860, %ecx | |
1f26: f0 lock | |
1f27: ff ff <unknown> | |
1f29: f5 cmc | |
1f2a: f3 ff ff <unknown> | |
1f2d: f5 cmc | |
1f2e: f3 ff ff <unknown> | |
1f31: f5 cmc | |
1f32: f3 ff ff <unknown> | |
1f35: f5 cmc | |
1f36: f3 ff ff <unknown> | |
1f39: f5 cmc | |
1f3a: f3 ff ff <unknown> | |
1f3d: 0f 1f 00 nopl (%rax) | |
0000000000001f40 _dav1d_prep_8tap_regular_16bpc_avx2: | |
1f40: 41 ba 2d 00 00 00 movl $45, %r10d | |
1f46: 41 bb 2d 00 00 00 movl $45, %r11d | |
1f4c: e9 8f 00 00 00 jmp 143 <_dav1d_prep_8tap_16bpc_avx2> | |
1f51: 0f 1f 84 00 00 00 00 00 nopl (%rax,%rax) | |
1f59: 0f 1f 80 00 00 00 00 nopl (%rax) | |
0000000000001f60 _dav1d_prep_8tap_regular_smooth_16bpc_avx2: | |
1f60: 41 ba 2d 00 00 00 movl $45, %r10d | |
1f66: 41 bb bc 07 00 00 movl $1980, %r11d | |
1f6c: eb 72 jmp 114 <_dav1d_prep_8tap_16bpc_avx2> | |
1f6e: 66 90 nop | |
0000000000001f70 _dav1d_prep_8tap_regular_sharp_16bpc_avx2: | |
1f70: 41 ba 2d 00 00 00 movl $45, %r10d | |
1f76: 41 bb 2d 0f 00 00 movl $3885, %r11d | |
1f7c: eb 62 jmp 98 <_dav1d_prep_8tap_16bpc_avx2> | |
1f7e: 66 90 nop | |
0000000000001f80 _dav1d_prep_8tap_smooth_16bpc_avx2: | |
1f80: 41 ba bc 07 00 00 movl $1980, %r10d | |
1f86: 41 bb bc 07 00 00 movl $1980, %r11d | |
1f8c: eb 52 jmp 82 <_dav1d_prep_8tap_16bpc_avx2> | |
1f8e: 66 90 nop | |
0000000000001f90 _dav1d_prep_8tap_smooth_regular_16bpc_avx2: | |
1f90: 41 ba bc 07 00 00 movl $1980, %r10d | |
1f96: 41 bb 2d 00 00 00 movl $45, %r11d | |
1f9c: eb 42 jmp 66 <_dav1d_prep_8tap_16bpc_avx2> | |
1f9e: 66 90 nop | |
0000000000001fa0 _dav1d_prep_8tap_smooth_sharp_16bpc_avx2: | |
1fa0: 41 ba bc 07 00 00 movl $1980, %r10d | |
1fa6: 41 bb 2d 0f 00 00 movl $3885, %r11d | |
1fac: eb 32 jmp 50 <_dav1d_prep_8tap_16bpc_avx2> | |
1fae: 66 90 nop | |
0000000000001fb0 _dav1d_prep_8tap_sharp_16bpc_avx2: | |
1fb0: 41 ba 2d 0f 00 00 movl $3885, %r10d | |
1fb6: 41 bb 2d 0f 00 00 movl $3885, %r11d | |
1fbc: eb 22 jmp 34 <_dav1d_prep_8tap_16bpc_avx2> | |
1fbe: 66 90 nop | |
0000000000001fc0 _dav1d_prep_8tap_sharp_regular_16bpc_avx2: | |
1fc0: 41 ba 2d 0f 00 00 movl $3885, %r10d | |
1fc6: 41 bb 2d 00 00 00 movl $45, %r11d | |
1fcc: eb 12 jmp 18 <_dav1d_prep_8tap_16bpc_avx2> | |
1fce: 66 90 nop | |
0000000000001fd0 _dav1d_prep_8tap_sharp_smooth_16bpc_avx2: | |
1fd0: 41 ba 2d 0f 00 00 movl $3885, %r10d | |
1fd6: 41 bb bc 07 00 00 movl $1980, %r11d | |
1fdc: eb 02 jmp 2 <_dav1d_prep_8tap_16bpc_avx2> | |
1fde: 66 90 nop | |
0000000000001fe0 _dav1d_prep_8tap_16bpc_avx2: | |
1fe0: 53 pushq %rbx | |
1fe1: 69 44 24 10 81 40 00 00 imull $16513, 16(%rsp), %eax | |
1fe9: 44 01 d8 addl %r11d, %eax | |
1fec: 45 69 c9 81 40 00 00 imull $16513, %r9d, %r9d | |
1ff3: 45 01 d1 addl %r10d, %r9d | |
1ff6: 48 63 c9 movslq %ecx, %rcx | |
1ff9: 4c 8d 1c 09 leaq (%rcx,%rcx), %r11 | |
1ffd: 41 f7 c1 00 c0 1f 00 testl $2080768, %r9d | |
2004: 0f 85 f8 02 00 00 jne 760 <_dav1d_prep_8tap_16bpc_avx2.prep_8tap_h_16bpc> | |
200a: a9 00 c0 1f 00 testl $2080768, %eax | |
200f: 0f 85 e9 04 00 00 jne 1257 <_dav1d_prep_8tap_16bpc_avx2.prep_8tap_v_16bpc> | |
0000000000002015 _dav1d_prep_8tap_16bpc_avx2.prep_16bpc: | |
2015: f3 44 0f b8 54 24 18 popcntl 24(%rsp), %r10d | |
201c: c4 62 7d 59 05 00 00 00 00 vpbroadcastq (%rip), %ymm8 | |
2025: c4 62 7d 79 0d 00 00 00 00 vpbroadcastw (%rip), %ymm9 | |
202e: 41 83 fa 0c cmpl $12, %r10d | |
2032: 75 09 jne 9 <_dav1d_prep_8tap_16bpc_avx2.prep_bits10> | |
2034: c4 62 7d 59 05 00 00 00 00 vpbroadcastq (%rip), %ymm8 | |
000000000000203d _dav1d_prep_8tap_16bpc_avx2.prep_bits10: | |
203d: 4c 8d 15 a6 02 00 00 leaq 678(%rip), %r10 | |
2044: f3 0f bc c9 tzcntl %ecx, %ecx | |
2048: 83 e9 02 subl $2, %ecx | |
204b: 49 63 0c 8a movslq (%r10,%rcx,4), %rcx | |
204f: 4c 01 d1 addq %r10, %rcx | |
2052: ff e1 jmpq *%rcx | |
0000000000002054 _dav1d_prep_8tap_16bpc_avx2.w4: | |
2054: c5 fa 7e 06 vmovq (%rsi), %xmm0 | |
2058: c5 fa 7e 0c 16 vmovq (%rsi,%rdx), %xmm1 | |
205d: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2061: c4 c1 79 f1 c0 vpsllw %xmm8, %xmm0, %xmm0 | |
2066: c4 c1 71 f1 c8 vpsllw %xmm8, %xmm1, %xmm1 | |
206b: c4 c1 79 f9 c1 vpsubw %xmm9, %xmm0, %xmm0 | |
2070: c4 c1 71 f9 c9 vpsubw %xmm9, %xmm1, %xmm1 | |
2075: c5 f9 d6 07 vmovq %xmm0, (%rdi) | |
2079: c4 a1 79 d6 0c 1f vmovq %xmm1, (%rdi,%r11) | |
207f: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
2083: 41 83 e8 02 subl $2, %r8d | |
2087: 7f cb jg -53 <_dav1d_prep_8tap_16bpc_avx2.w4> | |
2089: 5b popq %rbx | |
208a: c3 retq | |
000000000000208b _dav1d_prep_8tap_16bpc_avx2.w8: | |
208b: c5 fa 6f 06 vmovdqu (%rsi), %xmm0 | |
208f: c5 fa 6f 0c 16 vmovdqu (%rsi,%rdx), %xmm1 | |
2094: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2098: c4 c1 79 f1 c0 vpsllw %xmm8, %xmm0, %xmm0 | |
209d: c4 c1 71 f1 c8 vpsllw %xmm8, %xmm1, %xmm1 | |
20a2: c4 c1 79 f9 c1 vpsubw %xmm9, %xmm0, %xmm0 | |
20a7: c4 c1 71 f9 c9 vpsubw %xmm9, %xmm1, %xmm1 | |
20ac: c5 f9 7f 07 vmovdqa %xmm0, (%rdi) | |
20b0: c4 a1 79 7f 0c 1f vmovdqa %xmm1, (%rdi,%r11) | |
20b6: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
20ba: 41 83 e8 02 subl $2, %r8d | |
20be: 7f cb jg -53 <_dav1d_prep_8tap_16bpc_avx2.w8> | |
20c0: 5b popq %rbx | |
20c1: c3 retq | |
00000000000020c2 _dav1d_prep_8tap_16bpc_avx2.w16: | |
20c2: c5 fe 6f 06 vmovdqu (%rsi), %ymm0 | |
20c6: c5 fe 6f 0c 16 vmovdqu (%rsi,%rdx), %ymm1 | |
20cb: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
20cf: c4 c1 7d f1 c0 vpsllw %xmm8, %ymm0, %ymm0 | |
20d4: c4 c1 75 f1 c8 vpsllw %xmm8, %ymm1, %ymm1 | |
20d9: c4 c1 7d f9 c1 vpsubw %ymm9, %ymm0, %ymm0 | |
20de: c4 c1 75 f9 c9 vpsubw %ymm9, %ymm1, %ymm1 | |
20e3: c5 fd 7f 07 vmovdqa %ymm0, (%rdi) | |
20e7: c4 a1 7d 7f 0c 1f vmovdqa %ymm1, (%rdi,%r11) | |
20ed: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
20f1: 41 83 e8 02 subl $2, %r8d | |
20f5: 7f cb jg -53 <_dav1d_prep_8tap_16bpc_avx2.w16> | |
20f7: 5b popq %rbx | |
20f8: c5 f8 77 vzeroupper | |
20fb: c3 retq | |
00000000000020fc _dav1d_prep_8tap_16bpc_avx2.w32: | |
20fc: c5 fe 6f 06 vmovdqu (%rsi), %ymm0 | |
2100: c5 fe 6f 4e 20 vmovdqu 32(%rsi), %ymm1 | |
2105: c5 fe 6f 14 16 vmovdqu (%rsi,%rdx), %ymm2 | |
210a: c5 fe 6f 5c 16 20 vmovdqu 32(%rsi,%rdx), %ymm3 | |
2110: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2114: c4 c1 7d f1 c0 vpsllw %xmm8, %ymm0, %ymm0 | |
2119: c4 c1 75 f1 c8 vpsllw %xmm8, %ymm1, %ymm1 | |
211e: c4 c1 6d f1 d0 vpsllw %xmm8, %ymm2, %ymm2 | |
2123: c4 c1 65 f1 d8 vpsllw %xmm8, %ymm3, %ymm3 | |
2128: c4 c1 7d f9 c1 vpsubw %ymm9, %ymm0, %ymm0 | |
212d: c4 c1 75 f9 c9 vpsubw %ymm9, %ymm1, %ymm1 | |
2132: c4 c1 6d f9 d1 vpsubw %ymm9, %ymm2, %ymm2 | |
2137: c4 c1 65 f9 d9 vpsubw %ymm9, %ymm3, %ymm3 | |
213c: c5 fd 7f 07 vmovdqa %ymm0, (%rdi) | |
2140: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi) | |
2145: c4 a1 7d 7f 14 1f vmovdqa %ymm2, (%rdi,%r11) | |
214b: c4 a1 7d 7f 5c 1f 20 vmovdqa %ymm3, 32(%rdi,%r11) | |
2152: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
2156: 41 83 e8 02 subl $2, %r8d | |
215a: 7f a0 jg -96 <_dav1d_prep_8tap_16bpc_avx2.w32> | |
215c: 5b popq %rbx | |
215d: c5 f8 77 vzeroupper | |
2160: c3 retq | |
0000000000002161 _dav1d_prep_8tap_16bpc_avx2.w64: | |
2161: c5 fe 6f 06 vmovdqu (%rsi), %ymm0 | |
2165: c5 fe 6f 4e 20 vmovdqu 32(%rsi), %ymm1 | |
216a: c5 fe 6f 56 40 vmovdqu 64(%rsi), %ymm2 | |
216f: c5 fe 6f 5e 60 vmovdqu 96(%rsi), %ymm3 | |
2174: c5 fe 6f 24 16 vmovdqu (%rsi,%rdx), %ymm4 | |
2179: c5 fe 6f 6c 16 20 vmovdqu 32(%rsi,%rdx), %ymm5 | |
217f: c5 fe 6f 74 16 40 vmovdqu 64(%rsi,%rdx), %ymm6 | |
2185: c5 fe 6f 7c 16 60 vmovdqu 96(%rsi,%rdx), %ymm7 | |
218b: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
218f: c4 c1 7d f1 c0 vpsllw %xmm8, %ymm0, %ymm0 | |
2194: c4 c1 75 f1 c8 vpsllw %xmm8, %ymm1, %ymm1 | |
2199: c4 c1 6d f1 d0 vpsllw %xmm8, %ymm2, %ymm2 | |
219e: c4 c1 65 f1 d8 vpsllw %xmm8, %ymm3, %ymm3 | |
21a3: c4 c1 5d f1 e0 vpsllw %xmm8, %ymm4, %ymm4 | |
21a8: c4 c1 55 f1 e8 vpsllw %xmm8, %ymm5, %ymm5 | |
21ad: c4 c1 4d f1 f0 vpsllw %xmm8, %ymm6, %ymm6 | |
21b2: c4 c1 45 f1 f8 vpsllw %xmm8, %ymm7, %ymm7 | |
21b7: c4 c1 7d f9 c1 vpsubw %ymm9, %ymm0, %ymm0 | |
21bc: c4 c1 75 f9 c9 vpsubw %ymm9, %ymm1, %ymm1 | |
21c1: c4 c1 6d f9 d1 vpsubw %ymm9, %ymm2, %ymm2 | |
21c6: c4 c1 65 f9 d9 vpsubw %ymm9, %ymm3, %ymm3 | |
21cb: c4 c1 5d f9 e1 vpsubw %ymm9, %ymm4, %ymm4 | |
21d0: c4 c1 55 f9 e9 vpsubw %ymm9, %ymm5, %ymm5 | |
21d5: c4 c1 4d f9 f1 vpsubw %ymm9, %ymm6, %ymm6 | |
21da: c4 c1 45 f9 f9 vpsubw %ymm9, %ymm7, %ymm7 | |
21df: c5 fd 7f 07 vmovdqa %ymm0, (%rdi) | |
21e3: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi) | |
21e8: c5 fd 7f 57 40 vmovdqa %ymm2, 64(%rdi) | |
21ed: c5 fd 7f 5f 60 vmovdqa %ymm3, 96(%rdi) | |
21f2: c4 a1 7d 7f 24 1f vmovdqa %ymm4, (%rdi,%r11) | |
21f8: c4 a1 7d 7f 6c 1f 20 vmovdqa %ymm5, 32(%rdi,%r11) | |
21ff: c4 a1 7d 7f 74 1f 40 vmovdqa %ymm6, 64(%rdi,%r11) | |
2206: c4 a1 7d 7f 7c 1f 60 vmovdqa %ymm7, 96(%rdi,%r11) | |
220d: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
2211: 41 83 e8 02 subl $2, %r8d | |
2215: 0f 8f 46 ff ff ff jg -186 <_dav1d_prep_8tap_16bpc_avx2.w64> | |
221b: 5b popq %rbx | |
221c: c5 f8 77 vzeroupper | |
221f: c3 retq | |
0000000000002220 _dav1d_prep_8tap_16bpc_avx2.w128: | |
2220: c5 fe 6f 06 vmovdqu (%rsi), %ymm0 | |
2224: c5 fe 6f 4e 20 vmovdqu 32(%rsi), %ymm1 | |
2229: c5 fe 6f 56 40 vmovdqu 64(%rsi), %ymm2 | |
222e: c5 fe 6f 5e 60 vmovdqu 96(%rsi), %ymm3 | |
2233: c5 fe 6f a6 80 00 00 00 vmovdqu 128(%rsi), %ymm4 | |
223b: c5 fe 6f ae a0 00 00 00 vmovdqu 160(%rsi), %ymm5 | |
2243: c5 fe 6f b6 c0 00 00 00 vmovdqu 192(%rsi), %ymm6 | |
224b: c5 fe 6f be e0 00 00 00 vmovdqu 224(%rsi), %ymm7 | |
2253: 48 01 d6 addq %rdx, %rsi | |
2256: c4 c1 7d f1 c0 vpsllw %xmm8, %ymm0, %ymm0 | |
225b: c4 c1 75 f1 c8 vpsllw %xmm8, %ymm1, %ymm1 | |
2260: c4 c1 6d f1 d0 vpsllw %xmm8, %ymm2, %ymm2 | |
2265: c4 c1 65 f1 d8 vpsllw %xmm8, %ymm3, %ymm3 | |
226a: c4 c1 5d f1 e0 vpsllw %xmm8, %ymm4, %ymm4 | |
226f: c4 c1 55 f1 e8 vpsllw %xmm8, %ymm5, %ymm5 | |
2274: c4 c1 4d f1 f0 vpsllw %xmm8, %ymm6, %ymm6 | |
2279: c4 c1 45 f1 f8 vpsllw %xmm8, %ymm7, %ymm7 | |
227e: c4 c1 7d f9 c1 vpsubw %ymm9, %ymm0, %ymm0 | |
2283: c4 c1 75 f9 c9 vpsubw %ymm9, %ymm1, %ymm1 | |
2288: c4 c1 6d f9 d1 vpsubw %ymm9, %ymm2, %ymm2 | |
228d: c4 c1 65 f9 d9 vpsubw %ymm9, %ymm3, %ymm3 | |
2292: c4 c1 5d f9 e1 vpsubw %ymm9, %ymm4, %ymm4 | |
2297: c4 c1 55 f9 e9 vpsubw %ymm9, %ymm5, %ymm5 | |
229c: c4 c1 4d f9 f1 vpsubw %ymm9, %ymm6, %ymm6 | |
22a1: c4 c1 45 f9 f9 vpsubw %ymm9, %ymm7, %ymm7 | |
22a6: c5 fd 7f 07 vmovdqa %ymm0, (%rdi) | |
22aa: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi) | |
22af: c5 fd 7f 57 40 vmovdqa %ymm2, 64(%rdi) | |
22b4: c5 fd 7f 5f 60 vmovdqa %ymm3, 96(%rdi) | |
22b9: c5 fd 7f a7 80 00 00 00 vmovdqa %ymm4, 128(%rdi) | |
22c1: c5 fd 7f af a0 00 00 00 vmovdqa %ymm5, 160(%rdi) | |
22c9: c5 fd 7f b7 c0 00 00 00 vmovdqa %ymm6, 192(%rdi) | |
22d1: c5 fd 7f bf e0 00 00 00 vmovdqa %ymm7, 224(%rdi) | |
22d9: 4c 01 df addq %r11, %rdi | |
22dc: 41 ff c8 decl %r8d | |
22df: 0f 8f 3b ff ff ff jg -197 <_dav1d_prep_8tap_16bpc_avx2.w128> | |
22e5: 5b popq %rbx | |
22e6: c5 f8 77 vzeroupper | |
22e9: c3 retq | |
00000000000022ea _dav1d_prep_8tap_16bpc_avx2.jmp_tbl: | |
22ea: 6a fd pushq $-3 | |
22ec: ff ff <unknown> | |
22ee: a1 fd ff ff d8 fd ff ff 12 movabsl 1369094277476384765, %eax | |
22f7: fe ff <unknown> | |
22f9: ff 77 fe pushq -2(%rdi) | |
22fc: ff ff <unknown> | |
22fe: 36 ff ff <unknown> | |
2301: ff 83 f9 04 7e 04 incl 75367673(%rbx) | |
0000000000002302 _dav1d_prep_8tap_16bpc_avx2.prep_8tap_h_16bpc: | |
2302: 83 f9 04 cmpl $4, %ecx | |
2305: 7e 04 jle 4 <_dav1d_prep_8tap_16bpc_avx2.h_use4tap> | |
2307: 41 c1 e9 07 shrl $7, %r9d | |
000000000000230b _dav1d_prep_8tap_16bpc_avx2.h_use4tap: | |
230b: 41 83 e1 7f andl $127, %r9d | |
230f: a9 00 c0 1f 00 testl $2080768, %eax | |
2314: 0f 85 41 06 00 00 jne 1601 <_dav1d_prep_8tap_16bpc_avx2.prep_8tap_hv_16bpc> | |
231a: f3 44 0f b8 54 24 18 popcntl 24(%rsp), %r10d | |
2321: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
232a: c4 e2 7d 59 3d 00 00 00 00 vpbroadcastq (%rip), %ymm7 | |
2333: 41 83 fa 0c cmpl $12, %r10d | |
2337: 75 12 jne 18 <_dav1d_prep_8tap_16bpc_avx2.h_bits10> | |
2339: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
2342: c4 e2 7d 59 3d 00 00 00 00 vpbroadcastq (%rip), %ymm7 | |
000000000000234b _dav1d_prep_8tap_16bpc_avx2.h_bits10: | |
234b: 48 8d 04 09 leaq (%rcx,%rcx), %rax | |
234f: 4c 8d 15 90 01 00 00 leaq 400(%rip), %r10 | |
2356: f3 0f bc c9 tzcntl %ecx, %ecx | |
235a: 83 e9 02 subl $2, %ecx | |
235d: 49 63 0c 8a movslq (%r10,%rcx,4), %rcx | |
2361: 4c 01 d1 addq %r10, %rcx | |
2364: ff e1 jmpq *%rcx | |
0000000000002366 _dav1d_prep_8tap_16bpc_avx2.h_w4: | |
2366: 48 83 ee 02 subq $2, %rsi | |
236a: c5 fd 6f 25 00 00 00 00 vmovdqa (%rip), %ymm4 | |
2372: c4 82 79 58 ac ca 90 fe ff ff vpbroadcastd -368(%r10,%r9,8), %xmm5 | |
237c: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5 | |
0000000000002381 _dav1d_prep_8tap_16bpc_avx2.h_w4l: | |
2381: c4 e2 7d 5a 06 vbroadcasti128 (%rsi), %ymm0 | |
2386: c4 e2 7d 5a 0c 16 vbroadcasti128 (%rsi,%rdx), %ymm1 | |
238c: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2390: c4 e2 7d 00 c4 vpshufb %ymm4, %ymm0, %ymm0 | |
2395: c4 e2 75 00 cc vpshufb %ymm4, %ymm1, %ymm1 | |
239a: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0 | |
239e: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
23a2: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0 | |
23a7: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0 | |
23ab: c5 fd e2 c7 vpsrad %xmm7, %ymm0, %ymm0 | |
23af: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0 | |
23b3: c4 e3 7d 39 c1 01 vextracti128 $1, %ymm0, %xmm1 | |
23b9: c5 f9 7e 07 vmovd %xmm0, (%rdi) | |
23bd: c5 f9 7e 4f 04 vmovd %xmm1, 4(%rdi) | |
23c2: c4 a3 79 16 04 1f 01 vpextrd $1, %xmm0, (%rdi,%r11) | |
23c9: c4 a3 79 16 4c 1f 04 01 vpextrd $1, %xmm1, 4(%rdi,%r11) | |
23d1: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
23d5: 41 83 e8 02 subl $2, %r8d | |
23d9: 7f a6 jg -90 <_dav1d_prep_8tap_16bpc_avx2.h_w4l> | |
23db: 5b popq %rbx | |
23dc: c5 f8 77 vzeroupper | |
23df: c3 retq | |
00000000000023e0 _dav1d_prep_8tap_16bpc_avx2.h_w8: | |
23e0: 48 83 ee 06 subq $6, %rsi | |
23e4: c4 82 79 59 ac ca 00 ff ff ff vpbroadcastq -256(%r10,%r9,8), %xmm5 | |
23ee: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5 | |
00000000000023f3 _dav1d_prep_8tap_16bpc_avx2.h_w8l: | |
23f3: 89 c1 movl %eax, %ecx | |
00000000000023f5 _dav1d_prep_8tap_16bpc_avx2.h_w8c: | |
23f5: c5 fa 6f 06 vmovdqu (%rsi), %xmm0 | |
23f9: c5 fa 6f 56 02 vmovdqu 2(%rsi), %xmm2 | |
23fe: c4 e3 7d 38 04 16 01 vinserti128 $1, (%rsi,%rdx), %ymm0, %ymm0 | |
2405: c4 e3 6d 38 54 32 02 01 vinserti128 $1, 2(%rdx,%rsi), %ymm2, %ymm2 | |
240d: c5 fa 6f 4e 04 vmovdqu 4(%rsi), %xmm1 | |
2412: c5 fa 6f 5e 06 vmovdqu 6(%rsi), %xmm3 | |
2417: c4 e3 75 38 4c 32 04 01 vinserti128 $1, 4(%rdx,%rsi), %ymm1, %ymm1 | |
241f: c4 e3 65 38 5c 32 06 01 vinserti128 $1, 6(%rdx,%rsi), %ymm3, %ymm3 | |
2427: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0 | |
242b: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
242f: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
2433: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
2437: c4 e2 7d 02 c2 vphaddd %ymm2, %ymm0, %ymm0 | |
243c: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1 | |
2441: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0 | |
2446: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0 | |
244a: c5 fd e2 c7 vpsrad %xmm7, %ymm0, %ymm0 | |
244e: c5 fa 6f 4e 08 vmovdqu 8(%rsi), %xmm1 | |
2453: c5 fa 6f 5e 0a vmovdqu 10(%rsi), %xmm3 | |
2458: c4 e3 75 38 4c 32 08 01 vinserti128 $1, 8(%rdx,%rsi), %ymm1, %ymm1 | |
2460: c4 e3 65 38 5c 32 0a 01 vinserti128 $1, 10(%rdx,%rsi), %ymm3, %ymm3 | |
2468: c5 fa 6f 56 0c vmovdqu 12(%rsi), %xmm2 | |
246d: c5 fa 6f 66 0e vmovdqu 14(%rsi), %xmm4 | |
2472: c4 e3 6d 38 54 32 0c 01 vinserti128 $1, 12(%rdx,%rsi), %ymm2, %ymm2 | |
247a: c4 e3 5d 38 64 32 0e 01 vinserti128 $1, 14(%rdx,%rsi), %ymm4, %ymm4 | |
2482: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
2486: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
248a: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
248e: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
2492: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1 | |
2497: c4 e2 6d 02 d4 vphaddd %ymm4, %ymm2, %ymm2 | |
249c: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1 | |
24a1: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1 | |
24a5: c5 f5 e2 cf vpsrad %xmm7, %ymm1, %ymm1 | |
24a9: c5 fd 6b c1 vpackssdw %ymm1, %ymm0, %ymm0 | |
24ad: 48 83 c6 10 addq $16, %rsi | |
24b1: c5 f9 7f 07 vmovdqa %xmm0, (%rdi) | |
24b5: c4 a3 7d 39 04 1f 01 vextracti128 $1, %ymm0, (%rdi,%r11) | |
24bc: 48 83 c7 10 addq $16, %rdi | |
24c0: 83 e9 10 subl $16, %ecx | |
24c3: 0f 8f 2c ff ff ff jg -212 <_dav1d_prep_8tap_16bpc_avx2.h_w8c> | |
24c9: 48 29 c6 subq %rax, %rsi | |
24cc: 48 29 c7 subq %rax, %rdi | |
24cf: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
24d3: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
24d7: 41 83 e8 02 subl $2, %r8d | |
24db: 0f 8f 12 ff ff ff jg -238 <_dav1d_prep_8tap_16bpc_avx2.h_w8l> | |
24e1: 5b popq %rbx | |
24e2: c5 f8 77 vzeroupper | |
24e5: c3 retq | |
00000000000024e6 _dav1d_prep_8tap_16bpc_avx2.h_jmp_tbl: | |
24e6: 80 fe ff cmpb $-1, %dh | |
24e9: ff fa <unknown> | |
24eb: fe ff <unknown> | |
24ed: ff fa <unknown> | |
24ef: fe ff <unknown> | |
24f1: ff fa <unknown> | |
24f3: fe ff <unknown> | |
24f5: ff fa <unknown> | |
24f7: fe ff <unknown> | |
24f9: ff fa <unknown> | |
24fb: fe ff <unknown> | |
24fd: ff 41 83 incl -125(%rcx) | |
00000000000024fe _dav1d_prep_8tap_16bpc_avx2.prep_8tap_v_16bpc: | |
24fe: 41 83 f8 04 cmpl $4, %r8d | |
2502: 7e 03 jle 3 <_dav1d_prep_8tap_16bpc_avx2.v_use4tap> | |
2504: c1 e8 07 shrl $7, %eax | |
0000000000002507 _dav1d_prep_8tap_16bpc_avx2.v_use4tap: | |
2507: 83 e0 7f andl $127, %eax | |
250a: f3 44 0f b8 54 24 18 popcntl 24(%rsp), %r10d | |
2511: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
251a: c4 e2 7d 59 3d 00 00 00 00 vpbroadcastq (%rip), %ymm7 | |
2523: 41 83 fa 0c cmpl $12, %r10d | |
2527: 75 12 jne 18 <_dav1d_prep_8tap_16bpc_avx2.v_bits10> | |
2529: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
2532: c4 e2 7d 59 3d 00 00 00 00 vpbroadcastq (%rip), %ymm7 | |
000000000000253b _dav1d_prep_8tap_16bpc_avx2.v_bits10: | |
253b: 4c 8d 15 01 04 00 00 leaq 1025(%rip), %r10 | |
2542: 4c 8d 0c 09 leaq (%rcx,%rcx), %r9 | |
2546: 48 8d 1c 52 leaq (%rdx,%rdx,2), %rbx | |
254a: 49 8d 84 c2 07 fc ff ff leaq -1017(%r10,%rax,8), %rax | |
2552: c4 62 7d 79 00 vpbroadcastw (%rax), %ymm8 | |
2557: c4 62 7d 79 48 02 vpbroadcastw 2(%rax), %ymm9 | |
255d: c4 62 7d 79 50 04 vpbroadcastw 4(%rax), %ymm10 | |
2563: c4 62 7d 79 58 06 vpbroadcastw 6(%rax), %ymm11 | |
2569: c4 42 7d 20 c0 vpmovsxbw %xmm8, %ymm8 | |
256e: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9 | |
2573: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10 | |
2578: c4 42 7d 20 db vpmovsxbw %xmm11, %ymm11 | |
257d: f3 0f bc c9 tzcntl %ecx, %ecx | |
2581: 83 e9 02 subl $2, %ecx | |
2584: 49 63 0c 8a movslq (%r10,%rcx,4), %rcx | |
2588: 4c 01 d1 addq %r10, %rcx | |
258b: ff e1 jmpq *%rcx | |
000000000000258d _dav1d_prep_8tap_16bpc_avx2.v_w4: | |
258d: 41 83 f8 04 cmpl $4, %r8d | |
2591: 0f 8f 84 00 00 00 jg 132 <_dav1d_prep_8tap_16bpc_avx2.v_w48> | |
2597: 48 29 d6 subq %rdx, %rsi | |
259a: c5 fa 7e 06 vmovq (%rsi), %xmm0 | |
259e: c5 fa 7e 0c 16 vmovq (%rsi,%rdx), %xmm1 | |
25a3: c5 fa 7e 14 56 vmovq (%rsi,%rdx,2), %xmm2 | |
25a8: 48 01 de addq %rbx, %rsi | |
25ab: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0 | |
25af: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1 | |
00000000000025b3 _dav1d_prep_8tap_16bpc_avx2.v_w4l: | |
25b3: c5 fa 7e 1e vmovq (%rsi), %xmm3 | |
25b7: c5 fa 7e 24 16 vmovq (%rsi,%rdx), %xmm4 | |
25bc: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
25c0: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2 | |
25c4: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3 | |
25c8: c5 b1 f5 c0 vpmaddwd %xmm0, %xmm9, %xmm0 | |
25cc: c5 a9 f5 ea vpmaddwd %xmm2, %xmm10, %xmm5 | |
25d0: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5 | |
25d4: c5 f9 6f c2 vmovdqa %xmm2, %xmm0 | |
25d8: c5 b1 f5 c9 vpmaddwd %xmm1, %xmm9, %xmm1 | |
25dc: c5 29 f5 c3 vpmaddwd %xmm3, %xmm10, %xmm8 | |
25e0: c5 39 fe c1 vpaddd %xmm1, %xmm8, %xmm8 | |
25e4: c5 f9 6f cb vmovdqa %xmm3, %xmm1 | |
25e8: c5 f9 6f d4 vmovdqa %xmm4, %xmm2 | |
25ec: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5 | |
25f0: c5 39 fe c6 vpaddd %xmm6, %xmm8, %xmm8 | |
25f4: c5 d1 e2 ef vpsrad %xmm7, %xmm5, %xmm5 | |
25f8: c5 39 e2 c7 vpsrad %xmm7, %xmm8, %xmm8 | |
25fc: c5 d1 6b ed vpackssdw %xmm5, %xmm5, %xmm5 | |
2600: c4 41 39 6b c0 vpackssdw %xmm8, %xmm8, %xmm8 | |
2605: c5 f9 d6 2f vmovq %xmm5, (%rdi) | |
2609: c4 21 79 d6 04 1f vmovq %xmm8, (%rdi,%r11) | |
260f: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
2613: 41 83 e8 02 subl $2, %r8d | |
2617: 7f 9a jg -102 <_dav1d_prep_8tap_16bpc_avx2.v_w4l> | |
2619: 5b popq %rbx | |
261a: c3 retq | |
000000000000261b _dav1d_prep_8tap_16bpc_avx2.v_w48: | |
261b: 48 29 de subq %rbx, %rsi | |
261e: c5 fa 7e 06 vmovq (%rsi), %xmm0 | |
2622: c5 fa 7e 0c 16 vmovq (%rsi,%rdx), %xmm1 | |
2627: c5 fa 7e 14 56 vmovq (%rsi,%rdx,2), %xmm2 | |
262c: 48 01 de addq %rbx, %rsi | |
262f: c5 fa 7e 1e vmovq (%rsi), %xmm3 | |
2633: c5 fa 7e 24 16 vmovq (%rsi,%rdx), %xmm4 | |
2638: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
263c: c5 7a 7e 26 vmovq (%rsi), %xmm12 | |
2640: c5 7a 7e 2c 16 vmovq (%rsi,%rdx), %xmm13 | |
2645: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2649: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0 | |
264d: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1 | |
2651: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2 | |
2655: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3 | |
2659: c4 c1 59 61 e4 vpunpcklwd %xmm12, %xmm4, %xmm4 | |
265e: c4 41 19 61 e5 vpunpcklwd %xmm13, %xmm12, %xmm12 | |
0000000000002663 _dav1d_prep_8tap_16bpc_avx2.v_w48l: | |
2663: 48 29 d6 subq %rdx, %rsi | |
2666: c5 7a 7e 2e vmovq (%rsi), %xmm13 | |
266a: c5 7a 7e 34 16 vmovq (%rsi,%rdx), %xmm14 | |
266f: c5 7a 7e 3c 56 vmovq (%rsi,%rdx,2), %xmm15 | |
2674: 48 01 de addq %rbx, %rsi | |
2677: c4 41 11 61 ee vpunpcklwd %xmm14, %xmm13, %xmm13 | |
267c: c4 41 09 61 f7 vpunpcklwd %xmm15, %xmm14, %xmm14 | |
2681: c5 b9 f5 c0 vpmaddwd %xmm0, %xmm8, %xmm0 | |
2685: c5 b1 f5 ea vpmaddwd %xmm2, %xmm9, %xmm5 | |
2689: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5 | |
268d: c5 f9 6f c2 vmovdqa %xmm2, %xmm0 | |
2691: c5 b9 f5 c9 vpmaddwd %xmm1, %xmm8, %xmm1 | |
2695: c5 31 f5 fb vpmaddwd %xmm3, %xmm9, %xmm15 | |
2699: c5 01 fe f9 vpaddd %xmm1, %xmm15, %xmm15 | |
269d: c5 f9 6f cb vmovdqa %xmm3, %xmm1 | |
26a1: c5 a9 f5 d4 vpmaddwd %xmm4, %xmm10, %xmm2 | |
26a5: c4 c1 19 f5 da vpmaddwd %xmm10, %xmm12, %xmm3 | |
26aa: c5 d1 fe ea vpaddd %xmm2, %xmm5, %xmm5 | |
26ae: c5 01 fe fb vpaddd %xmm3, %xmm15, %xmm15 | |
26b2: c5 f9 6f d4 vmovdqa %xmm4, %xmm2 | |
26b6: c4 c1 79 6f dc vmovdqa %xmm12, %xmm3 | |
26bb: c4 c1 11 f5 e3 vpmaddwd %xmm11, %xmm13, %xmm4 | |
26c0: c4 41 09 f5 e3 vpmaddwd %xmm11, %xmm14, %xmm12 | |
26c5: c5 d1 fe ec vpaddd %xmm4, %xmm5, %xmm5 | |
26c9: c4 41 01 fe fc vpaddd %xmm12, %xmm15, %xmm15 | |
26ce: c4 c1 79 6f e5 vmovdqa %xmm13, %xmm4 | |
26d3: c4 41 79 6f e6 vmovdqa %xmm14, %xmm12 | |
26d8: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5 | |
26dc: c5 01 fe fe vpaddd %xmm6, %xmm15, %xmm15 | |
26e0: c5 d1 e2 ef vpsrad %xmm7, %xmm5, %xmm5 | |
26e4: c5 01 e2 ff vpsrad %xmm7, %xmm15, %xmm15 | |
26e8: c5 d1 6b ed vpackssdw %xmm5, %xmm5, %xmm5 | |
26ec: c4 41 01 6b ff vpackssdw %xmm15, %xmm15, %xmm15 | |
26f1: c5 f9 d6 2f vmovq %xmm5, (%rdi) | |
26f5: c4 21 79 d6 3c 1f vmovq %xmm15, (%rdi,%r11) | |
26fb: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
26ff: 41 83 e8 02 subl $2, %r8d | |
2703: 0f 8f 5a ff ff ff jg -166 <_dav1d_prep_8tap_16bpc_avx2.v_w48l> | |
2709: 5b popq %rbx | |
270a: c3 retq | |
000000000000270b _dav1d_prep_8tap_16bpc_avx2.v_w8: | |
270b: 44 89 c1 movl %r8d, %ecx | |
270e: 48 89 f8 movq %rdi, %rax | |
2711: 41 83 f8 04 cmpl $4, %r8d | |
2715: 0f 8f cf 00 00 00 jg 207 <_dav1d_prep_8tap_16bpc_avx2.v_w88> | |
271b: 48 29 d6 subq %rdx, %rsi | |
271e: 49 89 f2 movq %rsi, %r10 | |
0000000000002721 _dav1d_prep_8tap_16bpc_avx2.v_w8l: | |
2721: c5 fa 6f 06 vmovdqu (%rsi), %xmm0 | |
2725: c5 fa 6f 0c 16 vmovdqu (%rsi,%rdx), %xmm1 | |
272a: c5 fa 6f 14 56 vmovdqu (%rsi,%rdx,2), %xmm2 | |
272f: c4 e3 fd 00 c0 d8 vpermq $216, %ymm0, %ymm0 | |
2735: c4 e3 fd 00 c9 d8 vpermq $216, %ymm1, %ymm1 | |
273b: c4 e3 fd 00 d2 d8 vpermq $216, %ymm2, %ymm2 | |
2741: 48 01 de addq %rbx, %rsi | |
2744: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
2748: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1 | |
000000000000274c _dav1d_prep_8tap_16bpc_avx2.v_w8c: | |
274c: c5 fa 6f 1e vmovdqu (%rsi), %xmm3 | |
2750: c5 fa 6f 24 16 vmovdqu (%rsi,%rdx), %xmm4 | |
2755: c4 e3 fd 00 db d8 vpermq $216, %ymm3, %ymm3 | |
275b: c4 e3 fd 00 e4 d8 vpermq $216, %ymm4, %ymm4 | |
2761: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2765: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
2769: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
276d: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0 | |
2771: c5 ad f5 ea vpmaddwd %ymm2, %ymm10, %ymm5 | |
2775: c5 d5 fe e8 vpaddd %ymm0, %ymm5, %ymm5 | |
2779: c5 fd 6f c2 vmovdqa %ymm2, %ymm0 | |
277d: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1 | |
2781: c5 2d f5 c3 vpmaddwd %ymm3, %ymm10, %ymm8 | |
2785: c5 3d fe c1 vpaddd %ymm1, %ymm8, %ymm8 | |
2789: c5 fd 6f cb vmovdqa %ymm3, %ymm1 | |
278d: c5 fd 6f d4 vmovdqa %ymm4, %ymm2 | |
2791: c5 d5 fe ee vpaddd %ymm6, %ymm5, %ymm5 | |
2795: c5 3d fe c6 vpaddd %ymm6, %ymm8, %ymm8 | |
2799: c5 d5 e2 ef vpsrad %xmm7, %ymm5, %ymm5 | |
279d: c5 3d e2 c7 vpsrad %xmm7, %ymm8, %ymm8 | |
27a1: c5 d5 6b ed vpackssdw %ymm5, %ymm5, %ymm5 | |
27a5: c4 41 3d 6b c0 vpackssdw %ymm8, %ymm8, %ymm8 | |
27aa: c4 e3 fd 00 ed d8 vpermq $216, %ymm5, %ymm5 | |
27b0: c4 43 fd 00 c0 d8 vpermq $216, %ymm8, %ymm8 | |
27b6: c5 fa 7f 2f vmovdqu %xmm5, (%rdi) | |
27ba: c4 21 7a 7f 04 1f vmovdqu %xmm8, (%rdi,%r11) | |
27c0: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
27c4: 41 83 e8 02 subl $2, %r8d | |
27c8: 7f 82 jg -126 <_dav1d_prep_8tap_16bpc_avx2.v_w8c> | |
27ca: 48 83 c0 10 addq $16, %rax | |
27ce: 49 83 c2 10 addq $16, %r10 | |
27d2: 41 89 c8 movl %ecx, %r8d | |
27d5: 48 89 c7 movq %rax, %rdi | |
27d8: 4c 89 d6 movq %r10, %rsi | |
27db: 41 83 e9 10 subl $16, %r9d | |
27df: 0f 8f 3c ff ff ff jg -196 <_dav1d_prep_8tap_16bpc_avx2.v_w8l> | |
27e5: 5b popq %rbx | |
27e6: c5 f8 77 vzeroupper | |
27e9: c3 retq | |
00000000000027ea _dav1d_prep_8tap_16bpc_avx2.v_w88: | |
27ea: 48 29 de subq %rbx, %rsi | |
27ed: 49 89 f2 movq %rsi, %r10 | |
00000000000027f0 _dav1d_prep_8tap_16bpc_avx2.v_w88l: | |
27f0: c5 fa 6f 06 vmovdqu (%rsi), %xmm0 | |
27f4: c5 fa 6f 0c 16 vmovdqu (%rsi,%rdx), %xmm1 | |
27f9: c5 fa 6f 14 56 vmovdqu (%rsi,%rdx,2), %xmm2 | |
27fe: c4 e3 fd 00 c0 d8 vpermq $216, %ymm0, %ymm0 | |
2804: c4 e3 fd 00 c9 d8 vpermq $216, %ymm1, %ymm1 | |
280a: c4 e3 fd 00 d2 d8 vpermq $216, %ymm2, %ymm2 | |
2810: 48 01 de addq %rbx, %rsi | |
2813: c5 fa 6f 1e vmovdqu (%rsi), %xmm3 | |
2817: c5 fa 6f 24 16 vmovdqu (%rsi,%rdx), %xmm4 | |
281c: c4 e3 fd 00 db d8 vpermq $216, %ymm3, %ymm3 | |
2822: c4 e3 fd 00 e4 d8 vpermq $216, %ymm4, %ymm4 | |
2828: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
282c: c5 7a 6f 26 vmovdqu (%rsi), %xmm12 | |
2830: c5 7a 6f 2c 16 vmovdqu (%rsi,%rdx), %xmm13 | |
2835: c4 43 fd 00 e4 d8 vpermq $216, %ymm12, %ymm12 | |
283b: c4 43 fd 00 ed d8 vpermq $216, %ymm13, %ymm13 | |
2841: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2845: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
2849: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1 | |
284d: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
2851: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
2855: c4 c1 5d 61 e4 vpunpcklwd %ymm12, %ymm4, %ymm4 | |
285a: c4 41 1d 61 e5 vpunpcklwd %ymm13, %ymm12, %ymm12 | |
000000000000285f _dav1d_prep_8tap_16bpc_avx2.v_w88c: | |
285f: 48 29 d6 subq %rdx, %rsi | |
2862: c5 7a 6f 2e vmovdqu (%rsi), %xmm13 | |
2866: c5 7a 6f 34 16 vmovdqu (%rsi,%rdx), %xmm14 | |
286b: c5 7a 6f 3c 56 vmovdqu (%rsi,%rdx,2), %xmm15 | |
2870: c4 43 fd 00 ed d8 vpermq $216, %ymm13, %ymm13 | |
2876: c4 43 fd 00 f6 d8 vpermq $216, %ymm14, %ymm14 | |
287c: c4 43 fd 00 ff d8 vpermq $216, %ymm15, %ymm15 | |
2882: 48 01 de addq %rbx, %rsi | |
2885: c4 41 15 61 ee vpunpcklwd %ymm14, %ymm13, %ymm13 | |
288a: c4 41 0d 61 f7 vpunpcklwd %ymm15, %ymm14, %ymm14 | |
288f: c5 bd f5 c0 vpmaddwd %ymm0, %ymm8, %ymm0 | |
2893: c5 b5 f5 ea vpmaddwd %ymm2, %ymm9, %ymm5 | |
2897: c5 d5 fe e8 vpaddd %ymm0, %ymm5, %ymm5 | |
289b: c5 fd 6f c2 vmovdqa %ymm2, %ymm0 | |
289f: c5 bd f5 c9 vpmaddwd %ymm1, %ymm8, %ymm1 | |
28a3: c5 35 f5 fb vpmaddwd %ymm3, %ymm9, %ymm15 | |
28a7: c5 05 fe f9 vpaddd %ymm1, %ymm15, %ymm15 | |
28ab: c5 fd 6f cb vmovdqa %ymm3, %ymm1 | |
28af: c5 ad f5 d4 vpmaddwd %ymm4, %ymm10, %ymm2 | |
28b3: c4 c1 1d f5 da vpmaddwd %ymm10, %ymm12, %ymm3 | |
28b8: c5 d5 fe ea vpaddd %ymm2, %ymm5, %ymm5 | |
28bc: c5 05 fe fb vpaddd %ymm3, %ymm15, %ymm15 | |
28c0: c5 fd 6f d4 vmovdqa %ymm4, %ymm2 | |
28c4: c4 c1 7d 6f dc vmovdqa %ymm12, %ymm3 | |
28c9: c4 c1 15 f5 e3 vpmaddwd %ymm11, %ymm13, %ymm4 | |
28ce: c4 41 0d f5 e3 vpmaddwd %ymm11, %ymm14, %ymm12 | |
28d3: c5 d5 fe ec vpaddd %ymm4, %ymm5, %ymm5 | |
28d7: c4 41 05 fe fc vpaddd %ymm12, %ymm15, %ymm15 | |
28dc: c4 c1 7d 6f e5 vmovdqa %ymm13, %ymm4 | |
28e1: c4 41 7d 6f e6 vmovdqa %ymm14, %ymm12 | |
28e6: c5 d5 fe ee vpaddd %ymm6, %ymm5, %ymm5 | |
28ea: c5 05 fe fe vpaddd %ymm6, %ymm15, %ymm15 | |
28ee: c5 d5 e2 ef vpsrad %xmm7, %ymm5, %ymm5 | |
28f2: c5 05 e2 ff vpsrad %xmm7, %ymm15, %ymm15 | |
28f6: c5 d5 6b ed vpackssdw %ymm5, %ymm5, %ymm5 | |
28fa: c4 41 05 6b ff vpackssdw %ymm15, %ymm15, %ymm15 | |
28ff: c4 e3 fd 00 ed d8 vpermq $216, %ymm5, %ymm5 | |
2905: c4 43 fd 00 ff d8 vpermq $216, %ymm15, %ymm15 | |
290b: c5 fa 7f 2f vmovdqu %xmm5, (%rdi) | |
290f: c4 21 7a 7f 3c 1f vmovdqu %xmm15, (%rdi,%r11) | |
2915: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
2919: 41 83 e8 02 subl $2, %r8d | |
291d: 0f 8f 3c ff ff ff jg -196 <_dav1d_prep_8tap_16bpc_avx2.v_w88c> | |
2923: 48 83 c0 10 addq $16, %rax | |
2927: 49 83 c2 10 addq $16, %r10 | |
292b: 41 89 c8 movl %ecx, %r8d | |
292e: 48 89 c7 movq %rax, %rdi | |
2931: 4c 89 d6 movq %r10, %rsi | |
2934: 41 83 e9 10 subl $16, %r9d | |
2938: 0f 8f b2 fe ff ff jg -334 <_dav1d_prep_8tap_16bpc_avx2.v_w88l> | |
293e: 5b popq %rbx | |
293f: c5 f8 77 vzeroupper | |
2942: c3 retq | |
0000000000002943 _dav1d_prep_8tap_16bpc_avx2.v_jmp_tbl: | |
2943: 4a fc cld | |
2945: ff ff <unknown> | |
2947: c8 fd ff ff enter $-3, $-1 | |
294b: c8 fd ff ff enter $-3, $-1 | |
294f: c8 fd ff ff enter $-3, $-1 | |
2953: c8 fd ff ff enter $-3, $-1 | |
2957: c8 fd ff ff enter $-3, $-1 | |
000000000000295b _dav1d_prep_8tap_16bpc_avx2.prep_8tap_hv_16bpc: | |
295b: 41 83 f8 04 cmpl $4, %r8d | |
295f: 7e 03 jle 3 <_dav1d_prep_8tap_16bpc_avx2.hv_use4tap> | |
2961: c1 e8 07 shrl $7, %eax | |
0000000000002964 _dav1d_prep_8tap_16bpc_avx2.hv_use4tap: | |
2964: 83 e0 7f andl $127, %eax | |
2967: f3 44 0f b8 54 24 18 popcntl 24(%rsp), %r10d | |
296e: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
2977: c5 7a 7e 2d 00 00 00 00 vmovq (%rip), %xmm13 | |
297f: c4 62 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm14 | |
2988: 41 83 fa 0c cmpl $12, %r10d | |
298c: 75 11 jne 17 <_dav1d_prep_8tap_16bpc_avx2.hv_bits10> | |
298e: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
2997: c5 7a 7e 2d 00 00 00 00 vmovq (%rip), %xmm13 | |
000000000000299f _dav1d_prep_8tap_16bpc_avx2.hv_bits10: | |
299f: 4c 8d 15 dd 0f 00 00 leaq 4061(%rip), %r10 | |
29a6: 49 8d 9c c2 23 f0 ff ff leaq -4061(%r10,%rax,8), %rbx | |
29ae: c4 62 79 79 03 vpbroadcastw (%rbx), %xmm8 | |
29b3: c4 62 79 79 4b 02 vpbroadcastw 2(%rbx), %xmm9 | |
29b9: c4 62 79 79 53 04 vpbroadcastw 4(%rbx), %xmm10 | |
29bf: c4 62 79 79 5b 06 vpbroadcastw 6(%rbx), %xmm11 | |
29c5: c4 42 7d 20 c0 vpmovsxbw %xmm8, %ymm8 | |
29ca: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9 | |
29cf: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10 | |
29d4: c4 42 7d 20 db vpmovsxbw %xmm11, %ymm11 | |
29d9: 48 89 cb movq %rcx, %rbx | |
29dc: f3 0f bc c9 tzcntl %ecx, %ecx | |
29e0: 83 e9 02 subl $2, %ecx | |
29e3: 49 63 0c 8a movslq (%r10,%rcx,4), %rcx | |
29e7: 4c 01 d1 addq %r10, %rcx | |
29ea: ff e1 jmpq *%rcx | |
00000000000029ec _dav1d_prep_8tap_16bpc_avx2.hv_w4: | |
29ec: 41 83 f8 04 cmpl $4, %r8d | |
29f0: 0f 8f 55 01 00 00 jg 341 <_dav1d_prep_8tap_16bpc_avx2.hv_w48> | |
29f6: 48 8d 1c 52 leaq (%rdx,%rdx,2), %rbx | |
29fa: c5 7d 6f 05 00 00 00 00 vmovdqa (%rip), %ymm8 | |
2a02: c4 82 79 58 ac ca 83 f0 ff ff vpbroadcastd -3965(%r10,%r9,8), %xmm5 | |
2a0c: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5 | |
2a11: 48 83 ee 02 subq $2, %rsi | |
2a15: 48 29 d6 subq %rdx, %rsi | |
2a18: c4 e2 7d 5a 06 vbroadcasti128 (%rsi), %ymm0 | |
2a1d: c4 e2 7d 5a 0c 16 vbroadcasti128 (%rsi,%rdx), %ymm1 | |
2a23: c4 e2 7d 5a 14 56 vbroadcasti128 (%rsi,%rdx,2), %ymm2 | |
2a29: 48 01 de addq %rbx, %rsi | |
2a2c: c4 c2 7d 00 c0 vpshufb %ymm8, %ymm0, %ymm0 | |
2a31: c4 c2 75 00 c8 vpshufb %ymm8, %ymm1, %ymm1 | |
2a36: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0 | |
2a3a: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
2a3e: c4 e2 7d 02 c0 vphaddd %ymm0, %ymm0, %ymm0 | |
2a43: c4 e2 75 02 c9 vphaddd %ymm1, %ymm1, %ymm1 | |
2a48: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0 | |
2a4c: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1 | |
2a50: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0 | |
2a55: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1 | |
2a5a: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0 | |
2a5e: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1 | |
2a62: c4 c2 6d 00 d0 vpshufb %ymm8, %ymm2, %ymm2 | |
2a67: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
2a6b: c4 e2 6d 02 d2 vphaddd %ymm2, %ymm2, %ymm2 | |
2a70: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2 | |
2a74: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2 | |
2a79: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2 | |
2a7d: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
2a81: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1 | |
0000000000002a85 _dav1d_prep_8tap_16bpc_avx2.hv_w4l: | |
2a85: c4 e2 7d 5a 1e vbroadcasti128 (%rsi), %ymm3 | |
2a8a: c4 e2 7d 5a 24 16 vbroadcasti128 (%rsi,%rdx), %ymm4 | |
2a90: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2a94: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3 | |
2a99: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4 | |
2a9e: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
2aa2: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
2aa6: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3 | |
2aab: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4 | |
2ab0: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3 | |
2ab4: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4 | |
2ab8: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3 | |
2abd: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4 | |
2ac2: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3 | |
2ac6: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
2aca: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
2ace: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
2ad2: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0 | |
2ad6: c5 2d f5 da vpmaddwd %ymm2, %ymm10, %ymm11 | |
2ada: c5 25 fe d8 vpaddd %ymm0, %ymm11, %ymm11 | |
2ade: c5 fd 6f c2 vmovdqa %ymm2, %ymm0 | |
2ae2: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1 | |
2ae6: c5 2d f5 e3 vpmaddwd %ymm3, %ymm10, %ymm12 | |
2aea: c5 1d fe e1 vpaddd %ymm1, %ymm12, %ymm12 | |
2aee: c5 fd 6f cb vmovdqa %ymm3, %ymm1 | |
2af2: c5 fd 6f d4 vmovdqa %ymm4, %ymm2 | |
2af6: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11 | |
2afb: c4 41 1d fe e6 vpaddd %ymm14, %ymm12, %ymm12 | |
2b00: c4 c1 25 72 e3 06 vpsrad $6, %ymm11, %ymm11 | |
2b06: c4 c1 1d 72 e4 06 vpsrad $6, %ymm12, %ymm12 | |
2b0c: c4 41 25 6b db vpackssdw %ymm11, %ymm11, %ymm11 | |
2b11: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12 | |
2b16: c4 63 7d 39 db 01 vextracti128 $1, %ymm11, %xmm3 | |
2b1c: c4 63 7d 39 e4 01 vextracti128 $1, %ymm12, %xmm4 | |
2b22: c5 79 7e 1f vmovd %xmm11, (%rdi) | |
2b26: c5 f9 7e 5f 04 vmovd %xmm3, 4(%rdi) | |
2b2b: c4 21 79 7e 24 1f vmovd %xmm12, (%rdi,%r11) | |
2b31: c4 a1 79 7e 64 1f 04 vmovd %xmm4, 4(%rdi,%r11) | |
2b38: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
2b3c: 41 83 e8 02 subl $2, %r8d | |
2b40: 0f 8f 3f ff ff ff jg -193 <_dav1d_prep_8tap_16bpc_avx2.hv_w4l> | |
2b46: 5b popq %rbx | |
2b47: c5 f8 77 vzeroupper | |
2b4a: c3 retq | |
0000000000002b4b _dav1d_prep_8tap_16bpc_avx2.hv_w48: | |
2b4b: 48 8d 1c 52 leaq (%rdx,%rdx,2), %rbx | |
2b4f: c5 7d 6f 05 00 00 00 00 vmovdqa (%rip), %ymm8 | |
2b57: c4 82 79 58 ac ca d8 f1 ff ff vpbroadcastd -3624(%r10,%r9,8), %xmm5 | |
2b61: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5 | |
2b66: 49 8d 84 c2 e3 f1 ff ff leaq -3613(%r10,%rax,8), %rax | |
2b6e: c4 62 79 58 08 vpbroadcastd (%rax), %xmm9 | |
2b73: c4 62 79 58 50 04 vpbroadcastd 4(%rax), %xmm10 | |
2b79: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9 | |
2b7e: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10 | |
2b83: 48 83 ee 02 subq $2, %rsi | |
2b87: 48 29 de subq %rbx, %rsi | |
2b8a: c4 e2 7d 5a 06 vbroadcasti128 (%rsi), %ymm0 | |
2b8f: c4 e2 7d 5a 0c 16 vbroadcasti128 (%rsi,%rdx), %ymm1 | |
2b95: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2b99: c4 c2 7d 00 c0 vpshufb %ymm8, %ymm0, %ymm0 | |
2b9e: c4 c2 75 00 c8 vpshufb %ymm8, %ymm1, %ymm1 | |
2ba3: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0 | |
2ba7: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
2bab: c4 e2 7d 02 c0 vphaddd %ymm0, %ymm0, %ymm0 | |
2bb0: c4 e2 75 02 c9 vphaddd %ymm1, %ymm1, %ymm1 | |
2bb5: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0 | |
2bb9: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1 | |
2bbd: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0 | |
2bc2: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1 | |
2bc7: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0 | |
2bcb: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1 | |
2bcf: c4 e2 7d 5a 26 vbroadcasti128 (%rsi), %ymm4 | |
2bd4: c4 e2 7d 5a 1c 16 vbroadcasti128 (%rsi,%rdx), %ymm3 | |
2bda: c4 e2 7d 5a 14 56 vbroadcasti128 (%rsi,%rdx,2), %ymm2 | |
2be0: 48 01 de addq %rbx, %rsi | |
2be3: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4 | |
2be8: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3 | |
2bed: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
2bf1: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
2bf5: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4 | |
2bfa: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3 | |
2bff: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4 | |
2c03: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3 | |
2c07: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4 | |
2c0c: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3 | |
2c11: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
2c15: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3 | |
2c19: c4 c2 6d 00 d0 vpshufb %ymm8, %ymm2, %ymm2 | |
2c1e: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
2c22: c4 e2 6d 02 d2 vphaddd %ymm2, %ymm2, %ymm2 | |
2c27: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2 | |
2c2b: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2 | |
2c30: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2 | |
2c34: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
2c38: c5 f5 61 cc vpunpcklwd %ymm4, %ymm1, %ymm1 | |
2c3c: c5 dd 61 e3 vpunpcklwd %ymm3, %ymm4, %ymm4 | |
2c40: c5 e5 61 da vpunpcklwd %ymm2, %ymm3, %ymm3 | |
2c44: c5 fd 62 c4 vpunpckldq %ymm4, %ymm0, %ymm0 | |
2c48: c5 f5 62 cb vpunpckldq %ymm3, %ymm1, %ymm1 | |
2c4c: c4 e2 7d 5a 1e vbroadcasti128 (%rsi), %ymm3 | |
2c51: c4 e2 7d 5a 24 16 vbroadcasti128 (%rsi,%rdx), %ymm4 | |
2c57: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2c5b: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3 | |
2c60: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4 | |
2c65: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
2c69: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
2c6d: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3 | |
2c72: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4 | |
2c77: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3 | |
2c7b: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4 | |
2c7f: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3 | |
2c84: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4 | |
2c89: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3 | |
2c8d: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
2c91: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
2c95: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
0000000000002c99 _dav1d_prep_8tap_16bpc_avx2.hv_w48l: | |
2c99: c4 62 7d 5a 1e vbroadcasti128 (%rsi), %ymm11 | |
2c9e: c4 62 7d 5a 24 16 vbroadcasti128 (%rsi,%rdx), %ymm12 | |
2ca4: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2ca8: c4 42 25 00 d8 vpshufb %ymm8, %ymm11, %ymm11 | |
2cad: c4 42 1d 00 e0 vpshufb %ymm8, %ymm12, %ymm12 | |
2cb2: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11 | |
2cb6: c5 1d f5 e5 vpmaddwd %ymm5, %ymm12, %ymm12 | |
2cba: c4 42 25 02 db vphaddd %ymm11, %ymm11, %ymm11 | |
2cbf: c4 42 1d 02 e4 vphaddd %ymm12, %ymm12, %ymm12 | |
2cc4: c5 25 fe de vpaddd %ymm6, %ymm11, %ymm11 | |
2cc8: c5 1d fe e6 vpaddd %ymm6, %ymm12, %ymm12 | |
2ccc: c4 41 25 e2 dd vpsrad %xmm13, %ymm11, %ymm11 | |
2cd1: c4 41 1d e2 e5 vpsrad %xmm13, %ymm12, %ymm12 | |
2cd6: c4 41 25 6b db vpackssdw %ymm11, %ymm11, %ymm11 | |
2cdb: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12 | |
2ce0: c4 c1 5d 61 e3 vpunpcklwd %ymm11, %ymm4, %ymm4 | |
2ce5: c4 41 25 61 dc vpunpcklwd %ymm12, %ymm11, %ymm11 | |
2cea: c5 ed 62 d4 vpunpckldq %ymm4, %ymm2, %ymm2 | |
2cee: c4 c1 65 62 db vpunpckldq %ymm11, %ymm3, %ymm3 | |
2cf3: c5 35 f5 d8 vpmaddwd %ymm0, %ymm9, %ymm11 | |
2cf7: c5 ad f5 e2 vpmaddwd %ymm2, %ymm10, %ymm4 | |
2cfb: c5 35 f5 e1 vpmaddwd %ymm1, %ymm9, %ymm12 | |
2cff: c5 25 fe dc vpaddd %ymm4, %ymm11, %ymm11 | |
2d03: c5 ad f5 e3 vpmaddwd %ymm3, %ymm10, %ymm4 | |
2d07: c5 1d fe e4 vpaddd %ymm4, %ymm12, %ymm12 | |
2d0b: c4 42 25 02 db vphaddd %ymm11, %ymm11, %ymm11 | |
2d10: c4 42 1d 02 e4 vphaddd %ymm12, %ymm12, %ymm12 | |
2d15: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11 | |
2d1a: c4 41 1d fe e6 vpaddd %ymm14, %ymm12, %ymm12 | |
2d1f: c4 c1 25 72 e3 06 vpsrad $6, %ymm11, %ymm11 | |
2d25: c4 c1 1d 72 e4 06 vpsrad $6, %ymm12, %ymm12 | |
2d2b: c4 41 25 6b db vpackssdw %ymm11, %ymm11, %ymm11 | |
2d30: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12 | |
2d35: c4 63 7d 39 dc 01 vextracti128 $1, %ymm11, %xmm4 | |
2d3b: c5 79 7e 1f vmovd %xmm11, (%rdi) | |
2d3f: c5 f9 7e 67 04 vmovd %xmm4, 4(%rdi) | |
2d44: c4 63 7d 39 e4 01 vextracti128 $1, %ymm12, %xmm4 | |
2d4a: c4 21 79 7e 24 1f vmovd %xmm12, (%rdi,%r11) | |
2d50: c4 a1 79 7e 64 1f 04 vmovd %xmm4, 4(%rdi,%r11) | |
2d57: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
2d5b: c5 fd 70 c0 8d vpshufd $141, %ymm0, %ymm0 | |
2d60: c5 fd 70 c9 8d vpshufd $141, %ymm1, %ymm1 | |
2d65: c5 7d 70 da d8 vpshufd $216, %ymm2, %ymm11 | |
2d6a: c5 7d 70 e3 d8 vpshufd $216, %ymm3, %ymm12 | |
2d6f: c5 fd 70 d2 8d vpshufd $141, %ymm2, %ymm2 | |
2d74: c5 fd 70 db 8d vpshufd $141, %ymm3, %ymm3 | |
2d79: c5 fd 6f e3 vmovdqa %ymm3, %ymm4 | |
2d7d: c5 dd 72 e4 10 vpsrad $16, %ymm4, %ymm4 | |
2d82: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
2d86: c4 c1 7d 62 c3 vpunpckldq %ymm11, %ymm0, %ymm0 | |
2d8b: c4 c1 75 62 cc vpunpckldq %ymm12, %ymm1, %ymm1 | |
2d90: 41 83 e8 02 subl $2, %r8d | |
2d94: 0f 8f ff fe ff ff jg -257 <_dav1d_prep_8tap_16bpc_avx2.hv_w48l> | |
2d9a: 5b popq %rbx | |
2d9b: c5 f8 77 vzeroupper | |
2d9e: c3 retq | |
0000000000002d9f _dav1d_prep_8tap_16bpc_avx2.hv_w8: | |
2d9f: 48 89 d9 movq %rbx, %rcx | |
2da2: 41 83 f8 04 cmpl $4, %r8d | |
2da6: 0f 8f ac 02 00 00 jg 684 <_dav1d_prep_8tap_16bpc_avx2.hv_w88> | |
2dac: 48 8d 1c 52 leaq (%rdx,%rdx,2), %rbx | |
2db0: c4 82 79 59 ac ca 2f f4 ff ff vpbroadcastq -3025(%r10,%r9,8), %xmm5 | |
2dba: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5 | |
2dbf: 48 83 ee 06 subq $6, %rsi | |
2dc3: 48 29 d6 subq %rdx, %rsi | |
2dc6: 45 89 c1 movl %r8d, %r9d | |
2dc9: 48 89 f8 movq %rdi, %rax | |
2dcc: 49 89 f2 movq %rsi, %r10 | |
0000000000002dcf _dav1d_prep_8tap_16bpc_avx2.hv_w8l: | |
2dcf: c5 fa 6f 06 vmovdqu (%rsi), %xmm0 | |
2dd3: c5 fa 6f 56 02 vmovdqu 2(%rsi), %xmm2 | |
2dd8: c4 e3 7d 38 46 08 01 vinserti128 $1, 8(%rsi), %ymm0, %ymm0 | |
2ddf: c4 e3 6d 38 56 0a 01 vinserti128 $1, 10(%rsi), %ymm2, %ymm2 | |
2de6: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0 | |
2dea: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
2dee: c4 e2 7d 02 c2 vphaddd %ymm2, %ymm0, %ymm0 | |
2df3: c5 fa 6f 4e 04 vmovdqu 4(%rsi), %xmm1 | |
2df8: c5 fa 6f 56 06 vmovdqu 6(%rsi), %xmm2 | |
2dfd: c4 e3 75 38 4e 0c 01 vinserti128 $1, 12(%rsi), %ymm1, %ymm1 | |
2e04: c4 e3 6d 38 56 0e 01 vinserti128 $1, 14(%rsi), %ymm2, %ymm2 | |
2e0b: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
2e0f: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
2e13: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1 | |
2e18: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0 | |
2e1d: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0 | |
2e21: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0 | |
2e26: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0 | |
2e2a: c5 fa 6f 0c 16 vmovdqu (%rsi,%rdx), %xmm1 | |
2e2f: c5 fa 6f 5c 16 02 vmovdqu 2(%rsi,%rdx), %xmm3 | |
2e35: c4 e3 75 38 4c 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm1, %ymm1 | |
2e3d: c4 e3 65 38 5c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm3, %ymm3 | |
2e45: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1 | |
2e49: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
2e4d: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1 | |
2e52: c5 fa 6f 54 16 04 vmovdqu 4(%rsi,%rdx), %xmm2 | |
2e58: c5 fa 6f 5c 16 06 vmovdqu 6(%rsi,%rdx), %xmm3 | |
2e5e: c4 e3 6d 38 54 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm2, %ymm2 | |
2e66: c4 e3 65 38 5c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm3, %ymm3 | |
2e6e: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
2e72: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
2e76: c4 e2 6d 02 d3 vphaddd %ymm3, %ymm2, %ymm2 | |
2e7b: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1 | |
2e80: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1 | |
2e84: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1 | |
2e89: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1 | |
2e8d: c5 fa 6f 14 56 vmovdqu (%rsi,%rdx,2), %xmm2 | |
2e92: c5 fa 6f 64 56 02 vmovdqu 2(%rsi,%rdx,2), %xmm4 | |
2e98: c4 e3 6d 38 54 56 08 01 vinserti128 $1, 8(%rsi,%rdx,2), %ymm2, %ymm2 | |
2ea0: c4 e3 5d 38 64 56 0a 01 vinserti128 $1, 10(%rsi,%rdx,2), %ymm4, %ymm4 | |
2ea8: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2 | |
2eac: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
2eb0: c4 e2 6d 02 d4 vphaddd %ymm4, %ymm2, %ymm2 | |
2eb5: c5 fa 6f 5c 56 04 vmovdqu 4(%rsi,%rdx,2), %xmm3 | |
2ebb: c5 fa 6f 64 56 06 vmovdqu 6(%rsi,%rdx,2), %xmm4 | |
2ec1: c4 e3 65 38 5c 56 0c 01 vinserti128 $1, 12(%rsi,%rdx,2), %ymm3, %ymm3 | |
2ec9: c4 e3 5d 38 64 56 0e 01 vinserti128 $1, 14(%rsi,%rdx,2), %ymm4, %ymm4 | |
2ed1: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
2ed5: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
2ed9: c4 e2 65 02 dc vphaddd %ymm4, %ymm3, %ymm3 | |
2ede: c4 e2 6d 02 d3 vphaddd %ymm3, %ymm2, %ymm2 | |
2ee3: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2 | |
2ee7: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2 | |
2eec: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2 | |
2ef0: 48 01 de addq %rbx, %rsi | |
2ef3: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
2ef7: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1 | |
0000000000002efb _dav1d_prep_8tap_16bpc_avx2.hv_w8c: | |
2efb: c5 fa 6f 1e vmovdqu (%rsi), %xmm3 | |
2eff: c5 7a 6f 5e 02 vmovdqu 2(%rsi), %xmm11 | |
2f04: c4 e3 65 38 5e 08 01 vinserti128 $1, 8(%rsi), %ymm3, %ymm3 | |
2f0b: c4 63 25 38 5e 0a 01 vinserti128 $1, 10(%rsi), %ymm11, %ymm11 | |
2f12: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3 | |
2f16: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11 | |
2f1a: c4 c2 65 02 db vphaddd %ymm11, %ymm3, %ymm3 | |
2f1f: c5 7a 6f 46 04 vmovdqu 4(%rsi), %xmm8 | |
2f24: c5 7a 6f 5e 06 vmovdqu 6(%rsi), %xmm11 | |
2f29: c4 63 3d 38 46 0c 01 vinserti128 $1, 12(%rsi), %ymm8, %ymm8 | |
2f30: c4 63 25 38 5e 0e 01 vinserti128 $1, 14(%rsi), %ymm11, %ymm11 | |
2f37: c5 3d f5 c5 vpmaddwd %ymm5, %ymm8, %ymm8 | |
2f3b: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11 | |
2f3f: c4 42 3d 02 c3 vphaddd %ymm11, %ymm8, %ymm8 | |
2f44: c4 c2 65 02 d8 vphaddd %ymm8, %ymm3, %ymm3 | |
2f49: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3 | |
2f4d: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3 | |
2f52: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3 | |
2f56: c5 fa 6f 24 16 vmovdqu (%rsi,%rdx), %xmm4 | |
2f5b: c5 7a 6f 5c 16 02 vmovdqu 2(%rsi,%rdx), %xmm11 | |
2f61: c4 e3 5d 38 64 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm4, %ymm4 | |
2f69: c4 63 25 38 5c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm11, %ymm11 | |
2f71: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4 | |
2f75: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11 | |
2f79: c4 c2 5d 02 e3 vphaddd %ymm11, %ymm4, %ymm4 | |
2f7e: c5 7a 6f 44 16 04 vmovdqu 4(%rsi,%rdx), %xmm8 | |
2f84: c5 7a 6f 5c 16 06 vmovdqu 6(%rsi,%rdx), %xmm11 | |
2f8a: c4 63 3d 38 44 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm8, %ymm8 | |
2f92: c4 63 25 38 5c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm11, %ymm11 | |
2f9a: c5 3d f5 c5 vpmaddwd %ymm5, %ymm8, %ymm8 | |
2f9e: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11 | |
2fa2: c4 42 3d 02 c3 vphaddd %ymm11, %ymm8, %ymm8 | |
2fa7: c4 c2 5d 02 e0 vphaddd %ymm8, %ymm4, %ymm4 | |
2fac: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4 | |
2fb0: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4 | |
2fb5: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
2fb9: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
2fbd: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
2fc1: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
2fc5: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0 | |
2fc9: c5 2d f5 c2 vpmaddwd %ymm2, %ymm10, %ymm8 | |
2fcd: c5 3d fe c0 vpaddd %ymm0, %ymm8, %ymm8 | |
2fd1: c5 fd 6f c2 vmovdqa %ymm2, %ymm0 | |
2fd5: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1 | |
2fd9: c5 2d f5 db vpmaddwd %ymm3, %ymm10, %ymm11 | |
2fdd: c5 25 fe d9 vpaddd %ymm1, %ymm11, %ymm11 | |
2fe1: c5 fd 6f cb vmovdqa %ymm3, %ymm1 | |
2fe5: c5 fd 6f d4 vmovdqa %ymm4, %ymm2 | |
2fe9: c4 41 3d fe c6 vpaddd %ymm14, %ymm8, %ymm8 | |
2fee: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11 | |
2ff3: c4 c1 3d 72 e0 06 vpsrad $6, %ymm8, %ymm8 | |
2ff9: c4 c1 25 72 e3 06 vpsrad $6, %ymm11, %ymm11 | |
2fff: c4 41 3d 6b c0 vpackssdw %ymm8, %ymm8, %ymm8 | |
3004: c4 41 25 6b db vpackssdw %ymm11, %ymm11, %ymm11 | |
3009: c4 63 7d 39 c3 01 vextracti128 $1, %ymm8, %xmm3 | |
300f: c4 63 7d 39 dc 01 vextracti128 $1, %ymm11, %xmm4 | |
3015: c5 79 d6 07 vmovq %xmm8, (%rdi) | |
3019: c5 f9 d6 5f 08 vmovq %xmm3, 8(%rdi) | |
301e: c4 21 79 d6 1c 1f vmovq %xmm11, (%rdi,%r11) | |
3024: c4 a1 79 d6 64 1f 08 vmovq %xmm4, 8(%rdi,%r11) | |
302b: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi | |
302f: 41 83 e8 02 subl $2, %r8d | |
3033: 0f 8f c2 fe ff ff jg -318 <_dav1d_prep_8tap_16bpc_avx2.hv_w8c> | |
3039: 48 83 c0 10 addq $16, %rax | |
303d: 49 83 c2 10 addq $16, %r10 | |
3041: 45 89 c8 movl %r9d, %r8d | |
3044: 48 89 c7 movq %rax, %rdi | |
3047: 4c 89 d6 movq %r10, %rsi | |
304a: 83 e9 08 subl $8, %ecx | |
304d: 0f 8f 7c fd ff ff jg -644 <_dav1d_prep_8tap_16bpc_avx2.hv_w8l> | |
3053: 5b popq %rbx | |
3054: c5 f8 77 vzeroupper | |
3057: c3 retq | |
0000000000003058 _dav1d_prep_8tap_16bpc_avx2.hv_w88: | |
3058: 48 8d 1c 52 leaq (%rdx,%rdx,2), %rbx | |
305c: c4 82 79 59 bc ca db f6 ff ff vpbroadcastq -2341(%r10,%r9,8), %xmm7 | |
3066: c4 e2 7d 20 ff vpmovsxbw %xmm7, %ymm7 | |
306b: 48 83 ee 06 subq $6, %rsi | |
306f: 48 29 de subq %rbx, %rsi | |
3072: 45 89 c1 movl %r8d, %r9d | |
3075: 48 89 f8 movq %rdi, %rax | |
3078: f3 44 0f b8 54 24 18 popcntl 24(%rsp), %r10d | |
307f: 41 83 fa 0c cmpl $12, %r10d | |
3083: 0f 84 7d 04 00 00 je 1149 <_dav1d_prep_8tap_16bpc_avx2.hv_w88_12bit> | |
3089: 49 89 f2 movq %rsi, %r10 | |
000000000000308c _dav1d_prep_8tap_16bpc_avx2.hv_w88l_10bit: | |
308c: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15 | |
3095: c5 fa 6f 06 vmovdqu (%rsi), %xmm0 | |
3099: c5 7a 6f 6e 02 vmovdqu 2(%rsi), %xmm13 | |
309e: c4 e3 7d 38 46 08 01 vinserti128 $1, 8(%rsi), %ymm0, %ymm0 | |
30a5: c4 63 15 38 6e 0a 01 vinserti128 $1, 10(%rsi), %ymm13, %ymm13 | |
30ac: c5 fd f5 c7 vpmaddwd %ymm7, %ymm0, %ymm0 | |
30b0: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
30b4: c4 c2 7d 02 c5 vphaddd %ymm13, %ymm0, %ymm0 | |
30b9: c5 7a 6f 66 04 vmovdqu 4(%rsi), %xmm12 | |
30be: c5 7a 6f 6e 06 vmovdqu 6(%rsi), %xmm13 | |
30c3: c4 63 1d 38 66 0c 01 vinserti128 $1, 12(%rsi), %ymm12, %ymm12 | |
30ca: c4 63 15 38 6e 0e 01 vinserti128 $1, 14(%rsi), %ymm13, %ymm13 | |
30d1: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
30d5: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
30d9: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
30de: c4 c2 7d 02 c4 vphaddd %ymm12, %ymm0, %ymm0 | |
30e3: c5 85 fe c0 vpaddd %ymm0, %ymm15, %ymm0 | |
30e7: c5 fd 72 e0 02 vpsrad $2, %ymm0, %ymm0 | |
30ec: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0 | |
30f0: c5 fa 6f 0c 16 vmovdqu (%rsi,%rdx), %xmm1 | |
30f5: c5 7a 6f 6c 16 02 vmovdqu 2(%rsi,%rdx), %xmm13 | |
30fb: c4 e3 75 38 4c 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm1, %ymm1 | |
3103: c4 63 15 38 6c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm13, %ymm13 | |
310b: c5 f5 f5 cf vpmaddwd %ymm7, %ymm1, %ymm1 | |
310f: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
3113: c4 c2 75 02 cd vphaddd %ymm13, %ymm1, %ymm1 | |
3118: c5 7a 6f 64 16 04 vmovdqu 4(%rsi,%rdx), %xmm12 | |
311e: c5 7a 6f 6c 16 06 vmovdqu 6(%rsi,%rdx), %xmm13 | |
3124: c4 63 1d 38 64 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm12, %ymm12 | |
312c: c4 63 15 38 6c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm13, %ymm13 | |
3134: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
3138: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
313c: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
3141: c4 c2 75 02 cc vphaddd %ymm12, %ymm1, %ymm1 | |
3146: c5 85 fe c9 vpaddd %ymm1, %ymm15, %ymm1 | |
314a: c5 f5 72 e1 02 vpsrad $2, %ymm1, %ymm1 | |
314f: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1 | |
3153: c5 fa 6f 14 56 vmovdqu (%rsi,%rdx,2), %xmm2 | |
3158: c5 7a 6f 6c 56 02 vmovdqu 2(%rsi,%rdx,2), %xmm13 | |
315e: c4 e3 6d 38 54 56 08 01 vinserti128 $1, 8(%rsi,%rdx,2), %ymm2, %ymm2 | |
3166: c4 63 15 38 6c 56 0a 01 vinserti128 $1, 10(%rsi,%rdx,2), %ymm13, %ymm13 | |
316e: c5 ed f5 d7 vpmaddwd %ymm7, %ymm2, %ymm2 | |
3172: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
3176: c4 c2 6d 02 d5 vphaddd %ymm13, %ymm2, %ymm2 | |
317b: c5 7a 6f 64 56 04 vmovdqu 4(%rsi,%rdx,2), %xmm12 | |
3181: c5 7a 6f 6c 56 06 vmovdqu 6(%rsi,%rdx,2), %xmm13 | |
3187: c4 63 1d 38 64 56 0c 01 vinserti128 $1, 12(%rsi,%rdx,2), %ymm12, %ymm12 | |
318f: c4 63 15 38 6c 56 0e 01 vinserti128 $1, 14(%rsi,%rdx,2), %ymm13, %ymm13 | |
3197: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
319b: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
319f: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
31a4: c4 c2 6d 02 d4 vphaddd %ymm12, %ymm2, %ymm2 | |
31a9: c5 85 fe d2 vpaddd %ymm2, %ymm15, %ymm2 | |
31ad: c5 ed 72 e2 02 vpsrad $2, %ymm2, %ymm2 | |
31b2: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2 | |
31b6: 48 01 de addq %rbx, %rsi | |
31b9: c5 fa 6f 1e vmovdqu (%rsi), %xmm3 | |
31bd: c5 7a 6f 6e 02 vmovdqu 2(%rsi), %xmm13 | |
31c2: c4 e3 65 38 5e 08 01 vinserti128 $1, 8(%rsi), %ymm3, %ymm3 | |
31c9: c4 63 15 38 6e 0a 01 vinserti128 $1, 10(%rsi), %ymm13, %ymm13 | |
31d0: c5 e5 f5 df vpmaddwd %ymm7, %ymm3, %ymm3 | |
31d4: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
31d8: c4 c2 65 02 dd vphaddd %ymm13, %ymm3, %ymm3 | |
31dd: c5 7a 6f 66 04 vmovdqu 4(%rsi), %xmm12 | |
31e2: c5 7a 6f 6e 06 vmovdqu 6(%rsi), %xmm13 | |
31e7: c4 63 1d 38 66 0c 01 vinserti128 $1, 12(%rsi), %ymm12, %ymm12 | |
31ee: c4 63 15 38 6e 0e 01 vinserti128 $1, 14(%rsi), %ymm13, %ymm13 | |
31f5: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
31f9: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
31fd: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
3202: c4 c2 65 02 dc vphaddd %ymm12, %ymm3, %ymm3 | |
3207: c5 85 fe db vpaddd %ymm3, %ymm15, %ymm3 | |
320b: c5 e5 72 e3 02 vpsrad $2, %ymm3, %ymm3 | |
3210: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3 | |
3214: c5 fa 6f 24 16 vmovdqu (%rsi,%rdx), %xmm4 | |
3219: c5 7a 6f 6c 16 02 vmovdqu 2(%rsi,%rdx), %xmm13 | |
321f: c4 e3 5d 38 64 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm4, %ymm4 | |
3227: c4 63 15 38 6c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm13, %ymm13 | |
322f: c5 dd f5 e7 vpmaddwd %ymm7, %ymm4, %ymm4 | |
3233: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
3237: c4 c2 5d 02 e5 vphaddd %ymm13, %ymm4, %ymm4 | |
323c: c5 7a 6f 64 16 04 vmovdqu 4(%rsi,%rdx), %xmm12 | |
3242: c5 7a 6f 6c 16 06 vmovdqu 6(%rsi,%rdx), %xmm13 | |
3248: c4 63 1d 38 64 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm12, %ymm12 | |
3250: c4 63 15 38 6c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm13, %ymm13 | |
3258: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
325c: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
3260: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
3265: c4 c2 5d 02 e4 vphaddd %ymm12, %ymm4, %ymm4 | |
326a: c5 85 fe e4 vpaddd %ymm4, %ymm15, %ymm4 | |
326e: c5 dd 72 e4 02 vpsrad $2, %ymm4, %ymm4 | |
3273: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4 | |
3277: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
327b: c5 fa 6f 2e vmovdqu (%rsi), %xmm5 | |
327f: c5 7a 6f 6e 02 vmovdqu 2(%rsi), %xmm13 | |
3284: c4 e3 55 38 6e 08 01 vinserti128 $1, 8(%rsi), %ymm5, %ymm5 | |
328b: c4 63 15 38 6e 0a 01 vinserti128 $1, 10(%rsi), %ymm13, %ymm13 | |
3292: c5 d5 f5 ef vpmaddwd %ymm7, %ymm5, %ymm5 | |
3296: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
329a: c4 c2 55 02 ed vphaddd %ymm13, %ymm5, %ymm5 | |
329f: c5 7a 6f 66 04 vmovdqu 4(%rsi), %xmm12 | |
32a4: c5 7a 6f 6e 06 vmovdqu 6(%rsi), %xmm13 | |
32a9: c4 63 1d 38 66 0c 01 vinserti128 $1, 12(%rsi), %ymm12, %ymm12 | |
32b0: c4 63 15 38 6e 0e 01 vinserti128 $1, 14(%rsi), %ymm13, %ymm13 | |
32b7: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
32bb: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
32bf: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
32c4: c4 c2 55 02 ec vphaddd %ymm12, %ymm5, %ymm5 | |
32c9: c5 85 fe ed vpaddd %ymm5, %ymm15, %ymm5 | |
32cd: c5 d5 72 e5 02 vpsrad $2, %ymm5, %ymm5 | |
32d2: c5 d5 6b ed vpackssdw %ymm5, %ymm5, %ymm5 | |
32d6: c5 fa 6f 34 16 vmovdqu (%rsi,%rdx), %xmm6 | |
32db: c5 7a 6f 6c 16 02 vmovdqu 2(%rsi,%rdx), %xmm13 | |
32e1: c4 e3 4d 38 74 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm6, %ymm6 | |
32e9: c4 63 15 38 6c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm13, %ymm13 | |
32f1: c5 cd f5 f7 vpmaddwd %ymm7, %ymm6, %ymm6 | |
32f5: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
32f9: c4 c2 4d 02 f5 vphaddd %ymm13, %ymm6, %ymm6 | |
32fe: c5 7a 6f 64 16 04 vmovdqu 4(%rsi,%rdx), %xmm12 | |
3304: c5 7a 6f 6c 16 06 vmovdqu 6(%rsi,%rdx), %xmm13 | |
330a: c4 63 1d 38 64 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm12, %ymm12 | |
3312: c4 63 15 38 6c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm13, %ymm13 | |
331a: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
331e: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
3322: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12 | |
3327: c4 c2 4d 02 f4 vphaddd %ymm12, %ymm6, %ymm6 | |
332c: c5 85 fe f6 vpaddd %ymm6, %ymm15, %ymm6 | |
3330: c5 cd 72 e6 02 vpsrad $2, %ymm6, %ymm6 | |
3335: c5 cd 6b f6 vpackssdw %ymm6, %ymm6, %ymm6 | |
3339: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
333d: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0 | |
3341: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1 | |
3345: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2 | |
3349: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3 | |
334d: c5 dd 61 e5 vpunpcklwd %ymm5, %ymm4, %ymm4 | |
3351: c5 d5 61 ee vpunpcklwd %ymm6, %ymm5, %ymm5 | |
0000000000003355 _dav1d_prep_8tap_16bpc_avx2.hv_w88c_10bit: | |
3355: c5 7a 6f 26 vmovdqu (%rsi), %xmm12 | |
3359: c5 7a 6f 7e 02 vmovdqu 2(%rsi), %xmm15 | |
335e: c4 63 1d 38 66 08 01 vinserti128 $1, 8(%rsi), %ymm12, %ymm12 | |
3365: c4 63 05 38 7e 0a 01 vinserti128 $1, 10(%rsi), %ymm15, %ymm15 | |
336c: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12 | |
3370: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
3374: c4 42 1d 02 e7 vphaddd %ymm15, %ymm12, %ymm12 | |
3379: c5 7a 6f 76 04 vmovdqu 4(%rsi), %xmm14 | |
337e: c5 7a 6f 7e 06 vmovdqu 6(%rsi), %xmm15 | |
3383: c4 63 0d 38 76 0c 01 vinserti128 $1, 12(%rsi), %ymm14, %ymm14 | |
338a: c4 63 05 38 7e 0e 01 vinserti128 $1, 14(%rsi), %ymm15, %ymm15 | |
3391: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14 | |
3395: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
3399: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14 | |
339e: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15 | |
33a7: c4 42 1d 02 e6 vphaddd %ymm14, %ymm12, %ymm12 | |
33ac: c4 41 1d fe e7 vpaddd %ymm15, %ymm12, %ymm12 | |
33b1: c4 c1 1d 72 e4 02 vpsrad $2, %ymm12, %ymm12 | |
33b7: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12 | |
33bc: c5 7a 6f 2c 16 vmovdqu (%rsi,%rdx), %xmm13 | |
33c1: c5 7a 6f 7c 16 02 vmovdqu 2(%rsi,%rdx), %xmm15 | |
33c7: c4 63 15 38 6c 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm13, %ymm13 | |
33cf: c4 63 05 38 7c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm15, %ymm15 | |
33d7: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13 | |
33db: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
33df: c4 42 15 02 ef vphaddd %ymm15, %ymm13, %ymm13 | |
33e4: c5 7a 6f 74 16 04 vmovdqu 4(%rsi,%rdx), %xmm14 | |
33ea: c5 7a 6f 7c 16 06 vmovdqu 6(%rsi,%rdx), %xmm15 | |
33f0: c4 63 0d 38 74 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm14, %ymm14 | |
33f8: c4 63 05 38 7c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm15, %ymm15 | |
3400: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14 | |
3404: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15 | |
3408: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14 | |
340d: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15 | |
3416: c4 42 15 02 ee vphaddd %ymm14, %ymm13, %ymm13 | |
341b: c4 41 15 fe ef vpaddd %ymm15, %ymm13, %ymm13 | |
3420: c4 c1 15 72 e5 02 vpsrad $2, %ymm13, %ymm13 | |
3426: c4 41 15 6b ed vpackssdw %ymm13, %ymm13, %ymm13 | |
342b: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi | |
342f: c4 c1 4d 61 f4 vpunpcklwd %ymm12, %ymm6, %ymm6 | |
3434: c4 41 1d 61 e5 vpunpcklwd %ymm13, %ymm12, %ymm12 | |
3439: c5 bd f5 c0 vpmaddwd %ymm0, %ymm8, %ymm0 | |
343d: c5 35 f5 f2 vpmaddwd %ymm2, %ymm9, %ymm14 | |
3441: c5 0d fe f0 vpaddd %ymm0, %ymm14, %ymm14 | |
3445: c5 fd 6f c2 vmovdqa %ymm2, %ymm0 | |
3449: c5 bd f5 c9 vpmaddwd %ymm1, %ymm8, %ymm1 | |
344d: c5 35 f5 fb vpmaddwd %ymm3, %ymm9, %ymm15 | |
3451: c5 05 fe f9 vpaddd %ymm1, %ymm15, %ymm15 | |
3455: c5 fd 6f cb vmovdqa %ymm3, %ymm1 | |
3459: c5 ad f5 d4 vpmaddwd %ymm4, %ymm10, %ymm2 | |
345d: c5 ad f5 dd vpmaddwd %ymm5, %ymm10, %ymm3 | |
3461: c5 0d fe f2 vpaddd %ymm2, %ymm14, %ymm14 | |
3465: c5 05 fe fb vpaddd %ymm3, %ymm15, %ymm15 | |
3469: c5 fd 6f d4 vmovdqa %ymm4, %ymm2 | |
346d: c5 fd 6f dd vmovdqa %ymm5, %ymm3 | |
3471: c5 a5 f5 e6 vpmaddwd %ymm6, %ymm11, %ymm4 | |
3475: c4 c1 1d f5 eb vpmaddwd %ymm11, %ymm12, %ymm5 | |
347a: c5 0d fe f4 vpaddd %ymm4, %ymm14, %ymm14 | |
347e: c5 05 fe fd vpaddd %ymm5, %ymm15, %ymm15 | |
3482: c5 fd 6f e6 vmovdqa %ymm6, %ymm4 | |
3486: c4 c1 7d 6f ec vmovdqa %ymm12, %ymm5 | |
348b: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6 | |
3494: c5 0d fe f6 vpaddd %ymm6, %ymm14, %ymm14 | |
3498: c5 05 fe fe vpaddd %ymm6, %ymm15, %ymm15 | |
349c: c4 c1 0d 72 e6 06 vpsrad $6, %ymm14, %ymm14 | |
34a2: c4 c1 05 72 e7 06 vpsrad $6, %ymm15, %ymm15 | |
34a8: c4 41 0d 6b f6 vpackssdw %ymm14, %ymm14, %ymm14 | |
34ad: c4 41 05 6b ff vpackssdw %ymm15, %ymm15, %ymm15 | |
34b2: c4 c1 7d 6f f5 vmovdqa %ymm13, %ymm6 | |
34b7: c4 43 7d 39 f4 01 vextracti128 $1, %ymm14, %xmm12 | |
34bd: c4 43 7d 39 fd 01 vextracti128 $1, %ymm15, %xm |