Skip to content

Instantly share code, notes, and snippets.

@lu-zero
Created December 27, 2020 12:48
Show Gist options
  • Save lu-zero/61b856697f5d8209e2908783c4c125b5 to your computer and use it in GitHub Desktop.
Save lu-zero/61b856697f5d8209e2908783c4c125b5 to your computer and use it in GitHub Desktop.
./src/libdav1d.5.dylib.p/mc16_avx2.obj: file format Mach-O 64-bit x86-64
Disassembly of section __TEXT,__text:
0000000000000000 _dav1d_put_8tap_regular_16bpc_avx2:
0: 41 ba 2d 00 00 00 movl $45, %r10d
6: 41 bb 2d 00 00 00 movl $45, %r11d
c: e9 8f 00 00 00 jmp 143 <_dav1d_put_8tap_16bpc_avx2>
11: 0f 1f 84 00 00 00 00 00 nopl (%rax,%rax)
19: 0f 1f 80 00 00 00 00 nopl (%rax)
0000000000000020 _dav1d_put_8tap_regular_smooth_16bpc_avx2:
20: 41 ba 2d 00 00 00 movl $45, %r10d
26: 41 bb bc 07 00 00 movl $1980, %r11d
2c: eb 72 jmp 114 <_dav1d_put_8tap_16bpc_avx2>
2e: 66 90 nop
0000000000000030 _dav1d_put_8tap_regular_sharp_16bpc_avx2:
30: 41 ba 2d 00 00 00 movl $45, %r10d
36: 41 bb 2d 0f 00 00 movl $3885, %r11d
3c: eb 62 jmp 98 <_dav1d_put_8tap_16bpc_avx2>
3e: 66 90 nop
0000000000000040 _dav1d_put_8tap_smooth_16bpc_avx2:
40: 41 ba bc 07 00 00 movl $1980, %r10d
46: 41 bb bc 07 00 00 movl $1980, %r11d
4c: eb 52 jmp 82 <_dav1d_put_8tap_16bpc_avx2>
4e: 66 90 nop
0000000000000050 _dav1d_put_8tap_smooth_regular_16bpc_avx2:
50: 41 ba bc 07 00 00 movl $1980, %r10d
56: 41 bb 2d 00 00 00 movl $45, %r11d
5c: eb 42 jmp 66 <_dav1d_put_8tap_16bpc_avx2>
5e: 66 90 nop
0000000000000060 _dav1d_put_8tap_smooth_sharp_16bpc_avx2:
60: 41 ba bc 07 00 00 movl $1980, %r10d
66: 41 bb 2d 0f 00 00 movl $3885, %r11d
6c: eb 32 jmp 50 <_dav1d_put_8tap_16bpc_avx2>
6e: 66 90 nop
0000000000000070 _dav1d_put_8tap_sharp_16bpc_avx2:
70: 41 ba 2d 0f 00 00 movl $3885, %r10d
76: 41 bb 2d 0f 00 00 movl $3885, %r11d
7c: eb 22 jmp 34 <_dav1d_put_8tap_16bpc_avx2>
7e: 66 90 nop
0000000000000080 _dav1d_put_8tap_sharp_regular_16bpc_avx2:
80: 41 ba 2d 0f 00 00 movl $3885, %r10d
86: 41 bb 2d 00 00 00 movl $45, %r11d
8c: eb 12 jmp 18 <_dav1d_put_8tap_16bpc_avx2>
8e: 66 90 nop
0000000000000090 _dav1d_put_8tap_sharp_smooth_16bpc_avx2:
90: 41 ba 2d 0f 00 00 movl $3885, %r10d
96: 41 bb bc 07 00 00 movl $1980, %r11d
9c: eb 02 jmp 2 <_dav1d_put_8tap_16bpc_avx2>
9e: 66 90 nop
00000000000000a0 _dav1d_put_8tap_16bpc_avx2:
a0: 53 pushq %rbx
a1: 69 44 24 10 81 40 00 00 imull $16513, 16(%rsp), %eax
a9: 44 01 d0 addl %r10d, %eax
ac: 44 69 54 24 18 81 40 00 00 imull $16513, 24(%rsp), %r10d
b5: 45 01 da addl %r11d, %r10d
b8: 4d 63 c0 movslq %r8d, %r8
bb: c4 e2 7d 79 7c 24 20 vpbroadcastw 32(%rsp), %ymm7
c2: a9 00 c0 1f 00 testl $2080768, %eax
c7: 0f 85 e6 01 00 00 jne 486 <_dav1d_put_8tap_16bpc_avx2.put_8tap_h_16bpc>
cd: 41 f7 c2 00 c0 1f 00 testl $2080768, %r10d
d4: 0f 85 45 04 00 00 jne 1093 <_dav1d_put_8tap_16bpc_avx2.put_8tap_v_16bpc>
00000000000000da _dav1d_put_8tap_16bpc_avx2.put_16bpc:
da: 4c 8d 1d b6 01 00 00 leaq 438(%rip), %r11
e1: f3 45 0f bc c0 tzcntl %r8d, %r8d
e6: 41 83 e8 01 subl $1, %r8d
ea: 4f 63 04 83 movslq (%r11,%r8,4), %r8
ee: 4d 01 d8 addq %r11, %r8
f1: 41 ff e0 jmpq *%r8
00000000000000f4 _dav1d_put_8tap_16bpc_avx2.w2:
f4: c5 f9 6e 02 vmovd (%rdx), %xmm0
f8: c5 f9 6e 0c 0a vmovd (%rdx,%rcx), %xmm1
fd: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
101: c5 f9 7e 07 vmovd %xmm0, (%rdi)
105: c5 f9 7e 0c 37 vmovd %xmm1, (%rdi,%rsi)
10a: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
10e: 41 83 e9 02 subl $2, %r9d
112: 7f e0 jg -32 <_dav1d_put_8tap_16bpc_avx2.w2>
114: 5b popq %rbx
115: c3 retq
0000000000000116 _dav1d_put_8tap_16bpc_avx2.w4:
116: c5 fa 7e 02 vmovq (%rdx), %xmm0
11a: c5 fa 7e 0c 0a vmovq (%rdx,%rcx), %xmm1
11f: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
123: c5 f9 d6 07 vmovq %xmm0, (%rdi)
127: c5 f9 d6 0c 37 vmovq %xmm1, (%rdi,%rsi)
12c: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
130: 41 83 e9 02 subl $2, %r9d
134: 7f e0 jg -32 <_dav1d_put_8tap_16bpc_avx2.w4>
136: 5b popq %rbx
137: c3 retq
0000000000000138 _dav1d_put_8tap_16bpc_avx2.w8:
138: c5 fa 6f 02 vmovdqu (%rdx), %xmm0
13c: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1
141: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
145: c5 f9 7f 07 vmovdqa %xmm0, (%rdi)
149: c5 f9 7f 0c 37 vmovdqa %xmm1, (%rdi,%rsi)
14e: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
152: 41 83 e9 02 subl $2, %r9d
156: 7f e0 jg -32 <_dav1d_put_8tap_16bpc_avx2.w8>
158: 5b popq %rbx
159: c3 retq
000000000000015a _dav1d_put_8tap_16bpc_avx2.w16:
15a: c5 fe 6f 02 vmovdqu (%rdx), %ymm0
15e: c5 fe 6f 0c 0a vmovdqu (%rdx,%rcx), %ymm1
163: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
167: c5 fd 7f 07 vmovdqa %ymm0, (%rdi)
16b: c5 fd 7f 0c 37 vmovdqa %ymm1, (%rdi,%rsi)
170: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
174: 41 83 e9 02 subl $2, %r9d
178: 7f e0 jg -32 <_dav1d_put_8tap_16bpc_avx2.w16>
17a: 5b popq %rbx
17b: c5 f8 77 vzeroupper
17e: c3 retq
000000000000017f _dav1d_put_8tap_16bpc_avx2.w32:
17f: c5 fe 6f 02 vmovdqu (%rdx), %ymm0
183: c5 fe 6f 4a 20 vmovdqu 32(%rdx), %ymm1
188: c5 fe 6f 14 0a vmovdqu (%rdx,%rcx), %ymm2
18d: c5 fe 6f 5c 0a 20 vmovdqu 32(%rdx,%rcx), %ymm3
193: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
197: c5 fd 7f 07 vmovdqa %ymm0, (%rdi)
19b: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi)
1a0: c5 fd 7f 14 37 vmovdqa %ymm2, (%rdi,%rsi)
1a5: c5 fd 7f 5c 37 20 vmovdqa %ymm3, 32(%rdi,%rsi)
1ab: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
1af: 41 83 e9 02 subl $2, %r9d
1b3: 7f ca jg -54 <_dav1d_put_8tap_16bpc_avx2.w32>
1b5: 5b popq %rbx
1b6: c5 f8 77 vzeroupper
1b9: c3 retq
00000000000001ba _dav1d_put_8tap_16bpc_avx2.w64:
1ba: c5 fe 6f 02 vmovdqu (%rdx), %ymm0
1be: c5 fe 6f 4a 20 vmovdqu 32(%rdx), %ymm1
1c3: c5 fe 6f 52 40 vmovdqu 64(%rdx), %ymm2
1c8: c5 fe 6f 5a 60 vmovdqu 96(%rdx), %ymm3
1cd: c5 fe 6f 24 0a vmovdqu (%rdx,%rcx), %ymm4
1d2: c5 fe 6f 6c 0a 20 vmovdqu 32(%rdx,%rcx), %ymm5
1d8: c5 fe 6f 74 0a 40 vmovdqu 64(%rdx,%rcx), %ymm6
1de: c5 fe 6f 7c 0a 60 vmovdqu 96(%rdx,%rcx), %ymm7
1e4: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
1e8: c5 fd 7f 07 vmovdqa %ymm0, (%rdi)
1ec: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi)
1f1: c5 fd 7f 57 40 vmovdqa %ymm2, 64(%rdi)
1f6: c5 fd 7f 5f 60 vmovdqa %ymm3, 96(%rdi)
1fb: c5 fd 7f 24 37 vmovdqa %ymm4, (%rdi,%rsi)
200: c5 fd 7f 6c 37 20 vmovdqa %ymm5, 32(%rdi,%rsi)
206: c5 fd 7f 74 37 40 vmovdqa %ymm6, 64(%rdi,%rsi)
20c: c5 fd 7f 7c 37 60 vmovdqa %ymm7, 96(%rdi,%rsi)
212: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
216: 41 83 e9 02 subl $2, %r9d
21a: 7f 9e jg -98 <_dav1d_put_8tap_16bpc_avx2.w64>
21c: 5b popq %rbx
21d: c5 f8 77 vzeroupper
220: c3 retq
0000000000000221 _dav1d_put_8tap_16bpc_avx2.w128:
221: c5 fe 6f 02 vmovdqu (%rdx), %ymm0
225: c5 fe 6f 4a 20 vmovdqu 32(%rdx), %ymm1
22a: c5 fe 6f 52 40 vmovdqu 64(%rdx), %ymm2
22f: c5 fe 6f 5a 60 vmovdqu 96(%rdx), %ymm3
234: c5 fe 6f a2 80 00 00 00 vmovdqu 128(%rdx), %ymm4
23c: c5 fe 6f aa a0 00 00 00 vmovdqu 160(%rdx), %ymm5
244: c5 fe 6f b2 c0 00 00 00 vmovdqu 192(%rdx), %ymm6
24c: c5 fe 6f ba e0 00 00 00 vmovdqu 224(%rdx), %ymm7
254: 48 01 ca addq %rcx, %rdx
257: c5 fd 7f 07 vmovdqa %ymm0, (%rdi)
25b: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi)
260: c5 fd 7f 57 40 vmovdqa %ymm2, 64(%rdi)
265: c5 fd 7f 5f 60 vmovdqa %ymm3, 96(%rdi)
26a: c5 fd 7f a7 80 00 00 00 vmovdqa %ymm4, 128(%rdi)
272: c5 fd 7f af a0 00 00 00 vmovdqa %ymm5, 160(%rdi)
27a: c5 fd 7f b7 c0 00 00 00 vmovdqa %ymm6, 192(%rdi)
282: c5 fd 7f bf e0 00 00 00 vmovdqa %ymm7, 224(%rdi)
28a: 48 01 f7 addq %rsi, %rdi
28d: 41 ff c9 decl %r9d
290: 7f 8f jg -113 <_dav1d_put_8tap_16bpc_avx2.w128>
292: 5b popq %rbx
293: c5 f8 77 vzeroupper
296: c3 retq
0000000000000297 _dav1d_put_8tap_16bpc_avx2.jmp_tbl:
297: 5d popq %rbp
298: fe ff <unknown>
29a: ff 7f fe <unknown>
29d: ff ff <unknown>
29f: a1 fe ff ff c3 fe ff ff e8 movabsl -1657324668173942786, %eax
2a8: fe ff <unknown>
2aa: ff 23 jmpq *(%rbx)
2ac: ff ff <unknown>
2ae: ff 8a ff ff ff 41 decl 1107296255(%rdx)
00000000000002b3 _dav1d_put_8tap_16bpc_avx2.put_8tap_h_16bpc:
2b3: 41 83 f8 04 cmpl $4, %r8d
2b7: 7e 03 jle 3 <_dav1d_put_8tap_16bpc_avx2.h_use4tap>
2b9: c1 e8 07 shrl $7, %eax
00000000000002bc _dav1d_put_8tap_16bpc_avx2.h_use4tap:
2bc: 83 e0 7f andl $127, %eax
2bf: 41 f7 c2 00 c0 1f 00 testl $2080768, %r10d
2c6: 0f 85 5f 08 00 00 jne 2143 <_dav1d_put_8tap_16bpc_avx2.put_8tap_hv_16bpc>
2cc: f3 44 0f b8 5c 24 20 popcntl 32(%rsp), %r11d
2d3: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
2dc: 41 83 fb 0c cmpl $12, %r11d
2e0: 75 09 jne 9 <_dav1d_put_8tap_16bpc_avx2.h_bits10>
2e2: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
00000000000002eb _dav1d_put_8tap_16bpc_avx2.h_bits10:
2eb: 4f 8d 14 00 leaq (%r8,%r8), %r10
2ef: 4c 8d 1d 0d 02 00 00 leaq 525(%rip), %r11
2f6: f3 45 0f bc c0 tzcntl %r8d, %r8d
2fb: 41 83 e8 01 subl $1, %r8d
2ff: 4f 63 04 83 movslq (%r11,%r8,4), %r8
303: 4d 01 d8 addq %r11, %r8
306: 41 ff e0 jmpq *%r8
0000000000000309 _dav1d_put_8tap_16bpc_avx2.h_w2:
309: 48 83 ea 02 subq $2, %rdx
30d: c5 f9 6f 25 00 00 00 00 vmovdqa (%rip), %xmm4
315: c4 c2 79 58 ac c3 16 fe ff ff vpbroadcastd -490(%r11,%rax,8), %xmm5
31f: c4 e2 79 20 ed vpmovsxbw %xmm5, %xmm5
0000000000000324 _dav1d_put_8tap_16bpc_avx2.h_w2l:
324: c5 fa 6f 02 vmovdqu (%rdx), %xmm0
328: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1
32d: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
331: c4 e2 79 00 c4 vpshufb %xmm4, %xmm0, %xmm0
336: c4 e2 71 00 cc vpshufb %xmm4, %xmm1, %xmm1
33b: c5 f9 f5 c5 vpmaddwd %xmm5, %xmm0, %xmm0
33f: c5 f1 f5 cd vpmaddwd %xmm5, %xmm1, %xmm1
343: c4 e2 79 02 c1 vphaddd %xmm1, %xmm0, %xmm0
348: c5 f9 fe c6 vpaddd %xmm6, %xmm0, %xmm0
34c: c5 f9 72 e0 06 vpsrad $6, %xmm0, %xmm0
351: c4 e2 79 2b c0 vpackusdw %xmm0, %xmm0, %xmm0
356: c4 e2 79 3a c7 vpminuw %xmm7, %xmm0, %xmm0
35b: c5 f9 7e 07 vmovd %xmm0, (%rdi)
35f: c4 e3 79 16 04 37 01 vpextrd $1, %xmm0, (%rdi,%rsi)
366: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
36a: 41 83 e9 02 subl $2, %r9d
36e: 7f b4 jg -76 <_dav1d_put_8tap_16bpc_avx2.h_w2l>
370: 5b popq %rbx
371: c3 retq
0000000000000372 _dav1d_put_8tap_16bpc_avx2.h_w4:
372: 48 83 ea 02 subq $2, %rdx
376: c5 fd 6f 25 00 00 00 00 vmovdqa (%rip), %ymm4
37e: c4 c2 79 58 ac c3 7f fe ff ff vpbroadcastd -385(%r11,%rax,8), %xmm5
388: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5
000000000000038d _dav1d_put_8tap_16bpc_avx2.h_w4l:
38d: c4 e2 7d 5a 02 vbroadcasti128 (%rdx), %ymm0
392: c4 e2 7d 5a 0c 0a vbroadcasti128 (%rdx,%rcx), %ymm1
398: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
39c: c4 e2 7d 00 c4 vpshufb %ymm4, %ymm0, %ymm0
3a1: c4 e2 75 00 cc vpshufb %ymm4, %ymm1, %ymm1
3a6: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0
3aa: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
3ae: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0
3b3: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0
3b7: c5 fd 72 e0 06 vpsrad $6, %ymm0, %ymm0
3bc: c4 e2 7d 2b c0 vpackusdw %ymm0, %ymm0, %ymm0
3c1: c4 e2 7d 3a c7 vpminuw %ymm7, %ymm0, %ymm0
3c6: c4 e3 7d 39 c1 01 vextracti128 $1, %ymm0, %xmm1
3cc: c5 f9 7e 07 vmovd %xmm0, (%rdi)
3d0: c5 f9 7e 4f 04 vmovd %xmm1, 4(%rdi)
3d5: c4 e3 79 16 04 37 01 vpextrd $1, %xmm0, (%rdi,%rsi)
3dc: c4 e3 79 16 4c 37 04 01 vpextrd $1, %xmm1, 4(%rdi,%rsi)
3e4: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
3e8: 41 83 e9 02 subl $2, %r9d
3ec: 7f 9f jg -97 <_dav1d_put_8tap_16bpc_avx2.h_w4l>
3ee: 5b popq %rbx
3ef: c5 f8 77 vzeroupper
3f2: c3 retq
00000000000003f3 _dav1d_put_8tap_16bpc_avx2.h_w8:
3f3: 48 83 ea 06 subq $6, %rdx
3f7: c4 c2 79 59 ac c3 f6 fe ff ff vpbroadcastq -266(%r11,%rax,8), %xmm5
401: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5
0000000000000406 _dav1d_put_8tap_16bpc_avx2.h_w8l:
406: 45 89 d0 movl %r10d, %r8d
0000000000000409 _dav1d_put_8tap_16bpc_avx2.h_w8c:
409: c5 fa 6f 02 vmovdqu (%rdx), %xmm0
40d: c5 fa 6f 52 02 vmovdqu 2(%rdx), %xmm2
412: c4 e3 7d 38 04 0a 01 vinserti128 $1, (%rdx,%rcx), %ymm0, %ymm0
419: c4 e3 6d 38 54 11 02 01 vinserti128 $1, 2(%rcx,%rdx), %ymm2, %ymm2
421: c5 fa 6f 4a 04 vmovdqu 4(%rdx), %xmm1
426: c5 fa 6f 5a 06 vmovdqu 6(%rdx), %xmm3
42b: c4 e3 75 38 4c 11 04 01 vinserti128 $1, 4(%rcx,%rdx), %ymm1, %ymm1
433: c4 e3 65 38 5c 11 06 01 vinserti128 $1, 6(%rcx,%rdx), %ymm3, %ymm3
43b: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0
43f: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
443: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
447: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
44b: c4 e2 7d 02 c2 vphaddd %ymm2, %ymm0, %ymm0
450: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1
455: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0
45a: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0
45e: c5 fd 72 e0 06 vpsrad $6, %ymm0, %ymm0
463: c5 fa 6f 4a 08 vmovdqu 8(%rdx), %xmm1
468: c5 fa 6f 5a 0a vmovdqu 10(%rdx), %xmm3
46d: c4 e3 75 38 4c 11 08 01 vinserti128 $1, 8(%rcx,%rdx), %ymm1, %ymm1
475: c4 e3 65 38 5c 11 0a 01 vinserti128 $1, 10(%rcx,%rdx), %ymm3, %ymm3
47d: c5 fa 6f 52 0c vmovdqu 12(%rdx), %xmm2
482: c5 fa 6f 62 0e vmovdqu 14(%rdx), %xmm4
487: c4 e3 6d 38 54 11 0c 01 vinserti128 $1, 12(%rcx,%rdx), %ymm2, %ymm2
48f: c4 e3 5d 38 64 11 0e 01 vinserti128 $1, 14(%rcx,%rdx), %ymm4, %ymm4
497: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
49b: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
49f: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
4a3: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
4a7: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1
4ac: c4 e2 6d 02 d4 vphaddd %ymm4, %ymm2, %ymm2
4b1: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1
4b6: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1
4ba: c5 f5 72 e1 06 vpsrad $6, %ymm1, %ymm1
4bf: c4 e2 7d 2b c1 vpackusdw %ymm1, %ymm0, %ymm0
4c4: c4 e2 7d 3a c7 vpminuw %ymm7, %ymm0, %ymm0
4c9: 48 83 c2 10 addq $16, %rdx
4cd: c5 f9 7f 07 vmovdqa %xmm0, (%rdi)
4d1: c4 e3 7d 39 04 37 01 vextracti128 $1, %ymm0, (%rdi,%rsi)
4d8: 48 83 c7 10 addq $16, %rdi
4dc: 41 83 e8 10 subl $16, %r8d
4e0: 0f 8f 23 ff ff ff jg -221 <_dav1d_put_8tap_16bpc_avx2.h_w8c>
4e6: 4c 29 d2 subq %r10, %rdx
4e9: 4c 29 d7 subq %r10, %rdi
4ec: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
4f0: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
4f4: 41 83 e9 02 subl $2, %r9d
4f8: 0f 8f 08 ff ff ff jg -248 <_dav1d_put_8tap_16bpc_avx2.h_w8l>
4fe: 5b popq %rbx
4ff: c5 f8 77 vzeroupper
502: c3 retq
0000000000000503 _dav1d_put_8tap_16bpc_avx2.h_jmp_tbl:
503: 06 <unknown>
504: fe ff <unknown>
506: ff 6f fe ljmpl *-2(%rdi)
509: ff ff <unknown>
50b: f0 lock
50c: fe ff <unknown>
50e: ff f0 pushq %rax
510: fe ff <unknown>
512: ff f0 pushq %rax
514: fe ff <unknown>
516: ff f0 pushq %rax
518: fe ff <unknown>
51a: ff f0 pushq %rax
51c: fe ff <unknown>
51e: ff 41 83 incl -125(%rcx)
000000000000051f _dav1d_put_8tap_16bpc_avx2.put_8tap_v_16bpc:
51f: 41 83 f9 04 cmpl $4, %r9d
523: 7e 04 jle 4 <_dav1d_put_8tap_16bpc_avx2.v_use4tap>
525: 41 c1 ea 07 shrl $7, %r10d
0000000000000529 _dav1d_put_8tap_16bpc_avx2.v_use4tap:
529: 41 83 e2 7f andl $127, %r10d
52d: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
536: 4c 8d 1d d2 05 00 00 leaq 1490(%rip), %r11
53d: 4b 8d 04 00 leaq (%r8,%r8), %rax
541: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx
545: 4f 8d 94 d3 36 fa ff ff leaq -1482(%r11,%r10,8), %r10
54d: c4 42 7d 79 02 vpbroadcastw (%r10), %ymm8
552: c4 42 7d 79 4a 02 vpbroadcastw 2(%r10), %ymm9
558: c4 42 7d 79 52 04 vpbroadcastw 4(%r10), %ymm10
55e: c4 42 7d 79 5a 06 vpbroadcastw 6(%r10), %ymm11
564: c4 42 7d 20 c0 vpmovsxbw %xmm8, %ymm8
569: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9
56e: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10
573: c4 42 7d 20 db vpmovsxbw %xmm11, %ymm11
578: f3 45 0f bc c0 tzcntl %r8d, %r8d
57d: 41 83 e8 01 subl $1, %r8d
581: 4f 63 04 83 movslq (%r11,%r8,4), %r8
585: 4d 01 d8 addq %r11, %r8
588: 41 ff e0 jmpq *%r8
000000000000058b _dav1d_put_8tap_16bpc_avx2.v_w2:
58b: 41 83 f9 04 cmpl $4, %r9d
58f: 0f 8f 91 00 00 00 jg 145 <_dav1d_put_8tap_16bpc_avx2.v_w28>
595: 48 29 ca subq %rcx, %rdx
598: c5 f9 6e 02 vmovd (%rdx), %xmm0
59c: c5 f9 6e 0c 0a vmovd (%rdx,%rcx), %xmm1
5a1: c5 f9 6e 14 4a vmovd (%rdx,%rcx,2), %xmm2
5a6: 48 01 da addq %rbx, %rdx
5a9: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0
5ad: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1
00000000000005b1 _dav1d_put_8tap_16bpc_avx2.v_w2l:
5b1: c5 f9 6e 1a vmovd (%rdx), %xmm3
5b5: c5 f9 6e 24 0a vmovd (%rdx,%rcx), %xmm4
5ba: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
5be: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2
5c2: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3
5c6: c5 b1 f5 c0 vpmaddwd %xmm0, %xmm9, %xmm0
5ca: c5 a9 f5 ea vpmaddwd %xmm2, %xmm10, %xmm5
5ce: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5
5d2: c5 f9 6f c2 vmovdqa %xmm2, %xmm0
5d6: c5 b1 f5 c9 vpmaddwd %xmm1, %xmm9, %xmm1
5da: c5 29 f5 c3 vpmaddwd %xmm3, %xmm10, %xmm8
5de: c5 39 fe c1 vpaddd %xmm1, %xmm8, %xmm8
5e2: c5 f9 6f cb vmovdqa %xmm3, %xmm1
5e6: c5 f9 6f d4 vmovdqa %xmm4, %xmm2
5ea: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5
5ee: c5 39 fe c6 vpaddd %xmm6, %xmm8, %xmm8
5f2: c5 d1 72 e5 06 vpsrad $6, %xmm5, %xmm5
5f7: c4 c1 39 72 e0 06 vpsrad $6, %xmm8, %xmm8
5fd: c4 e2 51 2b ed vpackusdw %xmm5, %xmm5, %xmm5
602: c4 42 39 2b c0 vpackusdw %xmm8, %xmm8, %xmm8
607: c4 e2 51 3a ef vpminuw %xmm7, %xmm5, %xmm5
60c: c4 62 39 3a c7 vpminuw %xmm7, %xmm8, %xmm8
611: c5 f9 7e 2f vmovd %xmm5, (%rdi)
615: c5 79 7e 04 37 vmovd %xmm8, (%rdi,%rsi)
61a: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
61e: 41 83 e9 02 subl $2, %r9d
622: 7f 8d jg -115 <_dav1d_put_8tap_16bpc_avx2.v_w2l>
624: 5b popq %rbx
625: c3 retq
0000000000000626 _dav1d_put_8tap_16bpc_avx2.v_w28:
626: 48 29 da subq %rbx, %rdx
629: c5 f9 6e 02 vmovd (%rdx), %xmm0
62d: c5 f9 6e 0c 0a vmovd (%rdx,%rcx), %xmm1
632: c5 f9 6e 14 4a vmovd (%rdx,%rcx,2), %xmm2
637: 48 01 da addq %rbx, %rdx
63a: c5 f9 6e 1a vmovd (%rdx), %xmm3
63e: c5 f9 6e 24 0a vmovd (%rdx,%rcx), %xmm4
643: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
647: c5 79 6e 22 vmovd (%rdx), %xmm12
64b: c5 79 6e 2c 0a vmovd (%rdx,%rcx), %xmm13
650: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
654: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0
658: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1
65c: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2
660: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3
664: c4 c1 59 61 e4 vpunpcklwd %xmm12, %xmm4, %xmm4
669: c4 41 19 61 e5 vpunpcklwd %xmm13, %xmm12, %xmm12
000000000000066e _dav1d_put_8tap_16bpc_avx2.v_w28l:
66e: 48 29 ca subq %rcx, %rdx
671: c5 79 6e 2a vmovd (%rdx), %xmm13
675: c5 79 6e 34 0a vmovd (%rdx,%rcx), %xmm14
67a: c5 79 6e 3c 4a vmovd (%rdx,%rcx,2), %xmm15
67f: 48 01 da addq %rbx, %rdx
682: c4 41 11 61 ee vpunpcklwd %xmm14, %xmm13, %xmm13
687: c4 41 09 61 f7 vpunpcklwd %xmm15, %xmm14, %xmm14
68c: c5 b9 f5 c0 vpmaddwd %xmm0, %xmm8, %xmm0
690: c5 b1 f5 ea vpmaddwd %xmm2, %xmm9, %xmm5
694: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5
698: c5 f9 6f c2 vmovdqa %xmm2, %xmm0
69c: c5 b9 f5 c9 vpmaddwd %xmm1, %xmm8, %xmm1
6a0: c5 31 f5 fb vpmaddwd %xmm3, %xmm9, %xmm15
6a4: c5 01 fe f9 vpaddd %xmm1, %xmm15, %xmm15
6a8: c5 f9 6f cb vmovdqa %xmm3, %xmm1
6ac: c5 a9 f5 d4 vpmaddwd %xmm4, %xmm10, %xmm2
6b0: c4 c1 19 f5 da vpmaddwd %xmm10, %xmm12, %xmm3
6b5: c5 d1 fe ea vpaddd %xmm2, %xmm5, %xmm5
6b9: c5 01 fe fb vpaddd %xmm3, %xmm15, %xmm15
6bd: c5 f9 6f d4 vmovdqa %xmm4, %xmm2
6c1: c4 c1 79 6f dc vmovdqa %xmm12, %xmm3
6c6: c4 c1 11 f5 e3 vpmaddwd %xmm11, %xmm13, %xmm4
6cb: c4 41 09 f5 e3 vpmaddwd %xmm11, %xmm14, %xmm12
6d0: c5 d1 fe ec vpaddd %xmm4, %xmm5, %xmm5
6d4: c4 41 01 fe fc vpaddd %xmm12, %xmm15, %xmm15
6d9: c4 c1 79 6f e5 vmovdqa %xmm13, %xmm4
6de: c4 41 79 6f e6 vmovdqa %xmm14, %xmm12
6e3: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5
6e7: c5 01 fe fe vpaddd %xmm6, %xmm15, %xmm15
6eb: c5 d1 72 e5 06 vpsrad $6, %xmm5, %xmm5
6f0: c4 c1 01 72 e7 06 vpsrad $6, %xmm15, %xmm15
6f6: c4 e2 51 2b ed vpackusdw %xmm5, %xmm5, %xmm5
6fb: c4 42 01 2b ff vpackusdw %xmm15, %xmm15, %xmm15
700: c4 e2 51 3a ef vpminuw %xmm7, %xmm5, %xmm5
705: c4 62 01 3a ff vpminuw %xmm7, %xmm15, %xmm15
70a: c5 f9 7e 2f vmovd %xmm5, (%rdi)
70e: c5 79 7e 3c 37 vmovd %xmm15, (%rdi,%rsi)
713: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
717: 41 83 e9 02 subl $2, %r9d
71b: 0f 8f 4d ff ff ff jg -179 <_dav1d_put_8tap_16bpc_avx2.v_w28l>
721: 5b popq %rbx
722: c3 retq
0000000000000723 _dav1d_put_8tap_16bpc_avx2.v_w4:
723: 41 83 f9 04 cmpl $4, %r9d
727: 0f 8f 91 00 00 00 jg 145 <_dav1d_put_8tap_16bpc_avx2.v_w48>
72d: 48 29 ca subq %rcx, %rdx
730: c5 fa 7e 02 vmovq (%rdx), %xmm0
734: c5 fa 7e 0c 0a vmovq (%rdx,%rcx), %xmm1
739: c5 fa 7e 14 4a vmovq (%rdx,%rcx,2), %xmm2
73e: 48 01 da addq %rbx, %rdx
741: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0
745: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1
0000000000000749 _dav1d_put_8tap_16bpc_avx2.v_w4l:
749: c5 fa 7e 1a vmovq (%rdx), %xmm3
74d: c5 fa 7e 24 0a vmovq (%rdx,%rcx), %xmm4
752: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
756: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2
75a: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3
75e: c5 b1 f5 c0 vpmaddwd %xmm0, %xmm9, %xmm0
762: c5 a9 f5 ea vpmaddwd %xmm2, %xmm10, %xmm5
766: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5
76a: c5 f9 6f c2 vmovdqa %xmm2, %xmm0
76e: c5 b1 f5 c9 vpmaddwd %xmm1, %xmm9, %xmm1
772: c5 29 f5 c3 vpmaddwd %xmm3, %xmm10, %xmm8
776: c5 39 fe c1 vpaddd %xmm1, %xmm8, %xmm8
77a: c5 f9 6f cb vmovdqa %xmm3, %xmm1
77e: c5 f9 6f d4 vmovdqa %xmm4, %xmm2
782: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5
786: c5 39 fe c6 vpaddd %xmm6, %xmm8, %xmm8
78a: c5 d1 72 e5 06 vpsrad $6, %xmm5, %xmm5
78f: c4 c1 39 72 e0 06 vpsrad $6, %xmm8, %xmm8
795: c4 e2 51 2b ed vpackusdw %xmm5, %xmm5, %xmm5
79a: c4 42 39 2b c0 vpackusdw %xmm8, %xmm8, %xmm8
79f: c4 e2 51 3a ef vpminuw %xmm7, %xmm5, %xmm5
7a4: c4 62 39 3a c7 vpminuw %xmm7, %xmm8, %xmm8
7a9: c5 f9 d6 2f vmovq %xmm5, (%rdi)
7ad: c5 79 d6 04 37 vmovq %xmm8, (%rdi,%rsi)
7b2: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
7b6: 41 83 e9 02 subl $2, %r9d
7ba: 7f 8d jg -115 <_dav1d_put_8tap_16bpc_avx2.v_w4l>
7bc: 5b popq %rbx
7bd: c3 retq
00000000000007be _dav1d_put_8tap_16bpc_avx2.v_w48:
7be: 48 29 da subq %rbx, %rdx
7c1: c5 fa 7e 02 vmovq (%rdx), %xmm0
7c5: c5 fa 7e 0c 0a vmovq (%rdx,%rcx), %xmm1
7ca: c5 fa 7e 14 4a vmovq (%rdx,%rcx,2), %xmm2
7cf: 48 01 da addq %rbx, %rdx
7d2: c5 fa 7e 1a vmovq (%rdx), %xmm3
7d6: c5 fa 7e 24 0a vmovq (%rdx,%rcx), %xmm4
7db: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
7df: c5 7a 7e 22 vmovq (%rdx), %xmm12
7e3: c5 7a 7e 2c 0a vmovq (%rdx,%rcx), %xmm13
7e8: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
7ec: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0
7f0: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1
7f4: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2
7f8: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3
7fc: c4 c1 59 61 e4 vpunpcklwd %xmm12, %xmm4, %xmm4
801: c4 41 19 61 e5 vpunpcklwd %xmm13, %xmm12, %xmm12
0000000000000806 _dav1d_put_8tap_16bpc_avx2.v_w48l:
806: 48 29 ca subq %rcx, %rdx
809: c5 7a 7e 2a vmovq (%rdx), %xmm13
80d: c5 7a 7e 34 0a vmovq (%rdx,%rcx), %xmm14
812: c5 7a 7e 3c 4a vmovq (%rdx,%rcx,2), %xmm15
817: 48 01 da addq %rbx, %rdx
81a: c4 41 11 61 ee vpunpcklwd %xmm14, %xmm13, %xmm13
81f: c4 41 09 61 f7 vpunpcklwd %xmm15, %xmm14, %xmm14
824: c5 b9 f5 c0 vpmaddwd %xmm0, %xmm8, %xmm0
828: c5 b1 f5 ea vpmaddwd %xmm2, %xmm9, %xmm5
82c: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5
830: c5 f9 6f c2 vmovdqa %xmm2, %xmm0
834: c5 b9 f5 c9 vpmaddwd %xmm1, %xmm8, %xmm1
838: c5 31 f5 fb vpmaddwd %xmm3, %xmm9, %xmm15
83c: c5 01 fe f9 vpaddd %xmm1, %xmm15, %xmm15
840: c5 f9 6f cb vmovdqa %xmm3, %xmm1
844: c5 a9 f5 d4 vpmaddwd %xmm4, %xmm10, %xmm2
848: c4 c1 19 f5 da vpmaddwd %xmm10, %xmm12, %xmm3
84d: c5 d1 fe ea vpaddd %xmm2, %xmm5, %xmm5
851: c5 01 fe fb vpaddd %xmm3, %xmm15, %xmm15
855: c5 f9 6f d4 vmovdqa %xmm4, %xmm2
859: c4 c1 79 6f dc vmovdqa %xmm12, %xmm3
85e: c4 c1 11 f5 e3 vpmaddwd %xmm11, %xmm13, %xmm4
863: c4 41 09 f5 e3 vpmaddwd %xmm11, %xmm14, %xmm12
868: c5 d1 fe ec vpaddd %xmm4, %xmm5, %xmm5
86c: c4 41 01 fe fc vpaddd %xmm12, %xmm15, %xmm15
871: c4 c1 79 6f e5 vmovdqa %xmm13, %xmm4
876: c4 41 79 6f e6 vmovdqa %xmm14, %xmm12
87b: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5
87f: c5 01 fe fe vpaddd %xmm6, %xmm15, %xmm15
883: c5 d1 72 e5 06 vpsrad $6, %xmm5, %xmm5
888: c4 c1 01 72 e7 06 vpsrad $6, %xmm15, %xmm15
88e: c4 e2 51 2b ed vpackusdw %xmm5, %xmm5, %xmm5
893: c4 42 01 2b ff vpackusdw %xmm15, %xmm15, %xmm15
898: c4 e2 51 3a ef vpminuw %xmm7, %xmm5, %xmm5
89d: c4 62 01 3a ff vpminuw %xmm7, %xmm15, %xmm15
8a2: c5 f9 d6 2f vmovq %xmm5, (%rdi)
8a6: c5 79 d6 3c 37 vmovq %xmm15, (%rdi,%rsi)
8ab: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
8af: 41 83 e9 02 subl $2, %r9d
8b3: 0f 8f 4d ff ff ff jg -179 <_dav1d_put_8tap_16bpc_avx2.v_w48l>
8b9: 5b popq %rbx
8ba: c3 retq
00000000000008bb _dav1d_put_8tap_16bpc_avx2.v_w8:
8bb: 45 89 c8 movl %r9d, %r8d
8be: 49 89 fa movq %rdi, %r10
8c1: 41 83 f9 04 cmpl $4, %r9d
8c5: 0f 8f df 00 00 00 jg 223 <_dav1d_put_8tap_16bpc_avx2.v_w88>
8cb: 48 29 ca subq %rcx, %rdx
8ce: 49 89 d3 movq %rdx, %r11
00000000000008d1 _dav1d_put_8tap_16bpc_avx2.v_w8l:
8d1: c5 fa 6f 02 vmovdqu (%rdx), %xmm0
8d5: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1
8da: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2
8df: c4 e3 fd 00 c0 d8 vpermq $216, %ymm0, %ymm0
8e5: c4 e3 fd 00 c9 d8 vpermq $216, %ymm1, %ymm1
8eb: c4 e3 fd 00 d2 d8 vpermq $216, %ymm2, %ymm2
8f1: 48 01 da addq %rbx, %rdx
8f4: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
8f8: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
00000000000008fc _dav1d_put_8tap_16bpc_avx2.v_w8c:
8fc: c5 fa 6f 1a vmovdqu (%rdx), %xmm3
900: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4
905: c4 e3 fd 00 db d8 vpermq $216, %ymm3, %ymm3
90b: c4 e3 fd 00 e4 d8 vpermq $216, %ymm4, %ymm4
911: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
915: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
919: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
91d: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0
921: c5 ad f5 ea vpmaddwd %ymm2, %ymm10, %ymm5
925: c5 d5 fe e8 vpaddd %ymm0, %ymm5, %ymm5
929: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
92d: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1
931: c5 2d f5 c3 vpmaddwd %ymm3, %ymm10, %ymm8
935: c5 3d fe c1 vpaddd %ymm1, %ymm8, %ymm8
939: c5 fd 6f cb vmovdqa %ymm3, %ymm1
93d: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
941: c5 d5 fe ee vpaddd %ymm6, %ymm5, %ymm5
945: c5 3d fe c6 vpaddd %ymm6, %ymm8, %ymm8
949: c5 d5 72 e5 06 vpsrad $6, %ymm5, %ymm5
94e: c4 c1 3d 72 e0 06 vpsrad $6, %ymm8, %ymm8
954: c4 e2 55 2b ed vpackusdw %ymm5, %ymm5, %ymm5
959: c4 42 3d 2b c0 vpackusdw %ymm8, %ymm8, %ymm8
95e: c4 e2 55 3a ef vpminuw %ymm7, %ymm5, %ymm5
963: c4 62 3d 3a c7 vpminuw %ymm7, %ymm8, %ymm8
968: c4 e3 fd 00 ed d8 vpermq $216, %ymm5, %ymm5
96e: c4 43 fd 00 c0 d8 vpermq $216, %ymm8, %ymm8
974: c5 fa 7f 2f vmovdqu %xmm5, (%rdi)
978: c5 7a 7f 04 37 vmovdqu %xmm8, (%rdi,%rsi)
97d: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
981: 41 83 e9 02 subl $2, %r9d
985: 0f 8f 71 ff ff ff jg -143 <_dav1d_put_8tap_16bpc_avx2.v_w8c>
98b: 49 83 c2 10 addq $16, %r10
98f: 49 83 c3 10 addq $16, %r11
993: 45 89 c1 movl %r8d, %r9d
996: 4c 89 d7 movq %r10, %rdi
999: 4c 89 da movq %r11, %rdx
99c: 83 e8 10 subl $16, %eax
99f: 0f 8f 2c ff ff ff jg -212 <_dav1d_put_8tap_16bpc_avx2.v_w8l>
9a5: 5b popq %rbx
9a6: c5 f8 77 vzeroupper
9a9: c3 retq
00000000000009aa _dav1d_put_8tap_16bpc_avx2.v_w88:
9aa: 48 29 da subq %rbx, %rdx
9ad: 49 89 d3 movq %rdx, %r11
00000000000009b0 _dav1d_put_8tap_16bpc_avx2.v_w88l:
9b0: c5 fa 6f 02 vmovdqu (%rdx), %xmm0
9b4: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1
9b9: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2
9be: c4 e3 fd 00 c0 d8 vpermq $216, %ymm0, %ymm0
9c4: c4 e3 fd 00 c9 d8 vpermq $216, %ymm1, %ymm1
9ca: c4 e3 fd 00 d2 d8 vpermq $216, %ymm2, %ymm2
9d0: 48 01 da addq %rbx, %rdx
9d3: c5 fa 6f 1a vmovdqu (%rdx), %xmm3
9d7: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4
9dc: c4 e3 fd 00 db d8 vpermq $216, %ymm3, %ymm3
9e2: c4 e3 fd 00 e4 d8 vpermq $216, %ymm4, %ymm4
9e8: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
9ec: c5 7a 6f 22 vmovdqu (%rdx), %xmm12
9f0: c5 7a 6f 2c 0a vmovdqu (%rdx,%rcx), %xmm13
9f5: c4 43 fd 00 e4 d8 vpermq $216, %ymm12, %ymm12
9fb: c4 43 fd 00 ed d8 vpermq $216, %ymm13, %ymm13
a01: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
a05: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
a09: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
a0d: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
a11: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
a15: c4 c1 5d 61 e4 vpunpcklwd %ymm12, %ymm4, %ymm4
a1a: c4 41 1d 61 e5 vpunpcklwd %ymm13, %ymm12, %ymm12
0000000000000a1f _dav1d_put_8tap_16bpc_avx2.v_w88c:
a1f: 48 29 ca subq %rcx, %rdx
a22: c5 7a 6f 2a vmovdqu (%rdx), %xmm13
a26: c5 7a 6f 34 0a vmovdqu (%rdx,%rcx), %xmm14
a2b: c5 7a 6f 3c 4a vmovdqu (%rdx,%rcx,2), %xmm15
a30: c4 43 fd 00 ed d8 vpermq $216, %ymm13, %ymm13
a36: c4 43 fd 00 f6 d8 vpermq $216, %ymm14, %ymm14
a3c: c4 43 fd 00 ff d8 vpermq $216, %ymm15, %ymm15
a42: 48 01 da addq %rbx, %rdx
a45: c4 41 15 61 ee vpunpcklwd %ymm14, %ymm13, %ymm13
a4a: c4 41 0d 61 f7 vpunpcklwd %ymm15, %ymm14, %ymm14
a4f: c5 bd f5 c0 vpmaddwd %ymm0, %ymm8, %ymm0
a53: c5 b5 f5 ea vpmaddwd %ymm2, %ymm9, %ymm5
a57: c5 d5 fe e8 vpaddd %ymm0, %ymm5, %ymm5
a5b: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
a5f: c5 bd f5 c9 vpmaddwd %ymm1, %ymm8, %ymm1
a63: c5 35 f5 fb vpmaddwd %ymm3, %ymm9, %ymm15
a67: c5 05 fe f9 vpaddd %ymm1, %ymm15, %ymm15
a6b: c5 fd 6f cb vmovdqa %ymm3, %ymm1
a6f: c5 ad f5 d4 vpmaddwd %ymm4, %ymm10, %ymm2
a73: c4 c1 1d f5 da vpmaddwd %ymm10, %ymm12, %ymm3
a78: c5 d5 fe ea vpaddd %ymm2, %ymm5, %ymm5
a7c: c5 05 fe fb vpaddd %ymm3, %ymm15, %ymm15
a80: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
a84: c4 c1 7d 6f dc vmovdqa %ymm12, %ymm3
a89: c4 c1 15 f5 e3 vpmaddwd %ymm11, %ymm13, %ymm4
a8e: c4 41 0d f5 e3 vpmaddwd %ymm11, %ymm14, %ymm12
a93: c5 d5 fe ec vpaddd %ymm4, %ymm5, %ymm5
a97: c4 41 05 fe fc vpaddd %ymm12, %ymm15, %ymm15
a9c: c4 c1 7d 6f e5 vmovdqa %ymm13, %ymm4
aa1: c4 41 7d 6f e6 vmovdqa %ymm14, %ymm12
aa6: c5 d5 fe ee vpaddd %ymm6, %ymm5, %ymm5
aaa: c5 05 fe fe vpaddd %ymm6, %ymm15, %ymm15
aae: c5 d5 72 e5 06 vpsrad $6, %ymm5, %ymm5
ab3: c4 c1 05 72 e7 06 vpsrad $6, %ymm15, %ymm15
ab9: c4 e2 55 2b ed vpackusdw %ymm5, %ymm5, %ymm5
abe: c4 42 05 2b ff vpackusdw %ymm15, %ymm15, %ymm15
ac3: c4 e2 55 3a ef vpminuw %ymm7, %ymm5, %ymm5
ac8: c4 62 05 3a ff vpminuw %ymm7, %ymm15, %ymm15
acd: c4 e3 fd 00 ed d8 vpermq $216, %ymm5, %ymm5
ad3: c4 43 fd 00 ff d8 vpermq $216, %ymm15, %ymm15
ad9: c5 fa 7f 2f vmovdqu %xmm5, (%rdi)
add: c5 7a 7f 3c 37 vmovdqu %xmm15, (%rdi,%rsi)
ae2: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
ae6: 41 83 e9 02 subl $2, %r9d
aea: 0f 8f 2f ff ff ff jg -209 <_dav1d_put_8tap_16bpc_avx2.v_w88c>
af0: 49 83 c2 10 addq $16, %r10
af4: 49 83 c3 10 addq $16, %r11
af8: 45 89 c1 movl %r8d, %r9d
afb: 4c 89 d7 movq %r10, %rdi
afe: 4c 89 da movq %r11, %rdx
b01: 83 e8 10 subl $16, %eax
b04: 0f 8f a6 fe ff ff jg -346 <_dav1d_put_8tap_16bpc_avx2.v_w88l>
b0a: 5b popq %rbx
b0b: c5 f8 77 vzeroupper
b0e: c3 retq
0000000000000b0f _dav1d_put_8tap_16bpc_avx2.v_jmp_tbl:
b0f: 7c fa jl -6 <_dav1d_put_8tap_16bpc_avx2.v_w88c+0xec>
b11: ff ff <unknown>
b13: 14 fc adcb $-4, %al
b15: ff ff <unknown>
b17: ac lodsb (%rsi), %al
b18: fd std
b19: ff ff <unknown>
b1b: ac lodsb (%rsi), %al
b1c: fd std
b1d: ff ff <unknown>
b1f: ac lodsb (%rsi), %al
b20: fd std
b21: ff ff <unknown>
b23: ac lodsb (%rsi), %al
b24: fd std
b25: ff ff <unknown>
b27: ac lodsb (%rsi), %al
b28: fd std
b29: ff ff <unknown>
0000000000000b2b _dav1d_put_8tap_16bpc_avx2.put_8tap_hv_16bpc:
b2b: 41 83 f9 04 cmpl $4, %r9d
b2f: 7e 04 jle 4 <_dav1d_put_8tap_16bpc_avx2.hv_use4tap>
b31: 41 c1 ea 07 shrl $7, %r10d
0000000000000b35 _dav1d_put_8tap_16bpc_avx2.hv_use4tap:
b35: 41 83 e2 7f andl $127, %r10d
b39: f3 44 0f b8 5c 24 20 popcntl 32(%rsp), %r11d
b40: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
b49: c5 7a 7e 2d 00 00 00 00 vmovq (%rip), %xmm13
b51: c4 62 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm14
b5a: c5 7a 7e 3d 00 00 00 00 vmovq (%rip), %xmm15
b62: 41 83 fb 0c cmpl $12, %r11d
b66: 75 22 jne 34 <_dav1d_put_8tap_16bpc_avx2.hv_bits10>
b68: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
b71: c5 7a 7e 2d 00 00 00 00 vmovq (%rip), %xmm13
b79: c4 62 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm14
b82: c5 7a 7e 3d 00 00 00 00 vmovq (%rip), %xmm15
0000000000000b8a _dav1d_put_8tap_16bpc_avx2.hv_bits10:
b8a: 4c 8d 1d 90 13 00 00 leaq 5008(%rip), %r11
b91: 4b 8d 9c d3 70 ec ff ff leaq -5008(%r11,%r10,8), %rbx
b99: c4 62 79 79 03 vpbroadcastw (%rbx), %xmm8
b9e: c4 62 79 79 4b 02 vpbroadcastw 2(%rbx), %xmm9
ba4: c4 62 79 79 53 04 vpbroadcastw 4(%rbx), %xmm10
baa: c4 62 79 79 5b 06 vpbroadcastw 6(%rbx), %xmm11
bb0: c4 42 7d 20 c0 vpmovsxbw %xmm8, %ymm8
bb5: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9
bba: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10
bbf: c4 42 7d 20 db vpmovsxbw %xmm11, %ymm11
bc4: 4c 89 c3 movq %r8, %rbx
bc7: f3 45 0f bc c0 tzcntl %r8d, %r8d
bcc: 41 83 e8 01 subl $1, %r8d
bd0: 4f 63 04 83 movslq (%r11,%r8,4), %r8
bd4: 4d 01 d8 addq %r11, %r8
bd7: 41 ff e0 jmpq *%r8
0000000000000bda _dav1d_put_8tap_16bpc_avx2.hv_w2:
bda: 41 83 f9 04 cmpl $4, %r9d
bde: 0f 8f 3c 01 00 00 jg 316 <_dav1d_put_8tap_16bpc_avx2.hv_w28>
be4: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx
be8: c5 79 6f 05 00 00 00 00 vmovdqa (%rip), %xmm8
bf0: c4 c2 79 58 ac c3 d3 ec ff ff vpbroadcastd -4909(%r11,%rax,8), %xmm5
bfa: c4 e2 79 20 ed vpmovsxbw %xmm5, %xmm5
bff: 48 83 ea 02 subq $2, %rdx
c03: 48 29 ca subq %rcx, %rdx
c06: c5 fa 6f 02 vmovdqu (%rdx), %xmm0
c0a: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1
c0f: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2
c14: 48 01 da addq %rbx, %rdx
c17: c4 c2 79 00 c0 vpshufb %xmm8, %xmm0, %xmm0
c1c: c4 c2 71 00 c8 vpshufb %xmm8, %xmm1, %xmm1
c21: c5 f9 f5 c5 vpmaddwd %xmm5, %xmm0, %xmm0
c25: c5 f1 f5 cd vpmaddwd %xmm5, %xmm1, %xmm1
c29: c4 e2 79 02 c0 vphaddd %xmm0, %xmm0, %xmm0
c2e: c4 e2 71 02 c9 vphaddd %xmm1, %xmm1, %xmm1
c33: c5 f9 fe c6 vpaddd %xmm6, %xmm0, %xmm0
c37: c5 f1 fe ce vpaddd %xmm6, %xmm1, %xmm1
c3b: c4 c1 79 e2 c5 vpsrad %xmm13, %xmm0, %xmm0
c40: c4 c1 71 e2 cd vpsrad %xmm13, %xmm1, %xmm1
c45: c5 f9 6b c0 vpackssdw %xmm0, %xmm0, %xmm0
c49: c5 f1 6b c9 vpackssdw %xmm1, %xmm1, %xmm1
c4d: c4 c2 69 00 d0 vpshufb %xmm8, %xmm2, %xmm2
c52: c5 e9 f5 d5 vpmaddwd %xmm5, %xmm2, %xmm2
c56: c4 e2 69 02 d2 vphaddd %xmm2, %xmm2, %xmm2
c5b: c5 e9 fe d6 vpaddd %xmm6, %xmm2, %xmm2
c5f: c4 c1 69 e2 d5 vpsrad %xmm13, %xmm2, %xmm2
c64: c5 e9 6b d2 vpackssdw %xmm2, %xmm2, %xmm2
c68: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0
c6c: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1
0000000000000c70 _dav1d_put_8tap_16bpc_avx2.hv_w2l:
c70: c5 fa 6f 1a vmovdqu (%rdx), %xmm3
c74: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4
c79: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
c7d: c4 c2 61 00 d8 vpshufb %xmm8, %xmm3, %xmm3
c82: c4 c2 59 00 e0 vpshufb %xmm8, %xmm4, %xmm4
c87: c5 e1 f5 dd vpmaddwd %xmm5, %xmm3, %xmm3
c8b: c5 d9 f5 e5 vpmaddwd %xmm5, %xmm4, %xmm4
c8f: c4 e2 61 02 db vphaddd %xmm3, %xmm3, %xmm3
c94: c4 e2 59 02 e4 vphaddd %xmm4, %xmm4, %xmm4
c99: c5 e1 fe de vpaddd %xmm6, %xmm3, %xmm3
c9d: c5 d9 fe e6 vpaddd %xmm6, %xmm4, %xmm4
ca1: c4 c1 61 e2 dd vpsrad %xmm13, %xmm3, %xmm3
ca6: c4 c1 59 e2 e5 vpsrad %xmm13, %xmm4, %xmm4
cab: c5 e1 6b db vpackssdw %xmm3, %xmm3, %xmm3
caf: c5 d9 6b e4 vpackssdw %xmm4, %xmm4, %xmm4
cb3: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2
cb7: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3
cbb: c5 b1 f5 c0 vpmaddwd %xmm0, %xmm9, %xmm0
cbf: c5 29 f5 da vpmaddwd %xmm2, %xmm10, %xmm11
cc3: c5 21 fe d8 vpaddd %xmm0, %xmm11, %xmm11
cc7: c5 f9 6f c2 vmovdqa %xmm2, %xmm0
ccb: c5 b1 f5 c9 vpmaddwd %xmm1, %xmm9, %xmm1
ccf: c5 29 f5 e3 vpmaddwd %xmm3, %xmm10, %xmm12
cd3: c5 19 fe e1 vpaddd %xmm1, %xmm12, %xmm12
cd7: c5 f9 6f cb vmovdqa %xmm3, %xmm1
cdb: c5 f9 6f d4 vmovdqa %xmm4, %xmm2
cdf: c4 41 21 fe de vpaddd %xmm14, %xmm11, %xmm11
ce4: c4 41 19 fe e6 vpaddd %xmm14, %xmm12, %xmm12
ce9: c4 41 21 e2 df vpsrad %xmm15, %xmm11, %xmm11
cee: c4 41 19 e2 e7 vpsrad %xmm15, %xmm12, %xmm12
cf3: c4 42 21 2b db vpackusdw %xmm11, %xmm11, %xmm11
cf8: c4 42 19 2b e4 vpackusdw %xmm12, %xmm12, %xmm12
cfd: c4 62 21 3a df vpminuw %xmm7, %xmm11, %xmm11
d02: c4 62 19 3a e7 vpminuw %xmm7, %xmm12, %xmm12
d07: c5 79 7e 1f vmovd %xmm11, (%rdi)
d0b: c5 79 7e 24 37 vmovd %xmm12, (%rdi,%rsi)
d10: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
d14: 41 83 e9 02 subl $2, %r9d
d18: 0f 8f 52 ff ff ff jg -174 <_dav1d_put_8tap_16bpc_avx2.hv_w2l>
d1e: 5b popq %rbx
d1f: c3 retq
0000000000000d20 _dav1d_put_8tap_16bpc_avx2.hv_w28:
d20: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx
d24: c5 79 6f 05 00 00 00 00 vmovdqa (%rip), %xmm8
d2c: c4 c2 79 58 ac c3 0f ee ff ff vpbroadcastd -4593(%r11,%rax,8), %xmm5
d36: c4 e2 79 20 ed vpmovsxbw %xmm5, %xmm5
d3b: 4f 8d 94 d3 1a ee ff ff leaq -4582(%r11,%r10,8), %r10
d43: c4 42 79 58 0a vpbroadcastd (%r10), %xmm9
d48: c4 42 79 58 52 04 vpbroadcastd 4(%r10), %xmm10
d4e: c4 42 79 20 c9 vpmovsxbw %xmm9, %xmm9
d53: c4 42 79 20 d2 vpmovsxbw %xmm10, %xmm10
d58: 48 83 ea 02 subq $2, %rdx
d5c: 48 29 da subq %rbx, %rdx
d5f: c5 fa 6f 02 vmovdqu (%rdx), %xmm0
d63: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1
d68: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
d6c: c4 c2 79 00 c0 vpshufb %xmm8, %xmm0, %xmm0
d71: c4 c2 71 00 c8 vpshufb %xmm8, %xmm1, %xmm1
d76: c5 f9 f5 c5 vpmaddwd %xmm5, %xmm0, %xmm0
d7a: c5 f1 f5 cd vpmaddwd %xmm5, %xmm1, %xmm1
d7e: c4 e2 79 02 c0 vphaddd %xmm0, %xmm0, %xmm0
d83: c4 e2 71 02 c9 vphaddd %xmm1, %xmm1, %xmm1
d88: c5 f9 fe c6 vpaddd %xmm6, %xmm0, %xmm0
d8c: c5 f1 fe ce vpaddd %xmm6, %xmm1, %xmm1
d90: c4 c1 79 e2 c5 vpsrad %xmm13, %xmm0, %xmm0
d95: c4 c1 71 e2 cd vpsrad %xmm13, %xmm1, %xmm1
d9a: c5 f9 6b c0 vpackssdw %xmm0, %xmm0, %xmm0
d9e: c5 f1 6b c9 vpackssdw %xmm1, %xmm1, %xmm1
da2: c5 fa 6f 22 vmovdqu (%rdx), %xmm4
da6: c5 fa 6f 1c 0a vmovdqu (%rdx,%rcx), %xmm3
dab: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2
db0: 48 01 da addq %rbx, %rdx
db3: c4 c2 59 00 e0 vpshufb %xmm8, %xmm4, %xmm4
db8: c4 c2 61 00 d8 vpshufb %xmm8, %xmm3, %xmm3
dbd: c5 d9 f5 e5 vpmaddwd %xmm5, %xmm4, %xmm4
dc1: c5 e1 f5 dd vpmaddwd %xmm5, %xmm3, %xmm3
dc5: c4 e2 59 02 e4 vphaddd %xmm4, %xmm4, %xmm4
dca: c4 e2 61 02 db vphaddd %xmm3, %xmm3, %xmm3
dcf: c5 d9 fe e6 vpaddd %xmm6, %xmm4, %xmm4
dd3: c5 e1 fe de vpaddd %xmm6, %xmm3, %xmm3
dd7: c4 c1 59 e2 e5 vpsrad %xmm13, %xmm4, %xmm4
ddc: c4 c1 61 e2 dd vpsrad %xmm13, %xmm3, %xmm3
de1: c5 d9 6b e4 vpackssdw %xmm4, %xmm4, %xmm4
de5: c5 e1 6b db vpackssdw %xmm3, %xmm3, %xmm3
de9: c4 c2 69 00 d0 vpshufb %xmm8, %xmm2, %xmm2
dee: c5 e9 f5 d5 vpmaddwd %xmm5, %xmm2, %xmm2
df2: c4 e2 69 02 d2 vphaddd %xmm2, %xmm2, %xmm2
df7: c5 e9 fe d6 vpaddd %xmm6, %xmm2, %xmm2
dfb: c4 c1 69 e2 d5 vpsrad %xmm13, %xmm2, %xmm2
e00: c5 e9 6b d2 vpackssdw %xmm2, %xmm2, %xmm2
e04: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0
e08: c5 f1 61 cc vpunpcklwd %xmm4, %xmm1, %xmm1
e0c: c5 d9 61 e3 vpunpcklwd %xmm3, %xmm4, %xmm4
e10: c5 e1 61 da vpunpcklwd %xmm2, %xmm3, %xmm3
e14: c5 f9 62 c4 vpunpckldq %xmm4, %xmm0, %xmm0
e18: c5 f1 62 cb vpunpckldq %xmm3, %xmm1, %xmm1
e1c: c5 fa 6f 1a vmovdqu (%rdx), %xmm3
e20: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4
e25: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
e29: c4 c2 61 00 d8 vpshufb %xmm8, %xmm3, %xmm3
e2e: c4 c2 59 00 e0 vpshufb %xmm8, %xmm4, %xmm4
e33: c5 e1 f5 dd vpmaddwd %xmm5, %xmm3, %xmm3
e37: c5 d9 f5 e5 vpmaddwd %xmm5, %xmm4, %xmm4
e3b: c4 e2 61 02 db vphaddd %xmm3, %xmm3, %xmm3
e40: c4 e2 59 02 e4 vphaddd %xmm4, %xmm4, %xmm4
e45: c5 e1 fe de vpaddd %xmm6, %xmm3, %xmm3
e49: c5 d9 fe e6 vpaddd %xmm6, %xmm4, %xmm4
e4d: c4 c1 61 e2 dd vpsrad %xmm13, %xmm3, %xmm3
e52: c4 c1 59 e2 e5 vpsrad %xmm13, %xmm4, %xmm4
e57: c5 e1 6b db vpackssdw %xmm3, %xmm3, %xmm3
e5b: c5 d9 6b e4 vpackssdw %xmm4, %xmm4, %xmm4
e5f: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2
e63: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3
0000000000000e67 _dav1d_put_8tap_16bpc_avx2.hv_w28l:
e67: c5 7a 6f 1a vmovdqu (%rdx), %xmm11
e6b: c5 7a 6f 24 0a vmovdqu (%rdx,%rcx), %xmm12
e70: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
e74: c4 42 21 00 d8 vpshufb %xmm8, %xmm11, %xmm11
e79: c4 42 19 00 e0 vpshufb %xmm8, %xmm12, %xmm12
e7e: c5 21 f5 dd vpmaddwd %xmm5, %xmm11, %xmm11
e82: c5 19 f5 e5 vpmaddwd %xmm5, %xmm12, %xmm12
e86: c4 42 21 02 db vphaddd %xmm11, %xmm11, %xmm11
e8b: c4 42 19 02 e4 vphaddd %xmm12, %xmm12, %xmm12
e90: c5 21 fe de vpaddd %xmm6, %xmm11, %xmm11
e94: c5 19 fe e6 vpaddd %xmm6, %xmm12, %xmm12
e98: c4 41 21 e2 dd vpsrad %xmm13, %xmm11, %xmm11
e9d: c4 41 19 e2 e5 vpsrad %xmm13, %xmm12, %xmm12
ea2: c4 41 21 6b db vpackssdw %xmm11, %xmm11, %xmm11
ea7: c4 41 19 6b e4 vpackssdw %xmm12, %xmm12, %xmm12
eac: c4 c1 59 61 e3 vpunpcklwd %xmm11, %xmm4, %xmm4
eb1: c4 41 21 61 dc vpunpcklwd %xmm12, %xmm11, %xmm11
eb6: c5 e9 62 d4 vpunpckldq %xmm4, %xmm2, %xmm2
eba: c4 c1 61 62 db vpunpckldq %xmm11, %xmm3, %xmm3
ebf: c5 31 f5 d8 vpmaddwd %xmm0, %xmm9, %xmm11
ec3: c5 a9 f5 e2 vpmaddwd %xmm2, %xmm10, %xmm4
ec7: c5 31 f5 e1 vpmaddwd %xmm1, %xmm9, %xmm12
ecb: c5 21 fe dc vpaddd %xmm4, %xmm11, %xmm11
ecf: c5 a9 f5 e3 vpmaddwd %xmm3, %xmm10, %xmm4
ed3: c5 19 fe e4 vpaddd %xmm4, %xmm12, %xmm12
ed7: c4 42 21 02 db vphaddd %xmm11, %xmm11, %xmm11
edc: c4 42 19 02 e4 vphaddd %xmm12, %xmm12, %xmm12
ee1: c4 41 21 fe de vpaddd %xmm14, %xmm11, %xmm11
ee6: c4 41 19 fe e6 vpaddd %xmm14, %xmm12, %xmm12
eeb: c4 41 21 e2 df vpsrad %xmm15, %xmm11, %xmm11
ef0: c4 41 19 e2 e7 vpsrad %xmm15, %xmm12, %xmm12
ef5: c4 42 21 2b db vpackusdw %xmm11, %xmm11, %xmm11
efa: c4 42 19 2b e4 vpackusdw %xmm12, %xmm12, %xmm12
eff: c4 62 21 3a df vpminuw %xmm7, %xmm11, %xmm11
f04: c4 62 19 3a e7 vpminuw %xmm7, %xmm12, %xmm12
f09: c5 79 7e 1f vmovd %xmm11, (%rdi)
f0d: c5 79 7e 24 37 vmovd %xmm12, (%rdi,%rsi)
f12: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
f16: c5 f9 70 c0 8d vpshufd $141, %xmm0, %xmm0
f1b: c5 f9 70 c9 8d vpshufd $141, %xmm1, %xmm1
f20: c5 79 70 da d8 vpshufd $216, %xmm2, %xmm11
f25: c5 79 70 e3 d8 vpshufd $216, %xmm3, %xmm12
f2a: c5 f9 70 d2 8d vpshufd $141, %xmm2, %xmm2
f2f: c5 f9 70 db 8d vpshufd $141, %xmm3, %xmm3
f34: c5 f9 6f e3 vmovdqa %xmm3, %xmm4
f38: c5 d9 72 e4 10 vpsrad $16, %xmm4, %xmm4
f3d: c5 d9 6b e4 vpackssdw %xmm4, %xmm4, %xmm4
f41: c4 c1 79 62 c3 vpunpckldq %xmm11, %xmm0, %xmm0
f46: c4 c1 71 62 cc vpunpckldq %xmm12, %xmm1, %xmm1
f4b: 41 83 e9 02 subl $2, %r9d
f4f: 0f 8f 12 ff ff ff jg -238 <_dav1d_put_8tap_16bpc_avx2.hv_w28l>
f55: 5b popq %rbx
f56: c3 retq
0000000000000f57 _dav1d_put_8tap_16bpc_avx2.hv_w4:
f57: 41 83 f9 04 cmpl $4, %r9d
f5b: 0f 8f 5b 01 00 00 jg 347 <_dav1d_put_8tap_16bpc_avx2.hv_w48>
f61: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx
f65: c5 7d 6f 05 00 00 00 00 vmovdqa (%rip), %ymm8
f6d: c4 c2 79 58 ac c3 50 f0 ff ff vpbroadcastd -4016(%r11,%rax,8), %xmm5
f77: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5
f7c: 48 83 ea 02 subq $2, %rdx
f80: 48 29 ca subq %rcx, %rdx
f83: c4 e2 7d 5a 02 vbroadcasti128 (%rdx), %ymm0
f88: c4 e2 7d 5a 0c 0a vbroadcasti128 (%rdx,%rcx), %ymm1
f8e: c4 e2 7d 5a 14 4a vbroadcasti128 (%rdx,%rcx,2), %ymm2
f94: 48 01 da addq %rbx, %rdx
f97: c4 c2 7d 00 c0 vpshufb %ymm8, %ymm0, %ymm0
f9c: c4 c2 75 00 c8 vpshufb %ymm8, %ymm1, %ymm1
fa1: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0
fa5: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
fa9: c4 e2 7d 02 c0 vphaddd %ymm0, %ymm0, %ymm0
fae: c4 e2 75 02 c9 vphaddd %ymm1, %ymm1, %ymm1
fb3: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0
fb7: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1
fbb: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0
fc0: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1
fc5: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0
fc9: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1
fcd: c4 c2 6d 00 d0 vpshufb %ymm8, %ymm2, %ymm2
fd2: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
fd6: c4 e2 6d 02 d2 vphaddd %ymm2, %ymm2, %ymm2
fdb: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2
fdf: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2
fe4: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2
fe8: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
fec: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
0000000000000ff0 _dav1d_put_8tap_16bpc_avx2.hv_w4l:
ff0: c4 e2 7d 5a 1a vbroadcasti128 (%rdx), %ymm3
ff5: c4 e2 7d 5a 24 0a vbroadcasti128 (%rdx,%rcx), %ymm4
ffb: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
fff: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3
1004: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4
1009: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
100d: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
1011: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3
1016: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4
101b: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3
101f: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4
1023: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3
1028: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4
102d: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
1031: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
1035: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
1039: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
103d: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0
1041: c5 2d f5 da vpmaddwd %ymm2, %ymm10, %ymm11
1045: c5 25 fe d8 vpaddd %ymm0, %ymm11, %ymm11
1049: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
104d: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1
1051: c5 2d f5 e3 vpmaddwd %ymm3, %ymm10, %ymm12
1055: c5 1d fe e1 vpaddd %ymm1, %ymm12, %ymm12
1059: c5 fd 6f cb vmovdqa %ymm3, %ymm1
105d: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
1061: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11
1066: c4 41 1d fe e6 vpaddd %ymm14, %ymm12, %ymm12
106b: c4 41 25 e2 df vpsrad %xmm15, %ymm11, %ymm11
1070: c4 41 1d e2 e7 vpsrad %xmm15, %ymm12, %ymm12
1075: c4 42 25 2b db vpackusdw %ymm11, %ymm11, %ymm11
107a: c4 42 1d 2b e4 vpackusdw %ymm12, %ymm12, %ymm12
107f: c4 62 25 3a df vpminuw %ymm7, %ymm11, %ymm11
1084: c4 62 1d 3a e7 vpminuw %ymm7, %ymm12, %ymm12
1089: c4 63 7d 39 db 01 vextracti128 $1, %ymm11, %xmm3
108f: c4 63 7d 39 e4 01 vextracti128 $1, %ymm12, %xmm4
1095: c5 79 7e 1f vmovd %xmm11, (%rdi)
1099: c5 f9 7e 5f 04 vmovd %xmm3, 4(%rdi)
109e: c5 79 7e 24 37 vmovd %xmm12, (%rdi,%rsi)
10a3: c5 f9 7e 64 37 04 vmovd %xmm4, 4(%rdi,%rsi)
10a9: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
10ad: 41 83 e9 02 subl $2, %r9d
10b1: 0f 8f 39 ff ff ff jg -199 <_dav1d_put_8tap_16bpc_avx2.hv_w4l>
10b7: 5b popq %rbx
10b8: c5 f8 77 vzeroupper
10bb: c3 retq
00000000000010bc _dav1d_put_8tap_16bpc_avx2.hv_w48:
10bc: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx
10c0: c5 7d 6f 05 00 00 00 00 vmovdqa (%rip), %ymm8
10c8: c4 c2 79 58 ac c3 ab f1 ff ff vpbroadcastd -3669(%r11,%rax,8), %xmm5
10d2: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5
10d7: 4f 8d 94 d3 b6 f1 ff ff leaq -3658(%r11,%r10,8), %r10
10df: c4 42 79 58 0a vpbroadcastd (%r10), %xmm9
10e4: c4 42 79 58 52 04 vpbroadcastd 4(%r10), %xmm10
10ea: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9
10ef: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10
10f4: 48 83 ea 02 subq $2, %rdx
10f8: 48 29 da subq %rbx, %rdx
10fb: c4 e2 7d 5a 02 vbroadcasti128 (%rdx), %ymm0
1100: c4 e2 7d 5a 0c 0a vbroadcasti128 (%rdx,%rcx), %ymm1
1106: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
110a: c4 c2 7d 00 c0 vpshufb %ymm8, %ymm0, %ymm0
110f: c4 c2 75 00 c8 vpshufb %ymm8, %ymm1, %ymm1
1114: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0
1118: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
111c: c4 e2 7d 02 c0 vphaddd %ymm0, %ymm0, %ymm0
1121: c4 e2 75 02 c9 vphaddd %ymm1, %ymm1, %ymm1
1126: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0
112a: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1
112e: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0
1133: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1
1138: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0
113c: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1
1140: c4 e2 7d 5a 22 vbroadcasti128 (%rdx), %ymm4
1145: c4 e2 7d 5a 1c 0a vbroadcasti128 (%rdx,%rcx), %ymm3
114b: c4 e2 7d 5a 14 4a vbroadcasti128 (%rdx,%rcx,2), %ymm2
1151: 48 01 da addq %rbx, %rdx
1154: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4
1159: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3
115e: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
1162: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
1166: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4
116b: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3
1170: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4
1174: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3
1178: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4
117d: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3
1182: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
1186: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
118a: c4 c2 6d 00 d0 vpshufb %ymm8, %ymm2, %ymm2
118f: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
1193: c4 e2 6d 02 d2 vphaddd %ymm2, %ymm2, %ymm2
1198: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2
119c: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2
11a1: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2
11a5: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
11a9: c5 f5 61 cc vpunpcklwd %ymm4, %ymm1, %ymm1
11ad: c5 dd 61 e3 vpunpcklwd %ymm3, %ymm4, %ymm4
11b1: c5 e5 61 da vpunpcklwd %ymm2, %ymm3, %ymm3
11b5: c5 fd 62 c4 vpunpckldq %ymm4, %ymm0, %ymm0
11b9: c5 f5 62 cb vpunpckldq %ymm3, %ymm1, %ymm1
11bd: c4 e2 7d 5a 1a vbroadcasti128 (%rdx), %ymm3
11c2: c4 e2 7d 5a 24 0a vbroadcasti128 (%rdx,%rcx), %ymm4
11c8: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
11cc: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3
11d1: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4
11d6: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
11da: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
11de: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3
11e3: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4
11e8: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3
11ec: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4
11f0: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3
11f5: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4
11fa: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
11fe: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
1202: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
1206: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
000000000000120a _dav1d_put_8tap_16bpc_avx2.hv_w48l:
120a: c4 62 7d 5a 1a vbroadcasti128 (%rdx), %ymm11
120f: c4 62 7d 5a 24 0a vbroadcasti128 (%rdx,%rcx), %ymm12
1215: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
1219: c4 42 25 00 d8 vpshufb %ymm8, %ymm11, %ymm11
121e: c4 42 1d 00 e0 vpshufb %ymm8, %ymm12, %ymm12
1223: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11
1227: c5 1d f5 e5 vpmaddwd %ymm5, %ymm12, %ymm12
122b: c4 42 25 02 db vphaddd %ymm11, %ymm11, %ymm11
1230: c4 42 1d 02 e4 vphaddd %ymm12, %ymm12, %ymm12
1235: c5 25 fe de vpaddd %ymm6, %ymm11, %ymm11
1239: c5 1d fe e6 vpaddd %ymm6, %ymm12, %ymm12
123d: c4 41 25 e2 dd vpsrad %xmm13, %ymm11, %ymm11
1242: c4 41 1d e2 e5 vpsrad %xmm13, %ymm12, %ymm12
1247: c4 41 25 6b db vpackssdw %ymm11, %ymm11, %ymm11
124c: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12
1251: c4 c1 5d 61 e3 vpunpcklwd %ymm11, %ymm4, %ymm4
1256: c4 41 25 61 dc vpunpcklwd %ymm12, %ymm11, %ymm11
125b: c5 ed 62 d4 vpunpckldq %ymm4, %ymm2, %ymm2
125f: c4 c1 65 62 db vpunpckldq %ymm11, %ymm3, %ymm3
1264: c5 35 f5 d8 vpmaddwd %ymm0, %ymm9, %ymm11
1268: c5 ad f5 e2 vpmaddwd %ymm2, %ymm10, %ymm4
126c: c5 35 f5 e1 vpmaddwd %ymm1, %ymm9, %ymm12
1270: c5 25 fe dc vpaddd %ymm4, %ymm11, %ymm11
1274: c5 ad f5 e3 vpmaddwd %ymm3, %ymm10, %ymm4
1278: c5 1d fe e4 vpaddd %ymm4, %ymm12, %ymm12
127c: c4 42 25 02 db vphaddd %ymm11, %ymm11, %ymm11
1281: c4 42 1d 02 e4 vphaddd %ymm12, %ymm12, %ymm12
1286: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11
128b: c4 41 1d fe e6 vpaddd %ymm14, %ymm12, %ymm12
1290: c4 41 25 e2 df vpsrad %xmm15, %ymm11, %ymm11
1295: c4 41 1d e2 e7 vpsrad %xmm15, %ymm12, %ymm12
129a: c4 42 25 2b db vpackusdw %ymm11, %ymm11, %ymm11
129f: c4 42 1d 2b e4 vpackusdw %ymm12, %ymm12, %ymm12
12a4: c4 62 25 3a df vpminuw %ymm7, %ymm11, %ymm11
12a9: c4 62 1d 3a e7 vpminuw %ymm7, %ymm12, %ymm12
12ae: c4 63 7d 39 dc 01 vextracti128 $1, %ymm11, %xmm4
12b4: c5 79 7e 1f vmovd %xmm11, (%rdi)
12b8: c5 f9 7e 67 04 vmovd %xmm4, 4(%rdi)
12bd: c4 63 7d 39 e4 01 vextracti128 $1, %ymm12, %xmm4
12c3: c5 79 7e 24 37 vmovd %xmm12, (%rdi,%rsi)
12c8: c5 f9 7e 64 37 04 vmovd %xmm4, 4(%rdi,%rsi)
12ce: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
12d2: c5 fd 70 c0 8d vpshufd $141, %ymm0, %ymm0
12d7: c5 fd 70 c9 8d vpshufd $141, %ymm1, %ymm1
12dc: c5 7d 70 da d8 vpshufd $216, %ymm2, %ymm11
12e1: c5 7d 70 e3 d8 vpshufd $216, %ymm3, %ymm12
12e6: c5 fd 70 d2 8d vpshufd $141, %ymm2, %ymm2
12eb: c5 fd 70 db 8d vpshufd $141, %ymm3, %ymm3
12f0: c5 fd 6f e3 vmovdqa %ymm3, %ymm4
12f4: c5 dd 72 e4 10 vpsrad $16, %ymm4, %ymm4
12f9: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
12fd: c4 c1 7d 62 c3 vpunpckldq %ymm11, %ymm0, %ymm0
1302: c4 c1 75 62 cc vpunpckldq %ymm12, %ymm1, %ymm1
1307: 41 83 e9 02 subl $2, %r9d
130b: 0f 8f f9 fe ff ff jg -263 <_dav1d_put_8tap_16bpc_avx2.hv_w48l>
1311: 5b popq %rbx
1312: c5 f8 77 vzeroupper
1315: c3 retq
0000000000001316 _dav1d_put_8tap_16bpc_avx2.hv_w8:
1316: 49 89 d8 movq %rbx, %r8
1319: 41 83 f9 04 cmpl $4, %r9d
131d: 0f 8f b3 02 00 00 jg 691 <_dav1d_put_8tap_16bpc_avx2.hv_w88>
1323: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx
1327: c4 c2 79 59 ac c3 08 f4 ff ff vpbroadcastq -3064(%r11,%rax,8), %xmm5
1331: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5
1336: 48 83 ea 06 subq $6, %rdx
133a: 48 29 ca subq %rcx, %rdx
133d: 44 89 c8 movl %r9d, %eax
1340: 49 89 fa movq %rdi, %r10
1343: 49 89 d3 movq %rdx, %r11
0000000000001346 _dav1d_put_8tap_16bpc_avx2.hv_w8l:
1346: c5 fa 6f 02 vmovdqu (%rdx), %xmm0
134a: c5 fa 6f 52 02 vmovdqu 2(%rdx), %xmm2
134f: c4 e3 7d 38 42 08 01 vinserti128 $1, 8(%rdx), %ymm0, %ymm0
1356: c4 e3 6d 38 52 0a 01 vinserti128 $1, 10(%rdx), %ymm2, %ymm2
135d: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0
1361: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
1365: c4 e2 7d 02 c2 vphaddd %ymm2, %ymm0, %ymm0
136a: c5 fa 6f 4a 04 vmovdqu 4(%rdx), %xmm1
136f: c5 fa 6f 52 06 vmovdqu 6(%rdx), %xmm2
1374: c4 e3 75 38 4a 0c 01 vinserti128 $1, 12(%rdx), %ymm1, %ymm1
137b: c4 e3 6d 38 52 0e 01 vinserti128 $1, 14(%rdx), %ymm2, %ymm2
1382: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
1386: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
138a: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1
138f: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0
1394: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0
1398: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0
139d: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0
13a1: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1
13a6: c5 fa 6f 5c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm3
13ac: c4 e3 75 38 4c 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm1, %ymm1
13b4: c4 e3 65 38 5c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm3, %ymm3
13bc: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
13c0: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
13c4: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1
13c9: c5 fa 6f 54 0a 04 vmovdqu 4(%rdx,%rcx), %xmm2
13cf: c5 fa 6f 5c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm3
13d5: c4 e3 6d 38 54 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm2, %ymm2
13dd: c4 e3 65 38 5c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm3, %ymm3
13e5: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
13e9: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
13ed: c4 e2 6d 02 d3 vphaddd %ymm3, %ymm2, %ymm2
13f2: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1
13f7: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1
13fb: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1
1400: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1
1404: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2
1409: c5 fa 6f 64 4a 02 vmovdqu 2(%rdx,%rcx,2), %xmm4
140f: c4 e3 6d 38 54 4a 08 01 vinserti128 $1, 8(%rdx,%rcx,2), %ymm2, %ymm2
1417: c4 e3 5d 38 64 4a 0a 01 vinserti128 $1, 10(%rdx,%rcx,2), %ymm4, %ymm4
141f: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
1423: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
1427: c4 e2 6d 02 d4 vphaddd %ymm4, %ymm2, %ymm2
142c: c5 fa 6f 5c 4a 04 vmovdqu 4(%rdx,%rcx,2), %xmm3
1432: c5 fa 6f 64 4a 06 vmovdqu 6(%rdx,%rcx,2), %xmm4
1438: c4 e3 65 38 5c 4a 0c 01 vinserti128 $1, 12(%rdx,%rcx,2), %ymm3, %ymm3
1440: c4 e3 5d 38 64 4a 0e 01 vinserti128 $1, 14(%rdx,%rcx,2), %ymm4, %ymm4
1448: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
144c: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
1450: c4 e2 65 02 dc vphaddd %ymm4, %ymm3, %ymm3
1455: c4 e2 6d 02 d3 vphaddd %ymm3, %ymm2, %ymm2
145a: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2
145e: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2
1463: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2
1467: 48 01 da addq %rbx, %rdx
146a: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
146e: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
0000000000001472 _dav1d_put_8tap_16bpc_avx2.hv_w8c:
1472: c5 fa 6f 1a vmovdqu (%rdx), %xmm3
1476: c5 7a 6f 5a 02 vmovdqu 2(%rdx), %xmm11
147b: c4 e3 65 38 5a 08 01 vinserti128 $1, 8(%rdx), %ymm3, %ymm3
1482: c4 63 25 38 5a 0a 01 vinserti128 $1, 10(%rdx), %ymm11, %ymm11
1489: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
148d: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11
1491: c4 c2 65 02 db vphaddd %ymm11, %ymm3, %ymm3
1496: c5 7a 6f 42 04 vmovdqu 4(%rdx), %xmm8
149b: c5 7a 6f 5a 06 vmovdqu 6(%rdx), %xmm11
14a0: c4 63 3d 38 42 0c 01 vinserti128 $1, 12(%rdx), %ymm8, %ymm8
14a7: c4 63 25 38 5a 0e 01 vinserti128 $1, 14(%rdx), %ymm11, %ymm11
14ae: c5 3d f5 c5 vpmaddwd %ymm5, %ymm8, %ymm8
14b2: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11
14b6: c4 42 3d 02 c3 vphaddd %ymm11, %ymm8, %ymm8
14bb: c4 c2 65 02 d8 vphaddd %ymm8, %ymm3, %ymm3
14c0: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3
14c4: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3
14c9: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
14cd: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4
14d2: c5 7a 6f 5c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm11
14d8: c4 e3 5d 38 64 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm4, %ymm4
14e0: c4 63 25 38 5c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm11, %ymm11
14e8: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
14ec: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11
14f0: c4 c2 5d 02 e3 vphaddd %ymm11, %ymm4, %ymm4
14f5: c5 7a 6f 44 0a 04 vmovdqu 4(%rdx,%rcx), %xmm8
14fb: c5 7a 6f 5c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm11
1501: c4 63 3d 38 44 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm8, %ymm8
1509: c4 63 25 38 5c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm11, %ymm11
1511: c5 3d f5 c5 vpmaddwd %ymm5, %ymm8, %ymm8
1515: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11
1519: c4 42 3d 02 c3 vphaddd %ymm11, %ymm8, %ymm8
151e: c4 c2 5d 02 e0 vphaddd %ymm8, %ymm4, %ymm4
1523: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4
1527: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4
152c: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
1530: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
1534: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
1538: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
153c: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0
1540: c5 2d f5 c2 vpmaddwd %ymm2, %ymm10, %ymm8
1544: c5 3d fe c0 vpaddd %ymm0, %ymm8, %ymm8
1548: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
154c: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1
1550: c5 2d f5 db vpmaddwd %ymm3, %ymm10, %ymm11
1554: c5 25 fe d9 vpaddd %ymm1, %ymm11, %ymm11
1558: c5 fd 6f cb vmovdqa %ymm3, %ymm1
155c: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
1560: c4 41 3d fe c6 vpaddd %ymm14, %ymm8, %ymm8
1565: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11
156a: c4 41 3d e2 c7 vpsrad %xmm15, %ymm8, %ymm8
156f: c4 41 25 e2 df vpsrad %xmm15, %ymm11, %ymm11
1574: c4 42 3d 2b c0 vpackusdw %ymm8, %ymm8, %ymm8
1579: c4 42 25 2b db vpackusdw %ymm11, %ymm11, %ymm11
157e: c4 62 3d 3a c7 vpminuw %ymm7, %ymm8, %ymm8
1583: c4 62 25 3a df vpminuw %ymm7, %ymm11, %ymm11
1588: c4 63 7d 39 c3 01 vextracti128 $1, %ymm8, %xmm3
158e: c4 63 7d 39 dc 01 vextracti128 $1, %ymm11, %xmm4
1594: c5 79 d6 07 vmovq %xmm8, (%rdi)
1598: c5 f9 d6 5f 08 vmovq %xmm3, 8(%rdi)
159d: c5 79 d6 1c 37 vmovq %xmm11, (%rdi,%rsi)
15a2: c5 f9 d6 64 37 08 vmovq %xmm4, 8(%rdi,%rsi)
15a8: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
15ac: 41 83 e9 02 subl $2, %r9d
15b0: 0f 8f bc fe ff ff jg -324 <_dav1d_put_8tap_16bpc_avx2.hv_w8c>
15b6: 49 83 c2 10 addq $16, %r10
15ba: 49 83 c3 10 addq $16, %r11
15be: 41 89 c1 movl %eax, %r9d
15c1: 4c 89 d7 movq %r10, %rdi
15c4: 4c 89 da movq %r11, %rdx
15c7: 41 83 e8 08 subl $8, %r8d
15cb: 0f 8f 75 fd ff ff jg -651 <_dav1d_put_8tap_16bpc_avx2.hv_w8l>
15d1: 5b popq %rbx
15d2: c5 f8 77 vzeroupper
15d5: c3 retq
00000000000015d6 _dav1d_put_8tap_16bpc_avx2.hv_w88:
15d6: 48 8d 1c 49 leaq (%rcx,%rcx,2), %rbx
15da: c4 c2 79 59 bc c3 bb f6 ff ff vpbroadcastq -2373(%r11,%rax,8), %xmm7
15e4: c4 e2 7d 20 ff vpmovsxbw %xmm7, %ymm7
15e9: 48 83 ea 06 subq $6, %rdx
15ed: 48 29 da subq %rbx, %rdx
15f0: 44 89 c8 movl %r9d, %eax
15f3: 49 89 fa movq %rdi, %r10
15f6: f3 44 0f b8 5c 24 20 popcntl 32(%rsp), %r11d
15fd: 41 83 fb 0c cmpl $12, %r11d
1601: 0f 84 8d 04 00 00 je 1165 <_dav1d_put_8tap_16bpc_avx2.hv_w88_12bit>
1607: 49 89 d3 movq %rdx, %r11
000000000000160a _dav1d_put_8tap_16bpc_avx2.hv_w88l_10bit:
160a: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
1613: c5 fa 6f 02 vmovdqu (%rdx), %xmm0
1617: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13
161c: c4 e3 7d 38 42 08 01 vinserti128 $1, 8(%rdx), %ymm0, %ymm0
1623: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13
162a: c5 fd f5 c7 vpmaddwd %ymm7, %ymm0, %ymm0
162e: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1632: c4 c2 7d 02 c5 vphaddd %ymm13, %ymm0, %ymm0
1637: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12
163c: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13
1641: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12
1648: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13
164f: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1653: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1657: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
165c: c4 c2 7d 02 c4 vphaddd %ymm12, %ymm0, %ymm0
1661: c5 85 fe c0 vpaddd %ymm0, %ymm15, %ymm0
1665: c5 fd 72 e0 02 vpsrad $2, %ymm0, %ymm0
166a: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0
166e: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1
1673: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13
1679: c4 e3 75 38 4c 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm1, %ymm1
1681: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13
1689: c5 f5 f5 cf vpmaddwd %ymm7, %ymm1, %ymm1
168d: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1691: c4 c2 75 02 cd vphaddd %ymm13, %ymm1, %ymm1
1696: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12
169c: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13
16a2: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12
16aa: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13
16b2: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
16b6: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
16ba: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
16bf: c4 c2 75 02 cc vphaddd %ymm12, %ymm1, %ymm1
16c4: c5 85 fe c9 vpaddd %ymm1, %ymm15, %ymm1
16c8: c5 f5 72 e1 02 vpsrad $2, %ymm1, %ymm1
16cd: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1
16d1: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2
16d6: c5 7a 6f 6c 4a 02 vmovdqu 2(%rdx,%rcx,2), %xmm13
16dc: c4 e3 6d 38 54 4a 08 01 vinserti128 $1, 8(%rdx,%rcx,2), %ymm2, %ymm2
16e4: c4 63 15 38 6c 4a 0a 01 vinserti128 $1, 10(%rdx,%rcx,2), %ymm13, %ymm13
16ec: c5 ed f5 d7 vpmaddwd %ymm7, %ymm2, %ymm2
16f0: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
16f4: c4 c2 6d 02 d5 vphaddd %ymm13, %ymm2, %ymm2
16f9: c5 7a 6f 64 4a 04 vmovdqu 4(%rdx,%rcx,2), %xmm12
16ff: c5 7a 6f 6c 4a 06 vmovdqu 6(%rdx,%rcx,2), %xmm13
1705: c4 63 1d 38 64 4a 0c 01 vinserti128 $1, 12(%rdx,%rcx,2), %ymm12, %ymm12
170d: c4 63 15 38 6c 4a 0e 01 vinserti128 $1, 14(%rdx,%rcx,2), %ymm13, %ymm13
1715: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1719: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
171d: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
1722: c4 c2 6d 02 d4 vphaddd %ymm12, %ymm2, %ymm2
1727: c5 85 fe d2 vpaddd %ymm2, %ymm15, %ymm2
172b: c5 ed 72 e2 02 vpsrad $2, %ymm2, %ymm2
1730: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2
1734: 48 01 da addq %rbx, %rdx
1737: c5 fa 6f 1a vmovdqu (%rdx), %xmm3
173b: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13
1740: c4 e3 65 38 5a 08 01 vinserti128 $1, 8(%rdx), %ymm3, %ymm3
1747: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13
174e: c5 e5 f5 df vpmaddwd %ymm7, %ymm3, %ymm3
1752: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1756: c4 c2 65 02 dd vphaddd %ymm13, %ymm3, %ymm3
175b: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12
1760: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13
1765: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12
176c: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13
1773: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1777: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
177b: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
1780: c4 c2 65 02 dc vphaddd %ymm12, %ymm3, %ymm3
1785: c5 85 fe db vpaddd %ymm3, %ymm15, %ymm3
1789: c5 e5 72 e3 02 vpsrad $2, %ymm3, %ymm3
178e: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
1792: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4
1797: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13
179d: c4 e3 5d 38 64 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm4, %ymm4
17a5: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13
17ad: c5 dd f5 e7 vpmaddwd %ymm7, %ymm4, %ymm4
17b1: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
17b5: c4 c2 5d 02 e5 vphaddd %ymm13, %ymm4, %ymm4
17ba: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12
17c0: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13
17c6: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12
17ce: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13
17d6: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
17da: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
17de: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
17e3: c4 c2 5d 02 e4 vphaddd %ymm12, %ymm4, %ymm4
17e8: c5 85 fe e4 vpaddd %ymm4, %ymm15, %ymm4
17ec: c5 dd 72 e4 02 vpsrad $2, %ymm4, %ymm4
17f1: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
17f5: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
17f9: c5 fa 6f 2a vmovdqu (%rdx), %xmm5
17fd: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13
1802: c4 e3 55 38 6a 08 01 vinserti128 $1, 8(%rdx), %ymm5, %ymm5
1809: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13
1810: c5 d5 f5 ef vpmaddwd %ymm7, %ymm5, %ymm5
1814: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1818: c4 c2 55 02 ed vphaddd %ymm13, %ymm5, %ymm5
181d: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12
1822: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13
1827: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12
182e: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13
1835: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1839: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
183d: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
1842: c4 c2 55 02 ec vphaddd %ymm12, %ymm5, %ymm5
1847: c5 85 fe ed vpaddd %ymm5, %ymm15, %ymm5
184b: c5 d5 72 e5 02 vpsrad $2, %ymm5, %ymm5
1850: c5 d5 6b ed vpackssdw %ymm5, %ymm5, %ymm5
1854: c5 fa 6f 34 0a vmovdqu (%rdx,%rcx), %xmm6
1859: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13
185f: c4 e3 4d 38 74 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm6, %ymm6
1867: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13
186f: c5 cd f5 f7 vpmaddwd %ymm7, %ymm6, %ymm6
1873: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1877: c4 c2 4d 02 f5 vphaddd %ymm13, %ymm6, %ymm6
187c: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12
1882: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13
1888: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12
1890: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13
1898: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
189c: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
18a0: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
18a5: c4 c2 4d 02 f4 vphaddd %ymm12, %ymm6, %ymm6
18aa: c5 85 fe f6 vpaddd %ymm6, %ymm15, %ymm6
18ae: c5 cd 72 e6 02 vpsrad $2, %ymm6, %ymm6
18b3: c5 cd 6b f6 vpackssdw %ymm6, %ymm6, %ymm6
18b7: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
18bb: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
18bf: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
18c3: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
18c7: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
18cb: c5 dd 61 e5 vpunpcklwd %ymm5, %ymm4, %ymm4
18cf: c5 d5 61 ee vpunpcklwd %ymm6, %ymm5, %ymm5
00000000000018d3 _dav1d_put_8tap_16bpc_avx2.hv_w88c_10bit:
18d3: c5 7a 6f 22 vmovdqu (%rdx), %xmm12
18d7: c5 7a 6f 7a 02 vmovdqu 2(%rdx), %xmm15
18dc: c4 63 1d 38 62 08 01 vinserti128 $1, 8(%rdx), %ymm12, %ymm12
18e3: c4 63 05 38 7a 0a 01 vinserti128 $1, 10(%rdx), %ymm15, %ymm15
18ea: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
18ee: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
18f2: c4 42 1d 02 e7 vphaddd %ymm15, %ymm12, %ymm12
18f7: c5 7a 6f 72 04 vmovdqu 4(%rdx), %xmm14
18fc: c5 7a 6f 7a 06 vmovdqu 6(%rdx), %xmm15
1901: c4 63 0d 38 72 0c 01 vinserti128 $1, 12(%rdx), %ymm14, %ymm14
1908: c4 63 05 38 7a 0e 01 vinserti128 $1, 14(%rdx), %ymm15, %ymm15
190f: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14
1913: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
1917: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14
191c: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
1925: c4 42 1d 02 e6 vphaddd %ymm14, %ymm12, %ymm12
192a: c4 41 1d fe e7 vpaddd %ymm15, %ymm12, %ymm12
192f: c4 c1 1d 72 e4 02 vpsrad $2, %ymm12, %ymm12
1935: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12
193a: c5 7a 6f 2c 0a vmovdqu (%rdx,%rcx), %xmm13
193f: c5 7a 6f 7c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm15
1945: c4 63 15 38 6c 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm13, %ymm13
194d: c4 63 05 38 7c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm15, %ymm15
1955: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1959: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
195d: c4 42 15 02 ef vphaddd %ymm15, %ymm13, %ymm13
1962: c5 7a 6f 74 0a 04 vmovdqu 4(%rdx,%rcx), %xmm14
1968: c5 7a 6f 7c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm15
196e: c4 63 0d 38 74 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm14, %ymm14
1976: c4 63 05 38 7c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm15, %ymm15
197e: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14
1982: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
1986: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14
198b: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
1994: c4 42 15 02 ee vphaddd %ymm14, %ymm13, %ymm13
1999: c4 41 15 fe ef vpaddd %ymm15, %ymm13, %ymm13
199e: c4 c1 15 72 e5 02 vpsrad $2, %ymm13, %ymm13
19a4: c4 41 15 6b ed vpackssdw %ymm13, %ymm13, %ymm13
19a9: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
19ad: c4 c1 4d 61 f4 vpunpcklwd %ymm12, %ymm6, %ymm6
19b2: c4 41 1d 61 e5 vpunpcklwd %ymm13, %ymm12, %ymm12
19b7: c5 bd f5 c0 vpmaddwd %ymm0, %ymm8, %ymm0
19bb: c5 35 f5 f2 vpmaddwd %ymm2, %ymm9, %ymm14
19bf: c5 0d fe f0 vpaddd %ymm0, %ymm14, %ymm14
19c3: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
19c7: c5 bd f5 c9 vpmaddwd %ymm1, %ymm8, %ymm1
19cb: c5 35 f5 fb vpmaddwd %ymm3, %ymm9, %ymm15
19cf: c5 05 fe f9 vpaddd %ymm1, %ymm15, %ymm15
19d3: c5 fd 6f cb vmovdqa %ymm3, %ymm1
19d7: c5 ad f5 d4 vpmaddwd %ymm4, %ymm10, %ymm2
19db: c5 ad f5 dd vpmaddwd %ymm5, %ymm10, %ymm3
19df: c5 0d fe f2 vpaddd %ymm2, %ymm14, %ymm14
19e3: c5 05 fe fb vpaddd %ymm3, %ymm15, %ymm15
19e7: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
19eb: c5 fd 6f dd vmovdqa %ymm5, %ymm3
19ef: c5 a5 f5 e6 vpmaddwd %ymm6, %ymm11, %ymm4
19f3: c4 c1 1d f5 eb vpmaddwd %ymm11, %ymm12, %ymm5
19f8: c5 0d fe f4 vpaddd %ymm4, %ymm14, %ymm14
19fc: c5 05 fe fd vpaddd %ymm5, %ymm15, %ymm15
1a00: c5 fd 6f e6 vmovdqa %ymm6, %ymm4
1a04: c4 c1 7d 6f ec vmovdqa %ymm12, %ymm5
1a09: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
1a12: c4 62 7d 79 64 24 20 vpbroadcastw 32(%rsp), %ymm12
1a19: c5 0d fe f6 vpaddd %ymm6, %ymm14, %ymm14
1a1d: c5 05 fe fe vpaddd %ymm6, %ymm15, %ymm15
1a21: c4 c1 0d 72 e6 0a vpsrad $10, %ymm14, %ymm14
1a27: c4 c1 05 72 e7 0a vpsrad $10, %ymm15, %ymm15
1a2d: c4 42 0d 2b f6 vpackusdw %ymm14, %ymm14, %ymm14
1a32: c4 42 05 2b ff vpackusdw %ymm15, %ymm15, %ymm15
1a37: c4 42 0d 3a f4 vpminuw %ymm12, %ymm14, %ymm14
1a3c: c4 42 05 3a fc vpminuw %ymm12, %ymm15, %ymm15
1a41: c4 c1 7d 6f f5 vmovdqa %ymm13, %ymm6
1a46: c4 43 7d 39 f4 01 vextracti128 $1, %ymm14, %xmm12
1a4c: c4 43 7d 39 fd 01 vextracti128 $1, %ymm15, %xmm13
1a52: c5 79 d6 37 vmovq %xmm14, (%rdi)
1a56: c5 79 d6 67 08 vmovq %xmm12, 8(%rdi)
1a5b: c5 79 d6 3c 37 vmovq %xmm15, (%rdi,%rsi)
1a60: c5 79 d6 6c 37 08 vmovq %xmm13, 8(%rdi,%rsi)
1a66: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
1a6a: 41 83 e9 02 subl $2, %r9d
1a6e: 0f 8f 5f fe ff ff jg -417 <_dav1d_put_8tap_16bpc_avx2.hv_w88c_10bit>
1a74: 49 83 c2 10 addq $16, %r10
1a78: 49 83 c3 10 addq $16, %r11
1a7c: 41 89 c1 movl %eax, %r9d
1a7f: 4c 89 d7 movq %r10, %rdi
1a82: 4c 89 da movq %r11, %rdx
1a85: 41 83 e8 08 subl $8, %r8d
1a89: 0f 8f 7b fb ff ff jg -1157 <_dav1d_put_8tap_16bpc_avx2.hv_w88l_10bit>
1a8f: 5b popq %rbx
1a90: c5 f8 77 vzeroupper
1a93: c3 retq
0000000000001a94 _dav1d_put_8tap_16bpc_avx2.hv_w88_12bit:
1a94: 49 89 d3 movq %rdx, %r11
0000000000001a97 _dav1d_put_8tap_16bpc_avx2.hv_w88l_12bit:
1a97: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
1aa0: c5 fa 6f 02 vmovdqu (%rdx), %xmm0
1aa4: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13
1aa9: c4 e3 7d 38 42 08 01 vinserti128 $1, 8(%rdx), %ymm0, %ymm0
1ab0: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13
1ab7: c5 fd f5 c7 vpmaddwd %ymm7, %ymm0, %ymm0
1abb: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1abf: c4 c2 7d 02 c5 vphaddd %ymm13, %ymm0, %ymm0
1ac4: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12
1ac9: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13
1ace: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12
1ad5: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13
1adc: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1ae0: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1ae4: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
1ae9: c4 c2 7d 02 c4 vphaddd %ymm12, %ymm0, %ymm0
1aee: c5 85 fe c0 vpaddd %ymm0, %ymm15, %ymm0
1af2: c5 fd 72 e0 04 vpsrad $4, %ymm0, %ymm0
1af7: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0
1afb: c5 fa 6f 0c 0a vmovdqu (%rdx,%rcx), %xmm1
1b00: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13
1b06: c4 e3 75 38 4c 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm1, %ymm1
1b0e: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13
1b16: c5 f5 f5 cf vpmaddwd %ymm7, %ymm1, %ymm1
1b1a: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1b1e: c4 c2 75 02 cd vphaddd %ymm13, %ymm1, %ymm1
1b23: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12
1b29: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13
1b2f: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12
1b37: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13
1b3f: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1b43: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1b47: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
1b4c: c4 c2 75 02 cc vphaddd %ymm12, %ymm1, %ymm1
1b51: c5 85 fe c9 vpaddd %ymm1, %ymm15, %ymm1
1b55: c5 f5 72 e1 04 vpsrad $4, %ymm1, %ymm1
1b5a: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1
1b5e: c5 fa 6f 14 4a vmovdqu (%rdx,%rcx,2), %xmm2
1b63: c5 7a 6f 6c 4a 02 vmovdqu 2(%rdx,%rcx,2), %xmm13
1b69: c4 e3 6d 38 54 4a 08 01 vinserti128 $1, 8(%rdx,%rcx,2), %ymm2, %ymm2
1b71: c4 63 15 38 6c 4a 0a 01 vinserti128 $1, 10(%rdx,%rcx,2), %ymm13, %ymm13
1b79: c5 ed f5 d7 vpmaddwd %ymm7, %ymm2, %ymm2
1b7d: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1b81: c4 c2 6d 02 d5 vphaddd %ymm13, %ymm2, %ymm2
1b86: c5 7a 6f 64 4a 04 vmovdqu 4(%rdx,%rcx,2), %xmm12
1b8c: c5 7a 6f 6c 4a 06 vmovdqu 6(%rdx,%rcx,2), %xmm13
1b92: c4 63 1d 38 64 4a 0c 01 vinserti128 $1, 12(%rdx,%rcx,2), %ymm12, %ymm12
1b9a: c4 63 15 38 6c 4a 0e 01 vinserti128 $1, 14(%rdx,%rcx,2), %ymm13, %ymm13
1ba2: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1ba6: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1baa: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
1baf: c4 c2 6d 02 d4 vphaddd %ymm12, %ymm2, %ymm2
1bb4: c5 85 fe d2 vpaddd %ymm2, %ymm15, %ymm2
1bb8: c5 ed 72 e2 04 vpsrad $4, %ymm2, %ymm2
1bbd: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2
1bc1: 48 01 da addq %rbx, %rdx
1bc4: c5 fa 6f 1a vmovdqu (%rdx), %xmm3
1bc8: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13
1bcd: c4 e3 65 38 5a 08 01 vinserti128 $1, 8(%rdx), %ymm3, %ymm3
1bd4: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13
1bdb: c5 e5 f5 df vpmaddwd %ymm7, %ymm3, %ymm3
1bdf: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1be3: c4 c2 65 02 dd vphaddd %ymm13, %ymm3, %ymm3
1be8: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12
1bed: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13
1bf2: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12
1bf9: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13
1c00: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1c04: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1c08: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
1c0d: c4 c2 65 02 dc vphaddd %ymm12, %ymm3, %ymm3
1c12: c5 85 fe db vpaddd %ymm3, %ymm15, %ymm3
1c16: c5 e5 72 e3 04 vpsrad $4, %ymm3, %ymm3
1c1b: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
1c1f: c5 fa 6f 24 0a vmovdqu (%rdx,%rcx), %xmm4
1c24: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13
1c2a: c4 e3 5d 38 64 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm4, %ymm4
1c32: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13
1c3a: c5 dd f5 e7 vpmaddwd %ymm7, %ymm4, %ymm4
1c3e: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1c42: c4 c2 5d 02 e5 vphaddd %ymm13, %ymm4, %ymm4
1c47: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12
1c4d: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13
1c53: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12
1c5b: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13
1c63: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1c67: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1c6b: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
1c70: c4 c2 5d 02 e4 vphaddd %ymm12, %ymm4, %ymm4
1c75: c5 85 fe e4 vpaddd %ymm4, %ymm15, %ymm4
1c79: c5 dd 72 e4 04 vpsrad $4, %ymm4, %ymm4
1c7e: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
1c82: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
1c86: c5 fa 6f 2a vmovdqu (%rdx), %xmm5
1c8a: c5 7a 6f 6a 02 vmovdqu 2(%rdx), %xmm13
1c8f: c4 e3 55 38 6a 08 01 vinserti128 $1, 8(%rdx), %ymm5, %ymm5
1c96: c4 63 15 38 6a 0a 01 vinserti128 $1, 10(%rdx), %ymm13, %ymm13
1c9d: c5 d5 f5 ef vpmaddwd %ymm7, %ymm5, %ymm5
1ca1: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1ca5: c4 c2 55 02 ed vphaddd %ymm13, %ymm5, %ymm5
1caa: c5 7a 6f 62 04 vmovdqu 4(%rdx), %xmm12
1caf: c5 7a 6f 6a 06 vmovdqu 6(%rdx), %xmm13
1cb4: c4 63 1d 38 62 0c 01 vinserti128 $1, 12(%rdx), %ymm12, %ymm12
1cbb: c4 63 15 38 6a 0e 01 vinserti128 $1, 14(%rdx), %ymm13, %ymm13
1cc2: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1cc6: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1cca: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
1ccf: c4 c2 55 02 ec vphaddd %ymm12, %ymm5, %ymm5
1cd4: c5 85 fe ed vpaddd %ymm5, %ymm15, %ymm5
1cd8: c5 d5 72 e5 04 vpsrad $4, %ymm5, %ymm5
1cdd: c5 d5 6b ed vpackssdw %ymm5, %ymm5, %ymm5
1ce1: c5 fa 6f 34 0a vmovdqu (%rdx,%rcx), %xmm6
1ce6: c5 7a 6f 6c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm13
1cec: c4 e3 4d 38 74 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm6, %ymm6
1cf4: c4 63 15 38 6c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm13, %ymm13
1cfc: c5 cd f5 f7 vpmaddwd %ymm7, %ymm6, %ymm6
1d00: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1d04: c4 c2 4d 02 f5 vphaddd %ymm13, %ymm6, %ymm6
1d09: c5 7a 6f 64 0a 04 vmovdqu 4(%rdx,%rcx), %xmm12
1d0f: c5 7a 6f 6c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm13
1d15: c4 63 1d 38 64 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm12, %ymm12
1d1d: c4 63 15 38 6c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm13, %ymm13
1d25: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1d29: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1d2d: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
1d32: c4 c2 4d 02 f4 vphaddd %ymm12, %ymm6, %ymm6
1d37: c5 85 fe f6 vpaddd %ymm6, %ymm15, %ymm6
1d3b: c5 cd 72 e6 04 vpsrad $4, %ymm6, %ymm6
1d40: c5 cd 6b f6 vpackssdw %ymm6, %ymm6, %ymm6
1d44: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
1d48: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
1d4c: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
1d50: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
1d54: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
1d58: c5 dd 61 e5 vpunpcklwd %ymm5, %ymm4, %ymm4
1d5c: c5 d5 61 ee vpunpcklwd %ymm6, %ymm5, %ymm5
0000000000001d60 _dav1d_put_8tap_16bpc_avx2.hv_w88c_12bit:
1d60: c5 7a 6f 22 vmovdqu (%rdx), %xmm12
1d64: c5 7a 6f 7a 02 vmovdqu 2(%rdx), %xmm15
1d69: c4 63 1d 38 62 08 01 vinserti128 $1, 8(%rdx), %ymm12, %ymm12
1d70: c4 63 05 38 7a 0a 01 vinserti128 $1, 10(%rdx), %ymm15, %ymm15
1d77: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
1d7b: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
1d7f: c4 42 1d 02 e7 vphaddd %ymm15, %ymm12, %ymm12
1d84: c5 7a 6f 72 04 vmovdqu 4(%rdx), %xmm14
1d89: c5 7a 6f 7a 06 vmovdqu 6(%rdx), %xmm15
1d8e: c4 63 0d 38 72 0c 01 vinserti128 $1, 12(%rdx), %ymm14, %ymm14
1d95: c4 63 05 38 7a 0e 01 vinserti128 $1, 14(%rdx), %ymm15, %ymm15
1d9c: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14
1da0: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
1da4: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14
1da9: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
1db2: c4 42 1d 02 e6 vphaddd %ymm14, %ymm12, %ymm12
1db7: c4 41 1d fe e7 vpaddd %ymm15, %ymm12, %ymm12
1dbc: c4 c1 1d 72 e4 04 vpsrad $4, %ymm12, %ymm12
1dc2: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12
1dc7: c5 7a 6f 2c 0a vmovdqu (%rdx,%rcx), %xmm13
1dcc: c5 7a 6f 7c 0a 02 vmovdqu 2(%rdx,%rcx), %xmm15
1dd2: c4 63 15 38 6c 0a 08 01 vinserti128 $1, 8(%rdx,%rcx), %ymm13, %ymm13
1dda: c4 63 05 38 7c 0a 0a 01 vinserti128 $1, 10(%rdx,%rcx), %ymm15, %ymm15
1de2: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
1de6: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
1dea: c4 42 15 02 ef vphaddd %ymm15, %ymm13, %ymm13
1def: c5 7a 6f 74 0a 04 vmovdqu 4(%rdx,%rcx), %xmm14
1df5: c5 7a 6f 7c 0a 06 vmovdqu 6(%rdx,%rcx), %xmm15
1dfb: c4 63 0d 38 74 0a 0c 01 vinserti128 $1, 12(%rdx,%rcx), %ymm14, %ymm14
1e03: c4 63 05 38 7c 0a 0e 01 vinserti128 $1, 14(%rdx,%rcx), %ymm15, %ymm15
1e0b: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14
1e0f: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
1e13: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14
1e18: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
1e21: c4 42 15 02 ee vphaddd %ymm14, %ymm13, %ymm13
1e26: c4 41 15 fe ef vpaddd %ymm15, %ymm13, %ymm13
1e2b: c4 c1 15 72 e5 04 vpsrad $4, %ymm13, %ymm13
1e31: c4 41 15 6b ed vpackssdw %ymm13, %ymm13, %ymm13
1e36: 48 8d 14 4a leaq (%rdx,%rcx,2), %rdx
1e3a: c4 c1 4d 61 f4 vpunpcklwd %ymm12, %ymm6, %ymm6
1e3f: c4 41 1d 61 e5 vpunpcklwd %ymm13, %ymm12, %ymm12
1e44: c5 bd f5 c0 vpmaddwd %ymm0, %ymm8, %ymm0
1e48: c5 35 f5 f2 vpmaddwd %ymm2, %ymm9, %ymm14
1e4c: c5 0d fe f0 vpaddd %ymm0, %ymm14, %ymm14
1e50: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
1e54: c5 bd f5 c9 vpmaddwd %ymm1, %ymm8, %ymm1
1e58: c5 35 f5 fb vpmaddwd %ymm3, %ymm9, %ymm15
1e5c: c5 05 fe f9 vpaddd %ymm1, %ymm15, %ymm15
1e60: c5 fd 6f cb vmovdqa %ymm3, %ymm1
1e64: c5 ad f5 d4 vpmaddwd %ymm4, %ymm10, %ymm2
1e68: c5 ad f5 dd vpmaddwd %ymm5, %ymm10, %ymm3
1e6c: c5 0d fe f2 vpaddd %ymm2, %ymm14, %ymm14
1e70: c5 05 fe fb vpaddd %ymm3, %ymm15, %ymm15
1e74: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
1e78: c5 fd 6f dd vmovdqa %ymm5, %ymm3
1e7c: c5 a5 f5 e6 vpmaddwd %ymm6, %ymm11, %ymm4
1e80: c4 c1 1d f5 eb vpmaddwd %ymm11, %ymm12, %ymm5
1e85: c5 0d fe f4 vpaddd %ymm4, %ymm14, %ymm14
1e89: c5 05 fe fd vpaddd %ymm5, %ymm15, %ymm15
1e8d: c5 fd 6f e6 vmovdqa %ymm6, %ymm4
1e91: c4 c1 7d 6f ec vmovdqa %ymm12, %ymm5
1e96: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
1e9f: c4 62 7d 79 64 24 20 vpbroadcastw 32(%rsp), %ymm12
1ea6: c5 0d fe f6 vpaddd %ymm6, %ymm14, %ymm14
1eaa: c5 05 fe fe vpaddd %ymm6, %ymm15, %ymm15
1eae: c4 c1 0d 72 e6 08 vpsrad $8, %ymm14, %ymm14
1eb4: c4 c1 05 72 e7 08 vpsrad $8, %ymm15, %ymm15
1eba: c4 42 0d 2b f6 vpackusdw %ymm14, %ymm14, %ymm14
1ebf: c4 42 05 2b ff vpackusdw %ymm15, %ymm15, %ymm15
1ec4: c4 42 0d 3a f4 vpminuw %ymm12, %ymm14, %ymm14
1ec9: c4 42 05 3a fc vpminuw %ymm12, %ymm15, %ymm15
1ece: c4 c1 7d 6f f5 vmovdqa %ymm13, %ymm6
1ed3: c4 43 7d 39 f4 01 vextracti128 $1, %ymm14, %xmm12
1ed9: c4 43 7d 39 fd 01 vextracti128 $1, %ymm15, %xmm13
1edf: c5 79 d6 37 vmovq %xmm14, (%rdi)
1ee3: c5 79 d6 67 08 vmovq %xmm12, 8(%rdi)
1ee8: c5 79 d6 3c 37 vmovq %xmm15, (%rdi,%rsi)
1eed: c5 79 d6 6c 37 08 vmovq %xmm13, 8(%rdi,%rsi)
1ef3: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
1ef7: 41 83 e9 02 subl $2, %r9d
1efb: 0f 8f 5f fe ff ff jg -417 <_dav1d_put_8tap_16bpc_avx2.hv_w88c_12bit>
1f01: 49 83 c2 10 addq $16, %r10
1f05: 49 83 c3 10 addq $16, %r11
1f09: 41 89 c1 movl %eax, %r9d
1f0c: 4c 89 d7 movq %r10, %rdi
1f0f: 4c 89 da movq %r11, %rdx
1f12: 41 83 e8 08 subl $8, %r8d
1f16: 0f 8f 7b fb ff ff jg -1157 <_dav1d_put_8tap_16bpc_avx2.hv_w88l_12bit>
1f1c: 5b popq %rbx
1f1d: c5 f8 77 vzeroupper
1f20: c3 retq
0000000000001f21 _dav1d_put_8tap_16bpc_avx2.hv_jmp_tbl:
1f21: b9 ec ff ff 36 movl $922746860, %ecx
1f26: f0 lock
1f27: ff ff <unknown>
1f29: f5 cmc
1f2a: f3 ff ff <unknown>
1f2d: f5 cmc
1f2e: f3 ff ff <unknown>
1f31: f5 cmc
1f32: f3 ff ff <unknown>
1f35: f5 cmc
1f36: f3 ff ff <unknown>
1f39: f5 cmc
1f3a: f3 ff ff <unknown>
1f3d: 0f 1f 00 nopl (%rax)
0000000000001f40 _dav1d_prep_8tap_regular_16bpc_avx2:
1f40: 41 ba 2d 00 00 00 movl $45, %r10d
1f46: 41 bb 2d 00 00 00 movl $45, %r11d
1f4c: e9 8f 00 00 00 jmp 143 <_dav1d_prep_8tap_16bpc_avx2>
1f51: 0f 1f 84 00 00 00 00 00 nopl (%rax,%rax)
1f59: 0f 1f 80 00 00 00 00 nopl (%rax)
0000000000001f60 _dav1d_prep_8tap_regular_smooth_16bpc_avx2:
1f60: 41 ba 2d 00 00 00 movl $45, %r10d
1f66: 41 bb bc 07 00 00 movl $1980, %r11d
1f6c: eb 72 jmp 114 <_dav1d_prep_8tap_16bpc_avx2>
1f6e: 66 90 nop
0000000000001f70 _dav1d_prep_8tap_regular_sharp_16bpc_avx2:
1f70: 41 ba 2d 00 00 00 movl $45, %r10d
1f76: 41 bb 2d 0f 00 00 movl $3885, %r11d
1f7c: eb 62 jmp 98 <_dav1d_prep_8tap_16bpc_avx2>
1f7e: 66 90 nop
0000000000001f80 _dav1d_prep_8tap_smooth_16bpc_avx2:
1f80: 41 ba bc 07 00 00 movl $1980, %r10d
1f86: 41 bb bc 07 00 00 movl $1980, %r11d
1f8c: eb 52 jmp 82 <_dav1d_prep_8tap_16bpc_avx2>
1f8e: 66 90 nop
0000000000001f90 _dav1d_prep_8tap_smooth_regular_16bpc_avx2:
1f90: 41 ba bc 07 00 00 movl $1980, %r10d
1f96: 41 bb 2d 00 00 00 movl $45, %r11d
1f9c: eb 42 jmp 66 <_dav1d_prep_8tap_16bpc_avx2>
1f9e: 66 90 nop
0000000000001fa0 _dav1d_prep_8tap_smooth_sharp_16bpc_avx2:
1fa0: 41 ba bc 07 00 00 movl $1980, %r10d
1fa6: 41 bb 2d 0f 00 00 movl $3885, %r11d
1fac: eb 32 jmp 50 <_dav1d_prep_8tap_16bpc_avx2>
1fae: 66 90 nop
0000000000001fb0 _dav1d_prep_8tap_sharp_16bpc_avx2:
1fb0: 41 ba 2d 0f 00 00 movl $3885, %r10d
1fb6: 41 bb 2d 0f 00 00 movl $3885, %r11d
1fbc: eb 22 jmp 34 <_dav1d_prep_8tap_16bpc_avx2>
1fbe: 66 90 nop
0000000000001fc0 _dav1d_prep_8tap_sharp_regular_16bpc_avx2:
1fc0: 41 ba 2d 0f 00 00 movl $3885, %r10d
1fc6: 41 bb 2d 00 00 00 movl $45, %r11d
1fcc: eb 12 jmp 18 <_dav1d_prep_8tap_16bpc_avx2>
1fce: 66 90 nop
0000000000001fd0 _dav1d_prep_8tap_sharp_smooth_16bpc_avx2:
1fd0: 41 ba 2d 0f 00 00 movl $3885, %r10d
1fd6: 41 bb bc 07 00 00 movl $1980, %r11d
1fdc: eb 02 jmp 2 <_dav1d_prep_8tap_16bpc_avx2>
1fde: 66 90 nop
0000000000001fe0 _dav1d_prep_8tap_16bpc_avx2:
1fe0: 53 pushq %rbx
1fe1: 69 44 24 10 81 40 00 00 imull $16513, 16(%rsp), %eax
1fe9: 44 01 d8 addl %r11d, %eax
1fec: 45 69 c9 81 40 00 00 imull $16513, %r9d, %r9d
1ff3: 45 01 d1 addl %r10d, %r9d
1ff6: 48 63 c9 movslq %ecx, %rcx
1ff9: 4c 8d 1c 09 leaq (%rcx,%rcx), %r11
1ffd: 41 f7 c1 00 c0 1f 00 testl $2080768, %r9d
2004: 0f 85 f8 02 00 00 jne 760 <_dav1d_prep_8tap_16bpc_avx2.prep_8tap_h_16bpc>
200a: a9 00 c0 1f 00 testl $2080768, %eax
200f: 0f 85 e9 04 00 00 jne 1257 <_dav1d_prep_8tap_16bpc_avx2.prep_8tap_v_16bpc>
0000000000002015 _dav1d_prep_8tap_16bpc_avx2.prep_16bpc:
2015: f3 44 0f b8 54 24 18 popcntl 24(%rsp), %r10d
201c: c4 62 7d 59 05 00 00 00 00 vpbroadcastq (%rip), %ymm8
2025: c4 62 7d 79 0d 00 00 00 00 vpbroadcastw (%rip), %ymm9
202e: 41 83 fa 0c cmpl $12, %r10d
2032: 75 09 jne 9 <_dav1d_prep_8tap_16bpc_avx2.prep_bits10>
2034: c4 62 7d 59 05 00 00 00 00 vpbroadcastq (%rip), %ymm8
000000000000203d _dav1d_prep_8tap_16bpc_avx2.prep_bits10:
203d: 4c 8d 15 a6 02 00 00 leaq 678(%rip), %r10
2044: f3 0f bc c9 tzcntl %ecx, %ecx
2048: 83 e9 02 subl $2, %ecx
204b: 49 63 0c 8a movslq (%r10,%rcx,4), %rcx
204f: 4c 01 d1 addq %r10, %rcx
2052: ff e1 jmpq *%rcx
0000000000002054 _dav1d_prep_8tap_16bpc_avx2.w4:
2054: c5 fa 7e 06 vmovq (%rsi), %xmm0
2058: c5 fa 7e 0c 16 vmovq (%rsi,%rdx), %xmm1
205d: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2061: c4 c1 79 f1 c0 vpsllw %xmm8, %xmm0, %xmm0
2066: c4 c1 71 f1 c8 vpsllw %xmm8, %xmm1, %xmm1
206b: c4 c1 79 f9 c1 vpsubw %xmm9, %xmm0, %xmm0
2070: c4 c1 71 f9 c9 vpsubw %xmm9, %xmm1, %xmm1
2075: c5 f9 d6 07 vmovq %xmm0, (%rdi)
2079: c4 a1 79 d6 0c 1f vmovq %xmm1, (%rdi,%r11)
207f: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
2083: 41 83 e8 02 subl $2, %r8d
2087: 7f cb jg -53 <_dav1d_prep_8tap_16bpc_avx2.w4>
2089: 5b popq %rbx
208a: c3 retq
000000000000208b _dav1d_prep_8tap_16bpc_avx2.w8:
208b: c5 fa 6f 06 vmovdqu (%rsi), %xmm0
208f: c5 fa 6f 0c 16 vmovdqu (%rsi,%rdx), %xmm1
2094: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2098: c4 c1 79 f1 c0 vpsllw %xmm8, %xmm0, %xmm0
209d: c4 c1 71 f1 c8 vpsllw %xmm8, %xmm1, %xmm1
20a2: c4 c1 79 f9 c1 vpsubw %xmm9, %xmm0, %xmm0
20a7: c4 c1 71 f9 c9 vpsubw %xmm9, %xmm1, %xmm1
20ac: c5 f9 7f 07 vmovdqa %xmm0, (%rdi)
20b0: c4 a1 79 7f 0c 1f vmovdqa %xmm1, (%rdi,%r11)
20b6: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
20ba: 41 83 e8 02 subl $2, %r8d
20be: 7f cb jg -53 <_dav1d_prep_8tap_16bpc_avx2.w8>
20c0: 5b popq %rbx
20c1: c3 retq
00000000000020c2 _dav1d_prep_8tap_16bpc_avx2.w16:
20c2: c5 fe 6f 06 vmovdqu (%rsi), %ymm0
20c6: c5 fe 6f 0c 16 vmovdqu (%rsi,%rdx), %ymm1
20cb: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
20cf: c4 c1 7d f1 c0 vpsllw %xmm8, %ymm0, %ymm0
20d4: c4 c1 75 f1 c8 vpsllw %xmm8, %ymm1, %ymm1
20d9: c4 c1 7d f9 c1 vpsubw %ymm9, %ymm0, %ymm0
20de: c4 c1 75 f9 c9 vpsubw %ymm9, %ymm1, %ymm1
20e3: c5 fd 7f 07 vmovdqa %ymm0, (%rdi)
20e7: c4 a1 7d 7f 0c 1f vmovdqa %ymm1, (%rdi,%r11)
20ed: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
20f1: 41 83 e8 02 subl $2, %r8d
20f5: 7f cb jg -53 <_dav1d_prep_8tap_16bpc_avx2.w16>
20f7: 5b popq %rbx
20f8: c5 f8 77 vzeroupper
20fb: c3 retq
00000000000020fc _dav1d_prep_8tap_16bpc_avx2.w32:
20fc: c5 fe 6f 06 vmovdqu (%rsi), %ymm0
2100: c5 fe 6f 4e 20 vmovdqu 32(%rsi), %ymm1
2105: c5 fe 6f 14 16 vmovdqu (%rsi,%rdx), %ymm2
210a: c5 fe 6f 5c 16 20 vmovdqu 32(%rsi,%rdx), %ymm3
2110: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2114: c4 c1 7d f1 c0 vpsllw %xmm8, %ymm0, %ymm0
2119: c4 c1 75 f1 c8 vpsllw %xmm8, %ymm1, %ymm1
211e: c4 c1 6d f1 d0 vpsllw %xmm8, %ymm2, %ymm2
2123: c4 c1 65 f1 d8 vpsllw %xmm8, %ymm3, %ymm3
2128: c4 c1 7d f9 c1 vpsubw %ymm9, %ymm0, %ymm0
212d: c4 c1 75 f9 c9 vpsubw %ymm9, %ymm1, %ymm1
2132: c4 c1 6d f9 d1 vpsubw %ymm9, %ymm2, %ymm2
2137: c4 c1 65 f9 d9 vpsubw %ymm9, %ymm3, %ymm3
213c: c5 fd 7f 07 vmovdqa %ymm0, (%rdi)
2140: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi)
2145: c4 a1 7d 7f 14 1f vmovdqa %ymm2, (%rdi,%r11)
214b: c4 a1 7d 7f 5c 1f 20 vmovdqa %ymm3, 32(%rdi,%r11)
2152: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
2156: 41 83 e8 02 subl $2, %r8d
215a: 7f a0 jg -96 <_dav1d_prep_8tap_16bpc_avx2.w32>
215c: 5b popq %rbx
215d: c5 f8 77 vzeroupper
2160: c3 retq
0000000000002161 _dav1d_prep_8tap_16bpc_avx2.w64:
2161: c5 fe 6f 06 vmovdqu (%rsi), %ymm0
2165: c5 fe 6f 4e 20 vmovdqu 32(%rsi), %ymm1
216a: c5 fe 6f 56 40 vmovdqu 64(%rsi), %ymm2
216f: c5 fe 6f 5e 60 vmovdqu 96(%rsi), %ymm3
2174: c5 fe 6f 24 16 vmovdqu (%rsi,%rdx), %ymm4
2179: c5 fe 6f 6c 16 20 vmovdqu 32(%rsi,%rdx), %ymm5
217f: c5 fe 6f 74 16 40 vmovdqu 64(%rsi,%rdx), %ymm6
2185: c5 fe 6f 7c 16 60 vmovdqu 96(%rsi,%rdx), %ymm7
218b: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
218f: c4 c1 7d f1 c0 vpsllw %xmm8, %ymm0, %ymm0
2194: c4 c1 75 f1 c8 vpsllw %xmm8, %ymm1, %ymm1
2199: c4 c1 6d f1 d0 vpsllw %xmm8, %ymm2, %ymm2
219e: c4 c1 65 f1 d8 vpsllw %xmm8, %ymm3, %ymm3
21a3: c4 c1 5d f1 e0 vpsllw %xmm8, %ymm4, %ymm4
21a8: c4 c1 55 f1 e8 vpsllw %xmm8, %ymm5, %ymm5
21ad: c4 c1 4d f1 f0 vpsllw %xmm8, %ymm6, %ymm6
21b2: c4 c1 45 f1 f8 vpsllw %xmm8, %ymm7, %ymm7
21b7: c4 c1 7d f9 c1 vpsubw %ymm9, %ymm0, %ymm0
21bc: c4 c1 75 f9 c9 vpsubw %ymm9, %ymm1, %ymm1
21c1: c4 c1 6d f9 d1 vpsubw %ymm9, %ymm2, %ymm2
21c6: c4 c1 65 f9 d9 vpsubw %ymm9, %ymm3, %ymm3
21cb: c4 c1 5d f9 e1 vpsubw %ymm9, %ymm4, %ymm4
21d0: c4 c1 55 f9 e9 vpsubw %ymm9, %ymm5, %ymm5
21d5: c4 c1 4d f9 f1 vpsubw %ymm9, %ymm6, %ymm6
21da: c4 c1 45 f9 f9 vpsubw %ymm9, %ymm7, %ymm7
21df: c5 fd 7f 07 vmovdqa %ymm0, (%rdi)
21e3: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi)
21e8: c5 fd 7f 57 40 vmovdqa %ymm2, 64(%rdi)
21ed: c5 fd 7f 5f 60 vmovdqa %ymm3, 96(%rdi)
21f2: c4 a1 7d 7f 24 1f vmovdqa %ymm4, (%rdi,%r11)
21f8: c4 a1 7d 7f 6c 1f 20 vmovdqa %ymm5, 32(%rdi,%r11)
21ff: c4 a1 7d 7f 74 1f 40 vmovdqa %ymm6, 64(%rdi,%r11)
2206: c4 a1 7d 7f 7c 1f 60 vmovdqa %ymm7, 96(%rdi,%r11)
220d: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
2211: 41 83 e8 02 subl $2, %r8d
2215: 0f 8f 46 ff ff ff jg -186 <_dav1d_prep_8tap_16bpc_avx2.w64>
221b: 5b popq %rbx
221c: c5 f8 77 vzeroupper
221f: c3 retq
0000000000002220 _dav1d_prep_8tap_16bpc_avx2.w128:
2220: c5 fe 6f 06 vmovdqu (%rsi), %ymm0
2224: c5 fe 6f 4e 20 vmovdqu 32(%rsi), %ymm1
2229: c5 fe 6f 56 40 vmovdqu 64(%rsi), %ymm2
222e: c5 fe 6f 5e 60 vmovdqu 96(%rsi), %ymm3
2233: c5 fe 6f a6 80 00 00 00 vmovdqu 128(%rsi), %ymm4
223b: c5 fe 6f ae a0 00 00 00 vmovdqu 160(%rsi), %ymm5
2243: c5 fe 6f b6 c0 00 00 00 vmovdqu 192(%rsi), %ymm6
224b: c5 fe 6f be e0 00 00 00 vmovdqu 224(%rsi), %ymm7
2253: 48 01 d6 addq %rdx, %rsi
2256: c4 c1 7d f1 c0 vpsllw %xmm8, %ymm0, %ymm0
225b: c4 c1 75 f1 c8 vpsllw %xmm8, %ymm1, %ymm1
2260: c4 c1 6d f1 d0 vpsllw %xmm8, %ymm2, %ymm2
2265: c4 c1 65 f1 d8 vpsllw %xmm8, %ymm3, %ymm3
226a: c4 c1 5d f1 e0 vpsllw %xmm8, %ymm4, %ymm4
226f: c4 c1 55 f1 e8 vpsllw %xmm8, %ymm5, %ymm5
2274: c4 c1 4d f1 f0 vpsllw %xmm8, %ymm6, %ymm6
2279: c4 c1 45 f1 f8 vpsllw %xmm8, %ymm7, %ymm7
227e: c4 c1 7d f9 c1 vpsubw %ymm9, %ymm0, %ymm0
2283: c4 c1 75 f9 c9 vpsubw %ymm9, %ymm1, %ymm1
2288: c4 c1 6d f9 d1 vpsubw %ymm9, %ymm2, %ymm2
228d: c4 c1 65 f9 d9 vpsubw %ymm9, %ymm3, %ymm3
2292: c4 c1 5d f9 e1 vpsubw %ymm9, %ymm4, %ymm4
2297: c4 c1 55 f9 e9 vpsubw %ymm9, %ymm5, %ymm5
229c: c4 c1 4d f9 f1 vpsubw %ymm9, %ymm6, %ymm6
22a1: c4 c1 45 f9 f9 vpsubw %ymm9, %ymm7, %ymm7
22a6: c5 fd 7f 07 vmovdqa %ymm0, (%rdi)
22aa: c5 fd 7f 4f 20 vmovdqa %ymm1, 32(%rdi)
22af: c5 fd 7f 57 40 vmovdqa %ymm2, 64(%rdi)
22b4: c5 fd 7f 5f 60 vmovdqa %ymm3, 96(%rdi)
22b9: c5 fd 7f a7 80 00 00 00 vmovdqa %ymm4, 128(%rdi)
22c1: c5 fd 7f af a0 00 00 00 vmovdqa %ymm5, 160(%rdi)
22c9: c5 fd 7f b7 c0 00 00 00 vmovdqa %ymm6, 192(%rdi)
22d1: c5 fd 7f bf e0 00 00 00 vmovdqa %ymm7, 224(%rdi)
22d9: 4c 01 df addq %r11, %rdi
22dc: 41 ff c8 decl %r8d
22df: 0f 8f 3b ff ff ff jg -197 <_dav1d_prep_8tap_16bpc_avx2.w128>
22e5: 5b popq %rbx
22e6: c5 f8 77 vzeroupper
22e9: c3 retq
00000000000022ea _dav1d_prep_8tap_16bpc_avx2.jmp_tbl:
22ea: 6a fd pushq $-3
22ec: ff ff <unknown>
22ee: a1 fd ff ff d8 fd ff ff 12 movabsl 1369094277476384765, %eax
22f7: fe ff <unknown>
22f9: ff 77 fe pushq -2(%rdi)
22fc: ff ff <unknown>
22fe: 36 ff ff <unknown>
2301: ff 83 f9 04 7e 04 incl 75367673(%rbx)
0000000000002302 _dav1d_prep_8tap_16bpc_avx2.prep_8tap_h_16bpc:
2302: 83 f9 04 cmpl $4, %ecx
2305: 7e 04 jle 4 <_dav1d_prep_8tap_16bpc_avx2.h_use4tap>
2307: 41 c1 e9 07 shrl $7, %r9d
000000000000230b _dav1d_prep_8tap_16bpc_avx2.h_use4tap:
230b: 41 83 e1 7f andl $127, %r9d
230f: a9 00 c0 1f 00 testl $2080768, %eax
2314: 0f 85 41 06 00 00 jne 1601 <_dav1d_prep_8tap_16bpc_avx2.prep_8tap_hv_16bpc>
231a: f3 44 0f b8 54 24 18 popcntl 24(%rsp), %r10d
2321: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
232a: c4 e2 7d 59 3d 00 00 00 00 vpbroadcastq (%rip), %ymm7
2333: 41 83 fa 0c cmpl $12, %r10d
2337: 75 12 jne 18 <_dav1d_prep_8tap_16bpc_avx2.h_bits10>
2339: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
2342: c4 e2 7d 59 3d 00 00 00 00 vpbroadcastq (%rip), %ymm7
000000000000234b _dav1d_prep_8tap_16bpc_avx2.h_bits10:
234b: 48 8d 04 09 leaq (%rcx,%rcx), %rax
234f: 4c 8d 15 90 01 00 00 leaq 400(%rip), %r10
2356: f3 0f bc c9 tzcntl %ecx, %ecx
235a: 83 e9 02 subl $2, %ecx
235d: 49 63 0c 8a movslq (%r10,%rcx,4), %rcx
2361: 4c 01 d1 addq %r10, %rcx
2364: ff e1 jmpq *%rcx
0000000000002366 _dav1d_prep_8tap_16bpc_avx2.h_w4:
2366: 48 83 ee 02 subq $2, %rsi
236a: c5 fd 6f 25 00 00 00 00 vmovdqa (%rip), %ymm4
2372: c4 82 79 58 ac ca 90 fe ff ff vpbroadcastd -368(%r10,%r9,8), %xmm5
237c: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5
0000000000002381 _dav1d_prep_8tap_16bpc_avx2.h_w4l:
2381: c4 e2 7d 5a 06 vbroadcasti128 (%rsi), %ymm0
2386: c4 e2 7d 5a 0c 16 vbroadcasti128 (%rsi,%rdx), %ymm1
238c: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2390: c4 e2 7d 00 c4 vpshufb %ymm4, %ymm0, %ymm0
2395: c4 e2 75 00 cc vpshufb %ymm4, %ymm1, %ymm1
239a: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0
239e: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
23a2: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0
23a7: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0
23ab: c5 fd e2 c7 vpsrad %xmm7, %ymm0, %ymm0
23af: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0
23b3: c4 e3 7d 39 c1 01 vextracti128 $1, %ymm0, %xmm1
23b9: c5 f9 7e 07 vmovd %xmm0, (%rdi)
23bd: c5 f9 7e 4f 04 vmovd %xmm1, 4(%rdi)
23c2: c4 a3 79 16 04 1f 01 vpextrd $1, %xmm0, (%rdi,%r11)
23c9: c4 a3 79 16 4c 1f 04 01 vpextrd $1, %xmm1, 4(%rdi,%r11)
23d1: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
23d5: 41 83 e8 02 subl $2, %r8d
23d9: 7f a6 jg -90 <_dav1d_prep_8tap_16bpc_avx2.h_w4l>
23db: 5b popq %rbx
23dc: c5 f8 77 vzeroupper
23df: c3 retq
00000000000023e0 _dav1d_prep_8tap_16bpc_avx2.h_w8:
23e0: 48 83 ee 06 subq $6, %rsi
23e4: c4 82 79 59 ac ca 00 ff ff ff vpbroadcastq -256(%r10,%r9,8), %xmm5
23ee: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5
00000000000023f3 _dav1d_prep_8tap_16bpc_avx2.h_w8l:
23f3: 89 c1 movl %eax, %ecx
00000000000023f5 _dav1d_prep_8tap_16bpc_avx2.h_w8c:
23f5: c5 fa 6f 06 vmovdqu (%rsi), %xmm0
23f9: c5 fa 6f 56 02 vmovdqu 2(%rsi), %xmm2
23fe: c4 e3 7d 38 04 16 01 vinserti128 $1, (%rsi,%rdx), %ymm0, %ymm0
2405: c4 e3 6d 38 54 32 02 01 vinserti128 $1, 2(%rdx,%rsi), %ymm2, %ymm2
240d: c5 fa 6f 4e 04 vmovdqu 4(%rsi), %xmm1
2412: c5 fa 6f 5e 06 vmovdqu 6(%rsi), %xmm3
2417: c4 e3 75 38 4c 32 04 01 vinserti128 $1, 4(%rdx,%rsi), %ymm1, %ymm1
241f: c4 e3 65 38 5c 32 06 01 vinserti128 $1, 6(%rdx,%rsi), %ymm3, %ymm3
2427: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0
242b: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
242f: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
2433: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
2437: c4 e2 7d 02 c2 vphaddd %ymm2, %ymm0, %ymm0
243c: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1
2441: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0
2446: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0
244a: c5 fd e2 c7 vpsrad %xmm7, %ymm0, %ymm0
244e: c5 fa 6f 4e 08 vmovdqu 8(%rsi), %xmm1
2453: c5 fa 6f 5e 0a vmovdqu 10(%rsi), %xmm3
2458: c4 e3 75 38 4c 32 08 01 vinserti128 $1, 8(%rdx,%rsi), %ymm1, %ymm1
2460: c4 e3 65 38 5c 32 0a 01 vinserti128 $1, 10(%rdx,%rsi), %ymm3, %ymm3
2468: c5 fa 6f 56 0c vmovdqu 12(%rsi), %xmm2
246d: c5 fa 6f 66 0e vmovdqu 14(%rsi), %xmm4
2472: c4 e3 6d 38 54 32 0c 01 vinserti128 $1, 12(%rdx,%rsi), %ymm2, %ymm2
247a: c4 e3 5d 38 64 32 0e 01 vinserti128 $1, 14(%rdx,%rsi), %ymm4, %ymm4
2482: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
2486: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
248a: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
248e: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
2492: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1
2497: c4 e2 6d 02 d4 vphaddd %ymm4, %ymm2, %ymm2
249c: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1
24a1: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1
24a5: c5 f5 e2 cf vpsrad %xmm7, %ymm1, %ymm1
24a9: c5 fd 6b c1 vpackssdw %ymm1, %ymm0, %ymm0
24ad: 48 83 c6 10 addq $16, %rsi
24b1: c5 f9 7f 07 vmovdqa %xmm0, (%rdi)
24b5: c4 a3 7d 39 04 1f 01 vextracti128 $1, %ymm0, (%rdi,%r11)
24bc: 48 83 c7 10 addq $16, %rdi
24c0: 83 e9 10 subl $16, %ecx
24c3: 0f 8f 2c ff ff ff jg -212 <_dav1d_prep_8tap_16bpc_avx2.h_w8c>
24c9: 48 29 c6 subq %rax, %rsi
24cc: 48 29 c7 subq %rax, %rdi
24cf: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
24d3: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
24d7: 41 83 e8 02 subl $2, %r8d
24db: 0f 8f 12 ff ff ff jg -238 <_dav1d_prep_8tap_16bpc_avx2.h_w8l>
24e1: 5b popq %rbx
24e2: c5 f8 77 vzeroupper
24e5: c3 retq
00000000000024e6 _dav1d_prep_8tap_16bpc_avx2.h_jmp_tbl:
24e6: 80 fe ff cmpb $-1, %dh
24e9: ff fa <unknown>
24eb: fe ff <unknown>
24ed: ff fa <unknown>
24ef: fe ff <unknown>
24f1: ff fa <unknown>
24f3: fe ff <unknown>
24f5: ff fa <unknown>
24f7: fe ff <unknown>
24f9: ff fa <unknown>
24fb: fe ff <unknown>
24fd: ff 41 83 incl -125(%rcx)
00000000000024fe _dav1d_prep_8tap_16bpc_avx2.prep_8tap_v_16bpc:
24fe: 41 83 f8 04 cmpl $4, %r8d
2502: 7e 03 jle 3 <_dav1d_prep_8tap_16bpc_avx2.v_use4tap>
2504: c1 e8 07 shrl $7, %eax
0000000000002507 _dav1d_prep_8tap_16bpc_avx2.v_use4tap:
2507: 83 e0 7f andl $127, %eax
250a: f3 44 0f b8 54 24 18 popcntl 24(%rsp), %r10d
2511: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
251a: c4 e2 7d 59 3d 00 00 00 00 vpbroadcastq (%rip), %ymm7
2523: 41 83 fa 0c cmpl $12, %r10d
2527: 75 12 jne 18 <_dav1d_prep_8tap_16bpc_avx2.v_bits10>
2529: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
2532: c4 e2 7d 59 3d 00 00 00 00 vpbroadcastq (%rip), %ymm7
000000000000253b _dav1d_prep_8tap_16bpc_avx2.v_bits10:
253b: 4c 8d 15 01 04 00 00 leaq 1025(%rip), %r10
2542: 4c 8d 0c 09 leaq (%rcx,%rcx), %r9
2546: 48 8d 1c 52 leaq (%rdx,%rdx,2), %rbx
254a: 49 8d 84 c2 07 fc ff ff leaq -1017(%r10,%rax,8), %rax
2552: c4 62 7d 79 00 vpbroadcastw (%rax), %ymm8
2557: c4 62 7d 79 48 02 vpbroadcastw 2(%rax), %ymm9
255d: c4 62 7d 79 50 04 vpbroadcastw 4(%rax), %ymm10
2563: c4 62 7d 79 58 06 vpbroadcastw 6(%rax), %ymm11
2569: c4 42 7d 20 c0 vpmovsxbw %xmm8, %ymm8
256e: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9
2573: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10
2578: c4 42 7d 20 db vpmovsxbw %xmm11, %ymm11
257d: f3 0f bc c9 tzcntl %ecx, %ecx
2581: 83 e9 02 subl $2, %ecx
2584: 49 63 0c 8a movslq (%r10,%rcx,4), %rcx
2588: 4c 01 d1 addq %r10, %rcx
258b: ff e1 jmpq *%rcx
000000000000258d _dav1d_prep_8tap_16bpc_avx2.v_w4:
258d: 41 83 f8 04 cmpl $4, %r8d
2591: 0f 8f 84 00 00 00 jg 132 <_dav1d_prep_8tap_16bpc_avx2.v_w48>
2597: 48 29 d6 subq %rdx, %rsi
259a: c5 fa 7e 06 vmovq (%rsi), %xmm0
259e: c5 fa 7e 0c 16 vmovq (%rsi,%rdx), %xmm1
25a3: c5 fa 7e 14 56 vmovq (%rsi,%rdx,2), %xmm2
25a8: 48 01 de addq %rbx, %rsi
25ab: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0
25af: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1
00000000000025b3 _dav1d_prep_8tap_16bpc_avx2.v_w4l:
25b3: c5 fa 7e 1e vmovq (%rsi), %xmm3
25b7: c5 fa 7e 24 16 vmovq (%rsi,%rdx), %xmm4
25bc: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
25c0: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2
25c4: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3
25c8: c5 b1 f5 c0 vpmaddwd %xmm0, %xmm9, %xmm0
25cc: c5 a9 f5 ea vpmaddwd %xmm2, %xmm10, %xmm5
25d0: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5
25d4: c5 f9 6f c2 vmovdqa %xmm2, %xmm0
25d8: c5 b1 f5 c9 vpmaddwd %xmm1, %xmm9, %xmm1
25dc: c5 29 f5 c3 vpmaddwd %xmm3, %xmm10, %xmm8
25e0: c5 39 fe c1 vpaddd %xmm1, %xmm8, %xmm8
25e4: c5 f9 6f cb vmovdqa %xmm3, %xmm1
25e8: c5 f9 6f d4 vmovdqa %xmm4, %xmm2
25ec: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5
25f0: c5 39 fe c6 vpaddd %xmm6, %xmm8, %xmm8
25f4: c5 d1 e2 ef vpsrad %xmm7, %xmm5, %xmm5
25f8: c5 39 e2 c7 vpsrad %xmm7, %xmm8, %xmm8
25fc: c5 d1 6b ed vpackssdw %xmm5, %xmm5, %xmm5
2600: c4 41 39 6b c0 vpackssdw %xmm8, %xmm8, %xmm8
2605: c5 f9 d6 2f vmovq %xmm5, (%rdi)
2609: c4 21 79 d6 04 1f vmovq %xmm8, (%rdi,%r11)
260f: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
2613: 41 83 e8 02 subl $2, %r8d
2617: 7f 9a jg -102 <_dav1d_prep_8tap_16bpc_avx2.v_w4l>
2619: 5b popq %rbx
261a: c3 retq
000000000000261b _dav1d_prep_8tap_16bpc_avx2.v_w48:
261b: 48 29 de subq %rbx, %rsi
261e: c5 fa 7e 06 vmovq (%rsi), %xmm0
2622: c5 fa 7e 0c 16 vmovq (%rsi,%rdx), %xmm1
2627: c5 fa 7e 14 56 vmovq (%rsi,%rdx,2), %xmm2
262c: 48 01 de addq %rbx, %rsi
262f: c5 fa 7e 1e vmovq (%rsi), %xmm3
2633: c5 fa 7e 24 16 vmovq (%rsi,%rdx), %xmm4
2638: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
263c: c5 7a 7e 26 vmovq (%rsi), %xmm12
2640: c5 7a 7e 2c 16 vmovq (%rsi,%rdx), %xmm13
2645: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2649: c5 f9 61 c1 vpunpcklwd %xmm1, %xmm0, %xmm0
264d: c5 f1 61 ca vpunpcklwd %xmm2, %xmm1, %xmm1
2651: c5 e9 61 d3 vpunpcklwd %xmm3, %xmm2, %xmm2
2655: c5 e1 61 dc vpunpcklwd %xmm4, %xmm3, %xmm3
2659: c4 c1 59 61 e4 vpunpcklwd %xmm12, %xmm4, %xmm4
265e: c4 41 19 61 e5 vpunpcklwd %xmm13, %xmm12, %xmm12
0000000000002663 _dav1d_prep_8tap_16bpc_avx2.v_w48l:
2663: 48 29 d6 subq %rdx, %rsi
2666: c5 7a 7e 2e vmovq (%rsi), %xmm13
266a: c5 7a 7e 34 16 vmovq (%rsi,%rdx), %xmm14
266f: c5 7a 7e 3c 56 vmovq (%rsi,%rdx,2), %xmm15
2674: 48 01 de addq %rbx, %rsi
2677: c4 41 11 61 ee vpunpcklwd %xmm14, %xmm13, %xmm13
267c: c4 41 09 61 f7 vpunpcklwd %xmm15, %xmm14, %xmm14
2681: c5 b9 f5 c0 vpmaddwd %xmm0, %xmm8, %xmm0
2685: c5 b1 f5 ea vpmaddwd %xmm2, %xmm9, %xmm5
2689: c5 d1 fe e8 vpaddd %xmm0, %xmm5, %xmm5
268d: c5 f9 6f c2 vmovdqa %xmm2, %xmm0
2691: c5 b9 f5 c9 vpmaddwd %xmm1, %xmm8, %xmm1
2695: c5 31 f5 fb vpmaddwd %xmm3, %xmm9, %xmm15
2699: c5 01 fe f9 vpaddd %xmm1, %xmm15, %xmm15
269d: c5 f9 6f cb vmovdqa %xmm3, %xmm1
26a1: c5 a9 f5 d4 vpmaddwd %xmm4, %xmm10, %xmm2
26a5: c4 c1 19 f5 da vpmaddwd %xmm10, %xmm12, %xmm3
26aa: c5 d1 fe ea vpaddd %xmm2, %xmm5, %xmm5
26ae: c5 01 fe fb vpaddd %xmm3, %xmm15, %xmm15
26b2: c5 f9 6f d4 vmovdqa %xmm4, %xmm2
26b6: c4 c1 79 6f dc vmovdqa %xmm12, %xmm3
26bb: c4 c1 11 f5 e3 vpmaddwd %xmm11, %xmm13, %xmm4
26c0: c4 41 09 f5 e3 vpmaddwd %xmm11, %xmm14, %xmm12
26c5: c5 d1 fe ec vpaddd %xmm4, %xmm5, %xmm5
26c9: c4 41 01 fe fc vpaddd %xmm12, %xmm15, %xmm15
26ce: c4 c1 79 6f e5 vmovdqa %xmm13, %xmm4
26d3: c4 41 79 6f e6 vmovdqa %xmm14, %xmm12
26d8: c5 d1 fe ee vpaddd %xmm6, %xmm5, %xmm5
26dc: c5 01 fe fe vpaddd %xmm6, %xmm15, %xmm15
26e0: c5 d1 e2 ef vpsrad %xmm7, %xmm5, %xmm5
26e4: c5 01 e2 ff vpsrad %xmm7, %xmm15, %xmm15
26e8: c5 d1 6b ed vpackssdw %xmm5, %xmm5, %xmm5
26ec: c4 41 01 6b ff vpackssdw %xmm15, %xmm15, %xmm15
26f1: c5 f9 d6 2f vmovq %xmm5, (%rdi)
26f5: c4 21 79 d6 3c 1f vmovq %xmm15, (%rdi,%r11)
26fb: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
26ff: 41 83 e8 02 subl $2, %r8d
2703: 0f 8f 5a ff ff ff jg -166 <_dav1d_prep_8tap_16bpc_avx2.v_w48l>
2709: 5b popq %rbx
270a: c3 retq
000000000000270b _dav1d_prep_8tap_16bpc_avx2.v_w8:
270b: 44 89 c1 movl %r8d, %ecx
270e: 48 89 f8 movq %rdi, %rax
2711: 41 83 f8 04 cmpl $4, %r8d
2715: 0f 8f cf 00 00 00 jg 207 <_dav1d_prep_8tap_16bpc_avx2.v_w88>
271b: 48 29 d6 subq %rdx, %rsi
271e: 49 89 f2 movq %rsi, %r10
0000000000002721 _dav1d_prep_8tap_16bpc_avx2.v_w8l:
2721: c5 fa 6f 06 vmovdqu (%rsi), %xmm0
2725: c5 fa 6f 0c 16 vmovdqu (%rsi,%rdx), %xmm1
272a: c5 fa 6f 14 56 vmovdqu (%rsi,%rdx,2), %xmm2
272f: c4 e3 fd 00 c0 d8 vpermq $216, %ymm0, %ymm0
2735: c4 e3 fd 00 c9 d8 vpermq $216, %ymm1, %ymm1
273b: c4 e3 fd 00 d2 d8 vpermq $216, %ymm2, %ymm2
2741: 48 01 de addq %rbx, %rsi
2744: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
2748: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
000000000000274c _dav1d_prep_8tap_16bpc_avx2.v_w8c:
274c: c5 fa 6f 1e vmovdqu (%rsi), %xmm3
2750: c5 fa 6f 24 16 vmovdqu (%rsi,%rdx), %xmm4
2755: c4 e3 fd 00 db d8 vpermq $216, %ymm3, %ymm3
275b: c4 e3 fd 00 e4 d8 vpermq $216, %ymm4, %ymm4
2761: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2765: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
2769: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
276d: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0
2771: c5 ad f5 ea vpmaddwd %ymm2, %ymm10, %ymm5
2775: c5 d5 fe e8 vpaddd %ymm0, %ymm5, %ymm5
2779: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
277d: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1
2781: c5 2d f5 c3 vpmaddwd %ymm3, %ymm10, %ymm8
2785: c5 3d fe c1 vpaddd %ymm1, %ymm8, %ymm8
2789: c5 fd 6f cb vmovdqa %ymm3, %ymm1
278d: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
2791: c5 d5 fe ee vpaddd %ymm6, %ymm5, %ymm5
2795: c5 3d fe c6 vpaddd %ymm6, %ymm8, %ymm8
2799: c5 d5 e2 ef vpsrad %xmm7, %ymm5, %ymm5
279d: c5 3d e2 c7 vpsrad %xmm7, %ymm8, %ymm8
27a1: c5 d5 6b ed vpackssdw %ymm5, %ymm5, %ymm5
27a5: c4 41 3d 6b c0 vpackssdw %ymm8, %ymm8, %ymm8
27aa: c4 e3 fd 00 ed d8 vpermq $216, %ymm5, %ymm5
27b0: c4 43 fd 00 c0 d8 vpermq $216, %ymm8, %ymm8
27b6: c5 fa 7f 2f vmovdqu %xmm5, (%rdi)
27ba: c4 21 7a 7f 04 1f vmovdqu %xmm8, (%rdi,%r11)
27c0: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
27c4: 41 83 e8 02 subl $2, %r8d
27c8: 7f 82 jg -126 <_dav1d_prep_8tap_16bpc_avx2.v_w8c>
27ca: 48 83 c0 10 addq $16, %rax
27ce: 49 83 c2 10 addq $16, %r10
27d2: 41 89 c8 movl %ecx, %r8d
27d5: 48 89 c7 movq %rax, %rdi
27d8: 4c 89 d6 movq %r10, %rsi
27db: 41 83 e9 10 subl $16, %r9d
27df: 0f 8f 3c ff ff ff jg -196 <_dav1d_prep_8tap_16bpc_avx2.v_w8l>
27e5: 5b popq %rbx
27e6: c5 f8 77 vzeroupper
27e9: c3 retq
00000000000027ea _dav1d_prep_8tap_16bpc_avx2.v_w88:
27ea: 48 29 de subq %rbx, %rsi
27ed: 49 89 f2 movq %rsi, %r10
00000000000027f0 _dav1d_prep_8tap_16bpc_avx2.v_w88l:
27f0: c5 fa 6f 06 vmovdqu (%rsi), %xmm0
27f4: c5 fa 6f 0c 16 vmovdqu (%rsi,%rdx), %xmm1
27f9: c5 fa 6f 14 56 vmovdqu (%rsi,%rdx,2), %xmm2
27fe: c4 e3 fd 00 c0 d8 vpermq $216, %ymm0, %ymm0
2804: c4 e3 fd 00 c9 d8 vpermq $216, %ymm1, %ymm1
280a: c4 e3 fd 00 d2 d8 vpermq $216, %ymm2, %ymm2
2810: 48 01 de addq %rbx, %rsi
2813: c5 fa 6f 1e vmovdqu (%rsi), %xmm3
2817: c5 fa 6f 24 16 vmovdqu (%rsi,%rdx), %xmm4
281c: c4 e3 fd 00 db d8 vpermq $216, %ymm3, %ymm3
2822: c4 e3 fd 00 e4 d8 vpermq $216, %ymm4, %ymm4
2828: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
282c: c5 7a 6f 26 vmovdqu (%rsi), %xmm12
2830: c5 7a 6f 2c 16 vmovdqu (%rsi,%rdx), %xmm13
2835: c4 43 fd 00 e4 d8 vpermq $216, %ymm12, %ymm12
283b: c4 43 fd 00 ed d8 vpermq $216, %ymm13, %ymm13
2841: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2845: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
2849: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
284d: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
2851: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
2855: c4 c1 5d 61 e4 vpunpcklwd %ymm12, %ymm4, %ymm4
285a: c4 41 1d 61 e5 vpunpcklwd %ymm13, %ymm12, %ymm12
000000000000285f _dav1d_prep_8tap_16bpc_avx2.v_w88c:
285f: 48 29 d6 subq %rdx, %rsi
2862: c5 7a 6f 2e vmovdqu (%rsi), %xmm13
2866: c5 7a 6f 34 16 vmovdqu (%rsi,%rdx), %xmm14
286b: c5 7a 6f 3c 56 vmovdqu (%rsi,%rdx,2), %xmm15
2870: c4 43 fd 00 ed d8 vpermq $216, %ymm13, %ymm13
2876: c4 43 fd 00 f6 d8 vpermq $216, %ymm14, %ymm14
287c: c4 43 fd 00 ff d8 vpermq $216, %ymm15, %ymm15
2882: 48 01 de addq %rbx, %rsi
2885: c4 41 15 61 ee vpunpcklwd %ymm14, %ymm13, %ymm13
288a: c4 41 0d 61 f7 vpunpcklwd %ymm15, %ymm14, %ymm14
288f: c5 bd f5 c0 vpmaddwd %ymm0, %ymm8, %ymm0
2893: c5 b5 f5 ea vpmaddwd %ymm2, %ymm9, %ymm5
2897: c5 d5 fe e8 vpaddd %ymm0, %ymm5, %ymm5
289b: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
289f: c5 bd f5 c9 vpmaddwd %ymm1, %ymm8, %ymm1
28a3: c5 35 f5 fb vpmaddwd %ymm3, %ymm9, %ymm15
28a7: c5 05 fe f9 vpaddd %ymm1, %ymm15, %ymm15
28ab: c5 fd 6f cb vmovdqa %ymm3, %ymm1
28af: c5 ad f5 d4 vpmaddwd %ymm4, %ymm10, %ymm2
28b3: c4 c1 1d f5 da vpmaddwd %ymm10, %ymm12, %ymm3
28b8: c5 d5 fe ea vpaddd %ymm2, %ymm5, %ymm5
28bc: c5 05 fe fb vpaddd %ymm3, %ymm15, %ymm15
28c0: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
28c4: c4 c1 7d 6f dc vmovdqa %ymm12, %ymm3
28c9: c4 c1 15 f5 e3 vpmaddwd %ymm11, %ymm13, %ymm4
28ce: c4 41 0d f5 e3 vpmaddwd %ymm11, %ymm14, %ymm12
28d3: c5 d5 fe ec vpaddd %ymm4, %ymm5, %ymm5
28d7: c4 41 05 fe fc vpaddd %ymm12, %ymm15, %ymm15
28dc: c4 c1 7d 6f e5 vmovdqa %ymm13, %ymm4
28e1: c4 41 7d 6f e6 vmovdqa %ymm14, %ymm12
28e6: c5 d5 fe ee vpaddd %ymm6, %ymm5, %ymm5
28ea: c5 05 fe fe vpaddd %ymm6, %ymm15, %ymm15
28ee: c5 d5 e2 ef vpsrad %xmm7, %ymm5, %ymm5
28f2: c5 05 e2 ff vpsrad %xmm7, %ymm15, %ymm15
28f6: c5 d5 6b ed vpackssdw %ymm5, %ymm5, %ymm5
28fa: c4 41 05 6b ff vpackssdw %ymm15, %ymm15, %ymm15
28ff: c4 e3 fd 00 ed d8 vpermq $216, %ymm5, %ymm5
2905: c4 43 fd 00 ff d8 vpermq $216, %ymm15, %ymm15
290b: c5 fa 7f 2f vmovdqu %xmm5, (%rdi)
290f: c4 21 7a 7f 3c 1f vmovdqu %xmm15, (%rdi,%r11)
2915: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
2919: 41 83 e8 02 subl $2, %r8d
291d: 0f 8f 3c ff ff ff jg -196 <_dav1d_prep_8tap_16bpc_avx2.v_w88c>
2923: 48 83 c0 10 addq $16, %rax
2927: 49 83 c2 10 addq $16, %r10
292b: 41 89 c8 movl %ecx, %r8d
292e: 48 89 c7 movq %rax, %rdi
2931: 4c 89 d6 movq %r10, %rsi
2934: 41 83 e9 10 subl $16, %r9d
2938: 0f 8f b2 fe ff ff jg -334 <_dav1d_prep_8tap_16bpc_avx2.v_w88l>
293e: 5b popq %rbx
293f: c5 f8 77 vzeroupper
2942: c3 retq
0000000000002943 _dav1d_prep_8tap_16bpc_avx2.v_jmp_tbl:
2943: 4a fc cld
2945: ff ff <unknown>
2947: c8 fd ff ff enter $-3, $-1
294b: c8 fd ff ff enter $-3, $-1
294f: c8 fd ff ff enter $-3, $-1
2953: c8 fd ff ff enter $-3, $-1
2957: c8 fd ff ff enter $-3, $-1
000000000000295b _dav1d_prep_8tap_16bpc_avx2.prep_8tap_hv_16bpc:
295b: 41 83 f8 04 cmpl $4, %r8d
295f: 7e 03 jle 3 <_dav1d_prep_8tap_16bpc_avx2.hv_use4tap>
2961: c1 e8 07 shrl $7, %eax
0000000000002964 _dav1d_prep_8tap_16bpc_avx2.hv_use4tap:
2964: 83 e0 7f andl $127, %eax
2967: f3 44 0f b8 54 24 18 popcntl 24(%rsp), %r10d
296e: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
2977: c5 7a 7e 2d 00 00 00 00 vmovq (%rip), %xmm13
297f: c4 62 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm14
2988: 41 83 fa 0c cmpl $12, %r10d
298c: 75 11 jne 17 <_dav1d_prep_8tap_16bpc_avx2.hv_bits10>
298e: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
2997: c5 7a 7e 2d 00 00 00 00 vmovq (%rip), %xmm13
000000000000299f _dav1d_prep_8tap_16bpc_avx2.hv_bits10:
299f: 4c 8d 15 dd 0f 00 00 leaq 4061(%rip), %r10
29a6: 49 8d 9c c2 23 f0 ff ff leaq -4061(%r10,%rax,8), %rbx
29ae: c4 62 79 79 03 vpbroadcastw (%rbx), %xmm8
29b3: c4 62 79 79 4b 02 vpbroadcastw 2(%rbx), %xmm9
29b9: c4 62 79 79 53 04 vpbroadcastw 4(%rbx), %xmm10
29bf: c4 62 79 79 5b 06 vpbroadcastw 6(%rbx), %xmm11
29c5: c4 42 7d 20 c0 vpmovsxbw %xmm8, %ymm8
29ca: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9
29cf: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10
29d4: c4 42 7d 20 db vpmovsxbw %xmm11, %ymm11
29d9: 48 89 cb movq %rcx, %rbx
29dc: f3 0f bc c9 tzcntl %ecx, %ecx
29e0: 83 e9 02 subl $2, %ecx
29e3: 49 63 0c 8a movslq (%r10,%rcx,4), %rcx
29e7: 4c 01 d1 addq %r10, %rcx
29ea: ff e1 jmpq *%rcx
00000000000029ec _dav1d_prep_8tap_16bpc_avx2.hv_w4:
29ec: 41 83 f8 04 cmpl $4, %r8d
29f0: 0f 8f 55 01 00 00 jg 341 <_dav1d_prep_8tap_16bpc_avx2.hv_w48>
29f6: 48 8d 1c 52 leaq (%rdx,%rdx,2), %rbx
29fa: c5 7d 6f 05 00 00 00 00 vmovdqa (%rip), %ymm8
2a02: c4 82 79 58 ac ca 83 f0 ff ff vpbroadcastd -3965(%r10,%r9,8), %xmm5
2a0c: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5
2a11: 48 83 ee 02 subq $2, %rsi
2a15: 48 29 d6 subq %rdx, %rsi
2a18: c4 e2 7d 5a 06 vbroadcasti128 (%rsi), %ymm0
2a1d: c4 e2 7d 5a 0c 16 vbroadcasti128 (%rsi,%rdx), %ymm1
2a23: c4 e2 7d 5a 14 56 vbroadcasti128 (%rsi,%rdx,2), %ymm2
2a29: 48 01 de addq %rbx, %rsi
2a2c: c4 c2 7d 00 c0 vpshufb %ymm8, %ymm0, %ymm0
2a31: c4 c2 75 00 c8 vpshufb %ymm8, %ymm1, %ymm1
2a36: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0
2a3a: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
2a3e: c4 e2 7d 02 c0 vphaddd %ymm0, %ymm0, %ymm0
2a43: c4 e2 75 02 c9 vphaddd %ymm1, %ymm1, %ymm1
2a48: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0
2a4c: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1
2a50: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0
2a55: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1
2a5a: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0
2a5e: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1
2a62: c4 c2 6d 00 d0 vpshufb %ymm8, %ymm2, %ymm2
2a67: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
2a6b: c4 e2 6d 02 d2 vphaddd %ymm2, %ymm2, %ymm2
2a70: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2
2a74: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2
2a79: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2
2a7d: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
2a81: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
0000000000002a85 _dav1d_prep_8tap_16bpc_avx2.hv_w4l:
2a85: c4 e2 7d 5a 1e vbroadcasti128 (%rsi), %ymm3
2a8a: c4 e2 7d 5a 24 16 vbroadcasti128 (%rsi,%rdx), %ymm4
2a90: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2a94: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3
2a99: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4
2a9e: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
2aa2: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
2aa6: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3
2aab: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4
2ab0: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3
2ab4: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4
2ab8: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3
2abd: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4
2ac2: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
2ac6: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
2aca: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
2ace: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
2ad2: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0
2ad6: c5 2d f5 da vpmaddwd %ymm2, %ymm10, %ymm11
2ada: c5 25 fe d8 vpaddd %ymm0, %ymm11, %ymm11
2ade: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
2ae2: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1
2ae6: c5 2d f5 e3 vpmaddwd %ymm3, %ymm10, %ymm12
2aea: c5 1d fe e1 vpaddd %ymm1, %ymm12, %ymm12
2aee: c5 fd 6f cb vmovdqa %ymm3, %ymm1
2af2: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
2af6: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11
2afb: c4 41 1d fe e6 vpaddd %ymm14, %ymm12, %ymm12
2b00: c4 c1 25 72 e3 06 vpsrad $6, %ymm11, %ymm11
2b06: c4 c1 1d 72 e4 06 vpsrad $6, %ymm12, %ymm12
2b0c: c4 41 25 6b db vpackssdw %ymm11, %ymm11, %ymm11
2b11: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12
2b16: c4 63 7d 39 db 01 vextracti128 $1, %ymm11, %xmm3
2b1c: c4 63 7d 39 e4 01 vextracti128 $1, %ymm12, %xmm4
2b22: c5 79 7e 1f vmovd %xmm11, (%rdi)
2b26: c5 f9 7e 5f 04 vmovd %xmm3, 4(%rdi)
2b2b: c4 21 79 7e 24 1f vmovd %xmm12, (%rdi,%r11)
2b31: c4 a1 79 7e 64 1f 04 vmovd %xmm4, 4(%rdi,%r11)
2b38: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
2b3c: 41 83 e8 02 subl $2, %r8d
2b40: 0f 8f 3f ff ff ff jg -193 <_dav1d_prep_8tap_16bpc_avx2.hv_w4l>
2b46: 5b popq %rbx
2b47: c5 f8 77 vzeroupper
2b4a: c3 retq
0000000000002b4b _dav1d_prep_8tap_16bpc_avx2.hv_w48:
2b4b: 48 8d 1c 52 leaq (%rdx,%rdx,2), %rbx
2b4f: c5 7d 6f 05 00 00 00 00 vmovdqa (%rip), %ymm8
2b57: c4 82 79 58 ac ca d8 f1 ff ff vpbroadcastd -3624(%r10,%r9,8), %xmm5
2b61: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5
2b66: 49 8d 84 c2 e3 f1 ff ff leaq -3613(%r10,%rax,8), %rax
2b6e: c4 62 79 58 08 vpbroadcastd (%rax), %xmm9
2b73: c4 62 79 58 50 04 vpbroadcastd 4(%rax), %xmm10
2b79: c4 42 7d 20 c9 vpmovsxbw %xmm9, %ymm9
2b7e: c4 42 7d 20 d2 vpmovsxbw %xmm10, %ymm10
2b83: 48 83 ee 02 subq $2, %rsi
2b87: 48 29 de subq %rbx, %rsi
2b8a: c4 e2 7d 5a 06 vbroadcasti128 (%rsi), %ymm0
2b8f: c4 e2 7d 5a 0c 16 vbroadcasti128 (%rsi,%rdx), %ymm1
2b95: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2b99: c4 c2 7d 00 c0 vpshufb %ymm8, %ymm0, %ymm0
2b9e: c4 c2 75 00 c8 vpshufb %ymm8, %ymm1, %ymm1
2ba3: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0
2ba7: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
2bab: c4 e2 7d 02 c0 vphaddd %ymm0, %ymm0, %ymm0
2bb0: c4 e2 75 02 c9 vphaddd %ymm1, %ymm1, %ymm1
2bb5: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0
2bb9: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1
2bbd: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0
2bc2: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1
2bc7: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0
2bcb: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1
2bcf: c4 e2 7d 5a 26 vbroadcasti128 (%rsi), %ymm4
2bd4: c4 e2 7d 5a 1c 16 vbroadcasti128 (%rsi,%rdx), %ymm3
2bda: c4 e2 7d 5a 14 56 vbroadcasti128 (%rsi,%rdx,2), %ymm2
2be0: 48 01 de addq %rbx, %rsi
2be3: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4
2be8: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3
2bed: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
2bf1: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
2bf5: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4
2bfa: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3
2bff: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4
2c03: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3
2c07: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4
2c0c: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3
2c11: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
2c15: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
2c19: c4 c2 6d 00 d0 vpshufb %ymm8, %ymm2, %ymm2
2c1e: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
2c22: c4 e2 6d 02 d2 vphaddd %ymm2, %ymm2, %ymm2
2c27: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2
2c2b: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2
2c30: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2
2c34: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
2c38: c5 f5 61 cc vpunpcklwd %ymm4, %ymm1, %ymm1
2c3c: c5 dd 61 e3 vpunpcklwd %ymm3, %ymm4, %ymm4
2c40: c5 e5 61 da vpunpcklwd %ymm2, %ymm3, %ymm3
2c44: c5 fd 62 c4 vpunpckldq %ymm4, %ymm0, %ymm0
2c48: c5 f5 62 cb vpunpckldq %ymm3, %ymm1, %ymm1
2c4c: c4 e2 7d 5a 1e vbroadcasti128 (%rsi), %ymm3
2c51: c4 e2 7d 5a 24 16 vbroadcasti128 (%rsi,%rdx), %ymm4
2c57: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2c5b: c4 c2 65 00 d8 vpshufb %ymm8, %ymm3, %ymm3
2c60: c4 c2 5d 00 e0 vpshufb %ymm8, %ymm4, %ymm4
2c65: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
2c69: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
2c6d: c4 e2 65 02 db vphaddd %ymm3, %ymm3, %ymm3
2c72: c4 e2 5d 02 e4 vphaddd %ymm4, %ymm4, %ymm4
2c77: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3
2c7b: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4
2c7f: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3
2c84: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4
2c89: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
2c8d: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
2c91: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
2c95: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
0000000000002c99 _dav1d_prep_8tap_16bpc_avx2.hv_w48l:
2c99: c4 62 7d 5a 1e vbroadcasti128 (%rsi), %ymm11
2c9e: c4 62 7d 5a 24 16 vbroadcasti128 (%rsi,%rdx), %ymm12
2ca4: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2ca8: c4 42 25 00 d8 vpshufb %ymm8, %ymm11, %ymm11
2cad: c4 42 1d 00 e0 vpshufb %ymm8, %ymm12, %ymm12
2cb2: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11
2cb6: c5 1d f5 e5 vpmaddwd %ymm5, %ymm12, %ymm12
2cba: c4 42 25 02 db vphaddd %ymm11, %ymm11, %ymm11
2cbf: c4 42 1d 02 e4 vphaddd %ymm12, %ymm12, %ymm12
2cc4: c5 25 fe de vpaddd %ymm6, %ymm11, %ymm11
2cc8: c5 1d fe e6 vpaddd %ymm6, %ymm12, %ymm12
2ccc: c4 41 25 e2 dd vpsrad %xmm13, %ymm11, %ymm11
2cd1: c4 41 1d e2 e5 vpsrad %xmm13, %ymm12, %ymm12
2cd6: c4 41 25 6b db vpackssdw %ymm11, %ymm11, %ymm11
2cdb: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12
2ce0: c4 c1 5d 61 e3 vpunpcklwd %ymm11, %ymm4, %ymm4
2ce5: c4 41 25 61 dc vpunpcklwd %ymm12, %ymm11, %ymm11
2cea: c5 ed 62 d4 vpunpckldq %ymm4, %ymm2, %ymm2
2cee: c4 c1 65 62 db vpunpckldq %ymm11, %ymm3, %ymm3
2cf3: c5 35 f5 d8 vpmaddwd %ymm0, %ymm9, %ymm11
2cf7: c5 ad f5 e2 vpmaddwd %ymm2, %ymm10, %ymm4
2cfb: c5 35 f5 e1 vpmaddwd %ymm1, %ymm9, %ymm12
2cff: c5 25 fe dc vpaddd %ymm4, %ymm11, %ymm11
2d03: c5 ad f5 e3 vpmaddwd %ymm3, %ymm10, %ymm4
2d07: c5 1d fe e4 vpaddd %ymm4, %ymm12, %ymm12
2d0b: c4 42 25 02 db vphaddd %ymm11, %ymm11, %ymm11
2d10: c4 42 1d 02 e4 vphaddd %ymm12, %ymm12, %ymm12
2d15: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11
2d1a: c4 41 1d fe e6 vpaddd %ymm14, %ymm12, %ymm12
2d1f: c4 c1 25 72 e3 06 vpsrad $6, %ymm11, %ymm11
2d25: c4 c1 1d 72 e4 06 vpsrad $6, %ymm12, %ymm12
2d2b: c4 41 25 6b db vpackssdw %ymm11, %ymm11, %ymm11
2d30: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12
2d35: c4 63 7d 39 dc 01 vextracti128 $1, %ymm11, %xmm4
2d3b: c5 79 7e 1f vmovd %xmm11, (%rdi)
2d3f: c5 f9 7e 67 04 vmovd %xmm4, 4(%rdi)
2d44: c4 63 7d 39 e4 01 vextracti128 $1, %ymm12, %xmm4
2d4a: c4 21 79 7e 24 1f vmovd %xmm12, (%rdi,%r11)
2d50: c4 a1 79 7e 64 1f 04 vmovd %xmm4, 4(%rdi,%r11)
2d57: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
2d5b: c5 fd 70 c0 8d vpshufd $141, %ymm0, %ymm0
2d60: c5 fd 70 c9 8d vpshufd $141, %ymm1, %ymm1
2d65: c5 7d 70 da d8 vpshufd $216, %ymm2, %ymm11
2d6a: c5 7d 70 e3 d8 vpshufd $216, %ymm3, %ymm12
2d6f: c5 fd 70 d2 8d vpshufd $141, %ymm2, %ymm2
2d74: c5 fd 70 db 8d vpshufd $141, %ymm3, %ymm3
2d79: c5 fd 6f e3 vmovdqa %ymm3, %ymm4
2d7d: c5 dd 72 e4 10 vpsrad $16, %ymm4, %ymm4
2d82: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
2d86: c4 c1 7d 62 c3 vpunpckldq %ymm11, %ymm0, %ymm0
2d8b: c4 c1 75 62 cc vpunpckldq %ymm12, %ymm1, %ymm1
2d90: 41 83 e8 02 subl $2, %r8d
2d94: 0f 8f ff fe ff ff jg -257 <_dav1d_prep_8tap_16bpc_avx2.hv_w48l>
2d9a: 5b popq %rbx
2d9b: c5 f8 77 vzeroupper
2d9e: c3 retq
0000000000002d9f _dav1d_prep_8tap_16bpc_avx2.hv_w8:
2d9f: 48 89 d9 movq %rbx, %rcx
2da2: 41 83 f8 04 cmpl $4, %r8d
2da6: 0f 8f ac 02 00 00 jg 684 <_dav1d_prep_8tap_16bpc_avx2.hv_w88>
2dac: 48 8d 1c 52 leaq (%rdx,%rdx,2), %rbx
2db0: c4 82 79 59 ac ca 2f f4 ff ff vpbroadcastq -3025(%r10,%r9,8), %xmm5
2dba: c4 e2 7d 20 ed vpmovsxbw %xmm5, %ymm5
2dbf: 48 83 ee 06 subq $6, %rsi
2dc3: 48 29 d6 subq %rdx, %rsi
2dc6: 45 89 c1 movl %r8d, %r9d
2dc9: 48 89 f8 movq %rdi, %rax
2dcc: 49 89 f2 movq %rsi, %r10
0000000000002dcf _dav1d_prep_8tap_16bpc_avx2.hv_w8l:
2dcf: c5 fa 6f 06 vmovdqu (%rsi), %xmm0
2dd3: c5 fa 6f 56 02 vmovdqu 2(%rsi), %xmm2
2dd8: c4 e3 7d 38 46 08 01 vinserti128 $1, 8(%rsi), %ymm0, %ymm0
2ddf: c4 e3 6d 38 56 0a 01 vinserti128 $1, 10(%rsi), %ymm2, %ymm2
2de6: c5 fd f5 c5 vpmaddwd %ymm5, %ymm0, %ymm0
2dea: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
2dee: c4 e2 7d 02 c2 vphaddd %ymm2, %ymm0, %ymm0
2df3: c5 fa 6f 4e 04 vmovdqu 4(%rsi), %xmm1
2df8: c5 fa 6f 56 06 vmovdqu 6(%rsi), %xmm2
2dfd: c4 e3 75 38 4e 0c 01 vinserti128 $1, 12(%rsi), %ymm1, %ymm1
2e04: c4 e3 6d 38 56 0e 01 vinserti128 $1, 14(%rsi), %ymm2, %ymm2
2e0b: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
2e0f: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
2e13: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1
2e18: c4 e2 7d 02 c1 vphaddd %ymm1, %ymm0, %ymm0
2e1d: c5 fd fe c6 vpaddd %ymm6, %ymm0, %ymm0
2e21: c4 c1 7d e2 c5 vpsrad %xmm13, %ymm0, %ymm0
2e26: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0
2e2a: c5 fa 6f 0c 16 vmovdqu (%rsi,%rdx), %xmm1
2e2f: c5 fa 6f 5c 16 02 vmovdqu 2(%rsi,%rdx), %xmm3
2e35: c4 e3 75 38 4c 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm1, %ymm1
2e3d: c4 e3 65 38 5c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm3, %ymm3
2e45: c5 f5 f5 cd vpmaddwd %ymm5, %ymm1, %ymm1
2e49: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
2e4d: c4 e2 75 02 cb vphaddd %ymm3, %ymm1, %ymm1
2e52: c5 fa 6f 54 16 04 vmovdqu 4(%rsi,%rdx), %xmm2
2e58: c5 fa 6f 5c 16 06 vmovdqu 6(%rsi,%rdx), %xmm3
2e5e: c4 e3 6d 38 54 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm2, %ymm2
2e66: c4 e3 65 38 5c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm3, %ymm3
2e6e: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
2e72: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
2e76: c4 e2 6d 02 d3 vphaddd %ymm3, %ymm2, %ymm2
2e7b: c4 e2 75 02 ca vphaddd %ymm2, %ymm1, %ymm1
2e80: c5 f5 fe ce vpaddd %ymm6, %ymm1, %ymm1
2e84: c4 c1 75 e2 cd vpsrad %xmm13, %ymm1, %ymm1
2e89: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1
2e8d: c5 fa 6f 14 56 vmovdqu (%rsi,%rdx,2), %xmm2
2e92: c5 fa 6f 64 56 02 vmovdqu 2(%rsi,%rdx,2), %xmm4
2e98: c4 e3 6d 38 54 56 08 01 vinserti128 $1, 8(%rsi,%rdx,2), %ymm2, %ymm2
2ea0: c4 e3 5d 38 64 56 0a 01 vinserti128 $1, 10(%rsi,%rdx,2), %ymm4, %ymm4
2ea8: c5 ed f5 d5 vpmaddwd %ymm5, %ymm2, %ymm2
2eac: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
2eb0: c4 e2 6d 02 d4 vphaddd %ymm4, %ymm2, %ymm2
2eb5: c5 fa 6f 5c 56 04 vmovdqu 4(%rsi,%rdx,2), %xmm3
2ebb: c5 fa 6f 64 56 06 vmovdqu 6(%rsi,%rdx,2), %xmm4
2ec1: c4 e3 65 38 5c 56 0c 01 vinserti128 $1, 12(%rsi,%rdx,2), %ymm3, %ymm3
2ec9: c4 e3 5d 38 64 56 0e 01 vinserti128 $1, 14(%rsi,%rdx,2), %ymm4, %ymm4
2ed1: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
2ed5: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
2ed9: c4 e2 65 02 dc vphaddd %ymm4, %ymm3, %ymm3
2ede: c4 e2 6d 02 d3 vphaddd %ymm3, %ymm2, %ymm2
2ee3: c5 ed fe d6 vpaddd %ymm6, %ymm2, %ymm2
2ee7: c4 c1 6d e2 d5 vpsrad %xmm13, %ymm2, %ymm2
2eec: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2
2ef0: 48 01 de addq %rbx, %rsi
2ef3: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
2ef7: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
0000000000002efb _dav1d_prep_8tap_16bpc_avx2.hv_w8c:
2efb: c5 fa 6f 1e vmovdqu (%rsi), %xmm3
2eff: c5 7a 6f 5e 02 vmovdqu 2(%rsi), %xmm11
2f04: c4 e3 65 38 5e 08 01 vinserti128 $1, 8(%rsi), %ymm3, %ymm3
2f0b: c4 63 25 38 5e 0a 01 vinserti128 $1, 10(%rsi), %ymm11, %ymm11
2f12: c5 e5 f5 dd vpmaddwd %ymm5, %ymm3, %ymm3
2f16: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11
2f1a: c4 c2 65 02 db vphaddd %ymm11, %ymm3, %ymm3
2f1f: c5 7a 6f 46 04 vmovdqu 4(%rsi), %xmm8
2f24: c5 7a 6f 5e 06 vmovdqu 6(%rsi), %xmm11
2f29: c4 63 3d 38 46 0c 01 vinserti128 $1, 12(%rsi), %ymm8, %ymm8
2f30: c4 63 25 38 5e 0e 01 vinserti128 $1, 14(%rsi), %ymm11, %ymm11
2f37: c5 3d f5 c5 vpmaddwd %ymm5, %ymm8, %ymm8
2f3b: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11
2f3f: c4 42 3d 02 c3 vphaddd %ymm11, %ymm8, %ymm8
2f44: c4 c2 65 02 d8 vphaddd %ymm8, %ymm3, %ymm3
2f49: c5 e5 fe de vpaddd %ymm6, %ymm3, %ymm3
2f4d: c4 c1 65 e2 dd vpsrad %xmm13, %ymm3, %ymm3
2f52: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
2f56: c5 fa 6f 24 16 vmovdqu (%rsi,%rdx), %xmm4
2f5b: c5 7a 6f 5c 16 02 vmovdqu 2(%rsi,%rdx), %xmm11
2f61: c4 e3 5d 38 64 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm4, %ymm4
2f69: c4 63 25 38 5c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm11, %ymm11
2f71: c5 dd f5 e5 vpmaddwd %ymm5, %ymm4, %ymm4
2f75: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11
2f79: c4 c2 5d 02 e3 vphaddd %ymm11, %ymm4, %ymm4
2f7e: c5 7a 6f 44 16 04 vmovdqu 4(%rsi,%rdx), %xmm8
2f84: c5 7a 6f 5c 16 06 vmovdqu 6(%rsi,%rdx), %xmm11
2f8a: c4 63 3d 38 44 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm8, %ymm8
2f92: c4 63 25 38 5c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm11, %ymm11
2f9a: c5 3d f5 c5 vpmaddwd %ymm5, %ymm8, %ymm8
2f9e: c5 25 f5 dd vpmaddwd %ymm5, %ymm11, %ymm11
2fa2: c4 42 3d 02 c3 vphaddd %ymm11, %ymm8, %ymm8
2fa7: c4 c2 5d 02 e0 vphaddd %ymm8, %ymm4, %ymm4
2fac: c5 dd fe e6 vpaddd %ymm6, %ymm4, %ymm4
2fb0: c4 c1 5d e2 e5 vpsrad %xmm13, %ymm4, %ymm4
2fb5: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
2fb9: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
2fbd: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
2fc1: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
2fc5: c5 b5 f5 c0 vpmaddwd %ymm0, %ymm9, %ymm0
2fc9: c5 2d f5 c2 vpmaddwd %ymm2, %ymm10, %ymm8
2fcd: c5 3d fe c0 vpaddd %ymm0, %ymm8, %ymm8
2fd1: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
2fd5: c5 b5 f5 c9 vpmaddwd %ymm1, %ymm9, %ymm1
2fd9: c5 2d f5 db vpmaddwd %ymm3, %ymm10, %ymm11
2fdd: c5 25 fe d9 vpaddd %ymm1, %ymm11, %ymm11
2fe1: c5 fd 6f cb vmovdqa %ymm3, %ymm1
2fe5: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
2fe9: c4 41 3d fe c6 vpaddd %ymm14, %ymm8, %ymm8
2fee: c4 41 25 fe de vpaddd %ymm14, %ymm11, %ymm11
2ff3: c4 c1 3d 72 e0 06 vpsrad $6, %ymm8, %ymm8
2ff9: c4 c1 25 72 e3 06 vpsrad $6, %ymm11, %ymm11
2fff: c4 41 3d 6b c0 vpackssdw %ymm8, %ymm8, %ymm8
3004: c4 41 25 6b db vpackssdw %ymm11, %ymm11, %ymm11
3009: c4 63 7d 39 c3 01 vextracti128 $1, %ymm8, %xmm3
300f: c4 63 7d 39 dc 01 vextracti128 $1, %ymm11, %xmm4
3015: c5 79 d6 07 vmovq %xmm8, (%rdi)
3019: c5 f9 d6 5f 08 vmovq %xmm3, 8(%rdi)
301e: c4 21 79 d6 1c 1f vmovq %xmm11, (%rdi,%r11)
3024: c4 a1 79 d6 64 1f 08 vmovq %xmm4, 8(%rdi,%r11)
302b: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
302f: 41 83 e8 02 subl $2, %r8d
3033: 0f 8f c2 fe ff ff jg -318 <_dav1d_prep_8tap_16bpc_avx2.hv_w8c>
3039: 48 83 c0 10 addq $16, %rax
303d: 49 83 c2 10 addq $16, %r10
3041: 45 89 c8 movl %r9d, %r8d
3044: 48 89 c7 movq %rax, %rdi
3047: 4c 89 d6 movq %r10, %rsi
304a: 83 e9 08 subl $8, %ecx
304d: 0f 8f 7c fd ff ff jg -644 <_dav1d_prep_8tap_16bpc_avx2.hv_w8l>
3053: 5b popq %rbx
3054: c5 f8 77 vzeroupper
3057: c3 retq
0000000000003058 _dav1d_prep_8tap_16bpc_avx2.hv_w88:
3058: 48 8d 1c 52 leaq (%rdx,%rdx,2), %rbx
305c: c4 82 79 59 bc ca db f6 ff ff vpbroadcastq -2341(%r10,%r9,8), %xmm7
3066: c4 e2 7d 20 ff vpmovsxbw %xmm7, %ymm7
306b: 48 83 ee 06 subq $6, %rsi
306f: 48 29 de subq %rbx, %rsi
3072: 45 89 c1 movl %r8d, %r9d
3075: 48 89 f8 movq %rdi, %rax
3078: f3 44 0f b8 54 24 18 popcntl 24(%rsp), %r10d
307f: 41 83 fa 0c cmpl $12, %r10d
3083: 0f 84 7d 04 00 00 je 1149 <_dav1d_prep_8tap_16bpc_avx2.hv_w88_12bit>
3089: 49 89 f2 movq %rsi, %r10
000000000000308c _dav1d_prep_8tap_16bpc_avx2.hv_w88l_10bit:
308c: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
3095: c5 fa 6f 06 vmovdqu (%rsi), %xmm0
3099: c5 7a 6f 6e 02 vmovdqu 2(%rsi), %xmm13
309e: c4 e3 7d 38 46 08 01 vinserti128 $1, 8(%rsi), %ymm0, %ymm0
30a5: c4 63 15 38 6e 0a 01 vinserti128 $1, 10(%rsi), %ymm13, %ymm13
30ac: c5 fd f5 c7 vpmaddwd %ymm7, %ymm0, %ymm0
30b0: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
30b4: c4 c2 7d 02 c5 vphaddd %ymm13, %ymm0, %ymm0
30b9: c5 7a 6f 66 04 vmovdqu 4(%rsi), %xmm12
30be: c5 7a 6f 6e 06 vmovdqu 6(%rsi), %xmm13
30c3: c4 63 1d 38 66 0c 01 vinserti128 $1, 12(%rsi), %ymm12, %ymm12
30ca: c4 63 15 38 6e 0e 01 vinserti128 $1, 14(%rsi), %ymm13, %ymm13
30d1: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
30d5: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
30d9: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
30de: c4 c2 7d 02 c4 vphaddd %ymm12, %ymm0, %ymm0
30e3: c5 85 fe c0 vpaddd %ymm0, %ymm15, %ymm0
30e7: c5 fd 72 e0 02 vpsrad $2, %ymm0, %ymm0
30ec: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0
30f0: c5 fa 6f 0c 16 vmovdqu (%rsi,%rdx), %xmm1
30f5: c5 7a 6f 6c 16 02 vmovdqu 2(%rsi,%rdx), %xmm13
30fb: c4 e3 75 38 4c 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm1, %ymm1
3103: c4 63 15 38 6c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm13, %ymm13
310b: c5 f5 f5 cf vpmaddwd %ymm7, %ymm1, %ymm1
310f: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3113: c4 c2 75 02 cd vphaddd %ymm13, %ymm1, %ymm1
3118: c5 7a 6f 64 16 04 vmovdqu 4(%rsi,%rdx), %xmm12
311e: c5 7a 6f 6c 16 06 vmovdqu 6(%rsi,%rdx), %xmm13
3124: c4 63 1d 38 64 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm12, %ymm12
312c: c4 63 15 38 6c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm13, %ymm13
3134: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
3138: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
313c: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
3141: c4 c2 75 02 cc vphaddd %ymm12, %ymm1, %ymm1
3146: c5 85 fe c9 vpaddd %ymm1, %ymm15, %ymm1
314a: c5 f5 72 e1 02 vpsrad $2, %ymm1, %ymm1
314f: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1
3153: c5 fa 6f 14 56 vmovdqu (%rsi,%rdx,2), %xmm2
3158: c5 7a 6f 6c 56 02 vmovdqu 2(%rsi,%rdx,2), %xmm13
315e: c4 e3 6d 38 54 56 08 01 vinserti128 $1, 8(%rsi,%rdx,2), %ymm2, %ymm2
3166: c4 63 15 38 6c 56 0a 01 vinserti128 $1, 10(%rsi,%rdx,2), %ymm13, %ymm13
316e: c5 ed f5 d7 vpmaddwd %ymm7, %ymm2, %ymm2
3172: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3176: c4 c2 6d 02 d5 vphaddd %ymm13, %ymm2, %ymm2
317b: c5 7a 6f 64 56 04 vmovdqu 4(%rsi,%rdx,2), %xmm12
3181: c5 7a 6f 6c 56 06 vmovdqu 6(%rsi,%rdx,2), %xmm13
3187: c4 63 1d 38 64 56 0c 01 vinserti128 $1, 12(%rsi,%rdx,2), %ymm12, %ymm12
318f: c4 63 15 38 6c 56 0e 01 vinserti128 $1, 14(%rsi,%rdx,2), %ymm13, %ymm13
3197: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
319b: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
319f: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
31a4: c4 c2 6d 02 d4 vphaddd %ymm12, %ymm2, %ymm2
31a9: c5 85 fe d2 vpaddd %ymm2, %ymm15, %ymm2
31ad: c5 ed 72 e2 02 vpsrad $2, %ymm2, %ymm2
31b2: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2
31b6: 48 01 de addq %rbx, %rsi
31b9: c5 fa 6f 1e vmovdqu (%rsi), %xmm3
31bd: c5 7a 6f 6e 02 vmovdqu 2(%rsi), %xmm13
31c2: c4 e3 65 38 5e 08 01 vinserti128 $1, 8(%rsi), %ymm3, %ymm3
31c9: c4 63 15 38 6e 0a 01 vinserti128 $1, 10(%rsi), %ymm13, %ymm13
31d0: c5 e5 f5 df vpmaddwd %ymm7, %ymm3, %ymm3
31d4: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
31d8: c4 c2 65 02 dd vphaddd %ymm13, %ymm3, %ymm3
31dd: c5 7a 6f 66 04 vmovdqu 4(%rsi), %xmm12
31e2: c5 7a 6f 6e 06 vmovdqu 6(%rsi), %xmm13
31e7: c4 63 1d 38 66 0c 01 vinserti128 $1, 12(%rsi), %ymm12, %ymm12
31ee: c4 63 15 38 6e 0e 01 vinserti128 $1, 14(%rsi), %ymm13, %ymm13
31f5: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
31f9: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
31fd: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
3202: c4 c2 65 02 dc vphaddd %ymm12, %ymm3, %ymm3
3207: c5 85 fe db vpaddd %ymm3, %ymm15, %ymm3
320b: c5 e5 72 e3 02 vpsrad $2, %ymm3, %ymm3
3210: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
3214: c5 fa 6f 24 16 vmovdqu (%rsi,%rdx), %xmm4
3219: c5 7a 6f 6c 16 02 vmovdqu 2(%rsi,%rdx), %xmm13
321f: c4 e3 5d 38 64 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm4, %ymm4
3227: c4 63 15 38 6c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm13, %ymm13
322f: c5 dd f5 e7 vpmaddwd %ymm7, %ymm4, %ymm4
3233: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3237: c4 c2 5d 02 e5 vphaddd %ymm13, %ymm4, %ymm4
323c: c5 7a 6f 64 16 04 vmovdqu 4(%rsi,%rdx), %xmm12
3242: c5 7a 6f 6c 16 06 vmovdqu 6(%rsi,%rdx), %xmm13
3248: c4 63 1d 38 64 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm12, %ymm12
3250: c4 63 15 38 6c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm13, %ymm13
3258: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
325c: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3260: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
3265: c4 c2 5d 02 e4 vphaddd %ymm12, %ymm4, %ymm4
326a: c5 85 fe e4 vpaddd %ymm4, %ymm15, %ymm4
326e: c5 dd 72 e4 02 vpsrad $2, %ymm4, %ymm4
3273: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
3277: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
327b: c5 fa 6f 2e vmovdqu (%rsi), %xmm5
327f: c5 7a 6f 6e 02 vmovdqu 2(%rsi), %xmm13
3284: c4 e3 55 38 6e 08 01 vinserti128 $1, 8(%rsi), %ymm5, %ymm5
328b: c4 63 15 38 6e 0a 01 vinserti128 $1, 10(%rsi), %ymm13, %ymm13
3292: c5 d5 f5 ef vpmaddwd %ymm7, %ymm5, %ymm5
3296: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
329a: c4 c2 55 02 ed vphaddd %ymm13, %ymm5, %ymm5
329f: c5 7a 6f 66 04 vmovdqu 4(%rsi), %xmm12
32a4: c5 7a 6f 6e 06 vmovdqu 6(%rsi), %xmm13
32a9: c4 63 1d 38 66 0c 01 vinserti128 $1, 12(%rsi), %ymm12, %ymm12
32b0: c4 63 15 38 6e 0e 01 vinserti128 $1, 14(%rsi), %ymm13, %ymm13
32b7: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
32bb: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
32bf: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
32c4: c4 c2 55 02 ec vphaddd %ymm12, %ymm5, %ymm5
32c9: c5 85 fe ed vpaddd %ymm5, %ymm15, %ymm5
32cd: c5 d5 72 e5 02 vpsrad $2, %ymm5, %ymm5
32d2: c5 d5 6b ed vpackssdw %ymm5, %ymm5, %ymm5
32d6: c5 fa 6f 34 16 vmovdqu (%rsi,%rdx), %xmm6
32db: c5 7a 6f 6c 16 02 vmovdqu 2(%rsi,%rdx), %xmm13
32e1: c4 e3 4d 38 74 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm6, %ymm6
32e9: c4 63 15 38 6c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm13, %ymm13
32f1: c5 cd f5 f7 vpmaddwd %ymm7, %ymm6, %ymm6
32f5: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
32f9: c4 c2 4d 02 f5 vphaddd %ymm13, %ymm6, %ymm6
32fe: c5 7a 6f 64 16 04 vmovdqu 4(%rsi,%rdx), %xmm12
3304: c5 7a 6f 6c 16 06 vmovdqu 6(%rsi,%rdx), %xmm13
330a: c4 63 1d 38 64 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm12, %ymm12
3312: c4 63 15 38 6c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm13, %ymm13
331a: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
331e: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3322: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
3327: c4 c2 4d 02 f4 vphaddd %ymm12, %ymm6, %ymm6
332c: c5 85 fe f6 vpaddd %ymm6, %ymm15, %ymm6
3330: c5 cd 72 e6 02 vpsrad $2, %ymm6, %ymm6
3335: c5 cd 6b f6 vpackssdw %ymm6, %ymm6, %ymm6
3339: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
333d: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
3341: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
3345: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
3349: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
334d: c5 dd 61 e5 vpunpcklwd %ymm5, %ymm4, %ymm4
3351: c5 d5 61 ee vpunpcklwd %ymm6, %ymm5, %ymm5
0000000000003355 _dav1d_prep_8tap_16bpc_avx2.hv_w88c_10bit:
3355: c5 7a 6f 26 vmovdqu (%rsi), %xmm12
3359: c5 7a 6f 7e 02 vmovdqu 2(%rsi), %xmm15
335e: c4 63 1d 38 66 08 01 vinserti128 $1, 8(%rsi), %ymm12, %ymm12
3365: c4 63 05 38 7e 0a 01 vinserti128 $1, 10(%rsi), %ymm15, %ymm15
336c: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
3370: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
3374: c4 42 1d 02 e7 vphaddd %ymm15, %ymm12, %ymm12
3379: c5 7a 6f 76 04 vmovdqu 4(%rsi), %xmm14
337e: c5 7a 6f 7e 06 vmovdqu 6(%rsi), %xmm15
3383: c4 63 0d 38 76 0c 01 vinserti128 $1, 12(%rsi), %ymm14, %ymm14
338a: c4 63 05 38 7e 0e 01 vinserti128 $1, 14(%rsi), %ymm15, %ymm15
3391: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14
3395: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
3399: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14
339e: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
33a7: c4 42 1d 02 e6 vphaddd %ymm14, %ymm12, %ymm12
33ac: c4 41 1d fe e7 vpaddd %ymm15, %ymm12, %ymm12
33b1: c4 c1 1d 72 e4 02 vpsrad $2, %ymm12, %ymm12
33b7: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12
33bc: c5 7a 6f 2c 16 vmovdqu (%rsi,%rdx), %xmm13
33c1: c5 7a 6f 7c 16 02 vmovdqu 2(%rsi,%rdx), %xmm15
33c7: c4 63 15 38 6c 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm13, %ymm13
33cf: c4 63 05 38 7c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm15, %ymm15
33d7: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
33db: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
33df: c4 42 15 02 ef vphaddd %ymm15, %ymm13, %ymm13
33e4: c5 7a 6f 74 16 04 vmovdqu 4(%rsi,%rdx), %xmm14
33ea: c5 7a 6f 7c 16 06 vmovdqu 6(%rsi,%rdx), %xmm15
33f0: c4 63 0d 38 74 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm14, %ymm14
33f8: c4 63 05 38 7c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm15, %ymm15
3400: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14
3404: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
3408: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14
340d: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
3416: c4 42 15 02 ee vphaddd %ymm14, %ymm13, %ymm13
341b: c4 41 15 fe ef vpaddd %ymm15, %ymm13, %ymm13
3420: c4 c1 15 72 e5 02 vpsrad $2, %ymm13, %ymm13
3426: c4 41 15 6b ed vpackssdw %ymm13, %ymm13, %ymm13
342b: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
342f: c4 c1 4d 61 f4 vpunpcklwd %ymm12, %ymm6, %ymm6
3434: c4 41 1d 61 e5 vpunpcklwd %ymm13, %ymm12, %ymm12
3439: c5 bd f5 c0 vpmaddwd %ymm0, %ymm8, %ymm0
343d: c5 35 f5 f2 vpmaddwd %ymm2, %ymm9, %ymm14
3441: c5 0d fe f0 vpaddd %ymm0, %ymm14, %ymm14
3445: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
3449: c5 bd f5 c9 vpmaddwd %ymm1, %ymm8, %ymm1
344d: c5 35 f5 fb vpmaddwd %ymm3, %ymm9, %ymm15
3451: c5 05 fe f9 vpaddd %ymm1, %ymm15, %ymm15
3455: c5 fd 6f cb vmovdqa %ymm3, %ymm1
3459: c5 ad f5 d4 vpmaddwd %ymm4, %ymm10, %ymm2
345d: c5 ad f5 dd vpmaddwd %ymm5, %ymm10, %ymm3
3461: c5 0d fe f2 vpaddd %ymm2, %ymm14, %ymm14
3465: c5 05 fe fb vpaddd %ymm3, %ymm15, %ymm15
3469: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
346d: c5 fd 6f dd vmovdqa %ymm5, %ymm3
3471: c5 a5 f5 e6 vpmaddwd %ymm6, %ymm11, %ymm4
3475: c4 c1 1d f5 eb vpmaddwd %ymm11, %ymm12, %ymm5
347a: c5 0d fe f4 vpaddd %ymm4, %ymm14, %ymm14
347e: c5 05 fe fd vpaddd %ymm5, %ymm15, %ymm15
3482: c5 fd 6f e6 vmovdqa %ymm6, %ymm4
3486: c4 c1 7d 6f ec vmovdqa %ymm12, %ymm5
348b: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
3494: c5 0d fe f6 vpaddd %ymm6, %ymm14, %ymm14
3498: c5 05 fe fe vpaddd %ymm6, %ymm15, %ymm15
349c: c4 c1 0d 72 e6 06 vpsrad $6, %ymm14, %ymm14
34a2: c4 c1 05 72 e7 06 vpsrad $6, %ymm15, %ymm15
34a8: c4 41 0d 6b f6 vpackssdw %ymm14, %ymm14, %ymm14
34ad: c4 41 05 6b ff vpackssdw %ymm15, %ymm15, %ymm15
34b2: c4 c1 7d 6f f5 vmovdqa %ymm13, %ymm6
34b7: c4 43 7d 39 f4 01 vextracti128 $1, %ymm14, %xmm12
34bd: c4 43 7d 39 fd 01 vextracti128 $1, %ymm15, %xmm13
34c3: c5 79 d6 37 vmovq %xmm14, (%rdi)
34c7: c5 79 d6 67 08 vmovq %xmm12, 8(%rdi)
34cc: c4 21 79 d6 3c 1f vmovq %xmm15, (%rdi,%r11)
34d2: c4 21 79 d6 6c 1f 08 vmovq %xmm13, 8(%rdi,%r11)
34d9: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
34dd: 41 83 e8 02 subl $2, %r8d
34e1: 0f 8f 6e fe ff ff jg -402 <_dav1d_prep_8tap_16bpc_avx2.hv_w88c_10bit>
34e7: 48 83 c0 10 addq $16, %rax
34eb: 49 83 c2 10 addq $16, %r10
34ef: 45 89 c8 movl %r9d, %r8d
34f2: 48 89 c7 movq %rax, %rdi
34f5: 4c 89 d6 movq %r10, %rsi
34f8: 83 e9 08 subl $8, %ecx
34fb: 0f 8f 8b fb ff ff jg -1141 <_dav1d_prep_8tap_16bpc_avx2.hv_w88l_10bit>
3501: 5b popq %rbx
3502: c5 f8 77 vzeroupper
3505: c3 retq
0000000000003506 _dav1d_prep_8tap_16bpc_avx2.hv_w88_12bit:
3506: 49 89 f2 movq %rsi, %r10
0000000000003509 _dav1d_prep_8tap_16bpc_avx2.hv_w88l_12bit:
3509: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
3512: c5 fa 6f 06 vmovdqu (%rsi), %xmm0
3516: c5 7a 6f 6e 02 vmovdqu 2(%rsi), %xmm13
351b: c4 e3 7d 38 46 08 01 vinserti128 $1, 8(%rsi), %ymm0, %ymm0
3522: c4 63 15 38 6e 0a 01 vinserti128 $1, 10(%rsi), %ymm13, %ymm13
3529: c5 fd f5 c7 vpmaddwd %ymm7, %ymm0, %ymm0
352d: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3531: c4 c2 7d 02 c5 vphaddd %ymm13, %ymm0, %ymm0
3536: c5 7a 6f 66 04 vmovdqu 4(%rsi), %xmm12
353b: c5 7a 6f 6e 06 vmovdqu 6(%rsi), %xmm13
3540: c4 63 1d 38 66 0c 01 vinserti128 $1, 12(%rsi), %ymm12, %ymm12
3547: c4 63 15 38 6e 0e 01 vinserti128 $1, 14(%rsi), %ymm13, %ymm13
354e: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
3552: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3556: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
355b: c4 c2 7d 02 c4 vphaddd %ymm12, %ymm0, %ymm0
3560: c5 85 fe c0 vpaddd %ymm0, %ymm15, %ymm0
3564: c5 fd 72 e0 04 vpsrad $4, %ymm0, %ymm0
3569: c5 fd 6b c0 vpackssdw %ymm0, %ymm0, %ymm0
356d: c5 fa 6f 0c 16 vmovdqu (%rsi,%rdx), %xmm1
3572: c5 7a 6f 6c 16 02 vmovdqu 2(%rsi,%rdx), %xmm13
3578: c4 e3 75 38 4c 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm1, %ymm1
3580: c4 63 15 38 6c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm13, %ymm13
3588: c5 f5 f5 cf vpmaddwd %ymm7, %ymm1, %ymm1
358c: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3590: c4 c2 75 02 cd vphaddd %ymm13, %ymm1, %ymm1
3595: c5 7a 6f 64 16 04 vmovdqu 4(%rsi,%rdx), %xmm12
359b: c5 7a 6f 6c 16 06 vmovdqu 6(%rsi,%rdx), %xmm13
35a1: c4 63 1d 38 64 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm12, %ymm12
35a9: c4 63 15 38 6c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm13, %ymm13
35b1: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
35b5: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
35b9: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
35be: c4 c2 75 02 cc vphaddd %ymm12, %ymm1, %ymm1
35c3: c5 85 fe c9 vpaddd %ymm1, %ymm15, %ymm1
35c7: c5 f5 72 e1 04 vpsrad $4, %ymm1, %ymm1
35cc: c5 f5 6b c9 vpackssdw %ymm1, %ymm1, %ymm1
35d0: c5 fa 6f 14 56 vmovdqu (%rsi,%rdx,2), %xmm2
35d5: c5 7a 6f 6c 56 02 vmovdqu 2(%rsi,%rdx,2), %xmm13
35db: c4 e3 6d 38 54 56 08 01 vinserti128 $1, 8(%rsi,%rdx,2), %ymm2, %ymm2
35e3: c4 63 15 38 6c 56 0a 01 vinserti128 $1, 10(%rsi,%rdx,2), %ymm13, %ymm13
35eb: c5 ed f5 d7 vpmaddwd %ymm7, %ymm2, %ymm2
35ef: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
35f3: c4 c2 6d 02 d5 vphaddd %ymm13, %ymm2, %ymm2
35f8: c5 7a 6f 64 56 04 vmovdqu 4(%rsi,%rdx,2), %xmm12
35fe: c5 7a 6f 6c 56 06 vmovdqu 6(%rsi,%rdx,2), %xmm13
3604: c4 63 1d 38 64 56 0c 01 vinserti128 $1, 12(%rsi,%rdx,2), %ymm12, %ymm12
360c: c4 63 15 38 6c 56 0e 01 vinserti128 $1, 14(%rsi,%rdx,2), %ymm13, %ymm13
3614: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
3618: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
361c: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
3621: c4 c2 6d 02 d4 vphaddd %ymm12, %ymm2, %ymm2
3626: c5 85 fe d2 vpaddd %ymm2, %ymm15, %ymm2
362a: c5 ed 72 e2 04 vpsrad $4, %ymm2, %ymm2
362f: c5 ed 6b d2 vpackssdw %ymm2, %ymm2, %ymm2
3633: 48 01 de addq %rbx, %rsi
3636: c5 fa 6f 1e vmovdqu (%rsi), %xmm3
363a: c5 7a 6f 6e 02 vmovdqu 2(%rsi), %xmm13
363f: c4 e3 65 38 5e 08 01 vinserti128 $1, 8(%rsi), %ymm3, %ymm3
3646: c4 63 15 38 6e 0a 01 vinserti128 $1, 10(%rsi), %ymm13, %ymm13
364d: c5 e5 f5 df vpmaddwd %ymm7, %ymm3, %ymm3
3651: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3655: c4 c2 65 02 dd vphaddd %ymm13, %ymm3, %ymm3
365a: c5 7a 6f 66 04 vmovdqu 4(%rsi), %xmm12
365f: c5 7a 6f 6e 06 vmovdqu 6(%rsi), %xmm13
3664: c4 63 1d 38 66 0c 01 vinserti128 $1, 12(%rsi), %ymm12, %ymm12
366b: c4 63 15 38 6e 0e 01 vinserti128 $1, 14(%rsi), %ymm13, %ymm13
3672: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
3676: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
367a: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
367f: c4 c2 65 02 dc vphaddd %ymm12, %ymm3, %ymm3
3684: c5 85 fe db vpaddd %ymm3, %ymm15, %ymm3
3688: c5 e5 72 e3 04 vpsrad $4, %ymm3, %ymm3
368d: c5 e5 6b db vpackssdw %ymm3, %ymm3, %ymm3
3691: c5 fa 6f 24 16 vmovdqu (%rsi,%rdx), %xmm4
3696: c5 7a 6f 6c 16 02 vmovdqu 2(%rsi,%rdx), %xmm13
369c: c4 e3 5d 38 64 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm4, %ymm4
36a4: c4 63 15 38 6c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm13, %ymm13
36ac: c5 dd f5 e7 vpmaddwd %ymm7, %ymm4, %ymm4
36b0: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
36b4: c4 c2 5d 02 e5 vphaddd %ymm13, %ymm4, %ymm4
36b9: c5 7a 6f 64 16 04 vmovdqu 4(%rsi,%rdx), %xmm12
36bf: c5 7a 6f 6c 16 06 vmovdqu 6(%rsi,%rdx), %xmm13
36c5: c4 63 1d 38 64 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm12, %ymm12
36cd: c4 63 15 38 6c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm13, %ymm13
36d5: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
36d9: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
36dd: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
36e2: c4 c2 5d 02 e4 vphaddd %ymm12, %ymm4, %ymm4
36e7: c5 85 fe e4 vpaddd %ymm4, %ymm15, %ymm4
36eb: c5 dd 72 e4 04 vpsrad $4, %ymm4, %ymm4
36f0: c5 dd 6b e4 vpackssdw %ymm4, %ymm4, %ymm4
36f4: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
36f8: c5 fa 6f 2e vmovdqu (%rsi), %xmm5
36fc: c5 7a 6f 6e 02 vmovdqu 2(%rsi), %xmm13
3701: c4 e3 55 38 6e 08 01 vinserti128 $1, 8(%rsi), %ymm5, %ymm5
3708: c4 63 15 38 6e 0a 01 vinserti128 $1, 10(%rsi), %ymm13, %ymm13
370f: c5 d5 f5 ef vpmaddwd %ymm7, %ymm5, %ymm5
3713: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3717: c4 c2 55 02 ed vphaddd %ymm13, %ymm5, %ymm5
371c: c5 7a 6f 66 04 vmovdqu 4(%rsi), %xmm12
3721: c5 7a 6f 6e 06 vmovdqu 6(%rsi), %xmm13
3726: c4 63 1d 38 66 0c 01 vinserti128 $1, 12(%rsi), %ymm12, %ymm12
372d: c4 63 15 38 6e 0e 01 vinserti128 $1, 14(%rsi), %ymm13, %ymm13
3734: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
3738: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
373c: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
3741: c4 c2 55 02 ec vphaddd %ymm12, %ymm5, %ymm5
3746: c5 85 fe ed vpaddd %ymm5, %ymm15, %ymm5
374a: c5 d5 72 e5 04 vpsrad $4, %ymm5, %ymm5
374f: c5 d5 6b ed vpackssdw %ymm5, %ymm5, %ymm5
3753: c5 fa 6f 34 16 vmovdqu (%rsi,%rdx), %xmm6
3758: c5 7a 6f 6c 16 02 vmovdqu 2(%rsi,%rdx), %xmm13
375e: c4 e3 4d 38 74 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm6, %ymm6
3766: c4 63 15 38 6c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm13, %ymm13
376e: c5 cd f5 f7 vpmaddwd %ymm7, %ymm6, %ymm6
3772: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3776: c4 c2 4d 02 f5 vphaddd %ymm13, %ymm6, %ymm6
377b: c5 7a 6f 64 16 04 vmovdqu 4(%rsi,%rdx), %xmm12
3781: c5 7a 6f 6c 16 06 vmovdqu 6(%rsi,%rdx), %xmm13
3787: c4 63 1d 38 64 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm12, %ymm12
378f: c4 63 15 38 6c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm13, %ymm13
3797: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
379b: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
379f: c4 42 1d 02 e5 vphaddd %ymm13, %ymm12, %ymm12
37a4: c4 c2 4d 02 f4 vphaddd %ymm12, %ymm6, %ymm6
37a9: c5 85 fe f6 vpaddd %ymm6, %ymm15, %ymm6
37ad: c5 cd 72 e6 04 vpsrad $4, %ymm6, %ymm6
37b2: c5 cd 6b f6 vpackssdw %ymm6, %ymm6, %ymm6
37b6: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
37ba: c5 fd 61 c1 vpunpcklwd %ymm1, %ymm0, %ymm0
37be: c5 f5 61 ca vpunpcklwd %ymm2, %ymm1, %ymm1
37c2: c5 ed 61 d3 vpunpcklwd %ymm3, %ymm2, %ymm2
37c6: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
37ca: c5 dd 61 e5 vpunpcklwd %ymm5, %ymm4, %ymm4
37ce: c5 d5 61 ee vpunpcklwd %ymm6, %ymm5, %ymm5
00000000000037d2 _dav1d_prep_8tap_16bpc_avx2.hv_w88c_12bit:
37d2: c5 7a 6f 26 vmovdqu (%rsi), %xmm12
37d6: c5 7a 6f 7e 02 vmovdqu 2(%rsi), %xmm15
37db: c4 63 1d 38 66 08 01 vinserti128 $1, 8(%rsi), %ymm12, %ymm12
37e2: c4 63 05 38 7e 0a 01 vinserti128 $1, 10(%rsi), %ymm15, %ymm15
37e9: c5 1d f5 e7 vpmaddwd %ymm7, %ymm12, %ymm12
37ed: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
37f1: c4 42 1d 02 e7 vphaddd %ymm15, %ymm12, %ymm12
37f6: c5 7a 6f 76 04 vmovdqu 4(%rsi), %xmm14
37fb: c5 7a 6f 7e 06 vmovdqu 6(%rsi), %xmm15
3800: c4 63 0d 38 76 0c 01 vinserti128 $1, 12(%rsi), %ymm14, %ymm14
3807: c4 63 05 38 7e 0e 01 vinserti128 $1, 14(%rsi), %ymm15, %ymm15
380e: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14
3812: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
3816: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14
381b: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
3824: c4 42 1d 02 e6 vphaddd %ymm14, %ymm12, %ymm12
3829: c4 41 1d fe e7 vpaddd %ymm15, %ymm12, %ymm12
382e: c4 c1 1d 72 e4 04 vpsrad $4, %ymm12, %ymm12
3834: c4 41 1d 6b e4 vpackssdw %ymm12, %ymm12, %ymm12
3839: c5 7a 6f 2c 16 vmovdqu (%rsi,%rdx), %xmm13
383e: c5 7a 6f 7c 16 02 vmovdqu 2(%rsi,%rdx), %xmm15
3844: c4 63 15 38 6c 16 08 01 vinserti128 $1, 8(%rsi,%rdx), %ymm13, %ymm13
384c: c4 63 05 38 7c 16 0a 01 vinserti128 $1, 10(%rsi,%rdx), %ymm15, %ymm15
3854: c5 15 f5 ef vpmaddwd %ymm7, %ymm13, %ymm13
3858: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
385c: c4 42 15 02 ef vphaddd %ymm15, %ymm13, %ymm13
3861: c5 7a 6f 74 16 04 vmovdqu 4(%rsi,%rdx), %xmm14
3867: c5 7a 6f 7c 16 06 vmovdqu 6(%rsi,%rdx), %xmm15
386d: c4 63 0d 38 74 16 0c 01 vinserti128 $1, 12(%rsi,%rdx), %ymm14, %ymm14
3875: c4 63 05 38 7c 16 0e 01 vinserti128 $1, 14(%rsi,%rdx), %ymm15, %ymm15
387d: c5 0d f5 f7 vpmaddwd %ymm7, %ymm14, %ymm14
3881: c5 05 f5 ff vpmaddwd %ymm7, %ymm15, %ymm15
3885: c4 42 0d 02 f7 vphaddd %ymm15, %ymm14, %ymm14
388a: c4 62 7d 58 3d 00 00 00 00 vpbroadcastd (%rip), %ymm15
3893: c4 42 15 02 ee vphaddd %ymm14, %ymm13, %ymm13
3898: c4 41 15 fe ef vpaddd %ymm15, %ymm13, %ymm13
389d: c4 c1 15 72 e5 04 vpsrad $4, %ymm13, %ymm13
38a3: c4 41 15 6b ed vpackssdw %ymm13, %ymm13, %ymm13
38a8: 48 8d 34 56 leaq (%rsi,%rdx,2), %rsi
38ac: c4 c1 4d 61 f4 vpunpcklwd %ymm12, %ymm6, %ymm6
38b1: c4 41 1d 61 e5 vpunpcklwd %ymm13, %ymm12, %ymm12
38b6: c5 bd f5 c0 vpmaddwd %ymm0, %ymm8, %ymm0
38ba: c5 35 f5 f2 vpmaddwd %ymm2, %ymm9, %ymm14
38be: c5 0d fe f0 vpaddd %ymm0, %ymm14, %ymm14
38c2: c5 fd 6f c2 vmovdqa %ymm2, %ymm0
38c6: c5 bd f5 c9 vpmaddwd %ymm1, %ymm8, %ymm1
38ca: c5 35 f5 fb vpmaddwd %ymm3, %ymm9, %ymm15
38ce: c5 05 fe f9 vpaddd %ymm1, %ymm15, %ymm15
38d2: c5 fd 6f cb vmovdqa %ymm3, %ymm1
38d6: c5 ad f5 d4 vpmaddwd %ymm4, %ymm10, %ymm2
38da: c5 ad f5 dd vpmaddwd %ymm5, %ymm10, %ymm3
38de: c5 0d fe f2 vpaddd %ymm2, %ymm14, %ymm14
38e2: c5 05 fe fb vpaddd %ymm3, %ymm15, %ymm15
38e6: c5 fd 6f d4 vmovdqa %ymm4, %ymm2
38ea: c5 fd 6f dd vmovdqa %ymm5, %ymm3
38ee: c5 a5 f5 e6 vpmaddwd %ymm6, %ymm11, %ymm4
38f2: c4 c1 1d f5 eb vpmaddwd %ymm11, %ymm12, %ymm5
38f7: c5 0d fe f4 vpaddd %ymm4, %ymm14, %ymm14
38fb: c5 05 fe fd vpaddd %ymm5, %ymm15, %ymm15
38ff: c5 fd 6f e6 vmovdqa %ymm6, %ymm4
3903: c4 c1 7d 6f ec vmovdqa %ymm12, %ymm5
3908: c4 e2 7d 58 35 00 00 00 00 vpbroadcastd (%rip), %ymm6
3911: c5 0d fe f6 vpaddd %ymm6, %ymm14, %ymm14
3915: c5 05 fe fe vpaddd %ymm6, %ymm15, %ymm15
3919: c4 c1 0d 72 e6 06 vpsrad $6, %ymm14, %ymm14
391f: c4 c1 05 72 e7 06 vpsrad $6, %ymm15, %ymm15
3925: c4 41 0d 6b f6 vpackssdw %ymm14, %ymm14, %ymm14
392a: c4 41 05 6b ff vpackssdw %ymm15, %ymm15, %ymm15
392f: c4 c1 7d 6f f5 vmovdqa %ymm13, %ymm6
3934: c4 43 7d 39 f4 01 vextracti128 $1, %ymm14, %xmm12
393a: c4 43 7d 39 fd 01 vextracti128 $1, %ymm15, %xmm13
3940: c5 79 d6 37 vmovq %xmm14, (%rdi)
3944: c5 79 d6 67 08 vmovq %xmm12, 8(%rdi)
3949: c4 21 79 d6 3c 1f vmovq %xmm15, (%rdi,%r11)
394f: c4 21 79 d6 6c 1f 08 vmovq %xmm13, 8(%rdi,%r11)
3956: 4a 8d 3c 5f leaq (%rdi,%r11,2), %rdi
395a: 41 83 e8 02 subl $2, %r8d
395e: 0f 8f 6e fe ff ff jg -402 <_dav1d_prep_8tap_16bpc_avx2.hv_w88c_12bit>
3964: 48 83 c0 10 addq $16, %rax
3968: 49 83 c2 10 addq $16, %r10
396c: 45 89 c8 movl %r9d, %r8d
396f: 48 89 c7 movq %rax, %rdi
3972: 4c 89 d6 movq %r10, %rsi
3975: 83 e9 08 subl $8, %ecx
3978: 0f 8f 8b fb ff ff jg -1141 <_dav1d_prep_8tap_16bpc_avx2.hv_w88l_12bit>
397e: 5b popq %rbx
397f: c5 f8 77 vzeroupper
3982: c3 retq
0000000000003983 _dav1d_prep_8tap_16bpc_avx2.hv_jmp_tbl:
3983: 69 f0 ff ff 1c f4 imull $4095541247, %eax, %esi
3989: ff ff <unknown>
398b: 1c f4 sbbb $-12, %al
398d: ff ff <unknown>
398f: 1c f4 sbbb $-12, %al
3991: ff ff <unknown>
3993: 1c f4 sbbb $-12, %al
3995: ff ff <unknown>
3997: 1c f4 sbbb $-12, %al
3999: ff ff <unknown>
399b: 0f 1f 44 00 00 nopl (%rax,%rax)
00000000000039a0 _dav1d_avg_16bpc_avx2:
39a0: c4 e2 7d 79 1d 00 00 00 00 vpbroadcastw (%rip), %ymm3
39a9: c4 e2 7d 58 25 00 00 00 00 vpbroadcastd (%rip), %ymm4
39b2: c5 fa 7e 2d 00 00 00 00 vmovq (%rip), %xmm5
39ba: f3 0f b8 44 24 08 popcntl 8(%rsp), %eax
39c0: 83 f8 0a cmpl $10, %eax
39c3: 74 11 je 17 <_dav1d_avg_16bpc_avx2.bits10>
39c5: c4 e2 7d 58 25 00 00 00 00 vpbroadcastd (%rip), %ymm4
39ce: c5 fa 7e 2d 00 00 00 00 vmovq (%rip), %xmm5
00000000000039d6 _dav1d_avg_16bpc_avx2.bits10:
39d6: c4 e2 7d 79 74 24 08 vpbroadcastw 8(%rsp), %ymm6
39dd: 4f 8d 1c 00 leaq (%r8,%r8), %r11
39e1: 4c 8d 15 30 01 00 00 leaq 304(%rip), %r10
39e8: f3 45 0f bc c0 tzcntl %r8d, %r8d
39ed: 41 83 e8 02 subl $2, %r8d
39f1: 4f 63 04 82 movslq (%r10,%r8,4), %r8
39f5: 4d 01 d0 addq %r10, %r8
39f8: 41 ff e0 jmpq *%r8
00000000000039fb _dav1d_avg_16bpc_avx2.w4:
39fb: 4c 8d 14 76 leaq (%rsi,%rsi,2), %r10
00000000000039ff _dav1d_avg_16bpc_avx2.w4l:
39ff: c5 fd 6f 02 vmovdqa (%rdx), %ymm0
3a03: c5 fd 6f 11 vmovdqa (%rcx), %ymm2
3a07: c5 fd 69 ca vpunpckhwd %ymm2, %ymm0, %ymm1
3a0b: c5 fd 61 c2 vpunpcklwd %ymm2, %ymm0, %ymm0
3a0f: c5 fd f5 c3 vpmaddwd %ymm3, %ymm0, %ymm0
3a13: c5 f5 f5 cb vpmaddwd %ymm3, %ymm1, %ymm1
3a17: c5 fd fe c4 vpaddd %ymm4, %ymm0, %ymm0
3a1b: c5 f5 fe cc vpaddd %ymm4, %ymm1, %ymm1
3a1f: c5 fd e2 c5 vpsrad %xmm5, %ymm0, %ymm0
3a23: c5 f5 e2 cd vpsrad %xmm5, %ymm1, %ymm1
3a27: c4 e2 7d 2b c1 vpackusdw %ymm1, %ymm0, %ymm0
3a2c: c4 e2 7d 3a c6 vpminuw %ymm6, %ymm0, %ymm0
3a31: c4 e3 7d 39 c1 01 vextracti128 $1, %ymm0, %xmm1
3a37: c5 f9 d6 07 vmovq %xmm0, (%rdi)
3a3b: c4 e3 f9 16 04 37 01 vpextrq $1, %xmm0, (%rdi,%rsi)
3a42: c5 f9 d6 0c 77 vmovq %xmm1, (%rdi,%rsi,2)
3a47: c4 a3 f9 16 0c 17 01 vpextrq $1, %xmm1, (%rdi,%r10)
3a4e: 48 8d 3c b7 leaq (%rdi,%rsi,4), %rdi
3a52: 48 83 c2 20 addq $32, %rdx
3a56: 48 83 c1 20 addq $32, %rcx
3a5a: 41 83 e9 04 subl $4, %r9d
3a5e: 7f 9f jg -97 <_dav1d_avg_16bpc_avx2.w4l>
3a60: c5 f8 77 vzeroupper
3a63: c3 retq
0000000000003a64 _dav1d_avg_16bpc_avx2.w8:
3a64: c5 fd 6f 02 vmovdqa (%rdx), %ymm0
3a68: c5 fd 6f 11 vmovdqa (%rcx), %ymm2
3a6c: c5 fd 69 ca vpunpckhwd %ymm2, %ymm0, %ymm1
3a70: c5 fd 61 c2 vpunpcklwd %ymm2, %ymm0, %ymm0
3a74: c5 fd f5 c3 vpmaddwd %ymm3, %ymm0, %ymm0
3a78: c5 f5 f5 cb vpmaddwd %ymm3, %ymm1, %ymm1
3a7c: c5 fd fe c4 vpaddd %ymm4, %ymm0, %ymm0
3a80: c5 f5 fe cc vpaddd %ymm4, %ymm1, %ymm1
3a84: c5 fd e2 c5 vpsrad %xmm5, %ymm0, %ymm0
3a88: c5 f5 e2 cd vpsrad %xmm5, %ymm1, %ymm1
3a8c: c4 e2 7d 2b c1 vpackusdw %ymm1, %ymm0, %ymm0
3a91: c4 e2 7d 3a c6 vpminuw %ymm6, %ymm0, %ymm0
3a96: c4 e3 7d 39 c1 01 vextracti128 $1, %ymm0, %xmm1
3a9c: c5 f9 7f 07 vmovdqa %xmm0, (%rdi)
3aa0: c5 f9 7f 0c 37 vmovdqa %xmm1, (%rdi,%rsi)
3aa5: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
3aa9: 48 83 c2 20 addq $32, %rdx
3aad: 48 83 c1 20 addq $32, %rcx
3ab1: 41 83 e9 02 subl $2, %r9d
3ab5: 7f ad jg -83 <_dav1d_avg_16bpc_avx2.w8>
3ab7: c5 f8 77 vzeroupper
3aba: c3 retq
0000000000003abb _dav1d_avg_16bpc_avx2.w16:
3abb: 4d 89 d8 movq %r11, %r8
3abe: 4c 29 c6 subq %r8, %rsi
0000000000003ac1 _dav1d_avg_16bpc_avx2.w16l:
3ac1: c5 fd 6f 02 vmovdqa (%rdx), %ymm0
3ac5: c5 fd 6f 11 vmovdqa (%rcx), %ymm2
3ac9: c5 fd 69 ca vpunpckhwd %ymm2, %ymm0, %ymm1
3acd: c5 fd 61 c2 vpunpcklwd %ymm2, %ymm0, %ymm0
3ad1: c5 fd f5 c3 vpmaddwd %ymm3, %ymm0, %ymm0
3ad5: c5 f5 f5 cb vpmaddwd %ymm3, %ymm1, %ymm1
3ad9: c5 fd fe c4 vpaddd %ymm4, %ymm0, %ymm0
3add: c5 f5 fe cc vpaddd %ymm4, %ymm1, %ymm1
3ae1: c5 fd e2 c5 vpsrad %xmm5, %ymm0, %ymm0
3ae5: c5 f5 e2 cd vpsrad %xmm5, %ymm1, %ymm1
3ae9: c4 e2 7d 2b c1 vpackusdw %ymm1, %ymm0, %ymm0
3aee: c4 e2 7d 3a c6 vpminuw %ymm6, %ymm0, %ymm0
3af3: c5 fd 7f 07 vmovdqa %ymm0, (%rdi)
3af7: 48 83 c7 20 addq $32, %rdi
3afb: 48 83 c2 20 addq $32, %rdx
3aff: 48 83 c1 20 addq $32, %rcx
3b03: 41 83 e8 20 subl $32, %r8d
3b07: 7f b8 jg -72 <_dav1d_avg_16bpc_avx2.w16l>
3b09: 48 01 f7 addq %rsi, %rdi
3b0c: 4d 89 d8 movq %r11, %r8
3b0f: 41 ff c9 decl %r9d
3b12: 7f ad jg -83 <_dav1d_avg_16bpc_avx2.w16l>
3b14: c5 f8 77 vzeroupper
3b17: c3 retq
0000000000003b18 _dav1d_avg_16bpc_avx2.jmp_tbl:
3b18: e3 fe jrcxz -2 <_dav1d_avg_16bpc_avx2.jmp_tbl>
3b1a: ff ff <unknown>
3b1c: 4c ff ff <unknown>
3b1f: ff a3 ff ff ff a3 jmpq *-1543503873(%rbx)
3b25: ff ff <unknown>
3b27: ff a3 ff ff ff a3 jmpq *-1543503873(%rbx)
3b2d: ff ff <unknown>
3b2f: ff c4 incl %esp
0000000000003b30 _dav1d_w_avg_16bpc_avx2:
3b30: c4 e2 7d 79 5c 24 08 vpbroadcastw 8(%rsp), %ymm3
3b37: c4 e2 7d 79 25 00 00 00 00 vpbroadcastw (%rip), %ymm4
3b40: c5 dd f9 e3 vpsubw %ymm3, %ymm4, %ymm4
3b44: c5 e5 61 dc vpunpcklwd %ymm4, %ymm3, %ymm3
3b48: c4 e2 7d 58 25 00 00 00 00 vpbroadcastd (%rip), %ymm4
3b51: c5 fa 7e 2d 00 00 00 00 vmovq (%rip), %xmm5
3b59: f3 44 0f b8 54 24 10 popcntl 16(%rsp), %r10d
3b60: 41 83 fa 0a cmpl $10, %r10d
3b64: 74 11 je 17 <_dav1d_w_avg_16bpc_avx2.bits10>
3b66: c4 e2 7d 58 25 00 00 00 00 vpbroadcastd (%rip), %ymm4
3b6f: c5 fa 7e 2d 00 00 00 00 vmovq (%rip), %xmm5
0000000000003b77 _dav1d_w_avg_16bpc_avx2.bits10:
3b77: c4 e2 7d 79 74 24 10 vpbroadcastw 16(%rsp), %ymm6
3b7e: 4f 8d 1c 00 leaq (%r8,%r8), %r11
3b82: 4c 8d 15 30 01 00 00 leaq 304(%rip), %r10
3b89: f3 45 0f bc c0 tzcntl %r8d, %r8d
3b8e: 41 83 e8 02 subl $2, %r8d
3b92: 4f 63 04 82 movslq (%r10,%r8,4), %r8
3b96: 4d 01 d0 addq %r10, %r8
3b99: 41 ff e0 jmpq *%r8
0000000000003b9c _dav1d_w_avg_16bpc_avx2.w4:
3b9c: 4c 8d 14 76 leaq (%rsi,%rsi,2), %r10
0000000000003ba0 _dav1d_w_avg_16bpc_avx2.w4l:
3ba0: c5 fd 6f 02 vmovdqa (%rdx), %ymm0
3ba4: c5 fd 6f 11 vmovdqa (%rcx), %ymm2
3ba8: c5 fd 69 ca vpunpckhwd %ymm2, %ymm0, %ymm1
3bac: c5 fd 61 c2 vpunpcklwd %ymm2, %ymm0, %ymm0
3bb0: c5 fd f5 c3 vpmaddwd %ymm3, %ymm0, %ymm0
3bb4: c5 f5 f5 cb vpmaddwd %ymm3, %ymm1, %ymm1
3bb8: c5 fd fe c4 vpaddd %ymm4, %ymm0, %ymm0
3bbc: c5 f5 fe cc vpaddd %ymm4, %ymm1, %ymm1
3bc0: c5 fd e2 c5 vpsrad %xmm5, %ymm0, %ymm0
3bc4: c5 f5 e2 cd vpsrad %xmm5, %ymm1, %ymm1
3bc8: c4 e2 7d 2b c1 vpackusdw %ymm1, %ymm0, %ymm0
3bcd: c4 e2 7d 3a c6 vpminuw %ymm6, %ymm0, %ymm0
3bd2: c4 e3 7d 39 c1 01 vextracti128 $1, %ymm0, %xmm1
3bd8: c5 f9 d6 07 vmovq %xmm0, (%rdi)
3bdc: c4 e3 f9 16 04 37 01 vpextrq $1, %xmm0, (%rdi,%rsi)
3be3: c5 f9 d6 0c 77 vmovq %xmm1, (%rdi,%rsi,2)
3be8: c4 a3 f9 16 0c 17 01 vpextrq $1, %xmm1, (%rdi,%r10)
3bef: 48 8d 3c b7 leaq (%rdi,%rsi,4), %rdi
3bf3: 48 83 c2 20 addq $32, %rdx
3bf7: 48 83 c1 20 addq $32, %rcx
3bfb: 41 83 e9 04 subl $4, %r9d
3bff: 7f 9f jg -97 <_dav1d_w_avg_16bpc_avx2.w4l>
3c01: c5 f8 77 vzeroupper
3c04: c3 retq
0000000000003c05 _dav1d_w_avg_16bpc_avx2.w8:
3c05: c5 fd 6f 02 vmovdqa (%rdx), %ymm0
3c09: c5 fd 6f 11 vmovdqa (%rcx), %ymm2
3c0d: c5 fd 69 ca vpunpckhwd %ymm2, %ymm0, %ymm1
3c11: c5 fd 61 c2 vpunpcklwd %ymm2, %ymm0, %ymm0
3c15: c5 fd f5 c3 vpmaddwd %ymm3, %ymm0, %ymm0
3c19: c5 f5 f5 cb vpmaddwd %ymm3, %ymm1, %ymm1
3c1d: c5 fd fe c4 vpaddd %ymm4, %ymm0, %ymm0
3c21: c5 f5 fe cc vpaddd %ymm4, %ymm1, %ymm1
3c25: c5 fd e2 c5 vpsrad %xmm5, %ymm0, %ymm0
3c29: c5 f5 e2 cd vpsrad %xmm5, %ymm1, %ymm1
3c2d: c4 e2 7d 2b c1 vpackusdw %ymm1, %ymm0, %ymm0
3c32: c4 e2 7d 3a c6 vpminuw %ymm6, %ymm0, %ymm0
3c37: c4 e3 7d 39 c1 01 vextracti128 $1, %ymm0, %xmm1
3c3d: c5 f9 7f 07 vmovdqa %xmm0, (%rdi)
3c41: c5 f9 7f 0c 37 vmovdqa %xmm1, (%rdi,%rsi)
3c46: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
3c4a: 48 83 c2 20 addq $32, %rdx
3c4e: 48 83 c1 20 addq $32, %rcx
3c52: 41 83 e9 02 subl $2, %r9d
3c56: 7f ad jg -83 <_dav1d_w_avg_16bpc_avx2.w8>
3c58: c5 f8 77 vzeroupper
3c5b: c3 retq
0000000000003c5c _dav1d_w_avg_16bpc_avx2.w16:
3c5c: 4d 89 d8 movq %r11, %r8
3c5f: 4c 29 c6 subq %r8, %rsi
0000000000003c62 _dav1d_w_avg_16bpc_avx2.w16l:
3c62: c5 fd 6f 02 vmovdqa (%rdx), %ymm0
3c66: c5 fd 6f 11 vmovdqa (%rcx), %ymm2
3c6a: c5 fd 69 ca vpunpckhwd %ymm2, %ymm0, %ymm1
3c6e: c5 fd 61 c2 vpunpcklwd %ymm2, %ymm0, %ymm0
3c72: c5 fd f5 c3 vpmaddwd %ymm3, %ymm0, %ymm0
3c76: c5 f5 f5 cb vpmaddwd %ymm3, %ymm1, %ymm1
3c7a: c5 fd fe c4 vpaddd %ymm4, %ymm0, %ymm0
3c7e: c5 f5 fe cc vpaddd %ymm4, %ymm1, %ymm1
3c82: c5 fd e2 c5 vpsrad %xmm5, %ymm0, %ymm0
3c86: c5 f5 e2 cd vpsrad %xmm5, %ymm1, %ymm1
3c8a: c4 e2 7d 2b c1 vpackusdw %ymm1, %ymm0, %ymm0
3c8f: c4 e2 7d 3a c6 vpminuw %ymm6, %ymm0, %ymm0
3c94: c5 fd 7f 07 vmovdqa %ymm0, (%rdi)
3c98: 48 83 c7 20 addq $32, %rdi
3c9c: 48 83 c2 20 addq $32, %rdx
3ca0: 48 83 c1 20 addq $32, %rcx
3ca4: 41 83 e8 20 subl $32, %r8d
3ca8: 7f b8 jg -72 <_dav1d_w_avg_16bpc_avx2.w16l>
3caa: 48 01 f7 addq %rsi, %rdi
3cad: 4d 89 d8 movq %r11, %r8
3cb0: 41 ff c9 decl %r9d
3cb3: 7f ad jg -83 <_dav1d_w_avg_16bpc_avx2.w16l>
3cb5: c5 f8 77 vzeroupper
3cb8: c3 retq
0000000000003cb9 _dav1d_w_avg_16bpc_avx2.jmp_tbl:
3cb9: e3 fe jrcxz -2 <_dav1d_w_avg_16bpc_avx2.jmp_tbl>
3cbb: ff ff <unknown>
3cbd: 4c ff ff <unknown>
3cc0: ff a3 ff ff ff a3 jmpq *-1543503873(%rbx)
3cc6: ff ff <unknown>
3cc8: ff a3 ff ff ff a3 jmpq *-1543503873(%rbx)
3cce: ff ff <unknown>
3cd0: ff 0f decl (%rdi)
3cd2: 1f <unknown>
3cd3: 84 00 testb %al, (%rax)
3cd5: 00 00 addb %al, (%rax)
3cd7: 00 00 addb %al, (%rax)
3cd9: 0f 1f 80 00 00 00 00 nopl (%rax)
0000000000003ce0 _dav1d_mask_16bpc_avx2:
3ce0: 48 8b 44 24 08 movq 8(%rsp), %rax
3ce5: c4 e2 7d 58 25 00 00 00 00 vpbroadcastd (%rip), %ymm4
3cee: c5 fa 7e 2d 00 00 00 00 vmovq (%rip), %xmm5
3cf6: f3 44 0f b8 54 24 10 popcntl 16(%rsp), %r10d
3cfd: 41 83 fa 0a cmpl $10, %r10d
3d01: 74 11 je 17 <_dav1d_mask_16bpc_avx2.bits10>
3d03: c4 e2 7d 58 25 00 00 00 00 vpbroadcastd (%rip), %ymm4
3d0c: c5 fa 7e 2d 00 00 00 00 vmovq (%rip), %xmm5
0000000000003d14 _dav1d_mask_16bpc_avx2.bits10:
3d14: c4 e2 7d 79 74 24 10 vpbroadcastw 16(%rsp), %ymm6
3d1b: 4f 8d 1c 00 leaq (%r8,%r8), %r11
3d1f: 4c 8d 15 9a 01 00 00 leaq 410(%rip), %r10
3d26: f3 45 0f bc c0 tzcntl %r8d, %r8d
3d2b: 41 83 e8 02 subl $2, %r8d
3d2f: 4f 63 04 82 movslq (%r10,%r8,4), %r8
3d33: 4d 01 d0 addq %r10, %r8
3d36: 41 ff e0 jmpq *%r8
0000000000003d39 _dav1d_mask_16bpc_avx2.w4:
3d39: 4c 8d 14 76 leaq (%rsi,%rsi,2), %r10
0000000000003d3d _dav1d_mask_16bpc_avx2.w4l:
3d3d: c5 fd 6f 02 vmovdqa (%rdx), %ymm0
3d41: c5 fd 6f 11 vmovdqa (%rcx), %ymm2
3d45: c5 fd 69 ca vpunpckhwd %ymm2, %ymm0, %ymm1
3d49: c5 fd 61 c2 vpunpcklwd %ymm2, %ymm0, %ymm0
3d4d: c5 f9 6f 10 vmovdqa (%rax), %xmm2
3d51: c4 e2 7d 20 d2 vpmovsxbw %xmm2, %ymm2
3d56: c4 e2 7d 79 3d 00 00 00 00 vpbroadcastw (%rip), %ymm7
3d5f: c5 c5 f9 fa vpsubw %ymm2, %ymm7, %ymm7
3d63: c5 ed 69 df vpunpckhwd %ymm7, %ymm2, %ymm3
3d67: c5 ed 61 d7 vpunpcklwd %ymm7, %ymm2, %ymm2
3d6b: c5 fd f5 c2 vpmaddwd %ymm2, %ymm0, %ymm0
3d6f: c5 f5 f5 cb vpmaddwd %ymm3, %ymm1, %ymm1
3d73: c5 fd fe c4 vpaddd %ymm4, %ymm0, %ymm0
3d77: c5 f5 fe cc vpaddd %ymm4, %ymm1, %ymm1
3d7b: c5 fd e2 c5 vpsrad %xmm5, %ymm0, %ymm0
3d7f: c5 f5 e2 cd vpsrad %xmm5, %ymm1, %ymm1
3d83: c4 e2 7d 2b c1 vpackusdw %ymm1, %ymm0, %ymm0
3d88: c4 e2 7d 3a c6 vpminuw %ymm6, %ymm0, %ymm0
3d8d: c4 e3 7d 39 c1 01 vextracti128 $1, %ymm0, %xmm1
3d93: c5 f9 d6 07 vmovq %xmm0, (%rdi)
3d97: c4 e3 f9 16 04 37 01 vpextrq $1, %xmm0, (%rdi,%rsi)
3d9e: c5 f9 d6 0c 77 vmovq %xmm1, (%rdi,%rsi,2)
3da3: c4 a3 f9 16 0c 17 01 vpextrq $1, %xmm1, (%rdi,%r10)
3daa: 48 8d 3c b7 leaq (%rdi,%rsi,4), %rdi
3dae: 48 83 c2 20 addq $32, %rdx
3db2: 48 83 c1 20 addq $32, %rcx
3db6: 48 83 c0 10 addq $16, %rax
3dba: 41 83 e9 04 subl $4, %r9d
3dbe: 0f 8f 79 ff ff ff jg -135 <_dav1d_mask_16bpc_avx2.w4l>
3dc4: c5 f8 77 vzeroupper
3dc7: c3 retq
0000000000003dc8 _dav1d_mask_16bpc_avx2.w8:
3dc8: c5 fd 6f 02 vmovdqa (%rdx), %ymm0
3dcc: c5 fd 6f 11 vmovdqa (%rcx), %ymm2
3dd0: c5 fd 69 ca vpunpckhwd %ymm2, %ymm0, %ymm1
3dd4: c5 fd 61 c2 vpunpcklwd %ymm2, %ymm0, %ymm0
3dd8: c5 f9 6f 10 vmovdqa (%rax), %xmm2
3ddc: c4 e2 7d 20 d2 vpmovsxbw %xmm2, %ymm2
3de1: c4 e2 7d 79 3d 00 00 00 00 vpbroadcastw (%rip), %ymm7
3dea: c5 c5 f9 fa vpsubw %ymm2, %ymm7, %ymm7
3dee: c5 ed 69 df vpunpckhwd %ymm7, %ymm2, %ymm3
3df2: c5 ed 61 d7 vpunpcklwd %ymm7, %ymm2, %ymm2
3df6: c5 fd f5 c2 vpmaddwd %ymm2, %ymm0, %ymm0
3dfa: c5 f5 f5 cb vpmaddwd %ymm3, %ymm1, %ymm1
3dfe: c5 fd fe c4 vpaddd %ymm4, %ymm0, %ymm0
3e02: c5 f5 fe cc vpaddd %ymm4, %ymm1, %ymm1
3e06: c5 fd e2 c5 vpsrad %xmm5, %ymm0, %ymm0
3e0a: c5 f5 e2 cd vpsrad %xmm5, %ymm1, %ymm1
3e0e: c4 e2 7d 2b c1 vpackusdw %ymm1, %ymm0, %ymm0
3e13: c4 e2 7d 3a c6 vpminuw %ymm6, %ymm0, %ymm0
3e18: c4 e3 7d 39 c1 01 vextracti128 $1, %ymm0, %xmm1
3e1e: c5 f9 7f 07 vmovdqa %xmm0, (%rdi)
3e22: c5 f9 7f 0c 37 vmovdqa %xmm1, (%rdi,%rsi)
3e27: 48 8d 3c 77 leaq (%rdi,%rsi,2), %rdi
3e2b: 48 83 c2 20 addq $32, %rdx
3e2f: 48 83 c1 20 addq $32, %rcx
3e33: 48 83 c0 10 addq $16, %rax
3e37: 41 83 e9 02 subl $2, %r9d
3e3b: 7f 8b jg -117 <_dav1d_mask_16bpc_avx2.w8>
3e3d: c5 f8 77 vzeroupper
3e40: c3 retq
0000000000003e41 _dav1d_mask_16bpc_avx2.w16:
3e41: 4d 89 d8 movq %r11, %r8
3e44: 4c 29 c6 subq %r8, %rsi
0000000000003e47 _dav1d_mask_16bpc_avx2.w16l:
3e47: c5 fd 6f 02 vmovdqa (%rdx), %ymm0
3e4b: c5 fd 6f 11 vmovdqa (%rcx), %ymm2
3e4f: c5 fd 69 ca vpunpckhwd %ymm2, %ymm0, %ymm1
3e53: c5 fd 61 c2 vpunpcklwd %ymm2, %ymm0, %ymm0
3e57: c5 f9 6f 10 vmovdqa (%rax), %xmm2
3e5b: c4 e2 7d 20 d2 vpmovsxbw %xmm2, %ymm2
3e60: c4 e2 7d 79 3d 00 00 00 00 vpbroadcastw (%rip), %ymm7
3e69: c5 c5 f9 fa vpsubw %ymm2, %ymm7, %ymm7
3e6d: c5 ed 69 df vpunpckhwd %ymm7, %ymm2, %ymm3
3e71: c5 ed 61 d7 vpunpcklwd %ymm7, %ymm2, %ymm2
3e75: c5 fd f5 c2 vpmaddwd %ymm2, %ymm0, %ymm0
3e79: c5 f5 f5 cb vpmaddwd %ymm3, %ymm1, %ymm1
3e7d: c5 fd fe c4 vpaddd %ymm4, %ymm0, %ymm0
3e81: c5 f5 fe cc vpaddd %ymm4, %ymm1, %ymm1
3e85: c5 fd e2 c5 vpsrad %xmm5, %ymm0, %ymm0
3e89: c5 f5 e2 cd vpsrad %xmm5, %ymm1, %ymm1
3e8d: c4 e2 7d 2b c1 vpackusdw %ymm1, %ymm0, %ymm0
3e92: c4 e2 7d 3a c6 vpminuw %ymm6, %ymm0, %ymm0
3e97: c5 fd 7f 07 vmovdqa %ymm0, (%rdi)
3e9b: 48 83 c7 20 addq $32, %rdi
3e9f: 48 83 c2 20 addq $32, %rdx
3ea3: 48 83 c1 20 addq $32, %rcx
3ea7: 48 83 c0 10 addq $16, %rax
3eab: 41 83 e8 20 subl $32, %r8d
3eaf: 7f 96 jg -106 <_dav1d_mask_16bpc_avx2.w16l>
3eb1: 48 01 f7 addq %rsi, %rdi
3eb4: 4d 89 d8 movq %r11, %r8
3eb7: 41 ff c9 decl %r9d
3eba: 7f 8b jg -117 <_dav1d_mask_16bpc_avx2.w16l>
3ebc: c5 f8 77 vzeroupper
3ebf: c3 retq
0000000000003ec0 _dav1d_mask_16bpc_avx2.jmp_tbl:
3ec0: 79 fe jns -2 <_dav1d_mask_16bpc_avx2.jmp_tbl>
3ec2: ff ff <unknown>
3ec4: 08 ff orb %bh, %bh
3ec6: ff ff <unknown>
3ec8: 81 ff ff ff 81 ff cmpl $4286709759, %edi
3ece: ff ff <unknown>
3ed0: 81 ff ff ff 81 ff cmpl $4286709759, %edi
3ed6: ff ff <unknown>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment