Skip to content

Instantly share code, notes, and snippets.

@ericonr
Created July 13, 2020 17:35
Show Gist options
  • Save ericonr/d267780996c9d94ec9c139c619bfd45d to your computer and use it in GitHub Desktop.
Save ericonr/d267780996c9d94ec9c139c619bfd45d to your computer and use it in GitHub Desktop.
#0 0x00007fd49e789660 in lowbd_inv_txfm2d_add_no_identity_avx2 (input=0x5561158e4000, output=0x7fd474092d00 '\237' <repeats 200 times>..., stride=64, tx_type=0 '\000', tx_size=4 '\004',
eob=1022) at ../av1/common/x86/av1_inv_txfm_avx2.c:1608
#1 0x00007fd49e5163e9 in av1_inverse_transform_block (xd=xd@entry=0x7fd475662cc0, dqcoeff=<optimized out>, plane=plane@entry=0, tx_type=<optimized out>, tx_size=tx_size@entry=4 '\004',
dst=dst@entry=0x7fd474092d00 '\237' <repeats 200 times>..., stride=64, eob=1022, reduced_tx_set=0) at ../av1/common/idct.c:320
#2 0x00007fd49e6afe51 in dist_block_px_domain (tx_size=4 '\004', blk_col=0, blk_row=0, block=<optimized out>, plane_bsize=BLOCK_128X128, plane=0, x=0x7fd475318040, cpi=0x7fd496ed4040)
at ../av1/encoder/tx_search.c:1224
#3 search_tx_type (cpi=cpi@entry=0x7fd496ed4040, x=x@entry=0x7fd475318040, plane=plane@entry=0, block=block@entry=0, blk_row=blk_row@entry=0, blk_col=blk_col@entry=0,
plane_bsize=<optimized out>, tx_size=<optimized out>, txb_ctx=<optimized out>, ftxs_mode=<optimized out>, skip_trellis=<optimized out>, ref_best_rd=<optimized out>,
best_rd_stats=0x7fd4740a6e10, use_fast_coef_costing=<optimized out>) at ../av1/encoder/tx_search.c:2289
#4 0x00007fd49e6b3c96 in block_rd_txfm (arg=0x7fd4740a6f20, tx_size=4 '\004', plane_bsize=BLOCK_128X128, blk_col=0, blk_row=0, block=0, plane=0) at ../av1/encoder/tx_search.c:2833
#5 block_rd_txfm (plane=plane@entry=0, block=block@entry=0, blk_row=blk_row@entry=0, blk_col=blk_col@entry=0, plane_bsize=plane_bsize@entry=BLOCK_128X128, tx_size=tx_size@entry=4 '\004',
arg=0x7fd4740a6f20) at ../av1/encoder/tx_search.c:2807
#6 0x00007fd49e5feebf in av1_foreach_transformed_block_in_plane (xd=xd@entry=0x7fd475662cc0, plane_bsize=plane_bsize@entry=BLOCK_128X128, plane=plane@entry=0,
visit=visit@entry=0x7fd49e6b36a0 <block_rd_txfm>, arg=arg@entry=0x7fd4740a6f20) at ../av1/encoder/encodemb.c:545
#7 0x00007fd49e6b8e19 in av1_txfm_rd_in_plane (tx_size=4 '\004', skip_trellis=0, ftxs_mode=<optimized out>, use_fast_coef_costing=0, plane_bsize=BLOCK_128X128, plane=0,
current_rd=<optimized out>, ref_best_rd=<optimized out>, rd_stats=0x7fd4740a7100, cpi=<optimized out>, x=0x7fd475318040) at ../av1/encoder/tx_search.c:3474
#8 av1_txfm_rd_in_plane (x=0x7fd475318040, cpi=<optimized out>, rd_stats=0x7fd4740a7100, ref_best_rd=<optimized out>, current_rd=<optimized out>, plane=0, plane_bsize=BLOCK_128X128,
tx_size=4 '\004', use_fast_coef_costing=0, ftxs_mode=0 '\000', skip_trellis=0) at ../av1/encoder/tx_search.c:3443
#9 0x00007fd49e6b8fa6 in av1_uniform_txfm_yrd (cpi=cpi@entry=0x7fd496ed4040, x=x@entry=0x7fd475318040, rd_stats=rd_stats@entry=0x7fd4740a7100,
ref_best_rd=ref_best_rd@entry=9223372036854775807, bs=bs@entry=BLOCK_128X128, tx_size=tx_size@entry=4 '\004', ftxs_mode=0 '\000', skip_trellis=0) at ../av1/encoder/tx_search.c:2903
#10 0x00007fd49e6b958e in choose_tx_size_type_from_rd (cpi=cpi@entry=0x7fd496ed4040, x=x@entry=0x7fd475318040, rd_stats=rd_stats@entry=0x7fd4740a7b90,
ref_best_rd=ref_best_rd@entry=9223372036854775807, bs=BLOCK_128X128) at ../av1/encoder/tx_search.c:2780
#11 0x00007fd49e6b9936 in av1_pick_uniform_tx_size_type_yrd (cpi=cpi@entry=0x7fd496ed4040, x=x@entry=0x7fd475318040, rd_stats=rd_stats@entry=0x7fd4740a7b90, bs=bs@entry=BLOCK_128X128,
ref_best_rd=9223372036854775807) at ../av1/encoder/tx_search.c:3370
#12 0x00007fd49e6c5a4a in av1_rd_pick_intra_sby_mode (cpi=cpi@entry=0x7fd496ed4040, x=x@entry=0x7fd475318040, rate=rate@entry=0x7fd4740a8100,
rate_tokenonly=rate_tokenonly@entry=0x7fd4740a8108, distortion=distortion@entry=0x7fd4740a8118, skippable=skippable@entry=0x7fd4740a8110, bsize=BLOCK_128X128, best_rd=<optimized out>,
ctx=0x7fd475314238) at ../av1/encoder/intra_mode_search.c:2016
#13 0x00007fd49e68efca in av1_rd_pick_intra_mode_sb (cpi=cpi@entry=0x7fd496ed4040, x=x@entry=0x7fd475318040, rd_cost=rd_cost@entry=0x7fd4740a8480, bsize=bsize@entry=BLOCK_128X128,
ctx=ctx@entry=0x7fd475314238, best_rd=9223372036854775807) at ../av1/encoder/rdopt.c:2838
#14 0x00007fd49e5e1191 in hybrid_intra_mode_search (ctx=<optimized out>, bsize=<optimized out>, rd_cost=<optimized out>, x=<optimized out>, cpi=<optimized out>)
at ../av1/encoder/encodeframe.c:716
#15 pick_sb_modes (cpi=cpi@entry=0x7fd496ed4040, tile_data=tile_data@entry=0x556115726560, x=x@entry=0x7fd475318040, mi_row=mi_row@entry=32, mi_col=mi_col@entry=0,
rd_cost=rd_cost@entry=0x7fd4740a8480, partition=0 '\000', bsize=BLOCK_128X128, ctx=<optimized out>, best_rd=..., pick_mode_type=0) at ../av1/encoder/encodeframe.c:838
#16 0x00007fd49e5eb8ae in rd_pick_partition (cpi=cpi@entry=0x7fd496ed4040, td=td@entry=0x7fd475318040, tile_data=tile_data@entry=0x556115726560, tp=tp@entry=0x7fd4740ad6b0,
mi_row=mi_row@entry=32, mi_col=mi_col@entry=0, bsize=<optimized out>, max_sq_part=<optimized out>, min_sq_part=<optimized out>, rd_cost=<optimized out>, best_rdc=...,
pc_tree=<optimized out>, none_rd=<optimized out>, multi_pass_mode=<optimized out>, rect_part_win_info=<optimized out>) at ../av1/encoder/encodeframe.c:2955
#17 0x00007fd49e5f5827 in encode_rd_sb (cpi=<optimized out>, td=<optimized out>, tile_data=<optimized out>, pc_root=<optimized out>, tp=<optimized out>, mi_row=<optimized out>,
mi_col=<optimized out>, seg_skip=<optimized out>) at ../av1/encoder/encodeframe.c:4786
#18 0x00007fd49e5f9449 in encode_sb_row (tp=0x7fd4740ad6b0, mi_row=0, tile_data=0x556115726560, td=0x20, cpi=0x7fd496ed4040) at ../av1/encoder/encodeframe.c:4961
#19 av1_encode_sb_row (cpi=cpi@entry=0x7fd496ed4040, td=td@entry=0x7fd475318040, tile_row=tile_row@entry=0, tile_col=tile_col@entry=0, mi_row=mi_row@entry=32)
at ../av1/encoder/encodeframe.c:5059
#20 0x00007fd49e628fa2 in enc_row_mt_worker_hook (arg1=0x5561142e9618, unused=<optimized out>) at ../av1/encoder/ethread.c:351
#21 0x00007fd49e4d480b in execute (worker=0x5561143ccff8) at ../aom_util/aom_thread.c:163
#22 thread_loop (ptr=0x5561143ccff8) at ../aom_util/aom_thread.c:72
#23 0x00007fd4a1dd3305 in start (p=0x7fd4740ad7a8) at src/thread/pthread_create.c:192
#24 0x00007fd4a1de0a6b in __clone () at src/thread/x86_64/clone.s:22
Backtrace stopped: frame did not save the PC
rax 0x7fd49e792290 140551168533136
rbx 0x7fd474092c40 140550456552512
rcx 0x0 0
rdx 0x40 64
rsi 0x7fd474092d00 140550456552704
rdi 0x5561158e4000 93875461832704
rbp 0x7fd4740929a0 0x7fd4740929a0
rsp 0x7fd47408c960 0x7fd47408c960
r8 0x4 4
r9 0x3fe 1022
r10 0x10 16
r11 0x7fd47408a960 140550456519008
r12 0x0 0
r13 0x7fd475318040 140550475972672
r14 0x8c0 2240
r15 0x7fd496ed4040 140551041925184
rip 0x7fd49e789660 0x7fd49e789660 <lowbd_inv_txfm2d_add_no_identity_avx2+32>
eflags 0x10206 [ PF IF RF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
Dump of assembler code for function lowbd_inv_txfm2d_add_no_identity_avx2:
0x00007fd49e789640 <+0>: push %rbp
0x00007fd49e789641 <+1>: mov %rsp,%rbp
0x00007fd49e789644 <+4>: push %r15
0x00007fd49e789646 <+6>: push %r14
0x00007fd49e789648 <+8>: push %r13
0x00007fd49e78964a <+10>: push %r12
0x00007fd49e78964c <+12>: push %rbx
0x00007fd49e78964d <+13>: and $0xffffffffffffffe0,%rsp
0x00007fd49e789651 <+17>: lea -0x8000(%rsp),%r11
0x00007fd49e789659 <+25>: sub $0x1000,%rsp
=> 0x00007fd49e789660 <+32>: orq $0x0,(%rsp)
0x00007fd49e789665 <+37>: cmp %r11,%rsp
0x00007fd49e789668 <+40>: jne 0x7fd49e789659 <lowbd_inv_txfm2d_add_no_identity_avx2+25>
0x00007fd49e78966a <+42>: sub $0x9c0,%rsp
0x00007fd49e789671 <+49>: mov %rsi,0x8(%rsp)
0x00007fd49e789676 <+54>: mov %ecx,%r11d
0x00007fd49e789679 <+57>: mov %r9d,%eax
0x00007fd49e78967c <+60>: movzbl %r8b,%r8d
0x00007fd49e789680 <+64>: mov %rdi,0x108(%rsp)
0x00007fd49e789688 <+72>: mov %edx,0x10(%rsp)
0x00007fd49e78968c <+76>: mov %fs:0x28,%rsi
0x00007fd49e789695 <+85>: mov %rsi,0x89b8(%rsp)
0x00007fd49e78969d <+93>: xor %esi,%esi
0x00007fd49e78969f <+95>: cmp $0x1,%r9d
0x00007fd49e7896a3 <+99>: je 0x7fd49e78a8b6 <lowbd_inv_txfm2d_add_no_identity_avx2+4726>
0x00007fd49e7896a9 <+105>: lea 0xfc390(%rip),%rdx # 0x7fd49e885a40 <tx_size_wide_log2_eob>
0x00007fd49e7896b0 <+112>: sub $0x1,%eax
0x00007fd49e7896b3 <+115>: lea 0xfc3e6(%rip),%r9 # 0x7fd49e885aa0 <lowbd_txfm_all_1d_zeros_idx>
0x00007fd49e7896ba <+122>: mov (%rdx,%r8,4),%ecx
0x00007fd49e7896be <+126>: lea 0x162cfb(%rip),%rdx # 0x7fd49e8ec3c0 <av1_eob_to_eobxy_default>
0x00007fd49e7896c5 <+133>: mov (%rdx,%r8,8),%rdx
0x00007fd49e7896c9 <+137>: sar %cl,%eax
0x00007fd49e7896cb <+139>: cltq
0x00007fd49e7896cd <+141>: movzwl (%rdx,%rax,2),%edx
0x00007fd49e7896d1 <+145>: movzbl %dl,%ecx
0x00007fd49e7896d4 <+148>: movsbl %dh,%edx
0x00007fd49e7896d7 <+151>: lea 0x10(%rdx),%esi
0x00007fd49e7896da <+154>: movslq %edx,%rdx
0x00007fd49e7896dd <+157>: lea 0x10(%rcx),%eax
0x00007fd49e7896e0 <+160>: mov (%r9,%rcx,4),%edi
0x00007fd49e7896e4 <+164>: mov (%r9,%rdx,4),%r10d
0x00007fd49e7896e8 <+168>: sar $0x4,%eax
0x00007fd49e7896eb <+171>: sar $0x4,%esi
0x00007fd49e7896ee <+174>: lea 0x1663eb(%rip),%rdx # 0x7fd49e8efae0 <av1_inv_txfm_shift_ls>
0x00007fd49e7896f5 <+181>: movl $0x1,0x11c(%rsp)
0x00007fd49e789700 <+192>: mov (%rdx,%r8,8),%rbx
0x00007fd49e789704 <+196>: lea 0xfc5f5(%rip),%rdx # 0x7fd49e885d00 <tx_size_wide_log2>
0x00007fd49e78970b <+203>: mov (%rdx,%r8,4),%ecx
0x00007fd49e78970f <+207>: lea 0xfc58a(%rip),%rdx # 0x7fd49e885ca0 <tx_size_high_log2>
0x00007fd49e789716 <+214>: mov (%rdx,%r8,4),%edx
0x00007fd49e78971a <+218>: mov %rbx,0x18(%rsp)
0x00007fd49e78971f <+223>: sub $0x2,%ecx
0x00007fd49e789722 <+226>: lea 0x70bf7(%rip),%rbx # 0x7fd49e7fa320 <av1_inv_cos_bit_col>
0x00007fd49e789729 <+233>: movslq %ecx,%rcx
0x00007fd49e78972c <+236>: sub $0x2,%edx
0x00007fd49e78972f <+239>: lea (%rcx,%rcx,4),%r9
0x00007fd49e789733 <+243>: movslq %edx,%rdx
0x00007fd49e789736 <+246>: add %r9,%rbx
0x00007fd49e789739 <+249>: add 0x165810(%rip),%r9 # 0x7fd49e8eef50
0x00007fd49e789740 <+256>: movsbl (%rbx,%rdx,1),%ebx
0x00007fd49e789744 <+260>: mov %ebx,0x14(%rsp)
0x00007fd49e789748 <+264>: movsbl (%r9,%rdx,1),%ebx
0x00007fd49e78974d <+269>: lea 0xfc66c(%rip),%r9 # 0x7fd49e885dc0 <tx_size_wide>
0x00007fd49e789754 <+276>: mov %ebx,0x110(%rsp)
0x00007fd49e78975b <+283>: mov (%r9,%r8,4),%ebx
0x00007fd49e78975f <+287>: lea 0xfc5fa(%rip),%r9 # 0x7fd49e885d60 <tx_size_high>
0x00007fd49e789766 <+294>: mov (%r9,%r8,4),%r15d
0x00007fd49e78976a <+298>: mov $0x20,%r8d
0x00007fd49e789770 <+304>: mov %ebx,%r9d
0x00007fd49e789773 <+307>: mov %ebx,0x114(%rsp)
0x00007fd49e78977a <+314>: sar $0x4,%r9d
0x00007fd49e78977e <+318>: cmp $0x20,%ebx
0x00007fd49e789781 <+321>: mov %r15d,0x128(%rsp)
0x00007fd49e789789 <+329>: cmovle %ebx,%r8d
0x00007fd49e78978d <+333>: mov %r9d,0x12c(%rsp)
0x00007fd49e789795 <+341>: mov %r8d,0x24(%rsp)
0x00007fd49e78979a <+346>: cmp %r15d,%ebx
0x00007fd49e78979d <+349>: je 0x7fd49e7897d0 <lowbd_inv_txfm2d_add_no_identity_avx2+400>
0x00007fd49e78979f <+351>: jg 0x7fd49e789849 <lowbd_inv_txfm2d_add_no_identity_avx2+521>
0x00007fd49e7897a5 <+357>: lea (%rbx,%rbx,1),%r8d
0x00007fd49e7897a9 <+361>: cmp %r8d,%r15d
0x00007fd49e7897ac <+364>: je 0x7fd49e78a8fd <lowbd_inv_txfm2d_add_no_identity_avx2+4797>
0x00007fd49e7897b2 <+370>: lea 0x0(,%rbx,4),%r8d
0x00007fd49e7897ba <+378>: xor %ebx,%ebx
0x00007fd49e7897bc <+380>: cmp %r8d,%r15d
0x00007fd49e7897bf <+383>: setne %bl
0x00007fd49e7897c2 <+386>: lea -0x3(,%rbx,4),%ebx
0x00007fd49e7897c9 <+393>: mov %ebx,0x11c(%rsp)
0x00007fd49e7897d0 <+400>: movslq %edi,%rbx
0x00007fd49e7897d3 <+403>: movzbl %r11b,%edi
0x00007fd49e7897d7 <+407>: lea 0xfc492(%rip),%r8 # 0x7fd49e885c70 <hitx_1d_tab>
0x00007fd49e7897de <+414>: sub $0x4,%r11d
0x00007fd49e7897e2 <+418>: movzbl (%r8,%rdi,1),%r8d
0x00007fd49e7897e7 <+423>: lea (%rcx,%rcx,2),%rcx
0x00007fd49e7897eb <+427>: lea 0x1629ee(%rip),%r9 # 0x7fd49e8ec1e0 <lowbd_txfm_all_1d_zeros_w16_arr>
0x00007fd49e7897f2 <+434>: lea (%rdx,%rdx,2),%rdx
0x00007fd49e7897f6 <+438>: add %r8,%rcx
0x00007fd49e7897f9 <+441>: lea 0xfc480(%rip),%r8 # 0x7fd49e885c80 <vitx_1d_tab>
0x00007fd49e789800 <+448>: movzbl (%r8,%rdi,1),%edi
0x00007fd49e789805 <+453>: lea (%rbx,%rcx,4),%rcx
0x00007fd49e789809 <+457>: mov (%r9,%rcx,8),%rbx
0x00007fd49e78980d <+461>: movslq %r10d,%rcx
0x00007fd49e789810 <+464>: add %rdi,%rdx
0x00007fd49e789813 <+467>: lea (%rcx,%rdx,4),%rdx
0x00007fd49e789817 <+471>: mov %rbx,0x100(%rsp)
0x00007fd49e78981f <+479>: mov (%r9,%rdx,8),%rdi
0x00007fd49e789823 <+483>: mov %rdi,0xf8(%rsp)
0x00007fd49e78982b <+491>: cmp $0xb,%r11b
0x00007fd49e78982f <+495>: ja 0x7fd49e78a57a <lowbd_inv_txfm2d_add_no_identity_avx2+3898>
0x00007fd49e789835 <+501>: lea 0xfc0e4(%rip),%rcx # 0x7fd49e885920
0x00007fd49e78983c <+508>: movzbl %r11b,%r11d
0x00007fd49e789840 <+512>: movslq (%rcx,%r11,4),%rdx
0x00007fd49e789844 <+516>: add %rcx,%rdx
0x00007fd49e789847 <+519>: jmpq *%rdx
0x00007fd49e789849 <+521>: lea (%r15,%r15,1),%r8d
0x00007fd49e78984d <+525>: cmp %ebx,%r8d
0x00007fd49e789850 <+528>: setne %bl
0x00007fd49e789853 <+531>: movzbl %bl,%ebx
0x00007fd49e789856 <+534>: mov %ebx,0x11c(%rsp)
0x00007fd49e78985d <+541>: jmpq 0x7fd49e7897d0 <lowbd_inv_txfm2d_add_no_identity_avx2+400>
0x00007fd49e789862 <+546>: movl $0x0,0x118(%rsp)
0x00007fd49e78986d <+557>: movl $0x1,0x20(%rsp)
0x00007fd49e789875 <+565>: mov 0x18(%rsp),%rdi
0x00007fd49e78987a <+570>: mov $0x1,%edx
0x00007fd49e78987f <+575>: movsbl (%rdi),%ecx
0x00007fd49e789882 <+578>: add $0xf,%ecx
0x00007fd49e789885 <+581>: shl %cl,%edx
0x00007fd49e789887 <+583>: vmovd %edx,%xmm5
0x00007fd49e78988b <+587>: vpbroadcastw %xmm5,%ymm5
0x00007fd49e789890 <+592>: vmovdqa %ymm5,0x40(%rsp)
0x00007fd49e789896 <+598>: test %esi,%esi
0x00007fd49e789898 <+600>: jle 0x7fd49e78a3d0 <lowbd_inv_txfm2d_add_no_identity_avx2+3472>
0x00007fd49e78989e <+606>: sub $0x1,%esi
0x00007fd49e7898a1 <+609>: mov 0x24(%rsp),%r14d
0x00007fd49e7898a6 <+614>: sub $0x1,%eax
0x00007fd49e7898a9 <+617>: mov 0x108(%rsp),%rbx
0x00007fd49e7898b1 <+625>: shl $0x9,%rsi
0x00007fd49e7898b5 <+629>: shl $0x9,%rax
0x00007fd49e7898b9 <+633>: lea 0x9a0(%rsp),%rdi
0x00007fd49e7898c1 <+641>: movq $0x0,0x120(%rsp)
0x00007fd49e7898cd <+653>: lea 0xba0(%rsp,%rsi,1),%rsi
0x00007fd49e7898d5 <+661>: lea (%r14,%r14,2),%r9d
0x00007fd49e7898d9 <+665>: mov %r14d,%r11d
0x00007fd49e7898dc <+668>: mov %rdi,0x130(%rsp)
0x00007fd49e7898e4 <+676>: mov %rsi,0x78(%rsp)
0x00007fd49e7898e9 <+681>: lea 0x0(,%r14,4),%esi
0x00007fd49e7898f1 <+689>: lea 0x0(,%r14,8),%edx
0x00007fd49e7898f9 <+697>: shl $0x4,%r11d
0x00007fd49e7898fd <+701>: movslq %esi,%rdi
0x00007fd49e789900 <+704>: movslq %edx,%rcx
0x00007fd49e789903 <+707>: lea 0x0(,%r9,4),%r10d
0x00007fd49e78990b <+715>: movslq %r11d,%r15
0x00007fd49e78990e <+718>: add %rbx,%rdi
0x00007fd49e789911 <+721>: movslq %r10d,%r8
0x00007fd49e789914 <+724>: mov %r15,0x70(%rsp)
0x00007fd49e789919 <+729>: mov %rdi,0x80(%rsp)
0x00007fd49e789921 <+737>: lea (%rbx,%rcx,1),%rdi
0x00007fd49e789925 <+741>: mov %rdi,0x88(%rsp)
0x00007fd49e78992d <+749>: lea (%rbx,%r8,1),%rdi
0x00007fd49e789931 <+753>: mov %rdi,0x90(%rsp)
0x00007fd49e789939 <+761>: mov %rbx,%rdi
0x00007fd49e78993c <+764>: add %r15,%rdi
0x00007fd49e78993f <+767>: mov %rdi,0x98(%rsp)
0x00007fd49e789947 <+775>: lea (%rsi,%r14,1),%edi
0x00007fd49e78994b <+779>: movslq %edi,%rsi
0x00007fd49e78994e <+782>: lea (%rbx,%rsi,4),%rsi
0x00007fd49e789952 <+786>: mov %rsi,0xa0(%rsp)
0x00007fd49e78995a <+794>: lea (%r9,%r9,1),%esi
0x00007fd49e78995e <+798>: movslq %esi,%rsi
0x00007fd49e789961 <+801>: lea (%rbx,%rsi,4),%rsi
0x00007fd49e789965 <+805>: mov %rsi,0xa8(%rsp)
0x00007fd49e78996d <+813>: mov %edx,%esi
0x00007fd49e78996f <+815>: add %r14d,%edx
0x00007fd49e789972 <+818>: sub %r14d,%esi
0x00007fd49e789975 <+821>: movslq %edx,%rdx
0x00007fd49e789978 <+824>: movslq %esi,%rsi
0x00007fd49e78997b <+827>: lea (%rbx,%rsi,4),%rsi
0x00007fd49e78997f <+831>: mov %rsi,0xb0(%rsp)
0x00007fd49e789987 <+839>: lea (%rbx,%rcx,4),%rsi
0x00007fd49e78998b <+843>: lea 0x3a0(%rsp),%rcx
0x00007fd49e789993 <+851>: mov %rsi,0xb8(%rsp)
0x00007fd49e78999b <+859>: lea (%rbx,%rdx,4),%rsi
0x00007fd49e78999f <+863>: add %rcx,%rax
0x00007fd49e7899a2 <+866>: mov %rsi,0xc0(%rsp)
0x00007fd49e7899aa <+874>: lea (%rdi,%rdi,1),%esi
0x00007fd49e7899ad <+877>: movslq %esi,%rdx
0x00007fd49e7899b0 <+880>: mov %rax,0x160(%rsp)
0x00007fd49e7899b8 <+888>: mov 0x114(%rsp),%eax
0x00007fd49e7899bf <+895>: lea (%rbx,%rdx,4),%rdi
0x00007fd49e7899c3 <+899>: lea (%rsi,%r14,1),%edx
0x00007fd49e7899c7 <+903>: movslq %edx,%rdx
0x00007fd49e7899ca <+906>: mov %rdi,0xc8(%rsp)
0x00007fd49e7899d2 <+914>: sub $0x1,%eax
0x00007fd49e7899d5 <+917>: lea (%rbx,%rdx,4),%rdi
0x00007fd49e7899d9 <+921>: lea (%r10,%r14,1),%edx
0x00007fd49e7899dd <+925>: shl $0x5,%rax
0x00007fd49e7899e1 <+929>: movslq %edx,%rdx
0x00007fd49e7899e4 <+932>: lea 0x1c0(%rsp,%rax,1),%rax
0x00007fd49e7899ec <+940>: mov %rdi,0xd0(%rsp)
0x00007fd49e7899f4 <+948>: lea (%rbx,%r8,4),%rdi
0x00007fd49e7899f8 <+952>: lea (%rbx,%rdx,4),%rsi
0x00007fd49e7899fc <+956>: mov %rdi,0xd8(%rsp)
0x00007fd49e789a04 <+964>: imul $0xe,%r14d,%edx
0x00007fd49e789a08 <+968>: mov %rax,0x68(%rsp)
0x00007fd49e789a0d <+973>: movslq 0x128(%rsp),%rax
0x00007fd49e789a15 <+981>: mov %rsi,0xe0(%rsp)
0x00007fd49e789a1d <+989>: movslq %edx,%rdx
0x00007fd49e789a20 <+992>: lea (%rbx,%rdx,4),%rdi
0x00007fd49e789a24 <+996>: mov %r11d,%edx
0x00007fd49e789a27 <+999>: mov %rdi,0xe8(%rsp)
0x00007fd49e789a2f <+1007>: lea 0x1a0(%rsp),%rdi
0x00007fd49e789a37 <+1015>: sub %r14d,%edx
0x00007fd49e789a3a <+1018>: mov %rdi,0x138(%rsp)
0x00007fd49e789a42 <+1026>: mov %rax,%rdi
0x00007fd49e789a45 <+1029>: shl $0x5,%rax
0x00007fd49e789a49 <+1033>: movslq %edx,%rdx
0x00007fd49e789a4c <+1036>: mov %rax,0x38(%rsp)
0x00007fd49e789a51 <+1041>: mov 0x12c(%rsp),%eax
0x00007fd49e789a58 <+1048>: lea (%rbx,%rdx,4),%rsi
0x00007fd49e789a5c <+1052>: mov %rsi,0xf0(%rsp)
0x00007fd49e789a64 <+1060>: lea -0x1(%rax),%edx
0x00007fd49e789a67 <+1063>: mov %rdx,%rax
0x00007fd49e789a6a <+1066>: shl $0x9,%rdx
0x00007fd49e789a6e <+1070>: imul %edi,%eax
0x00007fd49e789a71 <+1073>: lea (%rdx,%rcx,1),%rsi
0x00007fd49e789a75 <+1077>: mov %edi,%edx
0x00007fd49e789a77 <+1079>: neg %edx
0x00007fd49e789a79 <+1081>: mov %rsi,0x60(%rsp)
0x00007fd49e789a7e <+1086>: movslq %edx,%rdx
0x00007fd49e789a81 <+1089>: cltq
0x00007fd49e789a83 <+1091>: shl $0x5,%rdx
0x00007fd49e789a87 <+1095>: shl $0x5,%rax
0x00007fd49e789a8b <+1099>: mov %rdx,0x28(%rsp)
0x00007fd49e789a90 <+1104>: mov %rax,0x30(%rsp)
0x00007fd49e789a95 <+1109>: nopl (%rax)
0x00007fd49e789a98 <+1112>: mov 0x120(%rsp),%rax
0x00007fd49e789aa0 <+1120>: mov 0x108(%rsp),%rbx
0x00007fd49e789aa8 <+1128>: mov 0x88(%rsp),%rdi
0x00007fd49e789ab0 <+1136>: mov 0xd8(%rsp),%rdx
0x00007fd49e789ab8 <+1144>: shl $0x2,%rax
0x00007fd49e789abc <+1148>: mov 0xa8(%rsp),%rsi
0x00007fd49e789ac4 <+1156>: mov 0xd0(%rsp),%rcx
0x00007fd49e789acc <+1164>: lea (%rbx,%rax,1),%r15
0x00007fd49e789ad0 <+1168>: mov 0x80(%rsp),%rbx
0x00007fd49e789ad8 <+1176>: lea (%rdi,%rax,1),%r13
0x00007fd49e789adc <+1180>: add %rax,%rdx
0x00007fd49e789adf <+1183>: mov 0x98(%rsp),%rdi
0x00007fd49e789ae7 <+1191>: mov %rdx,0x168(%rsp)
0x00007fd49e789aef <+1199>: lea (%rsi,%rax,1),%r10
0x00007fd49e789af3 <+1203>: add %rax,%rcx
0x00007fd49e789af6 <+1206>: lea (%rbx,%rax,1),%r14
0x00007fd49e789afa <+1210>: mov 0x90(%rsp),%rbx
0x00007fd49e789b02 <+1218>: mov 0xe0(%rsp),%rdx
0x00007fd49e789b0a <+1226>: mov 0xb8(%rsp),%rsi
0x00007fd49e789b12 <+1234>: lea (%rbx,%rax,1),%r12
0x00007fd49e789b16 <+1238>: lea (%rdi,%rax,1),%rbx
0x00007fd49e789b1a <+1242>: mov 0xa0(%rsp),%rdi
0x00007fd49e789b22 <+1250>: add %rax,%rdx
0x00007fd49e789b25 <+1253>: mov %rdx,0x170(%rsp)
0x00007fd49e789b2d <+1261>: mov 0xe8(%rsp),%rdx
0x00007fd49e789b35 <+1269>: lea (%rsi,%rax,1),%r8
0x00007fd49e789b39 <+1273>: lea (%rdi,%rax,1),%r11
0x00007fd49e789b3d <+1277>: mov 0xb0(%rsp),%rdi
0x00007fd49e789b45 <+1285>: mov 0xc8(%rsp),%rsi
0x00007fd49e789b4d <+1293>: add %rax,%rdx
0x00007fd49e789b50 <+1296>: lea (%rdi,%rax,1),%r9
0x00007fd49e789b54 <+1300>: mov 0xc0(%rsp),%rdi
0x00007fd49e789b5c <+1308>: add %rax,%rsi
0x00007fd49e789b5f <+1311>: mov %rdx,0x178(%rsp)
0x00007fd49e789b67 <+1319>: add %rax,%rdi
0x00007fd49e789b6a <+1322>: add 0xf0(%rsp),%rax
0x00007fd49e789b72 <+1330>: mov %rax,0x180(%rsp)
0x00007fd49e789b7a <+1338>: mov 0x138(%rsp),%rax
0x00007fd49e789b82 <+1346>: nopw 0x0(%rax,%rax,1)
0x00007fd49e789b88 <+1352>: add $0x40,%r15
0x00007fd49e789b8c <+1356>: add $0x40,%r14
0x00007fd49e789b90 <+1360>: add $0x40,%r13
0x00007fd49e789b94 <+1364>: add $0x40,%r12
0x00007fd49e789b98 <+1368>: add $0x40,%rbx
0x00007fd49e789b9c <+1372>: add $0x40,%r11
0x00007fd49e789ba0 <+1376>: add $0x40,%r10
0x00007fd49e789ba4 <+1380>: add $0x40,%r9
0x00007fd49e789ba8 <+1384>: add $0x40,%r8
0x00007fd49e789bac <+1388>: add $0x40,%rdi
0x00007fd49e789bb0 <+1392>: add $0x40,%rsi
0x00007fd49e789bb4 <+1396>: add $0x40,%rcx
0x00007fd49e789bb8 <+1400>: vlddqu -0x40(%r15),%ymm0
0x00007fd49e789bbe <+1406>: vpackssdw -0x20(%r15),%ymm0,%ymm0
0x00007fd49e789bc4 <+1412>: add $0x200,%rax
0x00007fd49e789bca <+1418>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789bd0 <+1424>: vmovdqa %ymm0,-0x200(%rax)
0x00007fd49e789bd8 <+1432>: vlddqu -0x40(%r14),%ymm0
0x00007fd49e789bde <+1438>: vpackssdw -0x20(%r14),%ymm0,%ymm0
0x00007fd49e789be4 <+1444>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789bea <+1450>: vmovdqa %ymm0,-0x1e0(%rax)
0x00007fd49e789bf2 <+1458>: vlddqu -0x40(%r13),%ymm0
0x00007fd49e789bf8 <+1464>: vpackssdw -0x20(%r13),%ymm0,%ymm0
0x00007fd49e789bfe <+1470>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789c04 <+1476>: vmovdqa %ymm0,-0x1c0(%rax)
0x00007fd49e789c0c <+1484>: vlddqu -0x40(%r12),%ymm0
0x00007fd49e789c13 <+1491>: vpackssdw -0x20(%r12),%ymm0,%ymm0
0x00007fd49e789c1a <+1498>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789c20 <+1504>: vmovdqa %ymm0,-0x1a0(%rax)
0x00007fd49e789c28 <+1512>: vlddqu -0x40(%rbx),%ymm0
0x00007fd49e789c2d <+1517>: vpackssdw -0x20(%rbx),%ymm0,%ymm0
0x00007fd49e789c32 <+1522>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789c38 <+1528>: vmovdqa %ymm0,-0x180(%rax)
0x00007fd49e789c40 <+1536>: vlddqu -0x40(%r11),%ymm0
0x00007fd49e789c46 <+1542>: vpackssdw -0x20(%r11),%ymm0,%ymm0
0x00007fd49e789c4c <+1548>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789c52 <+1554>: vmovdqa %ymm0,-0x160(%rax)
0x00007fd49e789c5a <+1562>: vlddqu -0x40(%r10),%ymm0
0x00007fd49e789c60 <+1568>: vpackssdw -0x20(%r10),%ymm0,%ymm0
0x00007fd49e789c66 <+1574>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789c6c <+1580>: vmovdqa %ymm0,-0x140(%rax)
0x00007fd49e789c74 <+1588>: vlddqu -0x40(%r9),%ymm0
0x00007fd49e789c7a <+1594>: vpackssdw -0x20(%r9),%ymm0,%ymm0
0x00007fd49e789c80 <+1600>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789c86 <+1606>: vmovdqa %ymm0,-0x120(%rax)
0x00007fd49e789c8e <+1614>: vlddqu -0x40(%r8),%ymm0
0x00007fd49e789c94 <+1620>: vpackssdw -0x20(%r8),%ymm0,%ymm0
0x00007fd49e789c9a <+1626>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789ca0 <+1632>: vmovdqa %ymm0,-0x100(%rax)
0x00007fd49e789ca8 <+1640>: vlddqu -0x40(%rdi),%ymm0
0x00007fd49e789cad <+1645>: vpackssdw -0x20(%rdi),%ymm0,%ymm0
0x00007fd49e789cb2 <+1650>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789cb8 <+1656>: vmovdqa %ymm0,-0xe0(%rax)
0x00007fd49e789cc0 <+1664>: vlddqu -0x40(%rsi),%ymm0
0x00007fd49e789cc5 <+1669>: vpackssdw -0x20(%rsi),%ymm0,%ymm0
0x00007fd49e789cca <+1674>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789cd0 <+1680>: vmovdqa %ymm0,-0xc0(%rax)
0x00007fd49e789cd8 <+1688>: vlddqu -0x40(%rcx),%ymm0
0x00007fd49e789cdd <+1693>: vpackssdw -0x20(%rcx),%ymm0,%ymm0
0x00007fd49e789ce2 <+1698>: mov 0x168(%rsp),%rdx
0x00007fd49e789cea <+1706>: vmovdqa -0x1c0(%rax),%xmm8
0x00007fd49e789cf2 <+1714>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789cf8 <+1720>: vmovdqa -0x200(%rax),%xmm1
0x00007fd49e789d00 <+1728>: vinserti128 $0x1,-0xc0(%rax),%ymm8,%ymm13
0x00007fd49e789d0a <+1738>: vmovdqa %ymm0,-0xa0(%rax)
0x00007fd49e789d12 <+1746>: vlddqu (%rdx),%ymm0
0x00007fd49e789d16 <+1750>: vpackssdw 0x20(%rdx),%ymm0,%ymm0
0x00007fd49e789d1b <+1755>: mov 0x170(%rsp),%rdx
0x00007fd49e789d23 <+1763>: vmovdqa -0x1e0(%rax),%xmm6
0x00007fd49e789d2b <+1771>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789d31 <+1777>: vmovdqa -0x1a0(%rax),%xmm8
0x00007fd49e789d39 <+1785>: vinserti128 $0x1,-0xe0(%rax),%ymm6,%ymm6
0x00007fd49e789d43 <+1795>: vmovdqa %ymm0,-0x80(%rax)
0x00007fd49e789d48 <+1800>: vlddqu (%rdx),%ymm0
0x00007fd49e789d4c <+1804>: vpackssdw 0x20(%rdx),%ymm0,%ymm0
0x00007fd49e789d51 <+1809>: mov 0x178(%rsp),%rdx
0x00007fd49e789d59 <+1817>: vmovdqa -0x180(%rax),%xmm3
0x00007fd49e789d61 <+1825>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789d67 <+1831>: vinserti128 $0x1,-0x80(%rax),%ymm3,%ymm3
0x00007fd49e789d6e <+1838>: vinserti128 $0x1,-0x100(%rax),%ymm1,%ymm1
0x00007fd49e789d78 <+1848>: vmovdqa %ymm0,-0x60(%rax)
0x00007fd49e789d7d <+1853>: vlddqu (%rdx),%ymm0
0x00007fd49e789d81 <+1857>: vpackssdw 0x20(%rdx),%ymm0,%ymm0
0x00007fd49e789d86 <+1862>: mov 0x180(%rsp),%rdx
0x00007fd49e789d8e <+1870>: vmovdqa -0x160(%rax),%xmm9
0x00007fd49e789d96 <+1878>: vpunpcklwd %ymm6,%ymm1,%ymm15
0x00007fd49e789d9a <+1882>: vpunpckhwd %ymm6,%ymm1,%ymm1
0x00007fd49e789d9e <+1886>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789da4 <+1892>: vinserti128 $0x1,-0x60(%rax),%ymm9,%ymm9
0x00007fd49e789dab <+1899>: vinserti128 $0x1,-0xa0(%rax),%ymm8,%ymm8
0x00007fd49e789db5 <+1909>: vmovdqa %ymm0,-0x40(%rax)
0x00007fd49e789dba <+1914>: vlddqu (%rdx),%ymm0
0x00007fd49e789dbe <+1918>: vpackssdw 0x20(%rdx),%ymm0,%ymm0
0x00007fd49e789dc3 <+1923>: vmovdqa -0x140(%rax),%xmm7
0x00007fd49e789dcb <+1931>: vpunpcklwd %ymm8,%ymm13,%ymm6
0x00007fd49e789dd0 <+1936>: vpunpckhwd %ymm8,%ymm13,%ymm8
0x00007fd49e789dd5 <+1941>: vpunpcklwd %ymm9,%ymm3,%ymm13
0x00007fd49e789dda <+1946>: vpermq $0xd8,%ymm0,%ymm0
0x00007fd49e789de0 <+1952>: vpunpckhwd %ymm9,%ymm3,%ymm3
0x00007fd49e789de5 <+1957>: vmovdqa %ymm0,-0x20(%rax)
0x00007fd49e789dea <+1962>: vinserti128 $0x1,-0x40(%rax),%ymm7,%ymm14
0x00007fd49e789df1 <+1969>: vmovdqa -0x120(%rax),%xmm7
0x00007fd49e789df9 <+1977>: vmovdqa -0x1f0(%rax),%xmm12
0x00007fd49e789e01 <+1985>: vmovdqa -0x170(%rax),%xmm2
0x00007fd49e789e09 <+1993>: vinserti128 $0x1,%xmm0,%ymm7,%ymm7
0x00007fd49e789e0f <+1999>: vmovdqa -0x150(%rax),%xmm4
0x00007fd49e789e17 <+2007>: vinserti128 $0x1,-0x50(%rax),%ymm4,%ymm4
0x00007fd49e789e1e <+2014>: vpunpcklwd %ymm7,%ymm14,%ymm9
0x00007fd49e789e22 <+2018>: vpunpckhwd %ymm7,%ymm14,%ymm7
0x00007fd49e789e26 <+2022>: vpunpckldq %ymm6,%ymm15,%ymm14
0x00007fd49e789e2a <+2026>: vmovdqa -0x190(%rax),%xmm0
0x00007fd49e789e32 <+2034>: vpunpckhdq %ymm6,%ymm15,%ymm6
0x00007fd49e789e36 <+2038>: vpunpckldq %ymm9,%ymm13,%ymm15
0x00007fd49e789e3b <+2043>: vpunpckhdq %ymm9,%ymm13,%ymm9
0x00007fd49e789e40 <+2048>: vmovdqa -0x1d0(%rax),%xmm5
0x00007fd49e789e48 <+2056>: vpunpckldq %ymm8,%ymm1,%ymm13
0x00007fd49e789e4d <+2061>: vpunpckhdq %ymm8,%ymm1,%ymm1
0x00007fd49e789e52 <+2066>: vpunpckldq %ymm7,%ymm3,%ymm8
0x00007fd49e789e56 <+2070>: vmovdqa -0x1b0(%rax),%xmm11
0x00007fd49e789e5e <+2078>: vpunpckhdq %ymm7,%ymm3,%ymm3
0x00007fd49e789e62 <+2082>: vpunpcklqdq %ymm15,%ymm14,%ymm7
0x00007fd49e789e67 <+2087>: vinserti128 $0x1,-0xf0(%rax),%ymm12,%ymm12
0x00007fd49e789e71 <+2097>: vmovdqa -0x110(%rax),%xmm10
0x00007fd49e789e79 <+2105>: vmovdqa %ymm7,-0x200(%rax)
0x00007fd49e789e81 <+2113>: vpunpcklqdq %ymm8,%ymm13,%ymm7
0x00007fd49e789e86 <+2118>: vinserti128 $0x1,-0xd0(%rax),%ymm5,%ymm5
0x00007fd49e789e90 <+2128>: vinserti128 $0x1,-0x70(%rax),%ymm2,%ymm2
0x00007fd49e789e97 <+2135>: vmovdqa %ymm7,-0x180(%rax)
0x00007fd49e789e9f <+2143>: vpunpcklqdq %ymm9,%ymm6,%ymm7
0x00007fd49e789ea4 <+2148>: vpunpckhqdq %ymm9,%ymm6,%ymm6
0x00007fd49e789ea9 <+2153>: vinserti128 $0x1,-0xb0(%rax),%ymm11,%ymm11
0x00007fd49e789eb3 <+2163>: vmovdqa %ymm6,-0x1a0(%rax)
0x00007fd49e789ebb <+2171>: vpunpcklqdq %ymm3,%ymm1,%ymm6
0x00007fd49e789ebf <+2175>: vpunpckhqdq %ymm3,%ymm1,%ymm3
0x00007fd49e789ec3 <+2179>: vinserti128 $0x1,-0x90(%rax),%ymm0,%ymm0
0x00007fd49e789ecd <+2189>: vinserti128 $0x1,-0x10(%rax),%ymm10,%ymm10
0x00007fd49e789ed4 <+2196>: vmovdqa %ymm3,-0x120(%rax)
0x00007fd49e789edc <+2204>: vpunpcklwd %ymm5,%ymm12,%ymm1
0x00007fd49e789ee0 <+2208>: vpunpckhqdq %ymm8,%ymm13,%ymm13
0x00007fd49e789ee5 <+2213>: vmovdqa %ymm7,-0x1c0(%rax)
0x00007fd49e789eed <+2221>: vpunpcklwd %ymm0,%ymm11,%ymm7
0x00007fd49e789ef1 <+2225>: vpunpckhwd %ymm5,%ymm12,%ymm5
0x00007fd49e789ef5 <+2229>: vpunpckhwd %ymm0,%ymm11,%ymm0
0x00007fd49e789ef9 <+2233>: vpunpckldq %ymm7,%ymm1,%ymm8
0x00007fd49e789efd <+2237>: vpunpckhdq %ymm7,%ymm1,%ymm1
0x00007fd49e789f01 <+2241>: vmovdqa %ymm4,0x140(%rsp)
0x00007fd49e789f0a <+2250>: vmovdqa -0x130(%rax),%xmm4
0x00007fd49e789f12 <+2258>: vmovdqa 0x140(%rsp),%ymm3
0x00007fd49e789f1b <+2267>: vinserti128 $0x1,-0x30(%rax),%ymm4,%ymm4
0x00007fd49e789f22 <+2274>: vpunpckhqdq %ymm15,%ymm14,%ymm14
0x00007fd49e789f27 <+2279>: vmovdqa %ymm6,-0x140(%rax)
0x00007fd49e789f2f <+2287>: vmovdqa %ymm14,-0x1e0(%rax)
0x00007fd49e789f37 <+2295>: vpunpcklwd %ymm3,%ymm2,%ymm6
0x00007fd49e789f3b <+2299>: vpunpckhwd %ymm3,%ymm2,%ymm2
0x00007fd49e789f3f <+2303>: vpunpcklwd %ymm10,%ymm4,%ymm3
0x00007fd49e789f44 <+2308>: vmovdqa %ymm13,-0x160(%rax)
0x00007fd49e789f4c <+2316>: vpunpckldq %ymm3,%ymm6,%ymm7
0x00007fd49e789f50 <+2320>: vpunpckhwd %ymm10,%ymm4,%ymm4
0x00007fd49e789f55 <+2325>: vpunpckhdq %ymm3,%ymm6,%ymm3
0x00007fd49e789f59 <+2329>: vpunpckldq %ymm0,%ymm5,%ymm6
0x00007fd49e789f5d <+2333>: vpunpckhdq %ymm0,%ymm5,%ymm0
0x00007fd49e789f61 <+2337>: vpunpckldq %ymm4,%ymm2,%ymm5
0x00007fd49e789f65 <+2341>: vpunpckhdq %ymm4,%ymm2,%ymm2
0x00007fd49e789f69 <+2345>: vpunpcklqdq %ymm7,%ymm8,%ymm4
0x00007fd49e789f6d <+2349>: vmovdqa %ymm4,-0x100(%rax)
0x00007fd49e789f75 <+2357>: vpunpckhqdq %ymm7,%ymm8,%ymm4
0x00007fd49e789f79 <+2361>: vmovdqa %ymm4,-0xe0(%rax)
0x00007fd49e789f81 <+2369>: vpunpcklqdq %ymm5,%ymm6,%ymm4
0x00007fd49e789f85 <+2373>: vpunpckhqdq %ymm5,%ymm6,%ymm6
0x00007fd49e789f89 <+2377>: vmovdqa %ymm4,-0x80(%rax)
0x00007fd49e789f8e <+2382>: vpunpcklqdq %ymm3,%ymm1,%ymm4
0x00007fd49e789f92 <+2386>: vpunpckhqdq %ymm3,%ymm1,%ymm1
0x00007fd49e789f96 <+2390>: vmovdqa %ymm6,-0x60(%rax)
0x00007fd49e789f9b <+2395>: vmovdqa %ymm4,-0xc0(%rax)
0x00007fd49e789fa3 <+2403>: vmovdqa %ymm1,-0xa0(%rax)
0x00007fd49e789fab <+2411>: vpunpcklqdq %ymm2,%ymm0,%ymm1
0x00007fd49e789faf <+2415>: vpunpckhqdq %ymm2,%ymm0,%ymm2
0x00007fd49e789fb3 <+2419>: vmovdqa %ymm1,-0x40(%rax)
0x00007fd49e789fb8 <+2424>: vmovdqa %ymm2,-0x20(%rax)
0x00007fd49e789fbd <+2429>: addq $0x40,0x168(%rsp)
0x00007fd49e789fc6 <+2438>: addq $0x40,0x170(%rsp)
0x00007fd49e789fcf <+2447>: addq $0x40,0x178(%rsp)
0x00007fd49e789fd8 <+2456>: addq $0x40,0x180(%rsp)
0x00007fd49e789fe1 <+2465>: cmp %rax,0x160(%rsp)
0x00007fd49e789fe9 <+2473>: jne 0x7fd49e789b88 <lowbd_inv_txfm2d_add_no_identity_avx2+1352>
0x00007fd49e789fef <+2479>: mov 0x11c(%rsp),%r10d
0x00007fd49e789ff7 <+2487>: test %r10d,%r10d
0x00007fd49e789ffa <+2490>: je 0x7fd49e78a835 <lowbd_inv_txfm2d_add_no_identity_avx2+4597>
0x00007fd49e78a000 <+2496>: mov 0x138(%rsp),%rdi
0x00007fd49e78a008 <+2504>: mov 0x110(%rsp),%edx
0x00007fd49e78a00f <+2511>: mov 0x100(%rsp),%rax
0x00007fd49e78a017 <+2519>: mov %rdi,%rsi
0x00007fd49e78a01a <+2522>: vzeroupper
0x00007fd49e78a01d <+2525>: callq *%rax
0x00007fd49e78a01f <+2527>: mov 0x114(%rsp),%r9d
0x00007fd49e78a027 <+2535>: test %r9d,%r9d
0x00007fd49e78a02a <+2538>: jle 0x7fd49e78a053 <lowbd_inv_txfm2d_add_no_identity_avx2+2579>
0x00007fd49e78a02c <+2540>: mov 0x138(%rsp),%rax
0x00007fd49e78a034 <+2548>: vmovdqa 0x40(%rsp),%ymm1
0x00007fd49e78a03a <+2554>: mov 0x68(%rsp),%rdx
0x00007fd49e78a03f <+2559>: nop
0x00007fd49e78a040 <+2560>: vpmulhrsw (%rax),%ymm1,%ymm0
0x00007fd49e78a045 <+2565>: add $0x20,%rax
0x00007fd49e78a049 <+2569>: vmovdqa %ymm0,-0x20(%rax)
0x00007fd49e78a04e <+2574>: cmp %rax,%rdx
0x00007fd49e78a051 <+2577>: jne 0x7fd49e78a040 <lowbd_inv_txfm2d_add_no_identity_avx2+2560>
0x00007fd49e78a053 <+2579>: mov 0x118(%rsp),%edi
0x00007fd49e78a05a <+2586>: test %edi,%edi
0x00007fd49e78a05c <+2588>: je 0x7fd49e78a598 <lowbd_inv_txfm2d_add_no_identity_avx2+3928>
0x00007fd49e78a062 <+2594>: mov 0x12c(%rsp),%ecx
0x00007fd49e78a069 <+2601>: test %ecx,%ecx
0x00007fd49e78a06b <+2603>: jle 0x7fd49e78a3a4 <lowbd_inv_txfm2d_add_no_identity_avx2+3428>
0x00007fd49e78a071 <+2609>: mov 0x138(%rsp),%rax
0x00007fd49e78a079 <+2617>: mov 0x60(%rsp),%rcx
0x00007fd49e78a07e <+2622>: mov 0x28(%rsp),%rsi
0x00007fd49e78a083 <+2627>: mov 0x130(%rsp),%rdx
0x00007fd49e78a08b <+2635>: add 0x30(%rsp),%rdx
0x00007fd49e78a090 <+2640>: vmovdqa 0x1e0(%rax),%xmm10
0x00007fd49e78a098 <+2648>: vmovdqa 0xe0(%rax),%xmm11
0x00007fd49e78a0a0 <+2656>: add $0x200,%rax
0x00007fd49e78a0a6 <+2662>: vmovdqa -0x140(%rax),%xmm0
0x00007fd49e78a0ae <+2670>: vmovdqa -0x60(%rax),%xmm2
0x00007fd49e78a0b3 <+2675>: vinserti128 $0x1,%xmm11,%ymm10,%ymm11
0x00007fd49e78a0b9 <+2681>: vmovdqa -0x160(%rax),%xmm15
0x00007fd49e78a0c1 <+2689>: vmovdqa -0x40(%rax),%xmm10
0x00007fd49e78a0c6 <+2694>: vmovdqa -0x20(%rax),%ymm4
0x00007fd49e78a0cb <+2699>: vmovdqa -0x1a0(%rax),%xmm5
0x00007fd49e78a0d3 <+2707>: vinserti128 $0x1,%xmm15,%ymm2,%ymm15
0x00007fd49e78a0d9 <+2713>: vinserti128 $0x1,%xmm0,%ymm10,%ymm10
0x00007fd49e78a0df <+2719>: vmovdqa -0x80(%rax),%xmm2
0x00007fd49e78a0e4 <+2724>: vmovdqa -0x180(%rax),%xmm0
0x00007fd49e78a0ec <+2732>: vmovdqa -0xa0(%rax),%xmm13
0x00007fd49e78a0f4 <+2740>: vextracti128 $0x1,%ymm4,%xmm8
0x00007fd49e78a0fa <+2746>: vmovdqa -0x120(%rax),%ymm4
0x00007fd49e78a102 <+2754>: vmovdqa -0x1e0(%rax),%xmm9
0x00007fd49e78a10a <+2762>: vmovdqa -0x40(%rax),%ymm7
0x00007fd49e78a10f <+2767>: vinserti128 $0x1,%xmm0,%ymm2,%ymm2
0x00007fd49e78a115 <+2773>: vinserti128 $0x1,%xmm5,%ymm13,%ymm13
0x00007fd49e78a11b <+2779>: vmovdqa -0x1c0(%rax),%xmm0
0x00007fd49e78a123 <+2787>: vmovdqa -0xc0(%rax),%xmm5
0x00007fd49e78a12b <+2795>: vextracti128 $0x1,%ymm4,%xmm4
0x00007fd49e78a131 <+2801>: vmovdqa -0xe0(%rax),%xmm12
0x00007fd49e78a139 <+2809>: vinserti128 $0x1,%xmm4,%ymm8,%ymm8
0x00007fd49e78a13f <+2815>: vextracti128 $0x1,%ymm7,%xmm4
0x00007fd49e78a145 <+2821>: vmovdqa -0x140(%rax),%ymm7
0x00007fd49e78a14d <+2829>: vinserti128 $0x1,%xmm0,%ymm5,%ymm5
0x00007fd49e78a153 <+2835>: vmovdqa -0x200(%rax),%xmm0
0x00007fd49e78a15b <+2843>: vmovdqa -0x160(%rax),%ymm6
0x00007fd49e78a163 <+2851>: vinserti128 $0x1,%xmm9,%ymm12,%ymm12
0x00007fd49e78a169 <+2857>: vmovdqa -0x100(%rax),%xmm9
0x00007fd49e78a171 <+2865>: vextracti128 $0x1,%ymm6,%xmm1
0x00007fd49e78a177 <+2871>: vmovdqa -0x180(%rax),%ymm6
0x00007fd49e78a17f <+2879>: vinserti128 $0x1,%xmm0,%ymm9,%ymm9
0x00007fd49e78a185 <+2885>: vextracti128 $0x1,%ymm7,%xmm0
0x00007fd49e78a18b <+2891>: vmovdqa -0x60(%rax),%ymm7
0x00007fd49e78a190 <+2896>: vinserti128 $0x1,%xmm0,%ymm4,%ymm4
0x00007fd49e78a196 <+2902>: vextracti128 $0x1,%ymm6,%xmm3
0x00007fd49e78a19c <+2908>: vmovdqa -0xa0(%rax),%ymm6
0x00007fd49e78a1a4 <+2916>: vextracti128 $0x1,%ymm7,%xmm0
0x00007fd49e78a1aa <+2922>: vmovdqa -0x80(%rax),%ymm7
0x00007fd49e78a1af <+2927>: vinserti128 $0x1,%xmm1,%ymm0,%ymm0
0x00007fd49e78a1b5 <+2933>: vextracti128 $0x1,%ymm7,%xmm1
0x00007fd49e78a1bb <+2939>: vinserti128 $0x1,%xmm3,%ymm1,%ymm7
0x00007fd49e78a1c1 <+2945>: vmovdqa -0x1a0(%rax),%ymm3
0x00007fd49e78a1c9 <+2953>: vmovdqa %ymm7,0x180(%rsp)
0x00007fd49e78a1d2 <+2962>: vextracti128 $0x1,%ymm6,%xmm7
0x00007fd49e78a1d8 <+2968>: vmovdqa -0xc0(%rax),%ymm6
0x00007fd49e78a1e0 <+2976>: vextracti128 $0x1,%ymm3,%xmm1
0x00007fd49e78a1e6 <+2982>: vmovdqa -0x1c0(%rax),%ymm3
0x00007fd49e78a1ee <+2990>: vinserti128 $0x1,%xmm1,%ymm7,%ymm7
0x00007fd49e78a1f4 <+2996>: vextracti128 $0x1,%ymm6,%xmm1
0x00007fd49e78a1fa <+3002>: vmovdqa -0xe0(%rax),%ymm6
0x00007fd49e78a202 <+3010>: vextracti128 $0x1,%ymm3,%xmm3
0x00007fd49e78a208 <+3016>: vinserti128 $0x1,%xmm3,%ymm1,%ymm1
0x00007fd49e78a20e <+3022>: vextracti128 $0x1,%ymm6,%xmm3
0x00007fd49e78a214 <+3028>: vmovdqa -0x1e0(%rax),%ymm6
0x00007fd49e78a21c <+3036>: vextracti128 $0x1,%ymm6,%xmm6
0x00007fd49e78a222 <+3042>: vinserti128 $0x1,%xmm6,%ymm3,%ymm3
0x00007fd49e78a228 <+3048>: vmovdqa -0x100(%rax),%ymm6
0x00007fd49e78a230 <+3056>: vmovdqa -0x200(%rax),%ymm14
0x00007fd49e78a238 <+3064>: vextracti128 $0x1,%ymm6,%xmm6
0x00007fd49e78a23e <+3070>: vextracti128 $0x1,%ymm14,%xmm14
0x00007fd49e78a244 <+3076>: vinserti128 $0x1,%xmm14,%ymm6,%ymm6
0x00007fd49e78a24a <+3082>: vpunpcklwd %ymm10,%ymm11,%ymm14
0x00007fd49e78a24f <+3087>: vpunpckhwd %ymm10,%ymm11,%ymm10
0x00007fd49e78a254 <+3092>: vpunpcklwd %ymm2,%ymm15,%ymm11
0x00007fd49e78a258 <+3096>: vpunpckhwd %ymm2,%ymm15,%ymm2
0x00007fd49e78a25c <+3100>: vpunpcklwd %ymm5,%ymm13,%ymm15
0x00007fd49e78a260 <+3104>: vpunpckhwd %ymm5,%ymm13,%ymm5
0x00007fd49e78a264 <+3108>: vpunpcklwd %ymm9,%ymm12,%ymm13
0x00007fd49e78a269 <+3113>: vpunpckhwd %ymm9,%ymm12,%ymm9
0x00007fd49e78a26e <+3118>: vpunpckldq %ymm11,%ymm14,%ymm12
0x00007fd49e78a273 <+3123>: vpunpckhdq %ymm11,%ymm14,%ymm11
0x00007fd49e78a278 <+3128>: vpunpckldq %ymm13,%ymm15,%ymm14
0x00007fd49e78a27d <+3133>: vpunpckhdq %ymm13,%ymm15,%ymm13
0x00007fd49e78a282 <+3138>: vpunpckldq %ymm2,%ymm10,%ymm15
0x00007fd49e78a286 <+3142>: vpunpckhdq %ymm2,%ymm10,%ymm2
0x00007fd49e78a28a <+3146>: vpunpckldq %ymm9,%ymm5,%ymm10
0x00007fd49e78a28f <+3151>: vpunpckhdq %ymm9,%ymm5,%ymm5
0x00007fd49e78a294 <+3156>: vpunpcklqdq %ymm14,%ymm12,%ymm9
0x00007fd49e78a299 <+3161>: vmovdqa %ymm9,(%rdx)
0x00007fd49e78a29d <+3165>: vpunpcklqdq %ymm10,%ymm15,%ymm9
0x00007fd49e78a2a2 <+3170>: vpunpckhqdq %ymm10,%ymm15,%ymm15
0x00007fd49e78a2a7 <+3175>: vpunpckhqdq %ymm14,%ymm12,%ymm12
0x00007fd49e78a2ac <+3180>: vmovdqa %ymm9,0x80(%rdx)
0x00007fd49e78a2b4 <+3188>: vpunpcklqdq %ymm13,%ymm11,%ymm9
0x00007fd49e78a2b9 <+3193>: vpunpckhqdq %ymm13,%ymm11,%ymm11
0x00007fd49e78a2be <+3198>: vmovdqa %ymm9,0x40(%rdx)
0x00007fd49e78a2c3 <+3203>: vpunpcklqdq %ymm5,%ymm2,%ymm9
0x00007fd49e78a2c7 <+3207>: vpunpckhqdq %ymm5,%ymm2,%ymm5
0x00007fd49e78a2cb <+3211>: vpunpcklwd %ymm4,%ymm8,%ymm2
0x00007fd49e78a2cf <+3215>: vmovdqa %ymm5,0xe0(%rdx)
0x00007fd49e78a2d7 <+3223>: vpunpckhwd %ymm4,%ymm8,%ymm4
0x00007fd49e78a2db <+3227>: vmovdqa 0x180(%rsp),%ymm5
0x00007fd49e78a2e4 <+3236>: vmovdqa %ymm9,0xc0(%rdx)
0x00007fd49e78a2ec <+3244>: vpunpcklwd %ymm1,%ymm7,%ymm9
0x00007fd49e78a2f0 <+3248>: vpunpckhwd %ymm1,%ymm7,%ymm1
0x00007fd49e78a2f4 <+3252>: vpunpcklwd %ymm5,%ymm0,%ymm10
0x00007fd49e78a2f8 <+3256>: vpunpckhwd %ymm5,%ymm0,%ymm0
0x00007fd49e78a2fc <+3260>: vpunpcklwd %ymm6,%ymm3,%ymm5
0x00007fd49e78a300 <+3264>: vmovdqa %ymm12,0x20(%rdx)
0x00007fd49e78a305 <+3269>: vpunpckhwd %ymm6,%ymm3,%ymm3
0x00007fd49e78a309 <+3273>: vpunpckldq %ymm10,%ymm2,%ymm8
0x00007fd49e78a30e <+3278>: vpunpckldq %ymm5,%ymm9,%ymm7
0x00007fd49e78a312 <+3282>: vmovdqa %ymm15,0xa0(%rdx)
0x00007fd49e78a31a <+3290>: vpunpckldq %ymm0,%ymm4,%ymm6
0x00007fd49e78a31e <+3294>: vpunpckhdq %ymm0,%ymm4,%ymm0
0x00007fd49e78a322 <+3298>: vpunpckldq %ymm3,%ymm1,%ymm4
0x00007fd49e78a326 <+3302>: vmovdqa %ymm11,0x60(%rdx)
0x00007fd49e78a32b <+3307>: vpunpckhdq %ymm3,%ymm1,%ymm1
0x00007fd49e78a32f <+3311>: vpunpcklqdq %ymm7,%ymm8,%ymm3
0x00007fd49e78a333 <+3315>: vpunpckhdq %ymm10,%ymm2,%ymm2
0x00007fd49e78a338 <+3320>: vmovdqa %ymm3,0x100(%rdx)
0x00007fd49e78a340 <+3328>: vpunpckhqdq %ymm7,%ymm8,%ymm3
0x00007fd49e78a344 <+3332>: vpunpckhdq %ymm5,%ymm9,%ymm5
0x00007fd49e78a348 <+3336>: vmovdqa %ymm3,0x120(%rdx)
0x00007fd49e78a350 <+3344>: vpunpcklqdq %ymm4,%ymm6,%ymm3
0x00007fd49e78a354 <+3348>: vpunpckhqdq %ymm4,%ymm6,%ymm6
0x00007fd49e78a358 <+3352>: vmovdqa %ymm3,0x180(%rdx)
0x00007fd49e78a360 <+3360>: vpunpcklqdq %ymm5,%ymm2,%ymm3
0x00007fd49e78a364 <+3364>: vpunpckhqdq %ymm5,%ymm2,%ymm2
0x00007fd49e78a368 <+3368>: vmovdqa %ymm2,0x160(%rdx)
0x00007fd49e78a370 <+3376>: vpunpcklqdq %ymm1,%ymm0,%ymm2
0x00007fd49e78a374 <+3380>: vpunpckhqdq %ymm1,%ymm0,%ymm1
0x00007fd49e78a378 <+3384>: vmovdqa %ymm6,0x1a0(%rdx)
0x00007fd49e78a380 <+3392>: vmovdqa %ymm3,0x140(%rdx)
0x00007fd49e78a388 <+3400>: vmovdqa %ymm2,0x1c0(%rdx)
0x00007fd49e78a390 <+3408>: vmovdqa %ymm1,0x1e0(%rdx)
0x00007fd49e78a398 <+3416>: add %rsi,%rdx
0x00007fd49e78a39b <+3419>: cmp %rax,%rcx
0x00007fd49e78a39e <+3422>: jne 0x7fd49e78a090 <lowbd_inv_txfm2d_add_no_identity_avx2+2640>
0x00007fd49e78a3a4 <+3428>: addq $0x200,0x130(%rsp)
0x00007fd49e78a3b0 <+3440>: mov 0x70(%rsp),%rbx
0x00007fd49e78a3b5 <+3445>: mov 0x130(%rsp),%rax
0x00007fd49e78a3bd <+3453>: add %rbx,0x120(%rsp)
0x00007fd49e78a3c5 <+3461>: cmp %rax,0x78(%rsp)
0x00007fd49e78a3ca <+3466>: jne 0x7fd49e789a98 <lowbd_inv_txfm2d_add_no_identity_avx2+1112>
0x00007fd49e78a3d0 <+3472>: mov 0x18(%rsp),%rax
0x00007fd49e78a3d5 <+3477>: mov 0x12c(%rsp),%r11d
0x00007fd49e78a3dd <+3485>: movsbl 0x1(%rax),%ecx
0x00007fd49e78a3e1 <+3489>: mov $0x1,%eax
0x00007fd49e78a3e6 <+3494>: add $0xf,%ecx
0x00007fd49e78a3e9 <+3497>: shl %cl,%eax
0x00007fd49e78a3eb <+3499>: vmovd %eax,%xmm1
0x00007fd49e78a3ef <+3503>: vpbroadcastw %xmm1,%ymm1
0x00007fd49e78a3f4 <+3508>: test %r11d,%r11d
0x00007fd49e78a3f7 <+3511>: jle 0x7fd49e78a551 <lowbd_inv_txfm2d_add_no_identity_avx2+3857>
0x00007fd49e78a3fd <+3517>: movslq 0x128(%rsp),%r13
0x00007fd49e78a405 <+3525>: lea 0x9a0(%rsp),%r12
0x00007fd49e78a40d <+3533>: xor %ebx,%ebx
0x00007fd49e78a40f <+3535>: mov %r12,0x170(%rsp)
0x00007fd49e78a417 <+3543>: mov %r12,%r15
0x00007fd49e78a41a <+3546>: mov 0x14(%rsp),%r12d
0x00007fd49e78a41f <+3551>: mov %r13,%rax
0x00007fd49e78a422 <+3554>: shl $0x5,%r13
0x00007fd49e78a426 <+3558>: sub $0x1,%eax
0x00007fd49e78a429 <+3561>: mov %eax,0x178(%rsp)
0x00007fd49e78a430 <+3568>: shl $0x5,%rax
0x00007fd49e78a434 <+3572>: lea 0x9c0(%rsp,%rax,1),%r14
0x00007fd49e78a43c <+3580>: mov %r14,%rax
0x00007fd49e78a43f <+3583>: mov %ebx,%r14d
0x00007fd49e78a442 <+3586>: mov %rax,%rbx
0x00007fd49e78a445 <+3589>: nopl (%rax)
0x00007fd49e78a448 <+3592>: mov 0xf8(%rsp),%rax
0x00007fd49e78a450 <+3600>: mov %r12d,%edx
0x00007fd49e78a453 <+3603>: mov %r15,%rsi
0x00007fd49e78a456 <+3606>: mov %r15,%rdi
0x00007fd49e78a459 <+3609>: vmovdqa %ymm1,0x180(%rsp)
0x00007fd49e78a462 <+3618>: vzeroupper
0x00007fd49e78a465 <+3621>: callq *%rax
0x00007fd49e78a467 <+3623>: mov 0x128(%rsp),%edx
0x00007fd49e78a46e <+3630>: mov %r15,%rax
0x00007fd49e78a471 <+3633>: vmovdqa 0x180(%rsp),%ymm1
0x00007fd49e78a47a <+3642>: test %edx,%edx
0x00007fd49e78a47c <+3644>: jle 0x7fd49e78a493 <lowbd_inv_txfm2d_add_no_identity_avx2+3667>
0x00007fd49e78a47e <+3646>: xchg %ax,%ax
0x00007fd49e78a480 <+3648>: vpmulhrsw (%rax),%ymm1,%ymm0
0x00007fd49e78a485 <+3653>: add $0x20,%rax
0x00007fd49e78a489 <+3657>: vmovdqa %ymm0,-0x20(%rax)
0x00007fd49e78a48e <+3662>: cmp %rax,%rbx
0x00007fd49e78a491 <+3665>: jne 0x7fd49e78a480 <lowbd_inv_txfm2d_add_no_identity_avx2+3648>
0x00007fd49e78a493 <+3667>: add $0x1,%r14d
0x00007fd49e78a497 <+3671>: add %r13,%r15
0x00007fd49e78a49a <+3674>: add %r13,%rbx
0x00007fd49e78a49d <+3677>: cmp %r14d,0x12c(%rsp)
0x00007fd49e78a4a5 <+3685>: jne 0x7fd49e78a448 <lowbd_inv_txfm2d_add_no_identity_avx2+3592>
0x00007fd49e78a4a7 <+3687>: mov 0x20(%rsp),%esi
0x00007fd49e78a4ab <+3691>: mov 0x128(%rsp),%r10d
0x00007fd49e78a4b3 <+3699>: mov $0x0,%edx
0x00007fd49e78a4b8 <+3704>: mov 0x170(%rsp),%r12
0x00007fd49e78a4c0 <+3712>: cmp $0x1,%esi
0x00007fd49e78a4c3 <+3715>: sbb %eax,%eax
0x00007fd49e78a4c5 <+3717>: and $0x2,%eax
0x00007fd49e78a4c8 <+3720>: sub $0x1,%eax
0x00007fd49e78a4cb <+3723>: test %esi,%esi
0x00007fd49e78a4cd <+3725>: cmovne 0x178(%rsp),%edx
0x00007fd49e78a4d5 <+3733>: test %r10d,%r10d
0x00007fd49e78a4d8 <+3736>: jle 0x7fd49e78a551 <lowbd_inv_txfm2d_add_no_identity_avx2+3857>
0x00007fd49e78a4da <+3738>: mov 0x12c(%rsp),%edi
0x00007fd49e78a4e1 <+3745>: movslq %edx,%rdx
0x00007fd49e78a4e4 <+3748>: cltq
0x00007fd49e78a4e6 <+3750>: xor %r8d,%r8d
0x00007fd49e78a4e9 <+3753>: shl $0x5,%rdx
0x00007fd49e78a4ed <+3757>: shl $0x5,%rax
0x00007fd49e78a4f1 <+3761>: mov 0x8(%rsp),%r11
0x00007fd49e78a4f6 <+3766>: lea -0x1(%rdi),%r9d
0x00007fd49e78a4fa <+3770>: movslq 0x10(%rsp),%rdi
0x00007fd49e78a4ff <+3775>: add %rdx,%r12
0x00007fd49e78a502 <+3778>: mov %rax,%rsi
0x00007fd49e78a505 <+3781>: add $0x1,%r9
0x00007fd49e78a509 <+3785>: shl $0x4,%r9
0x00007fd49e78a50d <+3789>: nopl (%rax)
0x00007fd49e78a510 <+3792>: lea (%r11,%r8,1),%rax
0x00007fd49e78a514 <+3796>: mov %r12,%rcx
0x00007fd49e78a517 <+3799>: xor %edx,%edx
0x00007fd49e78a519 <+3801>: nopl 0x0(%rax)
0x00007fd49e78a520 <+3808>: vpmovzxbw (%rax),%ymm0
0x00007fd49e78a525 <+3813>: vpaddsw (%rcx),%ymm0,%ymm0
0x00007fd49e78a529 <+3817>: add $0x1,%edx
0x00007fd49e78a52c <+3820>: add %rsi,%rcx
0x00007fd49e78a52f <+3823>: vpackuswb %ymm0,%ymm0,%ymm0
0x00007fd49e78a533 <+3827>: vpermq $0xa8,%ymm0,%ymm0
0x00007fd49e78a539 <+3833>: vmovups %xmm0,(%rax)
0x00007fd49e78a53d <+3837>: add %rdi,%rax
0x00007fd49e78a540 <+3840>: cmp %edx,%r10d
0x00007fd49e78a543 <+3843>: jne 0x7fd49e78a520 <lowbd_inv_txfm2d_add_no_identity_avx2+3808>
0x00007fd49e78a545 <+3845>: add $0x10,%r8
0x00007fd49e78a549 <+3849>: add %r13,%r12
0x00007fd49e78a54c <+3852>: cmp %r8,%r9
0x00007fd49e78a54f <+3855>: jne 0x7fd49e78a510 <lowbd_inv_txfm2d_add_no_identity_avx2+3792>
0x00007fd49e78a551 <+3857>: mov 0x89b8(%rsp),%rax
0x00007fd49e78a559 <+3865>: xor %fs:0x28,%rax
0x00007fd49e78a562 <+3874>: jne 0x7fd49e78a8f5 <lowbd_inv_txfm2d_add_no_identity_avx2+4789>
0x00007fd49e78a568 <+3880>: vzeroupper
0x00007fd49e78a56b <+3883>: lea -0x28(%rbp),%rsp
0x00007fd49e78a56f <+3887>: pop %rbx
0x00007fd49e78a570 <+3888>: pop %r12
0x00007fd49e78a572 <+3890>: pop %r13
0x00007fd49e78a574 <+3892>: pop %r14
0x00007fd49e78a576 <+3894>: pop %r15
0x00007fd49e78a578 <+3896>: pop %rbp
0x00007fd49e78a579 <+3897>: retq
0x00007fd49e78a57a <+3898>: movl $0x0,0x118(%rsp)
0x00007fd49e78a585 <+3909>: movl $0x0,0x20(%rsp)
0x00007fd49e78a58d <+3917>: jmpq 0x7fd49e789875 <lowbd_inv_txfm2d_add_no_identity_avx2+565>
0x00007fd49e78a592 <+3922>: nopw 0x0(%rax,%rax,1)
0x00007fd49e78a598 <+3928>: mov 0x12c(%rsp),%esi
0x00007fd49e78a59f <+3935>: mov 0x130(%rsp),%rdx
0x00007fd49e78a5a7 <+3943>: mov 0x138(%rsp),%rax
0x00007fd49e78a5af <+3951>: test %esi,%esi
0x00007fd49e78a5b1 <+3953>: jle 0x7fd49e78a3a4 <lowbd_inv_txfm2d_add_no_identity_avx2+3428>
0x00007fd49e78a5b7 <+3959>: mov 0x60(%rsp),%rcx
0x00007fd49e78a5bc <+3964>: mov 0x38(%rsp),%rsi
0x00007fd49e78a5c1 <+3969>: nopl 0x0(%rax)
0x00007fd49e78a5c8 <+3976>: vmovdqa 0x40(%rax),%xmm8
0x00007fd49e78a5cd <+3981>: vmovdqa (%rax),%xmm1
0x00007fd49e78a5d1 <+3985>: add $0x200,%rax
0x00007fd49e78a5d7 <+3991>: vmovdqa -0x140(%rax),%xmm7
0x00007fd49e78a5df <+3999>: vmovdqa -0x1e0(%rax),%xmm6
0x00007fd49e78a5e7 <+4007>: vinserti128 $0x1,-0x40(%rax),%ymm7,%ymm14
0x00007fd49e78a5ee <+4014>: vmovdqa -0x180(%rax),%xmm3
0x00007fd49e78a5f6 <+4022>: vmovdqa -0x160(%rax),%xmm9
0x00007fd49e78a5fe <+4030>: vinserti128 $0x1,-0x80(%rax),%ymm3,%ymm3
0x00007fd49e78a605 <+4037>: vinserti128 $0x1,-0x60(%rax),%ymm9,%ymm9
0x00007fd49e78a60c <+4044>: vinserti128 $0x1,-0xc0(%rax),%ymm8,%ymm13
0x00007fd49e78a616 <+4054>: vmovdqa -0x120(%rax),%xmm7
0x00007fd49e78a61e <+4062>: vmovdqa -0x1a0(%rax),%xmm8
0x00007fd49e78a626 <+4070>: vinserti128 $0x1,-0x20(%rax),%ymm7,%ymm7
0x00007fd49e78a62d <+4077>: vinserti128 $0x1,-0xe0(%rax),%ymm6,%ymm6
0x00007fd49e78a637 <+4087>: vmovdqa -0x150(%rax),%xmm4
0x00007fd49e78a63f <+4095>: vinserti128 $0x1,-0x50(%rax),%ymm4,%ymm4
0x00007fd49e78a646 <+4102>: vmovdqa -0x1f0(%rax),%xmm12
0x00007fd49e78a64e <+4110>: vmovdqa -0x1d0(%rax),%xmm5
0x00007fd49e78a656 <+4118>: vmovdqa -0x1b0(%rax),%xmm11
0x00007fd49e78a65e <+4126>: vmovdqa -0x190(%rax),%xmm0
0x00007fd49e78a666 <+4134>: vmovdqa %ymm4,0x180(%rsp)
0x00007fd49e78a66f <+4143>: vinserti128 $0x1,-0x100(%rax),%ymm1,%ymm1
0x00007fd49e78a679 <+4153>: vinserti128 $0x1,-0xa0(%rax),%ymm8,%ymm8
0x00007fd49e78a683 <+4163>: vinserti128 $0x1,-0xf0(%rax),%ymm12,%ymm12
0x00007fd49e78a68d <+4173>: vinserti128 $0x1,-0xd0(%rax),%ymm5,%ymm5
0x00007fd49e78a697 <+4183>: vpunpcklwd %ymm6,%ymm1,%ymm15
0x00007fd49e78a69b <+4187>: vpunpckhwd %ymm6,%ymm1,%ymm1
0x00007fd49e78a69f <+4191>: vpunpcklwd %ymm8,%ymm13,%ymm6
0x00007fd49e78a6a4 <+4196>: vmovdqa -0x170(%rax),%xmm2
0x00007fd49e78a6ac <+4204>: vpunpckhwd %ymm8,%ymm13,%ymm8
0x00007fd49e78a6b1 <+4209>: vpunpcklwd %ymm9,%ymm3,%ymm13
0x00007fd49e78a6b6 <+4214>: vpunpckhwd %ymm9,%ymm3,%ymm3
0x00007fd49e78a6bb <+4219>: vmovdqa -0x130(%rax),%xmm4
0x00007fd49e78a6c3 <+4227>: vpunpcklwd %ymm7,%ymm14,%ymm9
0x00007fd49e78a6c7 <+4231>: vpunpckhwd %ymm7,%ymm14,%ymm7
0x00007fd49e78a6cb <+4235>: vpunpckldq %ymm6,%ymm15,%ymm14
0x00007fd49e78a6cf <+4239>: vmovdqa -0x110(%rax),%xmm10
0x00007fd49e78a6d7 <+4247>: vpunpckhdq %ymm6,%ymm15,%ymm6
0x00007fd49e78a6db <+4251>: vpunpckldq %ymm9,%ymm13,%ymm15
0x00007fd49e78a6e0 <+4256>: vpunpckhdq %ymm9,%ymm13,%ymm9
0x00007fd49e78a6e5 <+4261>: vpunpckldq %ymm8,%ymm1,%ymm13
0x00007fd49e78a6ea <+4266>: vpunpckhdq %ymm8,%ymm1,%ymm1
0x00007fd49e78a6ef <+4271>: vpunpckldq %ymm7,%ymm3,%ymm8
0x00007fd49e78a6f3 <+4275>: vpunpckhdq %ymm7,%ymm3,%ymm3
0x00007fd49e78a6f7 <+4279>: vpunpcklqdq %ymm15,%ymm14,%ymm7
0x00007fd49e78a6fc <+4284>: vinserti128 $0x1,-0xb0(%rax),%ymm11,%ymm11
0x00007fd49e78a706 <+4294>: vinserti128 $0x1,-0x90(%rax),%ymm0,%ymm0
0x00007fd49e78a710 <+4304>: vinserti128 $0x1,-0x70(%rax),%ymm2,%ymm2
0x00007fd49e78a717 <+4311>: vpunpckhqdq %ymm15,%ymm14,%ymm14
0x00007fd49e78a71c <+4316>: vinserti128 $0x1,-0x30(%rax),%ymm4,%ymm4
0x00007fd49e78a723 <+4323>: vinserti128 $0x1,-0x10(%rax),%ymm10,%ymm10
0x00007fd49e78a72a <+4330>: vmovdqa %ymm7,(%rdx)
0x00007fd49e78a72e <+4334>: vpunpcklqdq %ymm8,%ymm13,%ymm7
0x00007fd49e78a733 <+4339>: vmovdqa %ymm7,0x80(%rdx)
0x00007fd49e78a73b <+4347>: vpunpcklqdq %ymm9,%ymm6,%ymm7
0x00007fd49e78a740 <+4352>: vpunpckhqdq %ymm9,%ymm6,%ymm6
0x00007fd49e78a745 <+4357>: vpunpckhqdq %ymm8,%ymm13,%ymm13
0x00007fd49e78a74a <+4362>: vmovdqa %ymm6,0x60(%rdx)
0x00007fd49e78a74f <+4367>: vpunpcklqdq %ymm3,%ymm1,%ymm6
0x00007fd49e78a753 <+4371>: vpunpckhqdq %ymm3,%ymm1,%ymm3
0x00007fd49e78a757 <+4375>: vpunpcklwd %ymm5,%ymm12,%ymm1
0x00007fd49e78a75b <+4379>: vmovdqa %ymm3,0xe0(%rdx)
0x00007fd49e78a763 <+4387>: vpunpckhwd %ymm5,%ymm12,%ymm5
0x00007fd49e78a767 <+4391>: vmovdqa 0x180(%rsp),%ymm3
0x00007fd49e78a770 <+4400>: vmovdqa %ymm7,0x40(%rdx)
0x00007fd49e78a775 <+4405>: vpunpcklwd %ymm0,%ymm11,%ymm7
0x00007fd49e78a779 <+4409>: vpunpckhwd %ymm0,%ymm11,%ymm0
0x00007fd49e78a77d <+4413>: vmovdqa %ymm6,0xc0(%rdx)
0x00007fd49e78a785 <+4421>: vpunpcklwd %ymm3,%ymm2,%ymm6
0x00007fd49e78a789 <+4425>: vpunpckhwd %ymm3,%ymm2,%ymm2
0x00007fd49e78a78d <+4429>: vpunpcklwd %ymm10,%ymm4,%ymm3
0x00007fd49e78a792 <+4434>: vpunpckldq %ymm7,%ymm1,%ymm8
0x00007fd49e78a796 <+4438>: vpunpckhwd %ymm10,%ymm4,%ymm4
0x00007fd49e78a79b <+4443>: vpunpckhdq %ymm7,%ymm1,%ymm1
0x00007fd49e78a79f <+4447>: vmovdqa %ymm14,0x20(%rdx)
0x00007fd49e78a7a4 <+4452>: vpunpckldq %ymm3,%ymm6,%ymm7
0x00007fd49e78a7a8 <+4456>: vpunpckhdq %ymm3,%ymm6,%ymm3
0x00007fd49e78a7ac <+4460>: vpunpckldq %ymm0,%ymm5,%ymm6
0x00007fd49e78a7b0 <+4464>: vmovdqa %ymm13,0xa0(%rdx)
0x00007fd49e78a7b8 <+4472>: vpunpckhdq %ymm0,%ymm5,%ymm0
0x00007fd49e78a7bc <+4476>: vpunpckldq %ymm4,%ymm2,%ymm5
0x00007fd49e78a7c0 <+4480>: vpunpckhdq %ymm4,%ymm2,%ymm2
0x00007fd49e78a7c4 <+4484>: vpunpcklqdq %ymm7,%ymm8,%ymm4
0x00007fd49e78a7c8 <+4488>: vmovdqa %ymm4,0x100(%rdx)
0x00007fd49e78a7d0 <+4496>: vpunpckhqdq %ymm7,%ymm8,%ymm4
0x00007fd49e78a7d4 <+4500>: vmovdqa %ymm4,0x120(%rdx)
0x00007fd49e78a7dc <+4508>: vpunpcklqdq %ymm5,%ymm6,%ymm4
0x00007fd49e78a7e0 <+4512>: vpunpckhqdq %ymm5,%ymm6,%ymm6
0x00007fd49e78a7e4 <+4516>: vmovdqa %ymm4,0x180(%rdx)
0x00007fd49e78a7ec <+4524>: vpunpcklqdq %ymm3,%ymm1,%ymm4
0x00007fd49e78a7f0 <+4528>: vpunpckhqdq %ymm3,%ymm1,%ymm1
0x00007fd49e78a7f4 <+4532>: vmovdqa %ymm1,0x160(%rdx)
0x00007fd49e78a7fc <+4540>: vpunpcklqdq %ymm2,%ymm0,%ymm1
0x00007fd49e78a800 <+4544>: vpunpckhqdq %ymm2,%ymm0,%ymm2
0x00007fd49e78a804 <+4548>: vmovdqa %ymm6,0x1a0(%rdx)
0x00007fd49e78a80c <+4556>: vmovdqa %ymm4,0x140(%rdx)
0x00007fd49e78a814 <+4564>: vmovdqa %ymm1,0x1c0(%rdx)
0x00007fd49e78a81c <+4572>: vmovdqa %ymm2,0x1e0(%rdx)
0x00007fd49e78a824 <+4580>: add %rsi,%rdx
0x00007fd49e78a827 <+4583>: cmp %rax,%rcx
0x00007fd49e78a82a <+4586>: jne 0x7fd49e78a5c8 <lowbd_inv_txfm2d_add_no_identity_avx2+3976>
0x00007fd49e78a830 <+4592>: jmpq 0x7fd49e78a3a4 <lowbd_inv_txfm2d_add_no_identity_avx2+3428>
0x00007fd49e78a835 <+4597>: mov 0x114(%rsp),%r8d
0x00007fd49e78a83d <+4605>: test %r8d,%r8d
0x00007fd49e78a840 <+4608>: jle 0x7fd49e78a892 <lowbd_inv_txfm2d_add_no_identity_avx2+4690>
0x00007fd49e78a842 <+4610>: mov 0x138(%rsp),%rax
0x00007fd49e78a84a <+4618>: mov 0x24(%rsp),%ecx
0x00007fd49e78a84e <+4622>: xor %edx,%edx
0x00007fd49e78a850 <+4624>: add $0x1,%edx
0x00007fd49e78a853 <+4627>: vmovdqa 0xfb5c5(%rip),%ymm5 # 0x7fd49e885e20
0x00007fd49e78a85b <+4635>: add $0x20,%rax
0x00007fd49e78a85f <+4639>: vpmulhrsw -0x20(%rax),%ymm5,%ymm0
0x00007fd49e78a865 <+4645>: vmovdqa %ymm0,-0x20(%rax)
0x00007fd49e78a86a <+4650>: cmp %edx,%ecx
0x00007fd49e78a86c <+4652>: jg 0x7fd49e78a850 <lowbd_inv_txfm2d_add_no_identity_avx2+4624>
0x00007fd49e78a86e <+4654>: mov 0x138(%rsp),%rsi
0x00007fd49e78a876 <+4662>: mov 0x110(%rsp),%edx
0x00007fd49e78a87d <+4669>: mov 0x100(%rsp),%rax
0x00007fd49e78a885 <+4677>: mov %rsi,%rdi
0x00007fd49e78a888 <+4680>: vzeroupper
0x00007fd49e78a88b <+4683>: callq *%rax
0x00007fd49e78a88d <+4685>: jmpq 0x7fd49e78a02c <lowbd_inv_txfm2d_add_no_identity_avx2+2540>
0x00007fd49e78a892 <+4690>: mov 0x138(%rsp),%rdi
0x00007fd49e78a89a <+4698>: mov 0x110(%rsp),%edx
0x00007fd49e78a8a1 <+4705>: mov 0x100(%rsp),%rax
0x00007fd49e78a8a9 <+4713>: mov %rdi,%rsi
0x00007fd49e78a8ac <+4716>: vzeroupper
0x00007fd49e78a8af <+4719>: callq *%rax
0x00007fd49e78a8b1 <+4721>: jmpq 0x7fd49e78a053 <lowbd_inv_txfm2d_add_no_identity_avx2+2579>
0x00007fd49e78a8b6 <+4726>: mov $0x1,%esi
0x00007fd49e78a8bb <+4731>: xor %r10d,%r10d
0x00007fd49e78a8be <+4734>: xor %edi,%edi
0x00007fd49e78a8c0 <+4736>: jmpq 0x7fd49e7896ee <lowbd_inv_txfm2d_add_no_identity_avx2+174>
0x00007fd49e78a8c5 <+4741>: movl $0x1,0x118(%rsp)
0x00007fd49e78a8d0 <+4752>: movl $0x0,0x20(%rsp)
0x00007fd49e78a8d8 <+4760>: jmpq 0x7fd49e789875 <lowbd_inv_txfm2d_add_no_identity_avx2+565>
0x00007fd49e78a8dd <+4765>: movl $0x1,0x118(%rsp)
0x00007fd49e78a8e8 <+4776>: movl $0x1,0x20(%rsp)
0x00007fd49e78a8f0 <+4784>: jmpq 0x7fd49e789875 <lowbd_inv_txfm2d_add_no_identity_avx2+565>
0x00007fd49e78a8f5 <+4789>: vzeroupper
0x00007fd49e78a8f8 <+4792>: callq 0x7fd49e2641e0 <__stack_chk_fail@plt>
0x00007fd49e78a8fd <+4797>: movl $0x0,0x11c(%rsp)
0x00007fd49e78a908 <+4808>: jmpq 0x7fd49e7897d0 <lowbd_inv_txfm2d_add_no_identity_avx2+400>
End of assembler dump.
quit
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment